Handle aquery build statements in a goroutine

Creation of build statements is largely parallelizable because each
action is independent apart from updates/reads to
depsetHashToArtifactPathsCache. Locally resulted in build statements
taking ~.45 seconds on staging mode to ~.02 seconds

Test: CI
Change-Id: Iab00c8394a9eab17353f71230885ff0870e17f24
This commit is contained in:
Liz Kammer 2023-02-10 17:17:28 -05:00
parent 00629db646
commit a4655a96c0
3 changed files with 88 additions and 49 deletions

View file

@ -188,7 +188,7 @@ type BazelContext interface {
OutputBase() string
// Returns build statements which should get registered to reflect Bazel's outputs.
BuildStatementsToRegister() []bazel.BuildStatement
BuildStatementsToRegister() []*bazel.BuildStatement
// Returns the depsets defined in Bazel's aquery response.
AqueryDepsets() []bazel.AqueryDepset
@ -222,7 +222,7 @@ type mixedBuildBazelContext struct {
results map[cqueryKey]string // Results of cquery requests after Bazel invocations
// Build statements which should get registered to reflect Bazel's outputs.
buildStatements []bazel.BuildStatement
buildStatements []*bazel.BuildStatement
// Depsets which should be used for Bazel's build statements.
depsets []bazel.AqueryDepset
@ -314,8 +314,8 @@ func (m MockBazelContext) IsModuleNameAllowed(_ string) bool {
func (m MockBazelContext) OutputBase() string { return m.OutputBaseDir }
func (m MockBazelContext) BuildStatementsToRegister() []bazel.BuildStatement {
return []bazel.BuildStatement{}
func (m MockBazelContext) BuildStatementsToRegister() []*bazel.BuildStatement {
return []*bazel.BuildStatement{}
}
func (m MockBazelContext) AqueryDepsets() []bazel.AqueryDepset {
@ -434,8 +434,8 @@ func (n noopBazelContext) IsModuleNameAllowed(_ string) bool {
return false
}
func (m noopBazelContext) BuildStatementsToRegister() []bazel.BuildStatement {
return []bazel.BuildStatement{}
func (m noopBazelContext) BuildStatementsToRegister() []*bazel.BuildStatement {
return []*bazel.BuildStatement{}
}
func (m noopBazelContext) AqueryDepsets() []bazel.AqueryDepset {
@ -1128,7 +1128,7 @@ func (context *mixedBuildBazelContext) generateBazelSymlinks(config Config, ctx
return err
}
func (context *mixedBuildBazelContext) BuildStatementsToRegister() []bazel.BuildStatement {
func (context *mixedBuildBazelContext) BuildStatementsToRegister() []*bazel.BuildStatement {
return context.buildStatements
}
@ -1196,6 +1196,11 @@ func (c *bazelSingleton) GenerateBuildActions(ctx SingletonContext) {
executionRoot := path.Join(ctx.Config().BazelContext.OutputBase(), "execroot", "__main__")
bazelOutDir := path.Join(executionRoot, "bazel-out")
for index, buildStatement := range ctx.Config().BazelContext.BuildStatementsToRegister() {
// nil build statements are a valid case where we do not create an action because it is
// unnecessary or handled by other processing
if buildStatement == nil {
continue
}
if len(buildStatement.Command) > 0 {
rule := NewRuleBuilder(pctx, ctx)
createCommand(rule.Command(), buildStatement, executionRoot, bazelOutDir, ctx)
@ -1240,7 +1245,7 @@ func (c *bazelSingleton) GenerateBuildActions(ctx SingletonContext) {
}
// Register bazel-owned build statements (obtained from the aquery invocation).
func createCommand(cmd *RuleBuilderCommand, buildStatement bazel.BuildStatement, executionRoot string, bazelOutDir string, ctx BuilderContext) {
func createCommand(cmd *RuleBuilderCommand, buildStatement *bazel.BuildStatement, executionRoot string, bazelOutDir string, ctx BuilderContext) {
// executionRoot is the action cwd.
cmd.Text(fmt.Sprintf("cd '%s' &&", executionRoot))

View file

@ -22,6 +22,7 @@ import (
"reflect"
"sort"
"strings"
"sync"
analysis_v2_proto "prebuilts/bazel/common/proto/analysis_v2"
@ -130,7 +131,7 @@ type aqueryArtifactHandler struct {
// depsetIdToArtifactIdsCache is a memoization of depset flattening, because flattening
// may be an expensive operation.
depsetHashToArtifactPathsCache map[string][]string
depsetHashToArtifactPathsCache sync.Map
// Maps artifact ids to fully expanded paths.
artifactIdToPath map[artifactId]string
}
@ -192,7 +193,7 @@ func newAqueryHandler(aqueryResult *analysis_v2_proto.ActionGraphContainer) (*aq
aqueryHandler := aqueryArtifactHandler{
depsetIdToAqueryDepset: map[depsetId]AqueryDepset{},
depsetHashToAqueryDepset: map[string]AqueryDepset{},
depsetHashToArtifactPathsCache: map[string][]string{},
depsetHashToArtifactPathsCache: sync.Map{},
emptyDepsetIds: make(map[depsetId]struct{}, 0),
artifactIdToPath: artifactIdToPath,
}
@ -297,8 +298,8 @@ func (a *aqueryArtifactHandler) getInputPaths(depsetIds []uint32) ([]string, err
}
func (a *aqueryArtifactHandler) artifactPathsFromDepsetHash(depsetHash string) ([]string, error) {
if result, exists := a.depsetHashToArtifactPathsCache[depsetHash]; exists {
return result, nil
if result, exists := a.depsetHashToArtifactPathsCache.Load(depsetHash); exists {
return result.([]string), nil
}
if depset, exists := a.depsetHashToAqueryDepset[depsetHash]; exists {
result := depset.DirectArtifacts
@ -309,7 +310,7 @@ func (a *aqueryArtifactHandler) artifactPathsFromDepsetHash(depsetHash string) (
}
result = append(result, childArtifactIds...)
}
a.depsetHashToArtifactPathsCache[depsetHash] = result
a.depsetHashToArtifactPathsCache.Store(depsetHash, result)
return result, nil
} else {
return nil, fmt.Errorf("undefined input depset hash %s", depsetHash)
@ -321,7 +322,7 @@ func (a *aqueryArtifactHandler) artifactPathsFromDepsetHash(depsetHash string) (
// action graph, as described by the given action graph json proto.
// BuildStatements are one-to-one with actions in the given action graph, and AqueryDepsets
// are one-to-one with Bazel's depSetOfFiles objects.
func AqueryBuildStatements(aqueryJsonProto []byte, eventHandler *metrics.EventHandler) ([]BuildStatement, []AqueryDepset, error) {
func AqueryBuildStatements(aqueryJsonProto []byte, eventHandler *metrics.EventHandler) ([]*BuildStatement, []AqueryDepset, error) {
aqueryProto := &analysis_v2_proto.ActionGraphContainer{}
err := proto.Unmarshal(aqueryJsonProto, aqueryProto)
if err != nil {
@ -338,21 +339,35 @@ func AqueryBuildStatements(aqueryJsonProto []byte, eventHandler *metrics.EventHa
}
}
buildStatements := make([]BuildStatement, 0, len(aqueryProto.Actions))
// allocate both length and capacity so each goroutine can write to an index independently without
// any need for synchronization for slice access.
buildStatements := make([]*BuildStatement, len(aqueryProto.Actions))
{
eventHandler.Begin("build_statements")
defer eventHandler.End("build_statements")
var buildStatement *BuildStatement
for _, actionEntry := range aqueryProto.Actions {
buildStatement, err = aqueryHandler.actionToBuildStatement(actionEntry)
if err != nil {
return nil, nil, err
}
if buildStatement == nil {
continue
}
buildStatements = append(buildStatements, *buildStatement)
wg := sync.WaitGroup{}
var errOnce sync.Once
for i, actionEntry := range aqueryProto.Actions {
wg.Add(1)
go func(i int, actionEntry *analysis_v2_proto.Action) {
buildStatement, aErr := aqueryHandler.actionToBuildStatement(actionEntry)
if aErr != nil {
errOnce.Do(func() {
err = aErr
})
} else {
// set build statement at an index rather than appending such that each goroutine does not
// impact other goroutines
buildStatements[i] = buildStatement
}
wg.Done()
}(i, actionEntry)
}
wg.Wait()
}
if err != nil {
return nil, nil, err
}
depsetsByHash := map[string]AqueryDepset{}
@ -379,7 +394,13 @@ func AqueryBuildStatements(aqueryJsonProto []byte, eventHandler *metrics.EventHa
// output). Note they are not sorted by their original IDs nor their Bazel ordering,
// as Bazel gives nondeterministic ordering / identifiers in aquery responses.
sort.Slice(buildStatements, func(i, j int) bool {
// For build statements, compare output lists. In Bazel, each output file
// Sort all nil statements to the end of the slice
if buildStatements[i] == nil {
return false
} else if buildStatements[j] == nil {
return true
}
//For build statements, compare output lists. In Bazel, each output file
// may only have one action which generates it, so this will provide
// a deterministic ordering.
outputs_i := buildStatements[i].OutputPaths

View file

@ -139,10 +139,10 @@ func TestAqueryMultiArchGenrule(t *testing.T) {
return
}
actualbuildStatements, actualDepsets, _ := AqueryBuildStatements(data, &metrics.EventHandler{})
var expectedBuildStatements []BuildStatement
var expectedBuildStatements []*BuildStatement
for _, arch := range []string{"arm", "arm64", "x86", "x86_64"} {
expectedBuildStatements = append(expectedBuildStatements,
BuildStatement{
&BuildStatement{
Command: fmt.Sprintf(
"/bin/bash -c 'source ../bazel_tools/tools/genrule/genrule-setup.sh; ../sourceroot/bionic/libc/tools/gensyscalls.py %s ../sourceroot/bionic/libc/SYSCALLS.TXT > bazel-out/sourceroot/k8-fastbuild/bin/bionic/libc/syscalls-%s.S'",
arch, arch),
@ -487,8 +487,8 @@ func TestTransitiveInputDepsets(t *testing.T) {
}
actualbuildStatements, actualDepsets, _ := AqueryBuildStatements(data, &metrics.EventHandler{})
expectedBuildStatements := []BuildStatement{
{
expectedBuildStatements := []*BuildStatement{
&BuildStatement{
Command: "/bin/bash -c 'touch bazel-out/sourceroot/k8-fastbuild/bin/testpkg/test_out'",
OutputPaths: []string{"bazel-out/sourceroot/k8-fastbuild/bin/testpkg/test_out"},
Mnemonic: "Action",
@ -545,8 +545,8 @@ func TestSymlinkTree(t *testing.T) {
if err != nil {
t.Errorf("Unexpected error %q", err)
}
assertBuildStatements(t, []BuildStatement{
{
assertBuildStatements(t, []*BuildStatement{
&BuildStatement{
Command: "",
OutputPaths: []string{"foo.runfiles/MANIFEST"},
Mnemonic: "SymlinkTree",
@ -615,7 +615,7 @@ func TestBazelOutRemovalFromInputDepsets(t *testing.T) {
t.Errorf("dependency ../dep2 expected but not found")
}
expectedBuildStatement := BuildStatement{
expectedBuildStatement := &BuildStatement{
Command: "bogus command",
OutputPaths: []string{"output"},
Mnemonic: "x",
@ -692,7 +692,7 @@ func TestBazelOutRemovalFromTransitiveInputDepsets(t *testing.T) {
return
}
expectedBuildStatement := BuildStatement{
expectedBuildStatement := &BuildStatement{
Command: "bogus command",
OutputPaths: []string{"output"},
Mnemonic: "x",
@ -757,8 +757,8 @@ func TestMiddlemenAction(t *testing.T) {
if err != nil {
t.Errorf("Unexpected error %q", err)
}
if expected := 1; len(actualBuildStatements) != expected {
t.Fatalf("Expected %d build statements, got %d", expected, len(actualBuildStatements))
if expected := 2; len(actualBuildStatements) != expected {
t.Fatalf("Expected %d build statements, got %d %#v", expected, len(actualBuildStatements), actualBuildStatements)
}
expectedDepsetFiles := [][]string{
@ -783,6 +783,11 @@ func TestMiddlemenAction(t *testing.T) {
if !reflect.DeepEqual(actualFlattenedInputs, expectedFlattenedInputs) {
t.Errorf("Expected flattened inputs %v, but got %v", expectedFlattenedInputs, actualFlattenedInputs)
}
bs = actualBuildStatements[1]
if bs != nil {
t.Errorf("Expected nil action for skipped")
}
}
// Returns the contents of given depsets in concatenated post order.
@ -856,8 +861,8 @@ func TestSimpleSymlink(t *testing.T) {
t.Errorf("Unexpected error %q", err)
}
expectedBuildStatements := []BuildStatement{
{
expectedBuildStatements := []*BuildStatement{
&BuildStatement{
Command: "mkdir -p one/symlink_subdir && " +
"rm -f one/symlink_subdir/symlink && " +
"ln -sf $PWD/one/file_subdir/file one/symlink_subdir/symlink",
@ -904,8 +909,8 @@ func TestSymlinkQuotesPaths(t *testing.T) {
t.Errorf("Unexpected error %q", err)
}
expectedBuildStatements := []BuildStatement{
{
expectedBuildStatements := []*BuildStatement{
&BuildStatement{
Command: "mkdir -p 'one/symlink subdir' && " +
"rm -f 'one/symlink subdir/symlink' && " +
"ln -sf $PWD/'one/file subdir/file' 'one/symlink subdir/symlink'",
@ -1014,8 +1019,8 @@ func TestTemplateExpandActionSubstitutions(t *testing.T) {
t.Errorf("Unexpected error %q", err)
}
expectedBuildStatements := []BuildStatement{
{
expectedBuildStatements := []*BuildStatement{
&BuildStatement{
Command: "/bin/bash -c 'echo \"Test template substitutions: abcd, python3\" | sed \"s/\\\\\\\\n/\\\\n/g\" > template_file && " +
"chmod a+x template_file'",
OutputPaths: []string{"template_file"},
@ -1084,8 +1089,8 @@ func TestFileWrite(t *testing.T) {
if err != nil {
t.Errorf("Unexpected error %q", err)
}
assertBuildStatements(t, []BuildStatement{
{
assertBuildStatements(t, []*BuildStatement{
&BuildStatement{
OutputPaths: []string{"foo.manifest"},
Mnemonic: "FileWrite",
FileContents: "file data\n",
@ -1122,8 +1127,8 @@ func TestSourceSymlinkManifest(t *testing.T) {
if err != nil {
t.Errorf("Unexpected error %q", err)
}
assertBuildStatements(t, []BuildStatement{
{
assertBuildStatements(t, []*BuildStatement{
&BuildStatement{
OutputPaths: []string{"foo.manifest"},
Mnemonic: "SourceSymlinkManifest",
SymlinkPaths: []string{},
@ -1142,7 +1147,7 @@ func assertError(t *testing.T, err error, expected string) {
// Asserts that the given actual build statements match the given expected build statements.
// Build statement equivalence is determined using buildStatementEquals.
func assertBuildStatements(t *testing.T, expected []BuildStatement, actual []BuildStatement) {
func assertBuildStatements(t *testing.T, expected []*BuildStatement, actual []*BuildStatement) {
t.Helper()
if len(expected) != len(actual) {
t.Errorf("expected %d build statements, but got %d,\n expected: %#v,\n actual: %#v",
@ -1150,8 +1155,13 @@ func assertBuildStatements(t *testing.T, expected []BuildStatement, actual []Bui
return
}
type compareFn = func(i int, j int) bool
byCommand := func(slice []BuildStatement) compareFn {
byCommand := func(slice []*BuildStatement) compareFn {
return func(i int, j int) bool {
if slice[i] == nil {
return false
} else if slice[j] == nil {
return false
}
return slice[i].Command < slice[j].Command
}
}
@ -1167,7 +1177,10 @@ func assertBuildStatements(t *testing.T, expected []BuildStatement, actual []Bui
}
}
func buildStatementEquals(first BuildStatement, second BuildStatement) string {
func buildStatementEquals(first *BuildStatement, second *BuildStatement) string {
if (first == nil) != (second == nil) {
return "Nil"
}
if first.Mnemonic != second.Mnemonic {
return "Mnemonic"
}