platform_build/tools/compliance/readgraph.go
Bob Badour 103eb0f9bc Performance and scale.
Defer edge creation.

Don't create edges until the count is known to avoid repeated allocate+
copy operatios.

Limit resolutions.

Allow only a single resolution condition set per target, and overwrite
intermediate results. Reduces memory and obviates allocations.

Propagate fewer conditions.

Instead of propagating notice conditions to parents in graph during
initial resolve, leave them on leaf node, and attach to ancestors in
the final walk. Reduces copies.

Parallelize resolutions.

Use goroutines, mutexes, and waitgroups to resolve branches of the
graph in parallel. Makes better use of available cores.

Don't accumulate resolutions inside non-containers.

During the final resolution walk, only attach actions to ancestors from
the root down until the 1st non-aggregate. Prevents an explosion of
copies in the lower levels of the graph.

Drop origin for scale.

Tracking the origin of every potential origin for every restricted
condition does not scale. By dropping origin, propagating from top
to bottom can prune many redundant paths avoiding an exponential
explosion.

Conditions as bitmask.

Use bit masks for license conditions and condition sets. Reduces maps
and allocations.

Bug: 68860345
Bug: 151177513
Bug: 151953481

Test: m all
Test: m systemlicense
Test: m listshare; out/soong/host/linux-x86/bin/listshare ...
Test: m checkshare; out/soong/host/linux-x86/bin/checkshare ...
Test: m dumpgraph; out/soong/host/linux-x86/dumpgraph ...
Test: m dumpresolutions; out/soong/host/linux-x86/dumpresolutions ...

where ... is the path to the .meta_lic file for the system image. In my
case if

$ export PRODUCT=$(realpath $ANDROID_PRODUCT_OUT --relative-to=$PWD)

... can be expressed as:

${PRODUCT}/gen/META/lic_intermediates/${PRODUCT}/system.img.meta_lic

Change-Id: Ia2ec1b818de6122c239fbd0824754f1d65daffd3
2022-01-11 10:40:50 -08:00

268 lines
7.3 KiB
Go

// Copyright 2021 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package compliance
import (
"fmt"
"io"
"io/fs"
"strings"
"sync"
"android/soong/compliance/license_metadata_proto"
"google.golang.org/protobuf/encoding/prototext"
)
var (
// ConcurrentReaders is the size of the task pool for limiting resource usage e.g. open files.
ConcurrentReaders = 5
)
// result describes the outcome of reading and parsing a single license metadata file.
type result struct {
// file identifies the path to the license metadata file
file string
// target contains the parsed metadata or nil if an error
target *TargetNode
// err is nil unless an error occurs
err error
}
// receiver coordinates the tasks for reading and parsing license metadata files.
type receiver struct {
// lg accumulates the read metadata and becomes the final resulting LicenseGraph.
lg *LicenseGraph
// rootFS locates the root of the file system from which to read the files.
rootFS fs.FS
// stderr identifies the error output writer.
stderr io.Writer
// task provides a fixed-size task pool to limit concurrent open files etc.
task chan bool
// results returns one license metadata file result at a time.
results chan *result
// wg detects when done
wg sync.WaitGroup
}
// ReadLicenseGraph reads and parses `files` and their dependencies into a LicenseGraph.
//
// `files` become the root files of the graph for top-down walks of the graph.
func ReadLicenseGraph(rootFS fs.FS, stderr io.Writer, files []string) (*LicenseGraph, error) {
if len(files) == 0 {
return nil, fmt.Errorf("no license metadata to analyze")
}
if ConcurrentReaders < 1 {
return nil, fmt.Errorf("need at least one task in pool")
}
lg := newLicenseGraph()
for _, f := range files {
if strings.HasSuffix(f, "meta_lic") {
lg.rootFiles = append(lg.rootFiles, f)
} else {
lg.rootFiles = append(lg.rootFiles, f+".meta_lic")
}
}
recv := &receiver{
lg: lg,
rootFS: rootFS,
stderr: stderr,
task: make(chan bool, ConcurrentReaders),
results: make(chan *result, ConcurrentReaders),
wg: sync.WaitGroup{},
}
for i := 0; i < ConcurrentReaders; i++ {
recv.task <- true
}
readFiles := func() {
lg.mu.Lock()
// identify the metadata files to schedule reading tasks for
for _, f := range lg.rootFiles {
lg.targets[f] = nil
}
lg.mu.Unlock()
// schedule tasks to read the files
for _, f := range lg.rootFiles {
readFile(recv, f)
}
// schedule a task to wait until finished and close the channel.
go func() {
recv.wg.Wait()
close(recv.task)
close(recv.results)
}()
}
go readFiles()
// tasks to read license metadata files are scheduled; read and process results from channel
var err error
for recv.results != nil {
select {
case r, ok := <-recv.results:
if ok {
// handle errors by nil'ing ls, setting err, and clobbering results channel
if r.err != nil {
err = r.err
fmt.Fprintf(recv.stderr, "%s\n", err.Error())
lg = nil
recv.results = nil
continue
}
// record the parsed metadata (guarded by mutex)
recv.lg.mu.Lock()
lg.targets[r.target.name] = r.target
recv.lg.mu.Unlock()
} else {
// finished -- nil the results channel
recv.results = nil
}
}
}
if lg != nil {
esize := 0
for _, tn := range lg.targets {
esize += len(tn.proto.Deps)
}
lg.edges = make(TargetEdgeList, 0, esize)
for _, tn := range lg.targets {
tn.licenseConditions = LicenseConditionSetFromNames(tn, tn.proto.LicenseConditions...)
err = addDependencies(lg, tn)
if err != nil {
return nil, fmt.Errorf("error indexing dependencies for %q: %w", tn.name, err)
}
tn.proto.Deps = []*license_metadata_proto.AnnotatedDependency{}
}
}
return lg, err
}
// targetNode contains the license metadata for a node in the license graph.
type targetNode struct {
proto license_metadata_proto.LicenseMetadata
// name is the path to the metadata file.
name string
// lg is the license graph the node belongs to.
lg *LicenseGraph
// edges identifies the dependencies of the target.
edges TargetEdgeList
// licenseConditions identifies the set of license conditions originating at the target node.
licenseConditions LicenseConditionSet
// resolution identifies the set of conditions resolved by acting on the target node.
resolution LicenseConditionSet
}
// addDependencies converts the proto AnnotatedDependencies into `edges`
func addDependencies(lg *LicenseGraph, tn *TargetNode) error {
tn.edges = make(TargetEdgeList, 0,len(tn.proto.Deps))
for _, ad := range tn.proto.Deps {
dependency := ad.GetFile()
if len(dependency) == 0 {
return fmt.Errorf("missing dependency name")
}
dtn, ok := lg.targets[dependency]
if !ok {
return fmt.Errorf("unknown dependency name %q", dependency)
}
if dtn == nil {
return fmt.Errorf("nil dependency for name %q", dependency)
}
annotations := newEdgeAnnotations()
for _, a := range ad.Annotations {
// look up a common constant annotation string from a small map
// instead of creating 1000's of copies of the same 3 strings.
if ann, ok := RecognizedAnnotations[a]; ok {
annotations.annotations[ann] = struct{}{}
}
}
edge := &TargetEdge{tn, dtn, annotations}
lg.edges = append(lg.edges, edge)
tn.edges = append(tn.edges, edge)
}
return nil
}
// readFile is a task to read and parse a single license metadata file, and to schedule
// additional tasks for reading and parsing dependencies as necessary.
func readFile(recv *receiver, file string) {
recv.wg.Add(1)
<-recv.task
go func() {
f, err := recv.rootFS.Open(file)
if err != nil {
recv.results <- &result{file, nil, fmt.Errorf("error opening license metadata %q: %w", file, err)}
return
}
// read the file
data, err := io.ReadAll(f)
if err != nil {
recv.results <- &result{file, nil, fmt.Errorf("error reading license metadata %q: %w", file, err)}
return
}
f.Close()
tn := &TargetNode{lg: recv.lg, name: file}
err = prototext.Unmarshal(data, &tn.proto)
if err != nil {
recv.results <- &result{file, nil, fmt.Errorf("error license metadata %q: %w", file, err)}
return
}
// send result for this file and release task before scheduling dependencies,
// but do not signal done to WaitGroup until dependencies are scheduled.
recv.results <- &result{file, tn, nil}
recv.task <- true
// schedule tasks as necessary to read dependencies
for _, ad := range tn.proto.Deps {
dependency := ad.GetFile()
// decide, signal and record whether to schedule task in critical section
recv.lg.mu.Lock()
_, alreadyScheduled := recv.lg.targets[dependency]
if !alreadyScheduled {
recv.lg.targets[dependency] = nil
}
recv.lg.mu.Unlock()
// schedule task to read dependency file outside critical section
if !alreadyScheduled {
readFile(recv, dependency)
}
}
// signal task done after scheduling dependencies
recv.wg.Done()
}()
}