platform_build_blueprint/ninja_strings.go
Colin Cross 6126fe8067 Optimize memory usage of ninjaString
ninjaString is an interface, which uses 16 bytes of memory on top
of the size of the concrete type.  A literalNinjaString is a string,
which is another 16 bytes for the string header for a total of 32
bytes.  A varNinjaString is two slices, which are 24 bytes each
for the slice headers, for a total of 64 bytes.  The slices contain
the first constant string, and then altenrating variable and string
parts of the ninjaString, resulting in 16 bytes plus 32 bytes per
variable.

This patch replaces the ninjaString interface with a *ninjaString
concrete struct type.  The ninjaString struct is a string and a
pointer to a slice of variable references, for a total of 24 bytes.

ninjaStrings with no variable references (the equivalent of the old
literalNinjaString) have a nil slice, and now use 24 bytes instead
of 32 bytes.

ninjaStrings with variable references allocate a slice of variable
references that contain 32-bit start and end offsets and a Variable
interface, but reuse the original string and so avoid the extra
string headers, resulting in 24 bytes for the slice header, and
24 bytes per variable.

These savings reduce the peak memory usage averaged across 10 runs of
/bin/time -v build/soong/soong_ui.bash --make-mode nothing
on the internal master branch cf_x86_64_phone-userdebug build
from 50114842kB to 45577638kB, a savings of 4537204kB or 9%.

The new Benchmark_parseNinjaString shows savings in both time and
memory.  Before:
Benchmark_parseNinjaString/constant/1-128       	594251787	         2.006 ns/op	       0 B/op	       0 allocs/op
Benchmark_parseNinjaString/constant/10-128      	21191347	        65.57 ns/op	      16 B/op	       1 allocs/op
Benchmark_parseNinjaString/constant/100-128     	 9983748	       130.2 ns/op	     112 B/op	       1 allocs/op
Benchmark_parseNinjaString/constant/1000-128    	 2632527	       445.1 ns/op	    1024 B/op	       1 allocs/op
Benchmark_parseNinjaString/variable/1-128       	 2964896	       419.4 ns/op	     176 B/op	       4 allocs/op
Benchmark_parseNinjaString/variable/10-128      	 1807341	       670.6 ns/op	     192 B/op	       7 allocs/op
Benchmark_parseNinjaString/variable/100-128     	 1000000	      1092 ns/op	     352 B/op	       7 allocs/op
Benchmark_parseNinjaString/variable/1000-128    	  300649	      3773 ns/op	    1584 B/op	       7 allocs/op
Benchmark_parseNinjaString/variables/1-128      	 2858432	       441.6 ns/op	     176 B/op	       4 allocs/op
Benchmark_parseNinjaString/variables/2-128      	 2360505	       513.4 ns/op	     208 B/op	       4 allocs/op
Benchmark_parseNinjaString/variables/3-128      	 1867136	       635.6 ns/op	     240 B/op	       4 allocs/op
Benchmark_parseNinjaString/variables/4-128      	 1584045	       752.1 ns/op	     272 B/op	       4 allocs/op
Benchmark_parseNinjaString/variables/5-128      	 1338189	       885.8 ns/op	     304 B/op	       4 allocs/op
Benchmark_parseNinjaString/variables/10-128     	 1000000	      1468 ns/op	     464 B/op	       4 allocs/op
Benchmark_parseNinjaString/variables/100-128    	   88768	     12895 ns/op	    3712 B/op	       4 allocs/op
Benchmark_parseNinjaString/variables/1000-128   	    8972	    133627 ns/op	   32896 B/op	       4 allocs/op

After:
Benchmark_parseNinjaString/constant/1-128       	584600864	         2.004 ns/op	       0 B/op	       0 allocs/op
Benchmark_parseNinjaString/constant/10-128      	19274581	        64.84 ns/op	      16 B/op	       1 allocs/op
Benchmark_parseNinjaString/constant/100-128     	 9017640	       127.6 ns/op	     112 B/op	       1 allocs/op
Benchmark_parseNinjaString/constant/1000-128    	 2630797	       453.0 ns/op	    1024 B/op	       1 allocs/op
Benchmark_parseNinjaString/variable/1-128       	 3460422	       347.0 ns/op	     136 B/op	       4 allocs/op
Benchmark_parseNinjaString/variable/10-128      	 2103404	       519.9 ns/op	     152 B/op	       7 allocs/op
Benchmark_parseNinjaString/variable/100-128     	 1315778	       906.5 ns/op	     312 B/op	       7 allocs/op
Benchmark_parseNinjaString/variable/1000-128    	  354812	      3284 ns/op	    1544 B/op	       7 allocs/op
Benchmark_parseNinjaString/variables/1-128      	 3386868	       361.5 ns/op	     136 B/op	       4 allocs/op
Benchmark_parseNinjaString/variables/2-128      	 2675594	       456.9 ns/op	     160 B/op	       4 allocs/op
Benchmark_parseNinjaString/variables/3-128      	 2344670	       520.0 ns/op	     192 B/op	       4 allocs/op
Benchmark_parseNinjaString/variables/4-128      	 1919482	       648.1 ns/op	     208 B/op	       4 allocs/op
Benchmark_parseNinjaString/variables/5-128      	 1560556	       723.9 ns/op	     240 B/op	       4 allocs/op
Benchmark_parseNinjaString/variables/10-128     	 1000000	      1169 ns/op	     352 B/op	       4 allocs/op
Benchmark_parseNinjaString/variables/100-128    	  116738	     10168 ns/op	    2800 B/op	       4 allocs/op
Benchmark_parseNinjaString/variables/1000-128   	   10000	    105646 ns/op	   24688 B/op	       4 allocs/op

Bug: 286423944
Test: ninja_strings_test.go
Test: out/soong/build*.ninja is the same before and after this change
Change-Id: I1ecffbaccb0d0469a41fa31255c1b17311e01687
2023-06-15 21:53:56 -07:00

415 lines
11 KiB
Go

// Copyright 2014 Google Inc. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package blueprint
import (
"bytes"
"fmt"
"io"
"strings"
)
const eof = -1
var (
defaultEscaper = strings.NewReplacer(
"\n", "$\n")
inputEscaper = strings.NewReplacer(
"\n", "$\n",
" ", "$ ")
outputEscaper = strings.NewReplacer(
"\n", "$\n",
" ", "$ ",
":", "$:")
)
// ninjaString contains the parsed result of a string that can contain references to variables (e.g. $cflags) that will
// be propagated to the build.ninja file. For literal strings with no variable references, the variables field will be
// nil. For strings with variable references str contains the original, unparsed string, and variables contains a
// pointer to a list of references, each with a span of bytes they should replace and a Variable interface.
type ninjaString struct {
str string
variables *[]variableReference
}
// variableReference contains information about a single reference to a variable (e.g. $cflags) inside a parsed
// ninjaString. start and end are int32 to reduce memory usage. A nil variable is a special case of an inserted '$'
// at the beginning of the string to handle leading whitespace that must not be stripped by ninja.
type variableReference struct {
// start is the offset of the '$' character from the beginning of the unparsed string.
start int32
// end is the offset of the character _after_ the final character of the variable name (or '}' if using the
//'${}' syntax)
end int32
variable Variable
}
type scope interface {
LookupVariable(name string) (Variable, error)
IsRuleVisible(rule Rule) bool
IsPoolVisible(pool Pool) bool
}
func simpleNinjaString(str string) *ninjaString {
return &ninjaString{str: str}
}
type parseState struct {
scope scope
str string
varStart int
varNameStart int
result *ninjaString
}
func (ps *parseState) pushVariable(start, end int, v Variable) {
if ps.result.variables == nil {
ps.result.variables = &[]variableReference{{start: int32(start), end: int32(end), variable: v}}
} else {
*ps.result.variables = append(*ps.result.variables, variableReference{start: int32(start), end: int32(end), variable: v})
}
}
type stateFunc func(*parseState, int, rune) (stateFunc, error)
// parseNinjaString parses an unescaped ninja string (i.e. all $<something>
// occurrences are expected to be variables or $$) and returns a list of the
// variable names that the string references.
func parseNinjaString(scope scope, str string) (*ninjaString, error) {
// naively pre-allocate slice by counting $ signs
n := strings.Count(str, "$")
if n == 0 {
if len(str) > 0 && str[0] == ' ' {
str = "$" + str
}
return simpleNinjaString(str), nil
}
variableReferences := make([]variableReference, 0, n)
result := &ninjaString{
str: str,
variables: &variableReferences,
}
parseState := &parseState{
scope: scope,
str: str,
result: result,
}
state := parseFirstRuneState
var err error
for i := 0; i < len(str); i++ {
r := rune(str[i])
state, err = state(parseState, i, r)
if err != nil {
return nil, fmt.Errorf("error parsing ninja string %q: %s", str, err)
}
}
_, err = state(parseState, len(parseState.str), eof)
if err != nil {
return nil, err
}
// All the '$' characters counted initially could have been "$$" escapes, leaving no
// variable references. Deallocate the variables slice if so.
if len(*result.variables) == 0 {
result.variables = nil
}
return result, nil
}
func parseFirstRuneState(state *parseState, i int, r rune) (stateFunc, error) {
if r == ' ' {
state.pushVariable(0, 1, nil)
}
return parseStringState(state, i, r)
}
func parseStringState(state *parseState, i int, r rune) (stateFunc, error) {
switch {
case r == '$':
state.varStart = i
return parseDollarStartState, nil
case r == eof:
return nil, nil
default:
return parseStringState, nil
}
}
func parseDollarStartState(state *parseState, i int, r rune) (stateFunc, error) {
switch {
case r >= 'a' && r <= 'z', r >= 'A' && r <= 'Z',
r >= '0' && r <= '9', r == '_', r == '-':
// The beginning of a of the variable name.
state.varNameStart = i
return parseDollarState, nil
case r == '$':
// Just a "$$". Go back to parseStringState.
return parseStringState, nil
case r == '{':
// This is a bracketted variable name (e.g. "${blah.blah}").
state.varNameStart = i + 1
return parseBracketsState, nil
case r == eof:
return nil, fmt.Errorf("unexpected end of string after '$'")
default:
// This was some arbitrary character following a dollar sign,
// which is not allowed.
return nil, fmt.Errorf("invalid character after '$' at byte "+
"offset %d", i)
}
}
func parseDollarState(state *parseState, i int, r rune) (stateFunc, error) {
switch {
case r >= 'a' && r <= 'z', r >= 'A' && r <= 'Z',
r >= '0' && r <= '9', r == '_', r == '-':
// A part of the variable name. Keep going.
return parseDollarState, nil
}
// The variable name has ended, output what we have.
v, err := state.scope.LookupVariable(state.str[state.varNameStart:i])
if err != nil {
return nil, err
}
state.pushVariable(state.varStart, i, v)
switch {
case r == '$':
// A dollar after the variable name (e.g. "$blah$"). Start a new one.
state.varStart = i
return parseDollarStartState, nil
case r == eof:
return nil, nil
default:
return parseStringState, nil
}
}
func parseBracketsState(state *parseState, i int, r rune) (stateFunc, error) {
switch {
case r >= 'a' && r <= 'z', r >= 'A' && r <= 'Z',
r >= '0' && r <= '9', r == '_', r == '-', r == '.':
// A part of the variable name. Keep going.
return parseBracketsState, nil
case r == '}':
if state.varNameStart == i {
// The brackets were immediately closed. That's no good.
return nil, fmt.Errorf("empty variable name at byte offset %d",
i)
}
// This is the end of the variable name.
v, err := state.scope.LookupVariable(state.str[state.varNameStart:i])
if err != nil {
return nil, err
}
state.pushVariable(state.varStart, i+1, v)
return parseStringState, nil
case r == eof:
return nil, fmt.Errorf("unexpected end of string in variable name")
default:
// This character isn't allowed in a variable name.
return nil, fmt.Errorf("invalid character in variable name at "+
"byte offset %d", i)
}
}
func parseNinjaStrings(scope scope, strs []string) ([]*ninjaString,
error) {
if len(strs) == 0 {
return nil, nil
}
result := make([]*ninjaString, len(strs))
for i, str := range strs {
ninjaStr, err := parseNinjaString(scope, str)
if err != nil {
return nil, fmt.Errorf("error parsing element %d: %s", i, err)
}
result[i] = ninjaStr
}
return result, nil
}
func (n *ninjaString) Value(pkgNames map[*packageContext]string) string {
if n.variables == nil || len(*n.variables) == 0 {
return defaultEscaper.Replace(n.str)
}
str := &strings.Builder{}
n.ValueWithEscaper(str, pkgNames, defaultEscaper)
return str.String()
}
func (n *ninjaString) ValueWithEscaper(w io.StringWriter, pkgNames map[*packageContext]string,
escaper *strings.Replacer) {
if n.variables == nil || len(*n.variables) == 0 {
w.WriteString(escaper.Replace(n.str))
return
}
i := 0
for _, v := range *n.variables {
w.WriteString(escaper.Replace(n.str[i:v.start]))
if v.variable == nil {
w.WriteString("$ ")
} else {
w.WriteString("${")
w.WriteString(v.variable.fullName(pkgNames))
w.WriteString("}")
}
i = int(v.end)
}
w.WriteString(escaper.Replace(n.str[i:len(n.str)]))
}
func (n *ninjaString) Eval(variables map[Variable]*ninjaString) (string, error) {
if n.variables == nil || len(*n.variables) == 0 {
return n.str, nil
}
w := &strings.Builder{}
i := 0
for _, v := range *n.variables {
w.WriteString(n.str[i:v.start])
if v.variable == nil {
w.WriteString(" ")
} else {
variable, ok := variables[v.variable]
if !ok {
return "", fmt.Errorf("no such global variable: %s", v.variable)
}
value, err := variable.Eval(variables)
if err != nil {
return "", err
}
w.WriteString(value)
}
i = int(v.end)
}
w.WriteString(n.str[i:len(n.str)])
return w.String(), nil
}
func (n *ninjaString) Variables() []Variable {
if n.variables == nil || len(*n.variables) == 0 {
return nil
}
variables := make([]Variable, 0, len(*n.variables))
for _, v := range *n.variables {
if v.variable != nil {
variables = append(variables, v.variable)
}
}
return variables
}
func validateNinjaName(name string) error {
for i, r := range name {
valid := (r >= 'a' && r <= 'z') ||
(r >= 'A' && r <= 'Z') ||
(r >= '0' && r <= '9') ||
(r == '_') ||
(r == '-') ||
(r == '.')
if !valid {
return fmt.Errorf("%q contains an invalid Ninja name character "+
"%q at byte offset %d", name, r, i)
}
}
return nil
}
func toNinjaName(name string) string {
ret := bytes.Buffer{}
ret.Grow(len(name))
for _, r := range name {
valid := (r >= 'a' && r <= 'z') ||
(r >= 'A' && r <= 'Z') ||
(r >= '0' && r <= '9') ||
(r == '_') ||
(r == '-') ||
(r == '.')
if valid {
ret.WriteRune(r)
} else {
// TODO(jeffrygaston): do escaping so that toNinjaName won't ever output duplicate
// names for two different input names
ret.WriteRune('_')
}
}
return ret.String()
}
var builtinRuleArgs = []string{"out", "in"}
func validateArgName(argName string) error {
err := validateNinjaName(argName)
if err != nil {
return err
}
// We only allow globals within the rule's package to be used as rule
// arguments. A global in another package can always be mirrored into
// the rule's package by defining a new variable, so this doesn't limit
// what's possible. This limitation prevents situations where a Build
// invocation in another package must use the rule-defining package's
// import name for a 3rd package in order to set the rule's arguments.
if strings.ContainsRune(argName, '.') {
return fmt.Errorf("%q contains a '.' character", argName)
}
for _, builtin := range builtinRuleArgs {
if argName == builtin {
return fmt.Errorf("%q conflicts with Ninja built-in", argName)
}
}
return nil
}
func validateArgNames(argNames []string) error {
for _, argName := range argNames {
err := validateArgName(argName)
if err != nil {
return err
}
}
return nil
}