platform_build_blueprint/blueprint/ninja_strings.go
Colin Cross e4cfdf93a9 Optimize parseNinjaString
The closures in parseNinjaString seem to confuse go's escape
analysis, causing it to allocate a new stateFunc object on the
heap for every character in the string.  Convert the state
functions to normal functions, and pass a pointer to a state
object to provide access to the variables that were in the
closure.

The range iteration on the input string was iterating over
UTF-8 runes, when all of the rune comparisons for state
changes only look at ASCII bytes.  Switch to iterating
over bytes, stringState will pass through multi-byte
characters in strings untouched.

Together this reduces the time spent in parseNinjaString by
>50%, and the overall execution time by >20%.

Change-Id: I4c926b52b883d51e2f12a1673d03fcc2cfe83445
2015-01-23 13:41:49 -08:00

313 lines
7.7 KiB
Go

package blueprint
import (
"fmt"
"strings"
)
const eof = -1
var (
defaultEscaper = strings.NewReplacer(
"\n", "$\n")
inputEscaper = strings.NewReplacer(
"\n", "$\n",
" ", "$ ")
outputEscaper = strings.NewReplacer(
"\n", "$\n",
" ", "$ ",
":", "$:")
)
type ninjaString struct {
strings []string
variables []Variable
}
type scope interface {
LookupVariable(name string) (Variable, error)
IsRuleVisible(rule Rule) bool
IsPoolVisible(pool Pool) bool
}
func simpleNinjaString(str string) *ninjaString {
return &ninjaString{
strings: []string{str},
}
}
type parseState struct {
scope scope
str string
stringStart int
varStart int
result *ninjaString
}
type stateFunc func(*parseState, int, rune) (stateFunc, error)
// parseNinjaString parses an unescaped ninja string (i.e. all $<something>
// occurrences are expected to be variables or $$) and returns a list of the
// variable names that the string references.
func parseNinjaString(scope scope, str string) (*ninjaString, error) {
result := &ninjaString{}
parseState := &parseState{
scope: scope,
str: str,
result: result,
}
state := parseStringState
var err error
for i := 0; i < len(str); i++ {
r := rune(str[i])
state, err = state(parseState, i, r)
if err != nil {
return nil, err
}
}
_, err = state(parseState, len(parseState.str), eof)
if err != nil {
return nil, err
}
return result, nil
}
func parseStringState(state *parseState, i int, r rune) (stateFunc, error) {
switch {
case r == '$':
state.varStart = i + 1
return parseDollarStartState, nil
case r == eof:
state.result.strings = append(state.result.strings, state.str[state.stringStart:i])
return nil, nil
default:
return parseStringState, nil
}
}
func parseDollarStartState(state *parseState, i int, r rune) (stateFunc, error) {
switch {
case r >= 'a' && r <= 'z', r >= 'A' && r <= 'Z',
r >= '0' && r <= '9', r == '_', r == '-':
// The beginning of a of the variable name. Output the string and
// keep going.
state.result.strings = append(state.result.strings, state.str[state.stringStart:i-1])
return parseDollarState, nil
case r == '$':
// Just a "$$". Go back to parseStringState without changing
// state.stringStart.
return parseStringState, nil
case r == '{':
// This is a bracketted variable name (e.g. "${blah.blah}"). Output
// the string and keep going.
state.result.strings = append(state.result.strings, state.str[state.stringStart:i-1])
state.varStart = i + 1
return parseBracketsState, nil
case r == eof:
return nil, fmt.Errorf("unexpected end of string after '$'")
default:
// This was some arbitrary character following a dollar sign,
// which is not allowed.
return nil, fmt.Errorf("invalid character after '$' at byte "+
"offset %d", i)
}
}
func parseDollarState(state *parseState, i int, r rune) (stateFunc, error) {
switch {
case r >= 'a' && r <= 'z', r >= 'A' && r <= 'Z',
r >= '0' && r <= '9', r == '_', r == '-':
// A part of the variable name. Keep going.
return parseDollarState, nil
case r == '$':
// A dollar after the variable name (e.g. "$blah$"). Output the
// variable we have and start a new one.
v, err := state.scope.LookupVariable(state.str[state.varStart:i])
if err != nil {
return nil, err
}
state.result.variables = append(state.result.variables, v)
state.varStart = i + 1
// We always have a string in between variables, even if it's an
// empty one.
state.result.strings = append(state.result.strings, "")
return parseDollarState, nil
case r == eof:
// This is the end of the variable name.
v, err := state.scope.LookupVariable(state.str[state.varStart:i])
if err != nil {
return nil, err
}
state.result.variables = append(state.result.variables, v)
// We always end with a string, even if it's an empty one.
state.result.strings = append(state.result.strings, "")
return nil, nil
default:
// We've just gone past the end of the variable name, so record what
// we have.
v, err := state.scope.LookupVariable(state.str[state.varStart:i])
if err != nil {
return nil, err
}
state.result.variables = append(state.result.variables, v)
state.stringStart = i
return parseStringState, nil
}
}
func parseBracketsState(state *parseState, i int, r rune) (stateFunc, error) {
switch {
case r >= 'a' && r <= 'z', r >= 'A' && r <= 'Z',
r >= '0' && r <= '9', r == '_', r == '-', r == '.':
// A part of the variable name. Keep going.
return parseBracketsState, nil
case r == '}':
if state.varStart == i {
// The brackets were immediately closed. That's no good.
return nil, fmt.Errorf("empty variable name at byte offset %d",
i)
}
// This is the end of the variable name.
v, err := state.scope.LookupVariable(state.str[state.varStart:i])
if err != nil {
return nil, err
}
state.result.variables = append(state.result.variables, v)
state.stringStart = i + 1
return parseStringState, nil
case r == eof:
return nil, fmt.Errorf("unexpected end of string in variable name")
default:
// This character isn't allowed in a variable name.
return nil, fmt.Errorf("invalid character in variable name at "+
"byte offset %d", i)
}
}
func parseNinjaStrings(scope scope, strs []string) ([]*ninjaString,
error) {
if len(strs) == 0 {
return nil, nil
}
result := make([]*ninjaString, len(strs))
for i, str := range strs {
ninjaStr, err := parseNinjaString(scope, str)
if err != nil {
return nil, fmt.Errorf("error parsing element %d: %s", i, err)
}
result[i] = ninjaStr
}
return result, nil
}
func (n *ninjaString) Value(pkgNames map[*PackageContext]string) string {
return n.ValueWithEscaper(pkgNames, defaultEscaper)
}
func (n *ninjaString) ValueWithEscaper(pkgNames map[*PackageContext]string,
escaper *strings.Replacer) string {
str := escaper.Replace(n.strings[0])
for i, v := range n.variables {
str += "${" + v.fullName(pkgNames) + "}"
str += escaper.Replace(n.strings[i+1])
}
return str
}
func (n *ninjaString) Eval(variables map[Variable]*ninjaString) (string, error) {
str := n.strings[0]
for i, v := range n.variables {
variable, ok := variables[v]
if !ok {
return "", fmt.Errorf("no such global variable: %s", v)
}
value, err := variable.Eval(variables)
if err != nil {
return "", err
}
str += value + n.strings[i+1]
}
return str, nil
}
func validateNinjaName(name string) error {
for i, r := range name {
valid := (r >= 'a' && r <= 'z') ||
(r >= 'A' && r <= 'Z') ||
(r >= '0' && r <= '9') ||
(r == '_') ||
(r == '-') ||
(r == '.')
if !valid {
return fmt.Errorf("%q contains an invalid Ninja name character "+
"%q at byte offset %d", name, r, i)
}
}
return nil
}
var builtinRuleArgs = []string{"out", "in"}
func validateArgName(argName string) error {
err := validateNinjaName(argName)
if err != nil {
return err
}
// We only allow globals within the rule's package to be used as rule
// arguments. A global in another package can always be mirrored into
// the rule's package by defining a new variable, so this doesn't limit
// what's possible. This limitation prevents situations where a Build
// invocation in another package must use the rule-defining package's
// import name for a 3rd package in order to set the rule's arguments.
if strings.ContainsRune(argName, '.') {
return fmt.Errorf("%q contains a '.' character", argName)
}
for _, builtin := range builtinRuleArgs {
if argName == builtin {
return fmt.Errorf("%q conflicts with Ninja built-in", argName)
}
}
return nil
}
func validateArgNames(argNames []string) error {
for _, argName := range argNames {
err := validateArgName(argName)
if err != nil {
return err
}
}
return nil
}