platform_build_blueprint/parser/sort.go
Colin Cross eb15c126c3 Fix numericStringLess and add tests
numericStringLess("1a", "11a") would strip the equal prefix "1" and
then compare the bytes "a" and "1", when it should have compared the
numbers 1 and 11.  Fix it by handling the case where the last equal
byte was numeric and the first differing byte is numeric in one
string and non-numeric in the other.

numericStringLess("12", "101") would strip the equal prefix "1" and
then compare the numbers 2 and 01, when it should have compared the
numbers 12 and 101.  Fix it by tracking the beginning of the sequence
of numeric bytes containing the differing byte.

Test: sort_test.go
Change-Id: I8d9252a64625ba6a3c75d09bb1429dcb1115e3e1
2021-03-08 17:59:55 -08:00

290 lines
7.4 KiB
Go

// Copyright 2014 Google Inc. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package parser
import (
"fmt"
"sort"
"strconv"
"strings"
"text/scanner"
)
// numericStringLess compares two strings, returning a lexicographical comparison unless the first
// difference occurs in a sequence of 1 or more numeric characters, in which case it returns the
// numerical comparison of the two numbers.
func numericStringLess(a, b string) bool {
isNumeric := func(r rune) bool { return r >= '0' && r <= '9' }
isNotNumeric := func(r rune) bool { return !isNumeric(r) }
minLength := len(a)
if len(b) < minLength {
minLength = len(b)
}
byteIndex := 0
numberStartIndex := -1
var aByte, bByte byte
// Start with a byte comparison to find where the strings differ.
for ; byteIndex < minLength; byteIndex++ {
aByte, bByte = a[byteIndex], b[byteIndex]
if aByte != bByte {
break
}
byteIsNumeric := isNumeric(rune(aByte))
if numberStartIndex != -1 && !byteIsNumeric {
numberStartIndex = -1
} else if numberStartIndex == -1 && byteIsNumeric {
numberStartIndex = byteIndex
}
}
// Handle the case where we reached the end of one or both strings without finding a difference.
if byteIndex == minLength {
if len(a) < len(b) {
// Reached the end of a. a is a prefix of b.
return true
} else {
// Reached the end of b. b is a prefix of a or b is equal to a.
return false
}
}
aByteNumeric := isNumeric(rune(aByte))
bByteNumeric := isNumeric(rune(bByte))
if (aByteNumeric || bByteNumeric) && !(aByteNumeric && bByteNumeric) && numberStartIndex != -1 {
// Only one of aByte and bByte is a number, but the previous byte was a number. That means
// one is a longer number with the same prefix, which must be numerically larger. If bByte
// is a number then the number in b is numerically larger than the number in a.
return bByteNumeric
}
// If the bytes are both numbers do a numeric comparison.
if aByteNumeric && bByteNumeric {
// Extract the numbers from each string, starting from the first number after the last
// non-number. This won't be invalid utf8 because we are only looking for the bytes
//'0'-'9', which can only occur as single-byte runes in utf8.
if numberStartIndex == -1 {
numberStartIndex = byteIndex
}
aNumberString := a[numberStartIndex:]
bNumberString := b[numberStartIndex:]
// Find the first non-number in each, using the full length if there isn't one.
endANumbers := strings.IndexFunc(aNumberString, isNotNumeric)
endBNumbers := strings.IndexFunc(bNumberString, isNotNumeric)
if endANumbers == -1 {
endANumbers = len(aNumberString)
}
if endBNumbers == -1 {
endBNumbers = len(bNumberString)
}
// Convert each to an int.
aNumber, err := strconv.Atoi(aNumberString[:endANumbers])
if err != nil {
panic(fmt.Errorf("failed to convert %q from %q to number: %w",
aNumberString[:endANumbers], a, err))
}
bNumber, err := strconv.Atoi(bNumberString[:endBNumbers])
if err != nil {
panic(fmt.Errorf("failed to convert %q from %q to number: %w",
bNumberString[:endBNumbers], b, err))
}
// Do a numeric comparison.
return aNumber < bNumber
}
// At least one is not a number, do a byte comparison.
return aByte < bByte
}
func SortLists(file *File) {
for _, def := range file.Defs {
if assignment, ok := def.(*Assignment); ok {
sortListsInValue(assignment.Value, file)
} else if module, ok := def.(*Module); ok {
for _, prop := range module.Properties {
sortListsInValue(prop.Value, file)
}
}
}
sort.Sort(commentsByOffset(file.Comments))
}
func SortList(file *File, list *List) {
if !isListOfPrimitives(list.Values) {
return
}
for i := 0; i < len(list.Values); i++ {
// Find a set of values on contiguous lines
line := list.Values[i].Pos().Line
var j int
for j = i + 1; j < len(list.Values); j++ {
if list.Values[j].Pos().Line > line+1 {
break
}
line = list.Values[j].Pos().Line
}
nextPos := list.End()
if j < len(list.Values) {
nextPos = list.Values[j].Pos()
}
sortSubList(list.Values[i:j], nextPos, file)
i = j - 1
}
}
func ListIsSorted(list *List) bool {
for i := 0; i < len(list.Values); i++ {
// Find a set of values on contiguous lines
line := list.Values[i].Pos().Line
var j int
for j = i + 1; j < len(list.Values); j++ {
if list.Values[j].Pos().Line > line+1 {
break
}
line = list.Values[j].Pos().Line
}
if !subListIsSorted(list.Values[i:j]) {
return false
}
i = j - 1
}
return true
}
func sortListsInValue(value Expression, file *File) {
switch v := value.(type) {
case *Variable:
// Nothing
case *Operator:
sortListsInValue(v.Args[0], file)
sortListsInValue(v.Args[1], file)
case *Map:
for _, p := range v.Properties {
sortListsInValue(p.Value, file)
}
case *List:
SortList(file, v)
}
}
func sortSubList(values []Expression, nextPos scanner.Position, file *File) {
if !isListOfPrimitives(values) {
return
}
l := make([]elem, len(values))
for i, v := range values {
s, ok := v.(*String)
if !ok {
panic("list contains non-string element")
}
n := nextPos
if i < len(values)-1 {
n = values[i+1].Pos()
}
l[i] = elem{s.Value, i, v.Pos(), n}
}
sort.SliceStable(l, func(i, j int) bool {
return numericStringLess(l[i].s, l[j].s)
})
copyValues := append([]Expression{}, values...)
copyComments := make([]*CommentGroup, len(file.Comments))
for i := range file.Comments {
cg := *file.Comments[i]
cg.Comments = make([]*Comment, len(cg.Comments))
for j := range file.Comments[i].Comments {
c := *file.Comments[i].Comments[j]
cg.Comments[j] = &c
}
copyComments[i] = &cg
}
curPos := values[0].Pos()
for i, e := range l {
values[i] = copyValues[e.i]
values[i].(*String).LiteralPos = curPos
for j, c := range copyComments {
if c.Pos().Offset > e.pos.Offset && c.Pos().Offset < e.nextPos.Offset {
file.Comments[j].Comments[0].Slash.Line = curPos.Line
file.Comments[j].Comments[0].Slash.Offset += values[i].Pos().Offset - e.pos.Offset
}
}
curPos.Offset += e.nextPos.Offset - e.pos.Offset
curPos.Line++
}
}
func subListIsSorted(values []Expression) bool {
if !isListOfPrimitives(values) {
return true
}
prev := ""
for _, v := range values {
s, ok := v.(*String)
if !ok {
panic("list contains non-string element")
}
if prev != "" && numericStringLess(s.Value, prev) {
return false
}
prev = s.Value
}
return true
}
type elem struct {
s string
i int
pos scanner.Position
nextPos scanner.Position
}
type commentsByOffset []*CommentGroup
func (l commentsByOffset) Len() int {
return len(l)
}
func (l commentsByOffset) Less(i, j int) bool {
return l[i].Pos().Offset < l[j].Pos().Offset
}
func (l commentsByOffset) Swap(i, j int) {
l[i], l[j] = l[j], l[i]
}
func isListOfPrimitives(values []Expression) bool {
if len(values) == 0 {
return true
}
switch values[0].Type() {
case BoolType, StringType, Int64Type:
return true
default:
return false
}
}