Fix numericStringLess and add tests

numericStringLess("1a", "11a") would strip the equal prefix "1" and
then compare the bytes "a" and "1", when it should have compared the
numbers 1 and 11.  Fix it by handling the case where the last equal
byte was numeric and the first differing byte is numeric in one
string and non-numeric in the other.

numericStringLess("12", "101") would strip the equal prefix "1" and
then compare the numbers 2 and 01, when it should have compared the
numbers 12 and 101.  Fix it by tracking the beginning of the sequence
of numeric bytes containing the differing byte.

Test: sort_test.go
Change-Id: I8d9252a64625ba6a3c75d09bb1429dcb1115e3e1
This commit is contained in:
Colin Cross 2021-03-08 17:55:50 -08:00
parent 122b3ee153
commit eb15c126c3
3 changed files with 166 additions and 37 deletions

View file

@ -47,6 +47,7 @@ bootstrap_go_package {
"parser/modify_test.go",
"parser/parser_test.go",
"parser/printer_test.go",
"parser/sort_test.go",
],
}

View file

@ -22,64 +22,96 @@ import (
"text/scanner"
)
// numericStringLess compares two strings, returning a lexicographical comparison unless the first
// difference occurs in a sequence of 1 or more numeric characters, in which case it returns the
// numerical comparison of the two numbers.
func numericStringLess(a, b string) bool {
byteIndex := 0
// Start with a byte comparison to find where the strings differ
for ; byteIndex < len(a) && byteIndex < len(b); byteIndex++ {
if a[byteIndex] != b[byteIndex] {
break
}
}
if byteIndex == len(a) && byteIndex != len(b) {
// Reached the end of a. a is a prefix of b.
return true
} else if byteIndex == len(b) {
// Reached the end of b. b is a prefix of a or b is equal to a.
return false
}
// Save the first differing bytes in case we have to fall back to a byte comparison.
aDifferingByte := a[byteIndex]
bDifferingByte := b[byteIndex]
// Save the differing suffixes of the strings. This may be invalid utf8 if the first
// rune was cut, but that's fine because are only looking for the bytes '0'-'9', which
// can only occur as single-byte runes in utf8.
aDifference := a[byteIndex:]
bDifference := b[byteIndex:]
isNumeric := func(r rune) bool { return r >= '0' && r <= '9' }
isNotNumeric := func(r rune) bool { return !isNumeric(r) }
// If the first runes are both numbers do a numeric comparison.
if isNumeric(rune(aDifferingByte)) && isNumeric(rune(bDifferingByte)) {
minLength := len(a)
if len(b) < minLength {
minLength = len(b)
}
byteIndex := 0
numberStartIndex := -1
var aByte, bByte byte
// Start with a byte comparison to find where the strings differ.
for ; byteIndex < minLength; byteIndex++ {
aByte, bByte = a[byteIndex], b[byteIndex]
if aByte != bByte {
break
}
byteIsNumeric := isNumeric(rune(aByte))
if numberStartIndex != -1 && !byteIsNumeric {
numberStartIndex = -1
} else if numberStartIndex == -1 && byteIsNumeric {
numberStartIndex = byteIndex
}
}
// Handle the case where we reached the end of one or both strings without finding a difference.
if byteIndex == minLength {
if len(a) < len(b) {
// Reached the end of a. a is a prefix of b.
return true
} else {
// Reached the end of b. b is a prefix of a or b is equal to a.
return false
}
}
aByteNumeric := isNumeric(rune(aByte))
bByteNumeric := isNumeric(rune(bByte))
if (aByteNumeric || bByteNumeric) && !(aByteNumeric && bByteNumeric) && numberStartIndex != -1 {
// Only one of aByte and bByte is a number, but the previous byte was a number. That means
// one is a longer number with the same prefix, which must be numerically larger. If bByte
// is a number then the number in b is numerically larger than the number in a.
return bByteNumeric
}
// If the bytes are both numbers do a numeric comparison.
if aByteNumeric && bByteNumeric {
// Extract the numbers from each string, starting from the first number after the last
// non-number. This won't be invalid utf8 because we are only looking for the bytes
//'0'-'9', which can only occur as single-byte runes in utf8.
if numberStartIndex == -1 {
numberStartIndex = byteIndex
}
aNumberString := a[numberStartIndex:]
bNumberString := b[numberStartIndex:]
// Find the first non-number in each, using the full length if there isn't one.
endANumbers := strings.IndexFunc(aDifference, isNotNumeric)
endBNumbers := strings.IndexFunc(bDifference, isNotNumeric)
endANumbers := strings.IndexFunc(aNumberString, isNotNumeric)
endBNumbers := strings.IndexFunc(bNumberString, isNotNumeric)
if endANumbers == -1 {
endANumbers = len(aDifference)
endANumbers = len(aNumberString)
}
if endBNumbers == -1 {
endBNumbers = len(bDifference)
endBNumbers = len(bNumberString)
}
// Convert each to an int.
aNumber, err := strconv.Atoi(aDifference[:endANumbers])
aNumber, err := strconv.Atoi(aNumberString[:endANumbers])
if err != nil {
panic(fmt.Errorf("failed to convert %q from %q to number: %w",
aDifference[:endANumbers], a, err))
aNumberString[:endANumbers], a, err))
}
bNumber, err := strconv.Atoi(bDifference[:endBNumbers])
bNumber, err := strconv.Atoi(bNumberString[:endBNumbers])
if err != nil {
panic(fmt.Errorf("failed to convert %q from %q to number: %w",
bDifference[:endBNumbers], b, err))
bNumberString[:endBNumbers], b, err))
}
// Do a numeric comparison.
return aNumber < bNumber
}
// At least one is not a number, do a byte comparison.
return aDifferingByte < bDifferingByte
return aByte < bByte
}
func SortLists(file *File) {

96
parser/sort_test.go Normal file
View file

@ -0,0 +1,96 @@
// Copyright 2021 Google Inc. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package parser
import "testing"
func Test_numericStringLess(t *testing.T) {
type args struct {
a string
b string
}
tests := []struct {
a, b string
}{
{"a", "b"},
{"aa", "ab"},
{"aaa", "aba"},
{"1", "2"},
{"1", "11"},
{"2", "11"},
{"1", "12"},
{"12", "101"},
{"11", "102"},
{"0", "1"},
{"0", "01"},
{"1", "02"},
{"01", "002"},
{"001", "02"},
}
oneTest := func(a, b string, want bool) {
t.Helper()
if got := numericStringLess(a, b); got != want {
t.Errorf("want numericStringLess(%v, %v) = %v, got %v", a, b, want, got)
}
}
for _, tt := range tests {
t.Run(tt.a+"<"+tt.b, func(t *testing.T) {
// a should be less than b
oneTest(tt.a, tt.b, true)
// b should not be less than a
oneTest(tt.b, tt.a, false)
// a should not be less than a
oneTest(tt.a, tt.a, false)
// b should not be less than b
oneTest(tt.b, tt.b, false)
// The same should be true both strings are prefixed with an "a"
oneTest("a"+tt.a, "a"+tt.b, true)
oneTest("a"+tt.b, "a"+tt.a, false)
oneTest("a"+tt.a, "a"+tt.a, false)
oneTest("a"+tt.b, "a"+tt.b, false)
// The same should be true both strings are suffixed with an "a"
oneTest(tt.a+"a", tt.b+"a", true)
oneTest(tt.b+"a", tt.a+"a", false)
oneTest(tt.a+"a", tt.a+"a", false)
oneTest(tt.b+"a", tt.b+"a", false)
// The same should be true both strings are suffixed with a "1"
oneTest(tt.a+"1", tt.b+"1", true)
oneTest(tt.b+"1", tt.a+"1", false)
oneTest(tt.a+"1", tt.a+"1", false)
oneTest(tt.b+"1", tt.b+"1", false)
// The same should be true both strings are prefixed with a "0"
oneTest("0"+tt.a, "0"+tt.b, true)
oneTest("0"+tt.b, "0"+tt.a, false)
oneTest("0"+tt.a, "0"+tt.a, false)
oneTest("0"+tt.b, "0"+tt.b, false)
// The same should be true both strings are suffixed with a "0"
oneTest(tt.a+"0", tt.b+"0", true)
oneTest(tt.b+"0", tt.a+"0", false)
oneTest(tt.a+"0", tt.a+"0", false)
oneTest(tt.b+"0", tt.b+"0", false)
})
}
}