Fix numericStringLess and add tests
numericStringLess("1a", "11a") would strip the equal prefix "1" and then compare the bytes "a" and "1", when it should have compared the numbers 1 and 11. Fix it by handling the case where the last equal byte was numeric and the first differing byte is numeric in one string and non-numeric in the other. numericStringLess("12", "101") would strip the equal prefix "1" and then compare the numbers 2 and 01, when it should have compared the numbers 12 and 101. Fix it by tracking the beginning of the sequence of numeric bytes containing the differing byte. Test: sort_test.go Change-Id: I8d9252a64625ba6a3c75d09bb1429dcb1115e3e1
This commit is contained in:
parent
122b3ee153
commit
eb15c126c3
3 changed files with 166 additions and 37 deletions
|
@ -47,6 +47,7 @@ bootstrap_go_package {
|
|||
"parser/modify_test.go",
|
||||
"parser/parser_test.go",
|
||||
"parser/printer_test.go",
|
||||
"parser/sort_test.go",
|
||||
],
|
||||
}
|
||||
|
||||
|
|
106
parser/sort.go
106
parser/sort.go
|
@ -22,64 +22,96 @@ import (
|
|||
"text/scanner"
|
||||
)
|
||||
|
||||
// numericStringLess compares two strings, returning a lexicographical comparison unless the first
|
||||
// difference occurs in a sequence of 1 or more numeric characters, in which case it returns the
|
||||
// numerical comparison of the two numbers.
|
||||
func numericStringLess(a, b string) bool {
|
||||
byteIndex := 0
|
||||
// Start with a byte comparison to find where the strings differ
|
||||
for ; byteIndex < len(a) && byteIndex < len(b); byteIndex++ {
|
||||
if a[byteIndex] != b[byteIndex] {
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
if byteIndex == len(a) && byteIndex != len(b) {
|
||||
// Reached the end of a. a is a prefix of b.
|
||||
return true
|
||||
} else if byteIndex == len(b) {
|
||||
// Reached the end of b. b is a prefix of a or b is equal to a.
|
||||
return false
|
||||
}
|
||||
|
||||
// Save the first differing bytes in case we have to fall back to a byte comparison.
|
||||
aDifferingByte := a[byteIndex]
|
||||
bDifferingByte := b[byteIndex]
|
||||
|
||||
// Save the differing suffixes of the strings. This may be invalid utf8 if the first
|
||||
// rune was cut, but that's fine because are only looking for the bytes '0'-'9', which
|
||||
// can only occur as single-byte runes in utf8.
|
||||
aDifference := a[byteIndex:]
|
||||
bDifference := b[byteIndex:]
|
||||
|
||||
isNumeric := func(r rune) bool { return r >= '0' && r <= '9' }
|
||||
isNotNumeric := func(r rune) bool { return !isNumeric(r) }
|
||||
|
||||
// If the first runes are both numbers do a numeric comparison.
|
||||
if isNumeric(rune(aDifferingByte)) && isNumeric(rune(bDifferingByte)) {
|
||||
minLength := len(a)
|
||||
if len(b) < minLength {
|
||||
minLength = len(b)
|
||||
}
|
||||
|
||||
byteIndex := 0
|
||||
numberStartIndex := -1
|
||||
|
||||
var aByte, bByte byte
|
||||
|
||||
// Start with a byte comparison to find where the strings differ.
|
||||
for ; byteIndex < minLength; byteIndex++ {
|
||||
aByte, bByte = a[byteIndex], b[byteIndex]
|
||||
if aByte != bByte {
|
||||
break
|
||||
}
|
||||
byteIsNumeric := isNumeric(rune(aByte))
|
||||
if numberStartIndex != -1 && !byteIsNumeric {
|
||||
numberStartIndex = -1
|
||||
} else if numberStartIndex == -1 && byteIsNumeric {
|
||||
numberStartIndex = byteIndex
|
||||
}
|
||||
}
|
||||
|
||||
// Handle the case where we reached the end of one or both strings without finding a difference.
|
||||
if byteIndex == minLength {
|
||||
if len(a) < len(b) {
|
||||
// Reached the end of a. a is a prefix of b.
|
||||
return true
|
||||
} else {
|
||||
// Reached the end of b. b is a prefix of a or b is equal to a.
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
aByteNumeric := isNumeric(rune(aByte))
|
||||
bByteNumeric := isNumeric(rune(bByte))
|
||||
|
||||
if (aByteNumeric || bByteNumeric) && !(aByteNumeric && bByteNumeric) && numberStartIndex != -1 {
|
||||
// Only one of aByte and bByte is a number, but the previous byte was a number. That means
|
||||
// one is a longer number with the same prefix, which must be numerically larger. If bByte
|
||||
// is a number then the number in b is numerically larger than the number in a.
|
||||
return bByteNumeric
|
||||
}
|
||||
|
||||
// If the bytes are both numbers do a numeric comparison.
|
||||
if aByteNumeric && bByteNumeric {
|
||||
// Extract the numbers from each string, starting from the first number after the last
|
||||
// non-number. This won't be invalid utf8 because we are only looking for the bytes
|
||||
//'0'-'9', which can only occur as single-byte runes in utf8.
|
||||
if numberStartIndex == -1 {
|
||||
numberStartIndex = byteIndex
|
||||
}
|
||||
aNumberString := a[numberStartIndex:]
|
||||
bNumberString := b[numberStartIndex:]
|
||||
|
||||
// Find the first non-number in each, using the full length if there isn't one.
|
||||
endANumbers := strings.IndexFunc(aDifference, isNotNumeric)
|
||||
endBNumbers := strings.IndexFunc(bDifference, isNotNumeric)
|
||||
endANumbers := strings.IndexFunc(aNumberString, isNotNumeric)
|
||||
endBNumbers := strings.IndexFunc(bNumberString, isNotNumeric)
|
||||
if endANumbers == -1 {
|
||||
endANumbers = len(aDifference)
|
||||
endANumbers = len(aNumberString)
|
||||
}
|
||||
if endBNumbers == -1 {
|
||||
endBNumbers = len(bDifference)
|
||||
endBNumbers = len(bNumberString)
|
||||
}
|
||||
|
||||
// Convert each to an int.
|
||||
aNumber, err := strconv.Atoi(aDifference[:endANumbers])
|
||||
aNumber, err := strconv.Atoi(aNumberString[:endANumbers])
|
||||
if err != nil {
|
||||
panic(fmt.Errorf("failed to convert %q from %q to number: %w",
|
||||
aDifference[:endANumbers], a, err))
|
||||
aNumberString[:endANumbers], a, err))
|
||||
}
|
||||
bNumber, err := strconv.Atoi(bDifference[:endBNumbers])
|
||||
bNumber, err := strconv.Atoi(bNumberString[:endBNumbers])
|
||||
if err != nil {
|
||||
panic(fmt.Errorf("failed to convert %q from %q to number: %w",
|
||||
bDifference[:endBNumbers], b, err))
|
||||
bNumberString[:endBNumbers], b, err))
|
||||
}
|
||||
// Do a numeric comparison.
|
||||
return aNumber < bNumber
|
||||
}
|
||||
|
||||
// At least one is not a number, do a byte comparison.
|
||||
return aDifferingByte < bDifferingByte
|
||||
return aByte < bByte
|
||||
}
|
||||
|
||||
func SortLists(file *File) {
|
||||
|
|
96
parser/sort_test.go
Normal file
96
parser/sort_test.go
Normal file
|
@ -0,0 +1,96 @@
|
|||
// Copyright 2021 Google Inc. All rights reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package parser
|
||||
|
||||
import "testing"
|
||||
|
||||
func Test_numericStringLess(t *testing.T) {
|
||||
type args struct {
|
||||
a string
|
||||
b string
|
||||
}
|
||||
tests := []struct {
|
||||
a, b string
|
||||
}{
|
||||
{"a", "b"},
|
||||
{"aa", "ab"},
|
||||
{"aaa", "aba"},
|
||||
|
||||
{"1", "2"},
|
||||
{"1", "11"},
|
||||
{"2", "11"},
|
||||
{"1", "12"},
|
||||
|
||||
{"12", "101"},
|
||||
{"11", "102"},
|
||||
|
||||
{"0", "1"},
|
||||
{"0", "01"},
|
||||
{"1", "02"},
|
||||
{"01", "002"},
|
||||
{"001", "02"},
|
||||
}
|
||||
|
||||
oneTest := func(a, b string, want bool) {
|
||||
t.Helper()
|
||||
if got := numericStringLess(a, b); got != want {
|
||||
t.Errorf("want numericStringLess(%v, %v) = %v, got %v", a, b, want, got)
|
||||
}
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.a+"<"+tt.b, func(t *testing.T) {
|
||||
// a should be less than b
|
||||
oneTest(tt.a, tt.b, true)
|
||||
// b should not be less than a
|
||||
oneTest(tt.b, tt.a, false)
|
||||
// a should not be less than a
|
||||
oneTest(tt.a, tt.a, false)
|
||||
// b should not be less than b
|
||||
oneTest(tt.b, tt.b, false)
|
||||
|
||||
// The same should be true both strings are prefixed with an "a"
|
||||
oneTest("a"+tt.a, "a"+tt.b, true)
|
||||
oneTest("a"+tt.b, "a"+tt.a, false)
|
||||
oneTest("a"+tt.a, "a"+tt.a, false)
|
||||
oneTest("a"+tt.b, "a"+tt.b, false)
|
||||
|
||||
// The same should be true both strings are suffixed with an "a"
|
||||
oneTest(tt.a+"a", tt.b+"a", true)
|
||||
oneTest(tt.b+"a", tt.a+"a", false)
|
||||
oneTest(tt.a+"a", tt.a+"a", false)
|
||||
oneTest(tt.b+"a", tt.b+"a", false)
|
||||
|
||||
// The same should be true both strings are suffixed with a "1"
|
||||
oneTest(tt.a+"1", tt.b+"1", true)
|
||||
oneTest(tt.b+"1", tt.a+"1", false)
|
||||
oneTest(tt.a+"1", tt.a+"1", false)
|
||||
oneTest(tt.b+"1", tt.b+"1", false)
|
||||
|
||||
// The same should be true both strings are prefixed with a "0"
|
||||
oneTest("0"+tt.a, "0"+tt.b, true)
|
||||
oneTest("0"+tt.b, "0"+tt.a, false)
|
||||
oneTest("0"+tt.a, "0"+tt.a, false)
|
||||
oneTest("0"+tt.b, "0"+tt.b, false)
|
||||
|
||||
// The same should be true both strings are suffixed with a "0"
|
||||
oneTest(tt.a+"0", tt.b+"0", true)
|
||||
oneTest(tt.b+"0", tt.a+"0", false)
|
||||
oneTest(tt.a+"0", tt.a+"0", false)
|
||||
oneTest(tt.b+"0", tt.b+"0", false)
|
||||
|
||||
})
|
||||
}
|
||||
}
|
Loading…
Reference in a new issue