Fix numericStringLess and add tests
numericStringLess("1a", "11a") would strip the equal prefix "1" and then compare the bytes "a" and "1", when it should have compared the numbers 1 and 11. Fix it by handling the case where the last equal byte was numeric and the first differing byte is numeric in one string and non-numeric in the other. numericStringLess("12", "101") would strip the equal prefix "1" and then compare the numbers 2 and 01, when it should have compared the numbers 12 and 101. Fix it by tracking the beginning of the sequence of numeric bytes containing the differing byte. Test: sort_test.go Change-Id: I8d9252a64625ba6a3c75d09bb1429dcb1115e3e1
This commit is contained in:
parent
122b3ee153
commit
eb15c126c3
3 changed files with 166 additions and 37 deletions
|
@ -47,6 +47,7 @@ bootstrap_go_package {
|
||||||
"parser/modify_test.go",
|
"parser/modify_test.go",
|
||||||
"parser/parser_test.go",
|
"parser/parser_test.go",
|
||||||
"parser/printer_test.go",
|
"parser/printer_test.go",
|
||||||
|
"parser/sort_test.go",
|
||||||
],
|
],
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
106
parser/sort.go
106
parser/sort.go
|
@ -22,64 +22,96 @@ import (
|
||||||
"text/scanner"
|
"text/scanner"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
// numericStringLess compares two strings, returning a lexicographical comparison unless the first
|
||||||
|
// difference occurs in a sequence of 1 or more numeric characters, in which case it returns the
|
||||||
|
// numerical comparison of the two numbers.
|
||||||
func numericStringLess(a, b string) bool {
|
func numericStringLess(a, b string) bool {
|
||||||
byteIndex := 0
|
|
||||||
// Start with a byte comparison to find where the strings differ
|
|
||||||
for ; byteIndex < len(a) && byteIndex < len(b); byteIndex++ {
|
|
||||||
if a[byteIndex] != b[byteIndex] {
|
|
||||||
break
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if byteIndex == len(a) && byteIndex != len(b) {
|
|
||||||
// Reached the end of a. a is a prefix of b.
|
|
||||||
return true
|
|
||||||
} else if byteIndex == len(b) {
|
|
||||||
// Reached the end of b. b is a prefix of a or b is equal to a.
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
|
|
||||||
// Save the first differing bytes in case we have to fall back to a byte comparison.
|
|
||||||
aDifferingByte := a[byteIndex]
|
|
||||||
bDifferingByte := b[byteIndex]
|
|
||||||
|
|
||||||
// Save the differing suffixes of the strings. This may be invalid utf8 if the first
|
|
||||||
// rune was cut, but that's fine because are only looking for the bytes '0'-'9', which
|
|
||||||
// can only occur as single-byte runes in utf8.
|
|
||||||
aDifference := a[byteIndex:]
|
|
||||||
bDifference := b[byteIndex:]
|
|
||||||
|
|
||||||
isNumeric := func(r rune) bool { return r >= '0' && r <= '9' }
|
isNumeric := func(r rune) bool { return r >= '0' && r <= '9' }
|
||||||
isNotNumeric := func(r rune) bool { return !isNumeric(r) }
|
isNotNumeric := func(r rune) bool { return !isNumeric(r) }
|
||||||
|
|
||||||
// If the first runes are both numbers do a numeric comparison.
|
minLength := len(a)
|
||||||
if isNumeric(rune(aDifferingByte)) && isNumeric(rune(bDifferingByte)) {
|
if len(b) < minLength {
|
||||||
|
minLength = len(b)
|
||||||
|
}
|
||||||
|
|
||||||
|
byteIndex := 0
|
||||||
|
numberStartIndex := -1
|
||||||
|
|
||||||
|
var aByte, bByte byte
|
||||||
|
|
||||||
|
// Start with a byte comparison to find where the strings differ.
|
||||||
|
for ; byteIndex < minLength; byteIndex++ {
|
||||||
|
aByte, bByte = a[byteIndex], b[byteIndex]
|
||||||
|
if aByte != bByte {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
byteIsNumeric := isNumeric(rune(aByte))
|
||||||
|
if numberStartIndex != -1 && !byteIsNumeric {
|
||||||
|
numberStartIndex = -1
|
||||||
|
} else if numberStartIndex == -1 && byteIsNumeric {
|
||||||
|
numberStartIndex = byteIndex
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Handle the case where we reached the end of one or both strings without finding a difference.
|
||||||
|
if byteIndex == minLength {
|
||||||
|
if len(a) < len(b) {
|
||||||
|
// Reached the end of a. a is a prefix of b.
|
||||||
|
return true
|
||||||
|
} else {
|
||||||
|
// Reached the end of b. b is a prefix of a or b is equal to a.
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
aByteNumeric := isNumeric(rune(aByte))
|
||||||
|
bByteNumeric := isNumeric(rune(bByte))
|
||||||
|
|
||||||
|
if (aByteNumeric || bByteNumeric) && !(aByteNumeric && bByteNumeric) && numberStartIndex != -1 {
|
||||||
|
// Only one of aByte and bByte is a number, but the previous byte was a number. That means
|
||||||
|
// one is a longer number with the same prefix, which must be numerically larger. If bByte
|
||||||
|
// is a number then the number in b is numerically larger than the number in a.
|
||||||
|
return bByteNumeric
|
||||||
|
}
|
||||||
|
|
||||||
|
// If the bytes are both numbers do a numeric comparison.
|
||||||
|
if aByteNumeric && bByteNumeric {
|
||||||
|
// Extract the numbers from each string, starting from the first number after the last
|
||||||
|
// non-number. This won't be invalid utf8 because we are only looking for the bytes
|
||||||
|
//'0'-'9', which can only occur as single-byte runes in utf8.
|
||||||
|
if numberStartIndex == -1 {
|
||||||
|
numberStartIndex = byteIndex
|
||||||
|
}
|
||||||
|
aNumberString := a[numberStartIndex:]
|
||||||
|
bNumberString := b[numberStartIndex:]
|
||||||
|
|
||||||
// Find the first non-number in each, using the full length if there isn't one.
|
// Find the first non-number in each, using the full length if there isn't one.
|
||||||
endANumbers := strings.IndexFunc(aDifference, isNotNumeric)
|
endANumbers := strings.IndexFunc(aNumberString, isNotNumeric)
|
||||||
endBNumbers := strings.IndexFunc(bDifference, isNotNumeric)
|
endBNumbers := strings.IndexFunc(bNumberString, isNotNumeric)
|
||||||
if endANumbers == -1 {
|
if endANumbers == -1 {
|
||||||
endANumbers = len(aDifference)
|
endANumbers = len(aNumberString)
|
||||||
}
|
}
|
||||||
if endBNumbers == -1 {
|
if endBNumbers == -1 {
|
||||||
endBNumbers = len(bDifference)
|
endBNumbers = len(bNumberString)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Convert each to an int.
|
// Convert each to an int.
|
||||||
aNumber, err := strconv.Atoi(aDifference[:endANumbers])
|
aNumber, err := strconv.Atoi(aNumberString[:endANumbers])
|
||||||
if err != nil {
|
if err != nil {
|
||||||
panic(fmt.Errorf("failed to convert %q from %q to number: %w",
|
panic(fmt.Errorf("failed to convert %q from %q to number: %w",
|
||||||
aDifference[:endANumbers], a, err))
|
aNumberString[:endANumbers], a, err))
|
||||||
}
|
}
|
||||||
bNumber, err := strconv.Atoi(bDifference[:endBNumbers])
|
bNumber, err := strconv.Atoi(bNumberString[:endBNumbers])
|
||||||
if err != nil {
|
if err != nil {
|
||||||
panic(fmt.Errorf("failed to convert %q from %q to number: %w",
|
panic(fmt.Errorf("failed to convert %q from %q to number: %w",
|
||||||
bDifference[:endBNumbers], b, err))
|
bNumberString[:endBNumbers], b, err))
|
||||||
}
|
}
|
||||||
// Do a numeric comparison.
|
// Do a numeric comparison.
|
||||||
return aNumber < bNumber
|
return aNumber < bNumber
|
||||||
}
|
}
|
||||||
|
|
||||||
// At least one is not a number, do a byte comparison.
|
// At least one is not a number, do a byte comparison.
|
||||||
return aDifferingByte < bDifferingByte
|
return aByte < bByte
|
||||||
}
|
}
|
||||||
|
|
||||||
func SortLists(file *File) {
|
func SortLists(file *File) {
|
||||||
|
|
96
parser/sort_test.go
Normal file
96
parser/sort_test.go
Normal file
|
@ -0,0 +1,96 @@
|
||||||
|
// Copyright 2021 Google Inc. All rights reserved.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
package parser
|
||||||
|
|
||||||
|
import "testing"
|
||||||
|
|
||||||
|
func Test_numericStringLess(t *testing.T) {
|
||||||
|
type args struct {
|
||||||
|
a string
|
||||||
|
b string
|
||||||
|
}
|
||||||
|
tests := []struct {
|
||||||
|
a, b string
|
||||||
|
}{
|
||||||
|
{"a", "b"},
|
||||||
|
{"aa", "ab"},
|
||||||
|
{"aaa", "aba"},
|
||||||
|
|
||||||
|
{"1", "2"},
|
||||||
|
{"1", "11"},
|
||||||
|
{"2", "11"},
|
||||||
|
{"1", "12"},
|
||||||
|
|
||||||
|
{"12", "101"},
|
||||||
|
{"11", "102"},
|
||||||
|
|
||||||
|
{"0", "1"},
|
||||||
|
{"0", "01"},
|
||||||
|
{"1", "02"},
|
||||||
|
{"01", "002"},
|
||||||
|
{"001", "02"},
|
||||||
|
}
|
||||||
|
|
||||||
|
oneTest := func(a, b string, want bool) {
|
||||||
|
t.Helper()
|
||||||
|
if got := numericStringLess(a, b); got != want {
|
||||||
|
t.Errorf("want numericStringLess(%v, %v) = %v, got %v", a, b, want, got)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, tt := range tests {
|
||||||
|
t.Run(tt.a+"<"+tt.b, func(t *testing.T) {
|
||||||
|
// a should be less than b
|
||||||
|
oneTest(tt.a, tt.b, true)
|
||||||
|
// b should not be less than a
|
||||||
|
oneTest(tt.b, tt.a, false)
|
||||||
|
// a should not be less than a
|
||||||
|
oneTest(tt.a, tt.a, false)
|
||||||
|
// b should not be less than b
|
||||||
|
oneTest(tt.b, tt.b, false)
|
||||||
|
|
||||||
|
// The same should be true both strings are prefixed with an "a"
|
||||||
|
oneTest("a"+tt.a, "a"+tt.b, true)
|
||||||
|
oneTest("a"+tt.b, "a"+tt.a, false)
|
||||||
|
oneTest("a"+tt.a, "a"+tt.a, false)
|
||||||
|
oneTest("a"+tt.b, "a"+tt.b, false)
|
||||||
|
|
||||||
|
// The same should be true both strings are suffixed with an "a"
|
||||||
|
oneTest(tt.a+"a", tt.b+"a", true)
|
||||||
|
oneTest(tt.b+"a", tt.a+"a", false)
|
||||||
|
oneTest(tt.a+"a", tt.a+"a", false)
|
||||||
|
oneTest(tt.b+"a", tt.b+"a", false)
|
||||||
|
|
||||||
|
// The same should be true both strings are suffixed with a "1"
|
||||||
|
oneTest(tt.a+"1", tt.b+"1", true)
|
||||||
|
oneTest(tt.b+"1", tt.a+"1", false)
|
||||||
|
oneTest(tt.a+"1", tt.a+"1", false)
|
||||||
|
oneTest(tt.b+"1", tt.b+"1", false)
|
||||||
|
|
||||||
|
// The same should be true both strings are prefixed with a "0"
|
||||||
|
oneTest("0"+tt.a, "0"+tt.b, true)
|
||||||
|
oneTest("0"+tt.b, "0"+tt.a, false)
|
||||||
|
oneTest("0"+tt.a, "0"+tt.a, false)
|
||||||
|
oneTest("0"+tt.b, "0"+tt.b, false)
|
||||||
|
|
||||||
|
// The same should be true both strings are suffixed with a "0"
|
||||||
|
oneTest(tt.a+"0", tt.b+"0", true)
|
||||||
|
oneTest(tt.b+"0", tt.a+"0", false)
|
||||||
|
oneTest(tt.a+"0", tt.a+"0", false)
|
||||||
|
oneTest(tt.b+"0", tt.b+"0", false)
|
||||||
|
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in a new issue