Fix non-deterministic output for merge_zips

A map iteration was causing a non-deterministic order
in output entries. The default behaviour is to preserve
the same order of the input zips, which is necessary
to ensure a deterministic output for deterministic inputs.

Bug: b/322788229
Test: Ran a couple of builds and confirmed no cache misses from
the output of merge_zips.

Change-Id: I3217e6887ab108d213a290b59da5b33d51b8241f
This commit is contained in:
Anas Sulaiman 2024-02-09 21:06:49 +00:00
parent b129b7cba2
commit 437e9470c0
2 changed files with 36 additions and 5 deletions

View file

@ -342,7 +342,8 @@ func (oz *OutputZip) writeEntries(entries []string) error {
func (oz *OutputZip) getUninitializedPythonPackages(inputZips []InputZip) ([]string, error) {
// the runfiles packages needs to be populated with "__init__.py".
// the runfiles dirs have been treated as packages.
allPackages := make(map[string]bool)
var allPackages []string // Using a slice to preserve input order.
seenPkgs := make(map[string]bool)
initedPackages := make(map[string]bool)
getPackage := func(path string) string {
ret := filepath.Dir(path)
@ -369,16 +370,17 @@ func (oz *OutputZip) getUninitializedPythonPackages(inputZips []InputZip) ([]str
initedPackages[pyPkg] = true
}
for pyPkg != "" {
if _, found := allPackages[pyPkg]; found {
if _, found := seenPkgs[pyPkg]; found {
break
}
allPackages[pyPkg] = true
seenPkgs[pyPkg] = true
allPackages = append(allPackages, pyPkg)
pyPkg = getPackage(pyPkg)
}
}
}
noInitPackages := make([]string, 0)
for pyPkg := range allPackages {
for _, pyPkg := range allPackages {
if _, found := initedPackages[pyPkg]; !found {
noInitPackages = append(noInitPackages, pyPkg)
}

View file

@ -103,6 +103,7 @@ func TestMergeZips(t *testing.T) {
stripFiles []string
stripDirs []string
jar bool
par bool
sort bool
ignoreDuplicates bool
stripDirEntries bool
@ -265,6 +266,34 @@ func TestMergeZips(t *testing.T) {
},
out: []testZipEntry{withoutTimestamp, a},
},
{
name: "emulate par",
in: [][]testZipEntry{
{
testZipEntry{name: "3/main.py"},
testZipEntry{name: "c/main.py"},
testZipEntry{name: "a/main.py"},
testZipEntry{name: "2/main.py"},
testZipEntry{name: "b/main.py"},
testZipEntry{name: "1/main.py"},
},
},
out: []testZipEntry{
testZipEntry{name: "3/__init__.py", mode: 0700, timestamp: jar.DefaultTime},
testZipEntry{name: "c/__init__.py", mode: 0700, timestamp: jar.DefaultTime},
testZipEntry{name: "a/__init__.py", mode: 0700, timestamp: jar.DefaultTime},
testZipEntry{name: "2/__init__.py", mode: 0700, timestamp: jar.DefaultTime},
testZipEntry{name: "b/__init__.py", mode: 0700, timestamp: jar.DefaultTime},
testZipEntry{name: "1/__init__.py", mode: 0700, timestamp: jar.DefaultTime},
testZipEntry{name: "3/main.py", timestamp: jar.DefaultTime},
testZipEntry{name: "c/main.py", timestamp: jar.DefaultTime},
testZipEntry{name: "a/main.py", timestamp: jar.DefaultTime},
testZipEntry{name: "2/main.py", timestamp: jar.DefaultTime},
testZipEntry{name: "b/main.py", timestamp: jar.DefaultTime},
testZipEntry{name: "1/main.py", timestamp: jar.DefaultTime},
},
par: true,
},
}
for _, test := range testCases {
@ -280,7 +309,7 @@ func TestMergeZips(t *testing.T) {
writer := zip.NewWriter(out)
err := mergeZips(inputZips, writer, "", "",
test.sort, test.jar, false, test.stripDirEntries, test.ignoreDuplicates,
test.sort, test.jar, test.par, test.stripDirEntries, test.ignoreDuplicates,
test.stripFiles, test.stripDirs, test.zipsToNotStrip)
closeErr := writer.Close()