From ae7fd6baf3ffdddf498eedfe598d403f8570b31a Mon Sep 17 00:00:00 2001 From: Colin Cross Date: Thu, 21 Dec 2017 16:44:26 -0800 Subject: [PATCH] Reimplement ioutil.ReadDir with a version that avoids calling lstat ioutil.ReadDir returns []os.FileInfo, which contains information on each entry in the directory that is only available by calling os.Lstat on the entry. Finder only the name and type (regular, directory or symlink) of the files, which on Linux kernels >= 2.6.4 is available in the return values of syscall.Getdents. Replace ioutil.ReadDir with a call that uses syscall.Getdents directly and collects the type information from the result. Testing with: rm -f /tmp/db && strace -fc finder -names Android.mk,Android.bp,Blueprints,CleanSpec.mk,TEST_MAPPING -exclude-dirs .git,.repo -prune-files .out-dir,.find-ignore -db /tmp/db . Before: 7.01 52.688304 63 833398 1 lstat 1.90 14.246644 68 210523 getdents64 1.25 9.370471 90 104286 1 openat After: 3.48 12.201385 117 104286 1 openat 3.06 10.729138 51 210523 getdents64 1.70 5.951892 57 104283 1 lstat Pros: Avoids 729115 calls to lstat. Cons: Requires copying ~200 lines of finicky buffer parsing code. Puts all getdents calls (and possibly fallback lstat calls) onto a non-blocking file descriptor, which will cause it to block a thread and not just a goroutine. Only works on Linux and Darwin. Bug: 70897635 Test: m checkbuild Change-Id: Iab9f82c38c8675d0b73b4e90540bb9e4d2ee52c1 --- finder/fs/Android.bp | 4 + finder/fs/fs.go | 13 +- finder/fs/readdir.go | 219 ++++++++++++++++++++++++++ finder/fs/readdir_test.go | 312 ++++++++++++++++++++++++++++++++++++++ 4 files changed, 547 insertions(+), 1 deletion(-) create mode 100644 finder/fs/readdir.go create mode 100644 finder/fs/readdir_test.go diff --git a/finder/fs/Android.bp b/finder/fs/Android.bp index fe0a0d36c..27e3c7d71 100644 --- a/finder/fs/Android.bp +++ b/finder/fs/Android.bp @@ -21,6 +21,10 @@ bootstrap_go_package { pkgPath: "android/soong/finder/fs", srcs: [ "fs.go", + "readdir.go", + ], + testSrcs: [ + "readdir_test.go", ], darwin: { srcs: [ diff --git a/finder/fs/fs.go b/finder/fs/fs.go index 3de548659..9c138cd10 100644 --- a/finder/fs/fs.go +++ b/finder/fs/fs.go @@ -75,8 +75,19 @@ type DirEntryInfo interface { IsDir() bool } +type dirEntryInfo struct { + name string + mode os.FileMode + modeExists bool +} + var _ DirEntryInfo = os.FileInfo(nil) +func (d *dirEntryInfo) Name() string { return d.name } +func (d *dirEntryInfo) Mode() os.FileMode { return d.mode } +func (d *dirEntryInfo) IsDir() bool { return d.mode.IsDir() } +func (d *dirEntryInfo) String() string { return d.name + ": " + d.mode.String() } + // osFs implements FileSystem using the local disk. type osFs struct{} @@ -89,7 +100,7 @@ func (osFs) Lstat(path string) (stats os.FileInfo, err error) { } func (osFs) ReadDir(path string) (contents []DirEntryInfo, err error) { - entries, err := ioutil.ReadDir(path) + entries, err := readdir(path) if err != nil { return nil, err } diff --git a/finder/fs/readdir.go b/finder/fs/readdir.go new file mode 100644 index 000000000..f6d7813e1 --- /dev/null +++ b/finder/fs/readdir.go @@ -0,0 +1,219 @@ +// Copyright 2017 Google Inc. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package fs + +// This is based on the readdir implementation from Go 1.9: +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +import ( + "os" + "syscall" + "unsafe" +) + +const ( + blockSize = 4096 +) + +func readdir(path string) ([]DirEntryInfo, error) { + f, err := os.Open(path) + defer f.Close() + + if err != nil { + return nil, err + } + // This implicitly switches the fd to non-blocking mode, which is less efficient than what + // file.ReadDir does since it will keep a thread blocked and not just a goroutine. + fd := int(f.Fd()) + + buf := make([]byte, blockSize) + entries := make([]*dirEntryInfo, 0, 100) + + for { + n, errno := syscall.ReadDirent(fd, buf) + if errno != nil { + err = os.NewSyscallError("readdirent", errno) + break + } + if n <= 0 { + break // EOF + } + + entries = parseDirent(buf[:n], entries) + } + + ret := make([]DirEntryInfo, 0, len(entries)) + + for _, entry := range entries { + if !entry.modeExists { + mode, lerr := lstatFileMode(path + "/" + entry.name) + if os.IsNotExist(lerr) { + // File disappeared between readdir + stat. + // Just treat it as if it didn't exist. + continue + } + if lerr != nil { + return ret, lerr + } + entry.mode = mode + entry.modeExists = true + } + ret = append(ret, entry) + } + + return ret, err +} + +func parseDirent(buf []byte, entries []*dirEntryInfo) []*dirEntryInfo { + for len(buf) > 0 { + reclen, ok := direntReclen(buf) + if !ok || reclen > uint64(len(buf)) { + return entries + } + rec := buf[:reclen] + buf = buf[reclen:] + ino, ok := direntIno(rec) + if !ok { + break + } + if ino == 0 { // File absent in directory. + continue + } + typ, ok := direntType(rec) + if !ok { + break + } + const namoff = uint64(unsafe.Offsetof(syscall.Dirent{}.Name)) + namlen, ok := direntNamlen(rec) + if !ok || namoff+namlen > uint64(len(rec)) { + break + } + name := rec[namoff : namoff+namlen] + + for i, c := range name { + if c == 0 { + name = name[:i] + break + } + } + // Check for useless names before allocating a string. + if string(name) == "." || string(name) == ".." { + continue + } + + mode, modeExists := direntTypeToFileMode(typ) + + entries = append(entries, &dirEntryInfo{string(name), mode, modeExists}) + } + return entries +} + +func direntIno(buf []byte) (uint64, bool) { + return readInt(buf, unsafe.Offsetof(syscall.Dirent{}.Ino), unsafe.Sizeof(syscall.Dirent{}.Ino)) +} + +func direntType(buf []byte) (uint64, bool) { + return readInt(buf, unsafe.Offsetof(syscall.Dirent{}.Type), unsafe.Sizeof(syscall.Dirent{}.Type)) +} + +func direntReclen(buf []byte) (uint64, bool) { + return readInt(buf, unsafe.Offsetof(syscall.Dirent{}.Reclen), unsafe.Sizeof(syscall.Dirent{}.Reclen)) +} + +func direntNamlen(buf []byte) (uint64, bool) { + reclen, ok := direntReclen(buf) + if !ok { + return 0, false + } + return reclen - uint64(unsafe.Offsetof(syscall.Dirent{}.Name)), true +} + +// readInt returns the size-bytes unsigned integer in native byte order at offset off. +func readInt(b []byte, off, size uintptr) (u uint64, ok bool) { + if len(b) < int(off+size) { + return 0, false + } + return readIntLE(b[off:], size), true +} + +func readIntLE(b []byte, size uintptr) uint64 { + switch size { + case 1: + return uint64(b[0]) + case 2: + _ = b[1] // bounds check hint to compiler; see golang.org/issue/14808 + return uint64(b[0]) | uint64(b[1])<<8 + case 4: + _ = b[3] // bounds check hint to compiler; see golang.org/issue/14808 + return uint64(b[0]) | uint64(b[1])<<8 | uint64(b[2])<<16 | uint64(b[3])<<24 + case 8: + _ = b[7] // bounds check hint to compiler; see golang.org/issue/14808 + return uint64(b[0]) | uint64(b[1])<<8 | uint64(b[2])<<16 | uint64(b[3])<<24 | + uint64(b[4])<<32 | uint64(b[5])<<40 | uint64(b[6])<<48 | uint64(b[7])<<56 + default: + panic("syscall: readInt with unsupported size") + } +} + +// If the directory entry doesn't specify the type, fall back to using lstat to get the type. +func lstatFileMode(name string) (os.FileMode, error) { + stat, err := os.Lstat(name) + if err != nil { + return 0, err + } + + return stat.Mode() & (os.ModeType | os.ModeCharDevice), nil +} + +// from Linux and Darwin dirent.h +const ( + DT_UNKNOWN = 0 + DT_FIFO = 1 + DT_CHR = 2 + DT_DIR = 4 + DT_BLK = 6 + DT_REG = 8 + DT_LNK = 10 + DT_SOCK = 12 +) + +func direntTypeToFileMode(typ uint64) (os.FileMode, bool) { + exists := true + var mode os.FileMode + switch typ { + case DT_UNKNOWN: + exists = false + case DT_FIFO: + mode = os.ModeNamedPipe + case DT_CHR: + mode = os.ModeDevice | os.ModeCharDevice + case DT_DIR: + mode = os.ModeDir + case DT_BLK: + mode = os.ModeDevice + case DT_REG: + mode = 0 + case DT_LNK: + mode = os.ModeSymlink + case DT_SOCK: + mode = os.ModeSocket + default: + exists = false + } + + return mode, exists +} diff --git a/finder/fs/readdir_test.go b/finder/fs/readdir_test.go new file mode 100644 index 000000000..24a6d1884 --- /dev/null +++ b/finder/fs/readdir_test.go @@ -0,0 +1,312 @@ +// Copyright 2017 Google Inc. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package fs + +import ( + "os" + "reflect" + "runtime" + "testing" +) + +func TestParseDirent(t *testing.T) { + testCases := []struct { + name string + in []byte + out []*dirEntryInfo + }{ + { + // Test that type DT_DIR is translated to os.ModeDir + name: "dir", + in: []byte{ + // __ino64_t d_ino; + 0xfb, 0x10, 0xe6, 0x00, 0x00, 0x00, 0x00, 0x00, + // __off64_t d_off; + 0xeb, 0x85, 0x20, 0x91, 0xb9, 0x14, 0x34, 0x03, + // unsigned short int d_reclen; + 0x28, 0x00, + // unsigned char d_type; + 0x04, + // char d_name[]; + 0x2e, 0x6d, 0x6f, 0x64, 0x75, 0x6c, 0x65, 0x5f, 0x70, 0x61, 0x74, 0x68, 0x73, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + }, + out: []*dirEntryInfo{ + {".module_paths", os.ModeDir, true}, + }, + }, + { + // Test that type DT_REG is translated to a regular file + name: "file", + in: []byte{ + // __ino64_t d_ino; + 0xfb, 0x10, 0xe6, 0x00, 0x00, 0x00, 0x00, 0x00, + // __off64_t d_off; + 0xeb, 0x85, 0x20, 0x91, 0xb9, 0x14, 0x34, 0x03, + // unsigned short int d_reclen; + 0x28, 0x00, + // unsigned char d_type; + 0x08, + // char d_name[]; + 0x2e, 0x6d, 0x6f, 0x64, 0x75, 0x6c, 0x65, 0x5f, 0x70, 0x61, 0x74, 0x68, 0x73, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + }, + out: []*dirEntryInfo{ + {".module_paths", 0, true}, + }, + }, + { + // Test that type DT_LNK is translated to a regular os.ModeSymlink + name: "symlink", + in: []byte{ + // __ino64_t d_ino; + 0xfb, 0x10, 0xe6, 0x00, 0x00, 0x00, 0x00, 0x00, + // __off64_t d_off; + 0xeb, 0x85, 0x20, 0x91, 0xb9, 0x14, 0x34, 0x03, + // unsigned short int d_reclen; + 0x28, 0x00, + // unsigned char d_type; + 0x0a, + // char d_name[]; + 0x2e, 0x6d, 0x6f, 0x64, 0x75, 0x6c, 0x65, 0x5f, 0x70, 0x61, 0x74, 0x68, 0x73, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + }, + out: []*dirEntryInfo{ + {".module_paths", os.ModeSymlink, true}, + }, + }, + { + // Test that type DT_UNKNOWN sets modeExists: false + name: "unknown", + in: []byte{ + // __ino64_t d_ino; + 0xfb, 0x10, 0xe6, 0x00, 0x00, 0x00, 0x00, 0x00, + // __off64_t d_off; + 0xeb, 0x85, 0x20, 0x91, 0xb9, 0x14, 0x34, 0x03, + // unsigned short int d_reclen; + 0x28, 0x00, + // unsigned char d_type; + 0x00, + // char d_name[]; + 0x2e, 0x6d, 0x6f, 0x64, 0x75, 0x6c, 0x65, 0x5f, 0x70, 0x61, 0x74, 0x68, 0x73, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + }, + out: []*dirEntryInfo{ + {".module_paths", 0, false}, + }, + }, + { + // Test a name with no padding after the null terminator + name: "no padding", + in: []byte{ + // __ino64_t d_ino; + 0xfb, 0x10, 0xe6, 0x00, 0x00, 0x00, 0x00, 0x00, + // __off64_t d_off; + 0xeb, 0x85, 0x20, 0x91, 0xb9, 0x14, 0x34, 0x03, + // unsigned short int d_reclen; + 0x20, 0x00, + // unsigned char d_type; + 0x04, + // char d_name[]; + 0x2e, 0x6d, 0x6f, 0x64, 0x75, 0x6c, 0x65, 0x5f, 0x70, 0x61, 0x74, 0x68, 0x00, + }, + out: []*dirEntryInfo{ + {".module_path", os.ModeDir, true}, + }, + }, + { + // Test two sequential entries + name: "two entries", + in: []byte{ + // __ino64_t d_ino; + 0xfb, 0x10, 0xe6, 0x00, 0x00, 0x00, 0x00, 0x00, + // __off64_t d_off; + 0xeb, 0x85, 0x20, 0x91, 0xb9, 0x14, 0x34, 0x03, + // unsigned short int d_reclen; + 0x28, 0x00, + // unsigned char d_type; + 0x04, + // char d_name[]; + 0x2e, 0x6d, 0x6f, 0x64, 0x75, 0x6c, 0x65, 0x5f, 0x70, 0x61, 0x74, 0x68, 0x73, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + + // __ino64_t d_ino; + 0xfb, 0x10, 0xe6, 0x00, 0x00, 0x00, 0x00, 0x00, + // __off64_t d_off; + 0xeb, 0x85, 0x20, 0x91, 0xb9, 0x14, 0x34, 0x03, + // unsigned short int d_reclen; + 0x28, 0x00, + // unsigned char d_type; + 0x04, + // char d_name[]; + 0x2e, 0x6d, 0x6f, 0x64, 0x75, 0x6c, 0x65, 0x5f, 0x70, 0x61, 0x74, 0x68, 0x74, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + }, + out: []*dirEntryInfo{ + {".module_paths", os.ModeDir, true}, + {".module_patht", os.ModeDir, true}, + }, + }, + { + // Test two sequential entries with no padding between them + name: "two entries no padding", + in: []byte{ + // __ino64_t d_ino; + 0xfb, 0x10, 0xe6, 0x00, 0x00, 0x00, 0x00, 0x00, + // __off64_t d_off; + 0xeb, 0x85, 0x20, 0x91, 0xb9, 0x14, 0x34, 0x03, + // unsigned short int d_reclen; + 0x20, 0x00, + // unsigned char d_type; + 0x04, + // char d_name[]; + 0x2e, 0x6d, 0x6f, 0x64, 0x75, 0x6c, 0x65, 0x5f, 0x70, 0x61, 0x74, 0x68, 0x00, + + // __ino64_t d_ino; + 0xfb, 0x10, 0xe6, 0x00, 0x00, 0x00, 0x00, 0x00, + // __off64_t d_off; + 0xeb, 0x85, 0x20, 0x91, 0xb9, 0x14, 0x34, 0x03, + // unsigned short int d_reclen; + 0x28, 0x00, + // unsigned char d_type; + 0x04, + // char d_name[]; + 0x2e, 0x6d, 0x6f, 0x64, 0x75, 0x6c, 0x65, 0x5f, 0x70, 0x61, 0x74, 0x68, 0x73, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + }, + out: []*dirEntryInfo{ + {".module_path", os.ModeDir, true}, + {".module_paths", os.ModeDir, true}, + }, + }, + { + // Test an empty buffer. This shouldn't happen in practice because + // readdir doesn't call parseDirent if no bytes were returned. + name: "empty", + in: []byte{}, + out: nil, + }, + { + name: "missing null terminator", + in: []byte{ + // __ino64_t d_ino; + 0xfb, 0x10, 0xe6, 0x00, 0x00, 0x00, 0x00, 0x00, + // __off64_t d_off; + 0xeb, 0x85, 0x20, 0x91, 0xb9, 0x14, 0x34, 0x03, + // unsigned short int d_reclen; + 0x20, 0x00, + // unsigned char d_type; + 0x04, + // char d_name[]; + 0x2e, 0x6d, 0x6f, 0x64, 0x75, 0x6c, 0x65, 0x5f, 0x70, 0x61, 0x74, 0x68, 0x73, + }, + out: []*dirEntryInfo{ + {".module_paths", os.ModeDir, true}, + }, + }, + { + // Test two sequential entries where the first has an incorrect d_reclen. + // Should return with no entries. + name: "two entries first malformed", + in: []byte{ + // __ino64_t d_ino; + 0xfb, 0x10, 0xe6, 0x00, 0x00, 0x00, 0x00, 0x00, + // __off64_t d_off; + 0xeb, 0x85, 0x20, 0x91, 0xb9, 0x14, 0x34, 0x03, + // unsigned short int d_reclen; + 0x10, 0x00, + // unsigned char d_type; + 0x04, + // char d_name[]; + 0x2e, 0x6d, 0x6f, 0x64, 0x75, 0x6c, 0x65, 0x5f, 0x70, 0x61, 0x74, 0x68, 0x00, + + // __ino64_t d_ino; + 0xfb, 0x10, 0xe6, 0x00, 0x00, 0x00, 0x00, 0x00, + // __off64_t d_off; + 0xeb, 0x85, 0x20, 0x91, 0xb9, 0x14, 0x34, 0x03, + // unsigned short int d_reclen; + 0x28, 0x00, + // unsigned char d_type; + 0x04, + // char d_name[]; + 0x2e, 0x6d, 0x6f, 0x64, 0x75, 0x6c, 0x65, 0x5f, 0x70, 0x61, 0x74, 0x68, 0x73, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + }, + out: nil, + }, + { + // Test two sequential entries where the second has an incorrect d_reclen. + // Should return the first entry. + name: "two entries second malformed", + in: []byte{ + // __ino64_t d_ino; + 0xfb, 0x10, 0xe6, 0x00, 0x00, 0x00, 0x00, 0x00, + // __off64_t d_off; + 0xeb, 0x85, 0x20, 0x91, 0xb9, 0x14, 0x34, 0x03, + // unsigned short int d_reclen; + 0x28, 0x00, + // unsigned char d_type; + 0x04, + // char d_name[]; + 0x2e, 0x6d, 0x6f, 0x64, 0x75, 0x6c, 0x65, 0x5f, 0x70, 0x61, 0x74, 0x68, 0x00, + + // __ino64_t d_ino; + 0xfb, 0x10, 0xe6, 0x00, 0x00, 0x00, 0x00, 0x00, + // __off64_t d_off; + 0xeb, 0x85, 0x20, 0x91, 0xb9, 0x14, 0x34, 0x03, + // unsigned short int d_reclen; + 0x10, 0x00, + // unsigned char d_type; + 0x04, + // char d_name[]; + 0x2e, 0x6d, 0x6f, 0x64, 0x75, 0x6c, 0x65, 0x5f, 0x70, 0x61, 0x74, 0x68, 0x73, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + }, + out: []*dirEntryInfo{ + {".module_path", os.ModeDir, true}, + }, + }, + { + // Test a reclen that goes past the end of the buffer. + name: "overrun", + in: []byte{ + // __ino64_t d_ino; + 0xfb, 0x10, 0xe6, 0x00, 0x00, 0x00, 0x00, 0x00, + // __off64_t d_off; + 0xeb, 0x85, 0x20, 0x91, 0xb9, 0x14, 0x34, 0x03, + // unsigned short int d_reclen; + 0x30, 0x00, + // unsigned char d_type; + 0x04, + // char d_name[]; + 0x2e, 0x6d, 0x6f, 0x64, 0x75, 0x6c, 0x65, 0x5f, 0x70, 0x61, 0x74, 0x68, 0x00, + }, + out: nil, + }, + } + + if runtime.GOOS != "linux" { + t.Skip("depends on Linux definitions of syscall.Dirent") + } + + for _, testCase := range testCases { + t.Run(testCase.name, func(t *testing.T) { + entries := parseDirent(testCase.in, nil) + if !reflect.DeepEqual(testCase.out, entries) { + t.Fatalf("expected:\n %v\ngot:\n %v\n", testCase.out, entries) + } + }) + } +}