Reimplement ioutil.ReadDir with a version that avoids calling lstat

ioutil.ReadDir returns []os.FileInfo, which contains information on
each entry in the directory that is only available by calling
os.Lstat on the entry.  Finder only the name and type (regular,
directory or symlink) of the files, which on Linux kernels >= 2.6.4
is available in the return values of syscall.Getdents.

Replace ioutil.ReadDir with a call that uses syscall.Getdents
directly and collects the type information from the result.

Testing with:
rm -f /tmp/db && strace -fc finder -names Android.mk,Android.bp,Blueprints,CleanSpec.mk,TEST_MAPPING -exclude-dirs .git,.repo -prune-files .out-dir,.find-ignore -db /tmp/db .

Before:
  7.01   52.688304          63    833398         1 lstat
  1.90   14.246644          68    210523           getdents64
  1.25    9.370471          90    104286         1 openat

After:
  3.48   12.201385         117    104286         1 openat
  3.06   10.729138          51    210523           getdents64
  1.70    5.951892          57    104283         1 lstat

Pros:
Avoids 729115 calls to lstat.

Cons:
Requires copying ~200 lines of finicky buffer parsing code.
Puts all getdents calls (and possibly fallback lstat calls) onto
a non-blocking file descriptor, which will cause it to block a
thread and not just a goroutine.
Only works on Linux and Darwin.

Bug: 70897635
Test: m checkbuild
Change-Id: Iab9f82c38c8675d0b73b4e90540bb9e4d2ee52c1
This commit is contained in:
Colin Cross 2017-12-21 16:44:26 -08:00
parent a88c883e3e
commit ae7fd6baf3
4 changed files with 547 additions and 1 deletions

View file

@ -21,6 +21,10 @@ bootstrap_go_package {
pkgPath: "android/soong/finder/fs",
srcs: [
"fs.go",
"readdir.go",
],
testSrcs: [
"readdir_test.go",
],
darwin: {
srcs: [

View file

@ -75,8 +75,19 @@ type DirEntryInfo interface {
IsDir() bool
}
type dirEntryInfo struct {
name string
mode os.FileMode
modeExists bool
}
var _ DirEntryInfo = os.FileInfo(nil)
func (d *dirEntryInfo) Name() string { return d.name }
func (d *dirEntryInfo) Mode() os.FileMode { return d.mode }
func (d *dirEntryInfo) IsDir() bool { return d.mode.IsDir() }
func (d *dirEntryInfo) String() string { return d.name + ": " + d.mode.String() }
// osFs implements FileSystem using the local disk.
type osFs struct{}
@ -89,7 +100,7 @@ func (osFs) Lstat(path string) (stats os.FileInfo, err error) {
}
func (osFs) ReadDir(path string) (contents []DirEntryInfo, err error) {
entries, err := ioutil.ReadDir(path)
entries, err := readdir(path)
if err != nil {
return nil, err
}

219
finder/fs/readdir.go Normal file
View file

@ -0,0 +1,219 @@
// Copyright 2017 Google Inc. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package fs
// This is based on the readdir implementation from Go 1.9:
// Copyright 2009 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
import (
"os"
"syscall"
"unsafe"
)
const (
blockSize = 4096
)
func readdir(path string) ([]DirEntryInfo, error) {
f, err := os.Open(path)
defer f.Close()
if err != nil {
return nil, err
}
// This implicitly switches the fd to non-blocking mode, which is less efficient than what
// file.ReadDir does since it will keep a thread blocked and not just a goroutine.
fd := int(f.Fd())
buf := make([]byte, blockSize)
entries := make([]*dirEntryInfo, 0, 100)
for {
n, errno := syscall.ReadDirent(fd, buf)
if errno != nil {
err = os.NewSyscallError("readdirent", errno)
break
}
if n <= 0 {
break // EOF
}
entries = parseDirent(buf[:n], entries)
}
ret := make([]DirEntryInfo, 0, len(entries))
for _, entry := range entries {
if !entry.modeExists {
mode, lerr := lstatFileMode(path + "/" + entry.name)
if os.IsNotExist(lerr) {
// File disappeared between readdir + stat.
// Just treat it as if it didn't exist.
continue
}
if lerr != nil {
return ret, lerr
}
entry.mode = mode
entry.modeExists = true
}
ret = append(ret, entry)
}
return ret, err
}
func parseDirent(buf []byte, entries []*dirEntryInfo) []*dirEntryInfo {
for len(buf) > 0 {
reclen, ok := direntReclen(buf)
if !ok || reclen > uint64(len(buf)) {
return entries
}
rec := buf[:reclen]
buf = buf[reclen:]
ino, ok := direntIno(rec)
if !ok {
break
}
if ino == 0 { // File absent in directory.
continue
}
typ, ok := direntType(rec)
if !ok {
break
}
const namoff = uint64(unsafe.Offsetof(syscall.Dirent{}.Name))
namlen, ok := direntNamlen(rec)
if !ok || namoff+namlen > uint64(len(rec)) {
break
}
name := rec[namoff : namoff+namlen]
for i, c := range name {
if c == 0 {
name = name[:i]
break
}
}
// Check for useless names before allocating a string.
if string(name) == "." || string(name) == ".." {
continue
}
mode, modeExists := direntTypeToFileMode(typ)
entries = append(entries, &dirEntryInfo{string(name), mode, modeExists})
}
return entries
}
func direntIno(buf []byte) (uint64, bool) {
return readInt(buf, unsafe.Offsetof(syscall.Dirent{}.Ino), unsafe.Sizeof(syscall.Dirent{}.Ino))
}
func direntType(buf []byte) (uint64, bool) {
return readInt(buf, unsafe.Offsetof(syscall.Dirent{}.Type), unsafe.Sizeof(syscall.Dirent{}.Type))
}
func direntReclen(buf []byte) (uint64, bool) {
return readInt(buf, unsafe.Offsetof(syscall.Dirent{}.Reclen), unsafe.Sizeof(syscall.Dirent{}.Reclen))
}
func direntNamlen(buf []byte) (uint64, bool) {
reclen, ok := direntReclen(buf)
if !ok {
return 0, false
}
return reclen - uint64(unsafe.Offsetof(syscall.Dirent{}.Name)), true
}
// readInt returns the size-bytes unsigned integer in native byte order at offset off.
func readInt(b []byte, off, size uintptr) (u uint64, ok bool) {
if len(b) < int(off+size) {
return 0, false
}
return readIntLE(b[off:], size), true
}
func readIntLE(b []byte, size uintptr) uint64 {
switch size {
case 1:
return uint64(b[0])
case 2:
_ = b[1] // bounds check hint to compiler; see golang.org/issue/14808
return uint64(b[0]) | uint64(b[1])<<8
case 4:
_ = b[3] // bounds check hint to compiler; see golang.org/issue/14808
return uint64(b[0]) | uint64(b[1])<<8 | uint64(b[2])<<16 | uint64(b[3])<<24
case 8:
_ = b[7] // bounds check hint to compiler; see golang.org/issue/14808
return uint64(b[0]) | uint64(b[1])<<8 | uint64(b[2])<<16 | uint64(b[3])<<24 |
uint64(b[4])<<32 | uint64(b[5])<<40 | uint64(b[6])<<48 | uint64(b[7])<<56
default:
panic("syscall: readInt with unsupported size")
}
}
// If the directory entry doesn't specify the type, fall back to using lstat to get the type.
func lstatFileMode(name string) (os.FileMode, error) {
stat, err := os.Lstat(name)
if err != nil {
return 0, err
}
return stat.Mode() & (os.ModeType | os.ModeCharDevice), nil
}
// from Linux and Darwin dirent.h
const (
DT_UNKNOWN = 0
DT_FIFO = 1
DT_CHR = 2
DT_DIR = 4
DT_BLK = 6
DT_REG = 8
DT_LNK = 10
DT_SOCK = 12
)
func direntTypeToFileMode(typ uint64) (os.FileMode, bool) {
exists := true
var mode os.FileMode
switch typ {
case DT_UNKNOWN:
exists = false
case DT_FIFO:
mode = os.ModeNamedPipe
case DT_CHR:
mode = os.ModeDevice | os.ModeCharDevice
case DT_DIR:
mode = os.ModeDir
case DT_BLK:
mode = os.ModeDevice
case DT_REG:
mode = 0
case DT_LNK:
mode = os.ModeSymlink
case DT_SOCK:
mode = os.ModeSocket
default:
exists = false
}
return mode, exists
}

312
finder/fs/readdir_test.go Normal file
View file

@ -0,0 +1,312 @@
// Copyright 2017 Google Inc. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package fs
import (
"os"
"reflect"
"runtime"
"testing"
)
func TestParseDirent(t *testing.T) {
testCases := []struct {
name string
in []byte
out []*dirEntryInfo
}{
{
// Test that type DT_DIR is translated to os.ModeDir
name: "dir",
in: []byte{
// __ino64_t d_ino;
0xfb, 0x10, 0xe6, 0x00, 0x00, 0x00, 0x00, 0x00,
// __off64_t d_off;
0xeb, 0x85, 0x20, 0x91, 0xb9, 0x14, 0x34, 0x03,
// unsigned short int d_reclen;
0x28, 0x00,
// unsigned char d_type;
0x04,
// char d_name[];
0x2e, 0x6d, 0x6f, 0x64, 0x75, 0x6c, 0x65, 0x5f, 0x70, 0x61, 0x74, 0x68, 0x73,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
},
out: []*dirEntryInfo{
{".module_paths", os.ModeDir, true},
},
},
{
// Test that type DT_REG is translated to a regular file
name: "file",
in: []byte{
// __ino64_t d_ino;
0xfb, 0x10, 0xe6, 0x00, 0x00, 0x00, 0x00, 0x00,
// __off64_t d_off;
0xeb, 0x85, 0x20, 0x91, 0xb9, 0x14, 0x34, 0x03,
// unsigned short int d_reclen;
0x28, 0x00,
// unsigned char d_type;
0x08,
// char d_name[];
0x2e, 0x6d, 0x6f, 0x64, 0x75, 0x6c, 0x65, 0x5f, 0x70, 0x61, 0x74, 0x68, 0x73,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
},
out: []*dirEntryInfo{
{".module_paths", 0, true},
},
},
{
// Test that type DT_LNK is translated to a regular os.ModeSymlink
name: "symlink",
in: []byte{
// __ino64_t d_ino;
0xfb, 0x10, 0xe6, 0x00, 0x00, 0x00, 0x00, 0x00,
// __off64_t d_off;
0xeb, 0x85, 0x20, 0x91, 0xb9, 0x14, 0x34, 0x03,
// unsigned short int d_reclen;
0x28, 0x00,
// unsigned char d_type;
0x0a,
// char d_name[];
0x2e, 0x6d, 0x6f, 0x64, 0x75, 0x6c, 0x65, 0x5f, 0x70, 0x61, 0x74, 0x68, 0x73,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
},
out: []*dirEntryInfo{
{".module_paths", os.ModeSymlink, true},
},
},
{
// Test that type DT_UNKNOWN sets modeExists: false
name: "unknown",
in: []byte{
// __ino64_t d_ino;
0xfb, 0x10, 0xe6, 0x00, 0x00, 0x00, 0x00, 0x00,
// __off64_t d_off;
0xeb, 0x85, 0x20, 0x91, 0xb9, 0x14, 0x34, 0x03,
// unsigned short int d_reclen;
0x28, 0x00,
// unsigned char d_type;
0x00,
// char d_name[];
0x2e, 0x6d, 0x6f, 0x64, 0x75, 0x6c, 0x65, 0x5f, 0x70, 0x61, 0x74, 0x68, 0x73,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
},
out: []*dirEntryInfo{
{".module_paths", 0, false},
},
},
{
// Test a name with no padding after the null terminator
name: "no padding",
in: []byte{
// __ino64_t d_ino;
0xfb, 0x10, 0xe6, 0x00, 0x00, 0x00, 0x00, 0x00,
// __off64_t d_off;
0xeb, 0x85, 0x20, 0x91, 0xb9, 0x14, 0x34, 0x03,
// unsigned short int d_reclen;
0x20, 0x00,
// unsigned char d_type;
0x04,
// char d_name[];
0x2e, 0x6d, 0x6f, 0x64, 0x75, 0x6c, 0x65, 0x5f, 0x70, 0x61, 0x74, 0x68, 0x00,
},
out: []*dirEntryInfo{
{".module_path", os.ModeDir, true},
},
},
{
// Test two sequential entries
name: "two entries",
in: []byte{
// __ino64_t d_ino;
0xfb, 0x10, 0xe6, 0x00, 0x00, 0x00, 0x00, 0x00,
// __off64_t d_off;
0xeb, 0x85, 0x20, 0x91, 0xb9, 0x14, 0x34, 0x03,
// unsigned short int d_reclen;
0x28, 0x00,
// unsigned char d_type;
0x04,
// char d_name[];
0x2e, 0x6d, 0x6f, 0x64, 0x75, 0x6c, 0x65, 0x5f, 0x70, 0x61, 0x74, 0x68, 0x73,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
// __ino64_t d_ino;
0xfb, 0x10, 0xe6, 0x00, 0x00, 0x00, 0x00, 0x00,
// __off64_t d_off;
0xeb, 0x85, 0x20, 0x91, 0xb9, 0x14, 0x34, 0x03,
// unsigned short int d_reclen;
0x28, 0x00,
// unsigned char d_type;
0x04,
// char d_name[];
0x2e, 0x6d, 0x6f, 0x64, 0x75, 0x6c, 0x65, 0x5f, 0x70, 0x61, 0x74, 0x68, 0x74,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
},
out: []*dirEntryInfo{
{".module_paths", os.ModeDir, true},
{".module_patht", os.ModeDir, true},
},
},
{
// Test two sequential entries with no padding between them
name: "two entries no padding",
in: []byte{
// __ino64_t d_ino;
0xfb, 0x10, 0xe6, 0x00, 0x00, 0x00, 0x00, 0x00,
// __off64_t d_off;
0xeb, 0x85, 0x20, 0x91, 0xb9, 0x14, 0x34, 0x03,
// unsigned short int d_reclen;
0x20, 0x00,
// unsigned char d_type;
0x04,
// char d_name[];
0x2e, 0x6d, 0x6f, 0x64, 0x75, 0x6c, 0x65, 0x5f, 0x70, 0x61, 0x74, 0x68, 0x00,
// __ino64_t d_ino;
0xfb, 0x10, 0xe6, 0x00, 0x00, 0x00, 0x00, 0x00,
// __off64_t d_off;
0xeb, 0x85, 0x20, 0x91, 0xb9, 0x14, 0x34, 0x03,
// unsigned short int d_reclen;
0x28, 0x00,
// unsigned char d_type;
0x04,
// char d_name[];
0x2e, 0x6d, 0x6f, 0x64, 0x75, 0x6c, 0x65, 0x5f, 0x70, 0x61, 0x74, 0x68, 0x73,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
},
out: []*dirEntryInfo{
{".module_path", os.ModeDir, true},
{".module_paths", os.ModeDir, true},
},
},
{
// Test an empty buffer. This shouldn't happen in practice because
// readdir doesn't call parseDirent if no bytes were returned.
name: "empty",
in: []byte{},
out: nil,
},
{
name: "missing null terminator",
in: []byte{
// __ino64_t d_ino;
0xfb, 0x10, 0xe6, 0x00, 0x00, 0x00, 0x00, 0x00,
// __off64_t d_off;
0xeb, 0x85, 0x20, 0x91, 0xb9, 0x14, 0x34, 0x03,
// unsigned short int d_reclen;
0x20, 0x00,
// unsigned char d_type;
0x04,
// char d_name[];
0x2e, 0x6d, 0x6f, 0x64, 0x75, 0x6c, 0x65, 0x5f, 0x70, 0x61, 0x74, 0x68, 0x73,
},
out: []*dirEntryInfo{
{".module_paths", os.ModeDir, true},
},
},
{
// Test two sequential entries where the first has an incorrect d_reclen.
// Should return with no entries.
name: "two entries first malformed",
in: []byte{
// __ino64_t d_ino;
0xfb, 0x10, 0xe6, 0x00, 0x00, 0x00, 0x00, 0x00,
// __off64_t d_off;
0xeb, 0x85, 0x20, 0x91, 0xb9, 0x14, 0x34, 0x03,
// unsigned short int d_reclen;
0x10, 0x00,
// unsigned char d_type;
0x04,
// char d_name[];
0x2e, 0x6d, 0x6f, 0x64, 0x75, 0x6c, 0x65, 0x5f, 0x70, 0x61, 0x74, 0x68, 0x00,
// __ino64_t d_ino;
0xfb, 0x10, 0xe6, 0x00, 0x00, 0x00, 0x00, 0x00,
// __off64_t d_off;
0xeb, 0x85, 0x20, 0x91, 0xb9, 0x14, 0x34, 0x03,
// unsigned short int d_reclen;
0x28, 0x00,
// unsigned char d_type;
0x04,
// char d_name[];
0x2e, 0x6d, 0x6f, 0x64, 0x75, 0x6c, 0x65, 0x5f, 0x70, 0x61, 0x74, 0x68, 0x73,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
},
out: nil,
},
{
// Test two sequential entries where the second has an incorrect d_reclen.
// Should return the first entry.
name: "two entries second malformed",
in: []byte{
// __ino64_t d_ino;
0xfb, 0x10, 0xe6, 0x00, 0x00, 0x00, 0x00, 0x00,
// __off64_t d_off;
0xeb, 0x85, 0x20, 0x91, 0xb9, 0x14, 0x34, 0x03,
// unsigned short int d_reclen;
0x28, 0x00,
// unsigned char d_type;
0x04,
// char d_name[];
0x2e, 0x6d, 0x6f, 0x64, 0x75, 0x6c, 0x65, 0x5f, 0x70, 0x61, 0x74, 0x68, 0x00,
// __ino64_t d_ino;
0xfb, 0x10, 0xe6, 0x00, 0x00, 0x00, 0x00, 0x00,
// __off64_t d_off;
0xeb, 0x85, 0x20, 0x91, 0xb9, 0x14, 0x34, 0x03,
// unsigned short int d_reclen;
0x10, 0x00,
// unsigned char d_type;
0x04,
// char d_name[];
0x2e, 0x6d, 0x6f, 0x64, 0x75, 0x6c, 0x65, 0x5f, 0x70, 0x61, 0x74, 0x68, 0x73,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
},
out: []*dirEntryInfo{
{".module_path", os.ModeDir, true},
},
},
{
// Test a reclen that goes past the end of the buffer.
name: "overrun",
in: []byte{
// __ino64_t d_ino;
0xfb, 0x10, 0xe6, 0x00, 0x00, 0x00, 0x00, 0x00,
// __off64_t d_off;
0xeb, 0x85, 0x20, 0x91, 0xb9, 0x14, 0x34, 0x03,
// unsigned short int d_reclen;
0x30, 0x00,
// unsigned char d_type;
0x04,
// char d_name[];
0x2e, 0x6d, 0x6f, 0x64, 0x75, 0x6c, 0x65, 0x5f, 0x70, 0x61, 0x74, 0x68, 0x00,
},
out: nil,
},
}
if runtime.GOOS != "linux" {
t.Skip("depends on Linux definitions of syscall.Dirent")
}
for _, testCase := range testCases {
t.Run(testCase.name, func(t *testing.T) {
entries := parseDirent(testCase.in, nil)
if !reflect.DeepEqual(testCase.out, entries) {
t.Fatalf("expected:\n %v\ngot:\n %v\n", testCase.out, entries)
}
})
}
}