libziparchive: Use ReadAtOffset exclusively

The use of ReadAtOffset is meant to allow concurrent access
to the zip archive once it has been loaded. There were places
where this was the case, and some places that did a seek + read
combination, which could lead to data races.

NOTE: On Windows, we are not using pread as the implementation of
ReadAtOffset, therefore the guarantees on Windows are weaker.

On Linux, pread allows the file descriptor to be read at a specific
offset without changing the read pointer. This allows inherited fd's
and duped fds to be read concurrently.

On Windows, we use the ReadFile API, which allows for an atomic seek +
read operation, but modifies the read pointer. This means that any mix
use of ReadAtOffset and Read will have races. Just using ReadAtOffset is
safe.

For the Windows case, this is fine as the libziparchive code now only
uses ReadAtOffset.

Bug: 62184114
Bug: 62101783
Test: make ziparchive-tests (existing tests pass)
Change-Id: Ia7f9a30af2216682cdd9d578d26e84bc46773bb9
This commit is contained in:
Adam Lesinski 2017-06-19 10:27:38 -07:00
parent c31963b5c2
commit de117e4a49
6 changed files with 84 additions and 79 deletions

View file

@ -153,6 +153,37 @@ bool ReadFully(int fd, void* data, size_t byte_count) {
return true;
}
#if defined(_WIN32)
// Windows implementation of pread. Note that this DOES move the file descriptors read position,
// but it does so atomically.
static ssize_t pread(int fd, void* data, size_t byte_count, off64_t offset) {
DWORD bytes_read;
OVERLAPPED overlapped;
memset(&overlapped, 0, sizeof(OVERLAPPED));
overlapped.Offset = static_cast<DWORD>(offset);
overlapped.OffsetHigh = static_cast<DWORD>(offset >> 32);
if (!ReadFile(reinterpret_cast<HANDLE>(_get_osfhandle(fd)), data, static_cast<DWORD>(byte_count),
&bytes_read, &overlapped)) {
// In case someone tries to read errno (since this is masquerading as a POSIX call)
errno = EIO;
return -1;
}
return static_cast<ssize_t>(bytes_read);
}
#endif
bool ReadFullyAtOffset(int fd, void* data, size_t byte_count, off64_t offset) {
uint8_t* p = reinterpret_cast<uint8_t*>(data);
while (byte_count > 0) {
ssize_t n = TEMP_FAILURE_RETRY(pread(fd, p, byte_count, offset));
if (n <= 0) return false;
p += n;
byte_count -= n;
offset += n;
}
return true;
}
bool WriteFully(int fd, const void* data, size_t byte_count) {
const uint8_t* p = reinterpret_cast<const uint8_t*>(data);
size_t remaining = byte_count;

View file

@ -42,6 +42,17 @@ bool WriteStringToFile(const std::string& content, const std::string& path,
#endif
bool ReadFully(int fd, void* data, size_t byte_count);
// Reads `byte_count` bytes from the file descriptor at the specified offset.
// Returns false if there was an IO error or EOF was reached before reading `byte_count` bytes.
//
// NOTE: On Linux/Mac, this function wraps pread, which provides atomic read support without
// modifying the read pointer of the file descriptor. On Windows, however, the read pointer does
// get modified. This means that ReadFullyAtOffset can be used concurrently with other calls to the
// same function, but concurrently seeking or reading incrementally can lead to unexpected
// behavior.
bool ReadFullyAtOffset(int fd, void* data, size_t byte_count, off64_t offset);
bool WriteFully(int fd, const void* data, size_t byte_count);
bool RemoveFileIfExists(const std::string& path, std::string* err = nullptr);

View file

@ -40,7 +40,8 @@ class ZipArchiveStreamEntry {
ZipArchiveHandle handle_;
uint32_t crc32_;
off64_t offset_ = 0;
uint32_t crc32_ = 0u;
};
#endif // LIBZIPARCHIVE_ZIPARCHIVESTREAMENTRY_H_

View file

@ -435,13 +435,20 @@ void CloseArchive(ZipArchiveHandle handle) {
static int32_t ValidateDataDescriptor(MappedZipFile& mapped_zip, ZipEntry* entry) {
uint8_t ddBuf[sizeof(DataDescriptor) + sizeof(DataDescriptor::kOptSignature)];
if (!mapped_zip.ReadData(ddBuf, sizeof(ddBuf))) {
off64_t offset = entry->offset;
if (entry->method != kCompressStored) {
offset += entry->compressed_length;
} else {
offset += entry->uncompressed_length;
}
if (!mapped_zip.ReadAtOffset(ddBuf, sizeof(ddBuf), offset)) {
return kIoError;
}
const uint32_t ddSignature = *(reinterpret_cast<const uint32_t*>(ddBuf));
const uint16_t offset = (ddSignature == DataDescriptor::kOptSignature) ? 4 : 0;
const DataDescriptor* descriptor = reinterpret_cast<const DataDescriptor*>(ddBuf + offset);
const uint16_t ddOffset = (ddSignature == DataDescriptor::kOptSignature) ? 4 : 0;
const DataDescriptor* descriptor = reinterpret_cast<const DataDescriptor*>(ddBuf + ddOffset);
// Validate that the values in the data descriptor match those in the central
// directory.
@ -899,7 +906,9 @@ static int32_t InflateEntryToWriter(MappedZipFile& mapped_zip, const ZipEntry* e
/* read as much as we can */
if (zstream.avail_in == 0) {
const size_t getSize = (compressed_length > kBufSize) ? kBufSize : compressed_length;
if (!mapped_zip.ReadData(read_buf.data(), getSize)) {
off64_t offset = entry->offset + (entry->compressed_length - compressed_length);
// Make sure to read at offset to ensure concurrent access to the fd.
if (!mapped_zip.ReadAtOffset(read_buf.data(), getSize, offset)) {
ALOGW("Zip: inflate read failed, getSize = %zu: %s", getSize, strerror(errno));
return kIoError;
}
@ -962,12 +971,15 @@ static int32_t CopyEntryToWriter(MappedZipFile& mapped_zip, const ZipEntry* entr
uint64_t crc = 0;
while (count < length) {
uint32_t remaining = length - count;
off64_t offset = entry->offset + count;
// Safe conversion because kBufSize is narrow enough for a 32 bit signed
// value.
// Safe conversion because kBufSize is narrow enough for a 32 bit signed value.
const size_t block_size = (remaining > kBufSize) ? kBufSize : remaining;
if (!mapped_zip.ReadData(buf.data(), block_size)) {
ALOGW("CopyFileToFile: copy read failed, block_size = %zu: %s", block_size, strerror(errno));
// Make sure to read at offset to ensure concurrent access to the fd.
if (!mapped_zip.ReadAtOffset(buf.data(), block_size, offset)) {
ALOGW("CopyFileToFile: copy read failed, block_size = %zu, offset = %" PRId64 ": %s",
block_size, static_cast<int64_t>(offset), strerror(errno));
return kIoError;
}
@ -986,12 +998,6 @@ static int32_t CopyEntryToWriter(MappedZipFile& mapped_zip, const ZipEntry* entr
int32_t ExtractToWriter(ZipArchiveHandle handle, ZipEntry* entry, Writer* writer) {
ZipArchive* archive = reinterpret_cast<ZipArchive*>(handle);
const uint16_t method = entry->method;
off64_t data_offset = entry->offset;
if (!archive->mapped_zip.SeekToOffset(data_offset)) {
ALOGW("Zip: lseek to data at %" PRId64 " failed", static_cast<int64_t>(data_offset));
return kIoError;
}
// this should default to kUnknownCompressionMethod.
int32_t return_value = -1;
@ -1111,52 +1117,21 @@ off64_t MappedZipFile::GetFileLength() const {
}
}
bool MappedZipFile::SeekToOffset(off64_t offset) {
if (has_fd_) {
if (lseek64(fd_, offset, SEEK_SET) != offset) {
ALOGE("Zip: lseek to %" PRId64 " failed: %s\n", offset, strerror(errno));
return false;
}
return true;
} else {
if (offset < 0 || offset > static_cast<off64_t>(data_length_)) {
ALOGE("Zip: invalid offset: %" PRId64 ", data length: %" PRId64 "\n", offset, data_length_);
return false;
}
read_pos_ = offset;
return true;
}
}
bool MappedZipFile::ReadData(uint8_t* buffer, size_t read_amount) {
if (has_fd_) {
if (!android::base::ReadFully(fd_, buffer, read_amount)) {
ALOGE("Zip: read from %d failed\n", fd_);
return false;
}
} else {
memcpy(buffer, static_cast<uint8_t*>(base_ptr_) + read_pos_, read_amount);
read_pos_ += read_amount;
}
return true;
}
// Attempts to read |len| bytes into |buf| at offset |off|.
bool MappedZipFile::ReadAtOffset(uint8_t* buf, size_t len, off64_t off) {
#if !defined(_WIN32)
if (has_fd_) {
if (static_cast<size_t>(TEMP_FAILURE_RETRY(pread64(fd_, buf, len, off))) != len) {
if (!android::base::ReadFullyAtOffset(fd_, buf, len, off)) {
ALOGE("Zip: failed to read at offset %" PRId64 "\n", off);
return false;
}
return true;
} else {
if (off < 0 || off > static_cast<off64_t>(data_length_)) {
ALOGE("Zip: invalid offset: %" PRId64 ", data length: %" PRId64 "\n", off, data_length_);
return false;
}
memcpy(buf, static_cast<uint8_t*>(base_ptr_) + off, len);
}
#endif
if (!SeekToOffset(off)) {
return false;
}
return ReadData(buf, len);
return true;
}
void CentralDirectory::Initialize(void* map_base_ptr, off64_t cd_start_offset, size_t cd_size) {

View file

@ -93,14 +93,10 @@ enum ErrorCodes : int32_t {
class MappedZipFile {
public:
explicit MappedZipFile(const int fd)
: has_fd_(true), fd_(fd), base_ptr_(nullptr), data_length_(0), read_pos_(0) {}
: has_fd_(true), fd_(fd), base_ptr_(nullptr), data_length_(0) {}
explicit MappedZipFile(void* address, size_t length)
: has_fd_(false),
fd_(-1),
base_ptr_(address),
data_length_(static_cast<off64_t>(length)),
read_pos_(0) {}
: has_fd_(false), fd_(-1), base_ptr_(address), data_length_(static_cast<off64_t>(length)) {}
bool HasFd() const { return has_fd_; }
@ -110,10 +106,6 @@ class MappedZipFile {
off64_t GetFileLength() const;
bool SeekToOffset(off64_t offset);
bool ReadData(uint8_t* buffer, size_t read_amount);
bool ReadAtOffset(uint8_t* buf, size_t len, off64_t off);
private:
@ -127,8 +119,6 @@ class MappedZipFile {
void* const base_ptr_;
const off64_t data_length_;
// read_pos_ is the offset to the base_ptr_ where we read data from.
size_t read_pos_;
};
class CentralDirectory {

View file

@ -38,13 +38,8 @@
static constexpr size_t kBufSize = 65535;
bool ZipArchiveStreamEntry::Init(const ZipEntry& entry) {
ZipArchive* archive = reinterpret_cast<ZipArchive*>(handle_);
off64_t data_offset = entry.offset;
if (!archive->mapped_zip.SeekToOffset(data_offset)) {
ALOGW("lseek to data at %" PRId64 " failed: %s", data_offset, strerror(errno));
return false;
}
crc32_ = entry.crc32;
offset_ = entry.offset;
return true;
}
@ -61,11 +56,11 @@ class ZipArchiveStreamEntryUncompressed : public ZipArchiveStreamEntry {
protected:
bool Init(const ZipEntry& entry) override;
uint32_t length_;
uint32_t length_ = 0u;
private:
std::vector<uint8_t> data_;
uint32_t computed_crc32_;
uint32_t computed_crc32_ = 0u;
};
bool ZipArchiveStreamEntryUncompressed::Init(const ZipEntry& entry) {
@ -89,7 +84,7 @@ const std::vector<uint8_t>* ZipArchiveStreamEntryUncompressed::Read() {
size_t bytes = (length_ > data_.size()) ? data_.size() : length_;
ZipArchive* archive = reinterpret_cast<ZipArchive*>(handle_);
errno = 0;
if (!archive->mapped_zip.ReadData(data_.data(), bytes)) {
if (!archive->mapped_zip.ReadAtOffset(data_.data(), bytes, offset_)) {
if (errno != 0) {
ALOGE("Error reading from archive fd: %s", strerror(errno));
} else {
@ -104,6 +99,7 @@ const std::vector<uint8_t>* ZipArchiveStreamEntryUncompressed::Read() {
}
computed_crc32_ = crc32(computed_crc32_, data_.data(), data_.size());
length_ -= bytes;
offset_ += bytes;
return &data_;
}
@ -129,9 +125,9 @@ class ZipArchiveStreamEntryCompressed : public ZipArchiveStreamEntry {
z_stream z_stream_;
std::vector<uint8_t> in_;
std::vector<uint8_t> out_;
uint32_t uncompressed_length_;
uint32_t compressed_length_;
uint32_t computed_crc32_;
uint32_t uncompressed_length_ = 0u;
uint32_t compressed_length_ = 0u;
uint32_t computed_crc32_ = 0u;
};
// This method is using libz macros with old-style-casts
@ -210,7 +206,7 @@ const std::vector<uint8_t>* ZipArchiveStreamEntryCompressed::Read() {
size_t bytes = (compressed_length_ > in_.size()) ? in_.size() : compressed_length_;
ZipArchive* archive = reinterpret_cast<ZipArchive*>(handle_);
errno = 0;
if (!archive->mapped_zip.ReadData(in_.data(), bytes)) {
if (!archive->mapped_zip.ReadAtOffset(in_.data(), bytes, offset_)) {
if (errno != 0) {
ALOGE("Error reading from archive fd: %s", strerror(errno));
} else {
@ -220,6 +216,7 @@ const std::vector<uint8_t>* ZipArchiveStreamEntryCompressed::Read() {
}
compressed_length_ -= bytes;
offset_ += bytes;
z_stream_.next_in = in_.data();
z_stream_.avail_in = bytes;
}