From 50ef29a170b19906374d506960f8274f14cb052d Mon Sep 17 00:00:00 2001 From: Elliott Hughes Date: Tue, 18 Jun 2019 18:23:59 -0700 Subject: [PATCH] Finally remove ZipString. Bug: http://b/129068177 Test: treehugger Change-Id: If8c009f96931c9c2672255d8d0fe01d7992282af --- .../include/ziparchive/zip_archive.h | 26 --- libziparchive/zip_archive.cc | 155 +++++++----------- libziparchive/zip_archive_private.h | 22 +-- 3 files changed, 66 insertions(+), 137 deletions(-) diff --git a/libziparchive/include/ziparchive/zip_archive.h b/libziparchive/include/ziparchive/zip_archive.h index 463851caf..e3ac114f2 100644 --- a/libziparchive/include/ziparchive/zip_archive.h +++ b/libziparchive/include/ziparchive/zip_archive.h @@ -36,30 +36,6 @@ enum { kCompressDeflated = 8, // standard deflate }; -// TODO: remove this when everyone's moved over to std::string. -struct ZipString { - const uint8_t* name; - uint16_t name_length; - - ZipString() {} - - explicit ZipString(std::string_view entry_name); - - bool operator==(const ZipString& rhs) const { - return name && (name_length == rhs.name_length) && (memcmp(name, rhs.name, name_length) == 0); - } - - bool StartsWith(const ZipString& prefix) const { - return name && (name_length >= prefix.name_length) && - (memcmp(name, prefix.name, prefix.name_length) == 0); - } - - bool EndsWith(const ZipString& suffix) const { - return name && (name_length >= suffix.name_length) && - (memcmp(name + name_length - suffix.name_length, suffix.name, suffix.name_length) == 0); - } -}; - /* * Represents information about a zip entry in a zip file. */ @@ -191,8 +167,6 @@ int32_t StartIteration(ZipArchiveHandle archive, void** cookie_ptr, */ int32_t Next(void* cookie, ZipEntry* data, std::string* name); int32_t Next(void* cookie, ZipEntry* data, std::string_view* name); -// TODO: remove this when everyone's moved over to std::string/std::string_view. -int32_t Next(void* cookie, ZipEntry* data, ZipString* name); /* * End iteration over all entries of a zip file and frees the memory allocated diff --git a/libziparchive/zip_archive.cc b/libziparchive/zip_archive.cc index e9662958f..c95b03542 100644 --- a/libziparchive/zip_archive.cc +++ b/libziparchive/zip_archive.cc @@ -47,6 +47,7 @@ #include // TEMP_FAILURE_RETRY may or may not be in unistd #include #include +#include #include #include #include "zlib.h" @@ -101,25 +102,8 @@ static uint32_t RoundUpPower2(uint32_t val) { return val; } -static uint32_t ComputeHash(const ZipString& name) { - return static_cast(std::hash{}( - std::string_view(reinterpret_cast(name.name), name.name_length))); -} - -static bool isZipStringEqual(const uint8_t* start, const ZipString& zip_string, - const ZipStringOffset& zip_string_offset) { - const ZipString from_offset = zip_string_offset.GetZipString(start); - return from_offset == zip_string; -} - -/** - * Returns offset of ZipString#name from the start of the central directory in the memory map. - * For valid ZipStrings contained in the zip archive mmap, 0 < offset < 0xffffff. - */ -static inline uint32_t GetOffset(const uint8_t* name, const uint8_t* start) { - CHECK_GT(name, start); - CHECK_LT(name, start + 0xffffff); - return static_cast(name - start); +static uint32_t ComputeHash(std::string_view name) { + return static_cast(std::hash{}(name)); } /* @@ -127,19 +111,19 @@ static inline uint32_t GetOffset(const uint8_t* name, const uint8_t* start) { * valid range. */ static int64_t EntryToIndex(const ZipStringOffset* hash_table, const uint32_t hash_table_size, - const ZipString& name, const uint8_t* start) { + std::string_view name, const uint8_t* start) { const uint32_t hash = ComputeHash(name); // NOTE: (hash_table_size - 1) is guaranteed to be non-negative. uint32_t ent = hash & (hash_table_size - 1); while (hash_table[ent].name_offset != 0) { - if (isZipStringEqual(start, name, hash_table[ent])) { + if (hash_table[ent].ToStringView(start) == name) { return ent; } ent = (ent + 1) & (hash_table_size - 1); } - ALOGV("Zip: Unable to find entry %.*s", name.name_length, name.name); + ALOGV("Zip: Unable to find entry %.*s", static_cast(name.size()), name.data()); return kEntryNotFound; } @@ -147,7 +131,7 @@ static int64_t EntryToIndex(const ZipStringOffset* hash_table, const uint32_t ha * Add a new entry to the hash table. */ static int32_t AddToHash(ZipStringOffset* hash_table, const uint32_t hash_table_size, - const ZipString& name, const uint8_t* start) { + std::string_view name, const uint8_t* start) { const uint64_t hash = ComputeHash(name); uint32_t ent = hash & (hash_table_size - 1); @@ -156,15 +140,18 @@ static int32_t AddToHash(ZipStringOffset* hash_table, const uint32_t hash_table_ * Further, we guarantee that the hashtable size is not 0. */ while (hash_table[ent].name_offset != 0) { - if (isZipStringEqual(start, name, hash_table[ent])) { - // We've found a duplicate entry. We don't accept it - ALOGW("Zip: Found duplicate entry %.*s", name.name_length, name.name); + if (hash_table[ent].ToStringView(start) == name) { + // We've found a duplicate entry. We don't accept duplicates. + ALOGW("Zip: Found duplicate entry %.*s", static_cast(name.size()), name.data()); return kDuplicateEntry; } ent = (ent + 1) & (hash_table_size - 1); } - hash_table[ent].name_offset = GetOffset(name.name, start); - hash_table[ent].name_length = name.name_length; + + // `name` has already been validated before entry. + const char* start_char = reinterpret_cast(start); + hash_table[ent].name_offset = static_cast(name.data() - start_char); + hash_table[ent].name_length = static_cast(name.size()); return 0; } @@ -366,7 +353,7 @@ static int32_t ParseZipArchive(ZipArchive* archive) { reinterpret_cast(calloc(archive->hash_table_size, sizeof(ZipStringOffset))); if (archive->hash_table == nullptr) { ALOGW("Zip: unable to allocate the %u-entry hash_table, entry size: %zu", - archive->hash_table_size, sizeof(ZipString)); + archive->hash_table_size, sizeof(ZipStringOffset)); return -1; } @@ -404,21 +391,19 @@ static int32_t ParseZipArchive(ZipArchive* archive) { const uint8_t* file_name = ptr + sizeof(CentralDirectoryRecord); if (file_name + file_name_length > cd_end) { - ALOGW( - "Zip: file name boundary exceeds the central directory range, file_name_length: " - "%" PRIx16 ", cd_length: %zu", - file_name_length, cd_length); + ALOGW("Zip: file name for entry %" PRIu16 + " exceeds the central directory range, file_name_length: %" PRIu16 ", cd_length: %zu", + i, file_name_length, cd_length); return -1; } - /* check that file name is valid UTF-8 and doesn't contain NUL (U+0000) characters */ + // Check that file name is valid UTF-8 and doesn't contain NUL (U+0000) characters. if (!IsValidEntryName(file_name, file_name_length)) { + ALOGW("Zip: invalid file name at entry %" PRIu16, i); return -1; } - /* add the CDE filename to the hash table */ - ZipString entry_name; - entry_name.name = file_name; - entry_name.name_length = file_name_length; + // Add the CDE filename to the hash table. + std::string_view entry_name{reinterpret_cast(file_name), file_name_length}; const int add_result = AddToHash(archive->hash_table, archive->hash_table_size, entry_name, archive->central_directory.GetBasePtr()); if (add_result != 0) { @@ -539,15 +524,13 @@ static int32_t FindEntry(const ZipArchive* archive, const int32_t ent, ZipEntry* // Recover the start of the central directory entry from the filename // pointer. The filename is the first entry past the fixed-size data, // so we can just subtract back from that. - const ZipString from_offset = - archive->hash_table[ent].GetZipString(archive->central_directory.GetBasePtr()); - const uint8_t* ptr = from_offset.name; + const uint8_t* base_ptr = archive->central_directory.GetBasePtr(); + const uint8_t* ptr = base_ptr + archive->hash_table[ent].name_offset; ptr -= sizeof(CentralDirectoryRecord); // This is the base of our mmapped region, we have to sanity check that // the name that's in the hash table is a pointer to a location within // this mapped region. - const uint8_t* base_ptr = archive->central_directory.GetBasePtr(); if (ptr < base_ptr || ptr > base_ptr + archive->central_directory.GetMapLength()) { ALOGW("Zip: Invalid entry pointer"); return kInvalidOffset; @@ -639,26 +622,24 @@ static int32_t FindEntry(const ZipArchive* archive, const int32_t ent, ZipEntry* // Check that the local file header name matches the declared // name in the central directory. - if (lfh->file_name_length == nameLen) { - const off64_t name_offset = local_header_offset + sizeof(LocalFileHeader); - if (name_offset + lfh->file_name_length > cd_offset) { - ALOGW("Zip: Invalid declared length"); - return kInvalidOffset; - } - - std::vector name_buf(nameLen); - if (!archive->mapped_zip.ReadAtOffset(name_buf.data(), nameLen, name_offset)) { - ALOGW("Zip: failed reading lfh name from offset %" PRId64, static_cast(name_offset)); - return kIoError; - } - const ZipString from_offset = - archive->hash_table[ent].GetZipString(archive->central_directory.GetBasePtr()); - if (memcmp(from_offset.name, name_buf.data(), nameLen)) { - return kInconsistentInformation; - } - - } else { - ALOGW("Zip: lfh name did not match central directory."); + if (lfh->file_name_length != nameLen) { + ALOGW("Zip: lfh name length did not match central directory"); + return kInconsistentInformation; + } + const off64_t name_offset = local_header_offset + sizeof(LocalFileHeader); + if (name_offset + lfh->file_name_length > cd_offset) { + ALOGW("Zip: lfh name has invalid declared length"); + return kInvalidOffset; + } + std::vector name_buf(nameLen); + if (!archive->mapped_zip.ReadAtOffset(name_buf.data(), nameLen, name_offset)) { + ALOGW("Zip: failed reading lfh name from offset %" PRId64, static_cast(name_offset)); + return kIoError; + } + const std::string_view entry_name = + archive->hash_table[ent].ToStringView(archive->central_directory.GetBasePtr()); + if (memcmp(entry_name.data(), name_buf.data(), nameLen) != 0) { + ALOGW("Zip: lfh name did not match central directory"); return kInconsistentInformation; } @@ -691,21 +672,13 @@ static int32_t FindEntry(const ZipArchive* archive, const int32_t ent, ZipEntry* struct IterationHandle { ZipArchive* archive; - std::string prefix_holder; - ZipString prefix; - - std::string suffix_holder; - ZipString suffix; + std::string prefix; + std::string suffix; uint32_t position = 0; - IterationHandle(ZipArchive* archive, const std::string_view in_prefix, - const std::string_view in_suffix) - : archive(archive), - prefix_holder(in_prefix), - prefix(prefix_holder), - suffix_holder(in_suffix), - suffix(suffix_holder) {} + IterationHandle(ZipArchive* archive, std::string_view in_prefix, std::string_view in_suffix) + : archive(archive), prefix(in_prefix), suffix(in_suffix) {} }; int32_t StartIteration(ZipArchiveHandle archive, void** cookie_ptr, @@ -737,8 +710,8 @@ int32_t FindEntry(const ZipArchiveHandle archive, const std::string_view entryNa return kInvalidEntryName; } - const int64_t ent = EntryToIndex(archive->hash_table, archive->hash_table_size, - ZipString(entryName), archive->central_directory.GetBasePtr()); + const int64_t ent = EntryToIndex(archive->hash_table, archive->hash_table_size, entryName, + archive->central_directory.GetBasePtr()); if (ent < 0) { ALOGV("Zip: Could not find entry %.*s", static_cast(entryName.size()), entryName.data()); return static_cast(ent); // kEntryNotFound is safe to truncate. @@ -757,15 +730,6 @@ int32_t Next(void* cookie, ZipEntry* data, std::string* name) { } int32_t Next(void* cookie, ZipEntry* data, std::string_view* name) { - ZipString zs; - int32_t result = Next(cookie, data, &zs); - if (result == 0 && name) { - *name = std::string_view(reinterpret_cast(zs.name), zs.name_length); - } - return result; -} - -int32_t Next(void* cookie, ZipEntry* data, ZipString* name) { IterationHandle* handle = reinterpret_cast(cookie); if (handle == NULL) { ALOGW("Zip: Null ZipArchiveHandle"); @@ -782,16 +746,14 @@ int32_t Next(void* cookie, ZipEntry* data, ZipString* name) { const uint32_t hash_table_length = archive->hash_table_size; const ZipStringOffset* hash_table = archive->hash_table; for (uint32_t i = currentOffset; i < hash_table_length; ++i) { - const ZipString from_offset = - hash_table[i].GetZipString(archive->central_directory.GetBasePtr()); - if (hash_table[i].name_offset != 0 && - (handle->prefix.name_length == 0 || from_offset.StartsWith(handle->prefix)) && - (handle->suffix.name_length == 0 || from_offset.EndsWith(handle->suffix))) { + const std::string_view entry_name = + hash_table[i].ToStringView(archive->central_directory.GetBasePtr()); + if (hash_table[i].name_offset != 0 && (android::base::StartsWith(entry_name, handle->prefix) && + android::base::EndsWith(entry_name, handle->suffix))) { handle->position = (i + 1); const int error = FindEntry(archive, i, data); - if (!error) { - name->name = from_offset.name; - name->name_length = hash_table[i].name_length; + if (!error && name) { + *name = entry_name; } return error; } @@ -1159,13 +1121,6 @@ int GetFileDescriptor(const ZipArchiveHandle archive) { return archive->mapped_zip.GetFileDescriptor(); } -ZipString::ZipString(std::string_view entry_name) - : name(reinterpret_cast(entry_name.data())) { - size_t len = entry_name.size(); - CHECK_LE(len, static_cast(UINT16_MAX)); - name_length = static_cast(len); -} - #if !defined(_WIN32) class ProcessWriter : public zip_archive::Writer { public: diff --git a/libziparchive/zip_archive_private.h b/libziparchive/zip_archive_private.h index 330a02a07..30a1d722c 100644 --- a/libziparchive/zip_archive_private.h +++ b/libziparchive/zip_archive_private.h @@ -137,22 +137,22 @@ class CentralDirectory { }; /** - * More space efficient string representation of strings in an mmaped zipped file than - * std::string_view or ZipString. Using ZipString as an entry in the ZipArchive hashtable wastes - * space. ZipString stores a pointer to a string (on 64 bit, 8 bytes) and the length to read from - * that pointer, 2 bytes. Because of alignment, the structure consumes 16 bytes, wasting 6 bytes. - * ZipStringOffset stores a 4 byte offset from a fixed location in the memory mapped file instead - * of the entire address, consuming 8 bytes with alignment. + * More space efficient string representation of strings in an mmaped zipped + * file than std::string_view. Using std::string_view as an entry in the + * ZipArchive hash table wastes space. std::string_view stores a pointer to a + * string (on 64 bit, 8 bytes) and the length to read from that pointer, + * 2 bytes. Because of alignment, the structure consumes 16 bytes, wasting + * 6 bytes. + * + * ZipStringOffset stores a 4 byte offset from a fixed location in the memory + * mapped file instead of the entire address, consuming 8 bytes with alignment. */ struct ZipStringOffset { uint32_t name_offset; uint16_t name_length; - const ZipString GetZipString(const uint8_t* start) const { - ZipString zip_string; - zip_string.name = start + name_offset; - zip_string.name_length = name_length; - return zip_string; + const std::string_view ToStringView(const uint8_t* start) const { + return std::string_view{reinterpret_cast(start + name_offset), name_length}; } };