Merge "Finally remove ZipString."
This commit is contained in:
commit
2226fe6ddf
3 changed files with 66 additions and 137 deletions
|
@ -36,30 +36,6 @@ enum {
|
|||
kCompressDeflated = 8, // standard deflate
|
||||
};
|
||||
|
||||
// TODO: remove this when everyone's moved over to std::string.
|
||||
struct ZipString {
|
||||
const uint8_t* name;
|
||||
uint16_t name_length;
|
||||
|
||||
ZipString() {}
|
||||
|
||||
explicit ZipString(std::string_view entry_name);
|
||||
|
||||
bool operator==(const ZipString& rhs) const {
|
||||
return name && (name_length == rhs.name_length) && (memcmp(name, rhs.name, name_length) == 0);
|
||||
}
|
||||
|
||||
bool StartsWith(const ZipString& prefix) const {
|
||||
return name && (name_length >= prefix.name_length) &&
|
||||
(memcmp(name, prefix.name, prefix.name_length) == 0);
|
||||
}
|
||||
|
||||
bool EndsWith(const ZipString& suffix) const {
|
||||
return name && (name_length >= suffix.name_length) &&
|
||||
(memcmp(name + name_length - suffix.name_length, suffix.name, suffix.name_length) == 0);
|
||||
}
|
||||
};
|
||||
|
||||
/*
|
||||
* Represents information about a zip entry in a zip file.
|
||||
*/
|
||||
|
@ -191,8 +167,6 @@ int32_t StartIteration(ZipArchiveHandle archive, void** cookie_ptr,
|
|||
*/
|
||||
int32_t Next(void* cookie, ZipEntry* data, std::string* name);
|
||||
int32_t Next(void* cookie, ZipEntry* data, std::string_view* name);
|
||||
// TODO: remove this when everyone's moved over to std::string/std::string_view.
|
||||
int32_t Next(void* cookie, ZipEntry* data, ZipString* name);
|
||||
|
||||
/*
|
||||
* End iteration over all entries of a zip file and frees the memory allocated
|
||||
|
|
|
@ -47,6 +47,7 @@
|
|||
#include <android-base/macros.h> // TEMP_FAILURE_RETRY may or may not be in unistd
|
||||
#include <android-base/mapped_file.h>
|
||||
#include <android-base/memory.h>
|
||||
#include <android-base/strings.h>
|
||||
#include <android-base/utf8.h>
|
||||
#include <log/log.h>
|
||||
#include "zlib.h"
|
||||
|
@ -101,25 +102,8 @@ static uint32_t RoundUpPower2(uint32_t val) {
|
|||
return val;
|
||||
}
|
||||
|
||||
static uint32_t ComputeHash(const ZipString& name) {
|
||||
return static_cast<uint32_t>(std::hash<std::string_view>{}(
|
||||
std::string_view(reinterpret_cast<const char*>(name.name), name.name_length)));
|
||||
}
|
||||
|
||||
static bool isZipStringEqual(const uint8_t* start, const ZipString& zip_string,
|
||||
const ZipStringOffset& zip_string_offset) {
|
||||
const ZipString from_offset = zip_string_offset.GetZipString(start);
|
||||
return from_offset == zip_string;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns offset of ZipString#name from the start of the central directory in the memory map.
|
||||
* For valid ZipStrings contained in the zip archive mmap, 0 < offset < 0xffffff.
|
||||
*/
|
||||
static inline uint32_t GetOffset(const uint8_t* name, const uint8_t* start) {
|
||||
CHECK_GT(name, start);
|
||||
CHECK_LT(name, start + 0xffffff);
|
||||
return static_cast<uint32_t>(name - start);
|
||||
static uint32_t ComputeHash(std::string_view name) {
|
||||
return static_cast<uint32_t>(std::hash<std::string_view>{}(name));
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -127,19 +111,19 @@ static inline uint32_t GetOffset(const uint8_t* name, const uint8_t* start) {
|
|||
* valid range.
|
||||
*/
|
||||
static int64_t EntryToIndex(const ZipStringOffset* hash_table, const uint32_t hash_table_size,
|
||||
const ZipString& name, const uint8_t* start) {
|
||||
std::string_view name, const uint8_t* start) {
|
||||
const uint32_t hash = ComputeHash(name);
|
||||
|
||||
// NOTE: (hash_table_size - 1) is guaranteed to be non-negative.
|
||||
uint32_t ent = hash & (hash_table_size - 1);
|
||||
while (hash_table[ent].name_offset != 0) {
|
||||
if (isZipStringEqual(start, name, hash_table[ent])) {
|
||||
if (hash_table[ent].ToStringView(start) == name) {
|
||||
return ent;
|
||||
}
|
||||
ent = (ent + 1) & (hash_table_size - 1);
|
||||
}
|
||||
|
||||
ALOGV("Zip: Unable to find entry %.*s", name.name_length, name.name);
|
||||
ALOGV("Zip: Unable to find entry %.*s", static_cast<int>(name.size()), name.data());
|
||||
return kEntryNotFound;
|
||||
}
|
||||
|
||||
|
@ -147,7 +131,7 @@ static int64_t EntryToIndex(const ZipStringOffset* hash_table, const uint32_t ha
|
|||
* Add a new entry to the hash table.
|
||||
*/
|
||||
static int32_t AddToHash(ZipStringOffset* hash_table, const uint32_t hash_table_size,
|
||||
const ZipString& name, const uint8_t* start) {
|
||||
std::string_view name, const uint8_t* start) {
|
||||
const uint64_t hash = ComputeHash(name);
|
||||
uint32_t ent = hash & (hash_table_size - 1);
|
||||
|
||||
|
@ -156,15 +140,18 @@ static int32_t AddToHash(ZipStringOffset* hash_table, const uint32_t hash_table_
|
|||
* Further, we guarantee that the hashtable size is not 0.
|
||||
*/
|
||||
while (hash_table[ent].name_offset != 0) {
|
||||
if (isZipStringEqual(start, name, hash_table[ent])) {
|
||||
// We've found a duplicate entry. We don't accept it
|
||||
ALOGW("Zip: Found duplicate entry %.*s", name.name_length, name.name);
|
||||
if (hash_table[ent].ToStringView(start) == name) {
|
||||
// We've found a duplicate entry. We don't accept duplicates.
|
||||
ALOGW("Zip: Found duplicate entry %.*s", static_cast<int>(name.size()), name.data());
|
||||
return kDuplicateEntry;
|
||||
}
|
||||
ent = (ent + 1) & (hash_table_size - 1);
|
||||
}
|
||||
hash_table[ent].name_offset = GetOffset(name.name, start);
|
||||
hash_table[ent].name_length = name.name_length;
|
||||
|
||||
// `name` has already been validated before entry.
|
||||
const char* start_char = reinterpret_cast<const char*>(start);
|
||||
hash_table[ent].name_offset = static_cast<uint32_t>(name.data() - start_char);
|
||||
hash_table[ent].name_length = static_cast<uint16_t>(name.size());
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -366,7 +353,7 @@ static int32_t ParseZipArchive(ZipArchive* archive) {
|
|||
reinterpret_cast<ZipStringOffset*>(calloc(archive->hash_table_size, sizeof(ZipStringOffset)));
|
||||
if (archive->hash_table == nullptr) {
|
||||
ALOGW("Zip: unable to allocate the %u-entry hash_table, entry size: %zu",
|
||||
archive->hash_table_size, sizeof(ZipString));
|
||||
archive->hash_table_size, sizeof(ZipStringOffset));
|
||||
return -1;
|
||||
}
|
||||
|
||||
|
@ -404,21 +391,19 @@ static int32_t ParseZipArchive(ZipArchive* archive) {
|
|||
const uint8_t* file_name = ptr + sizeof(CentralDirectoryRecord);
|
||||
|
||||
if (file_name + file_name_length > cd_end) {
|
||||
ALOGW(
|
||||
"Zip: file name boundary exceeds the central directory range, file_name_length: "
|
||||
"%" PRIx16 ", cd_length: %zu",
|
||||
file_name_length, cd_length);
|
||||
ALOGW("Zip: file name for entry %" PRIu16
|
||||
" exceeds the central directory range, file_name_length: %" PRIu16 ", cd_length: %zu",
|
||||
i, file_name_length, cd_length);
|
||||
return -1;
|
||||
}
|
||||
/* check that file name is valid UTF-8 and doesn't contain NUL (U+0000) characters */
|
||||
// Check that file name is valid UTF-8 and doesn't contain NUL (U+0000) characters.
|
||||
if (!IsValidEntryName(file_name, file_name_length)) {
|
||||
ALOGW("Zip: invalid file name at entry %" PRIu16, i);
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* add the CDE filename to the hash table */
|
||||
ZipString entry_name;
|
||||
entry_name.name = file_name;
|
||||
entry_name.name_length = file_name_length;
|
||||
// Add the CDE filename to the hash table.
|
||||
std::string_view entry_name{reinterpret_cast<const char*>(file_name), file_name_length};
|
||||
const int add_result = AddToHash(archive->hash_table, archive->hash_table_size, entry_name,
|
||||
archive->central_directory.GetBasePtr());
|
||||
if (add_result != 0) {
|
||||
|
@ -539,15 +524,13 @@ static int32_t FindEntry(const ZipArchive* archive, const int32_t ent, ZipEntry*
|
|||
// Recover the start of the central directory entry from the filename
|
||||
// pointer. The filename is the first entry past the fixed-size data,
|
||||
// so we can just subtract back from that.
|
||||
const ZipString from_offset =
|
||||
archive->hash_table[ent].GetZipString(archive->central_directory.GetBasePtr());
|
||||
const uint8_t* ptr = from_offset.name;
|
||||
const uint8_t* base_ptr = archive->central_directory.GetBasePtr();
|
||||
const uint8_t* ptr = base_ptr + archive->hash_table[ent].name_offset;
|
||||
ptr -= sizeof(CentralDirectoryRecord);
|
||||
|
||||
// This is the base of our mmapped region, we have to sanity check that
|
||||
// the name that's in the hash table is a pointer to a location within
|
||||
// this mapped region.
|
||||
const uint8_t* base_ptr = archive->central_directory.GetBasePtr();
|
||||
if (ptr < base_ptr || ptr > base_ptr + archive->central_directory.GetMapLength()) {
|
||||
ALOGW("Zip: Invalid entry pointer");
|
||||
return kInvalidOffset;
|
||||
|
@ -639,26 +622,24 @@ static int32_t FindEntry(const ZipArchive* archive, const int32_t ent, ZipEntry*
|
|||
|
||||
// Check that the local file header name matches the declared
|
||||
// name in the central directory.
|
||||
if (lfh->file_name_length == nameLen) {
|
||||
const off64_t name_offset = local_header_offset + sizeof(LocalFileHeader);
|
||||
if (name_offset + lfh->file_name_length > cd_offset) {
|
||||
ALOGW("Zip: Invalid declared length");
|
||||
return kInvalidOffset;
|
||||
}
|
||||
|
||||
std::vector<uint8_t> name_buf(nameLen);
|
||||
if (!archive->mapped_zip.ReadAtOffset(name_buf.data(), nameLen, name_offset)) {
|
||||
ALOGW("Zip: failed reading lfh name from offset %" PRId64, static_cast<int64_t>(name_offset));
|
||||
return kIoError;
|
||||
}
|
||||
const ZipString from_offset =
|
||||
archive->hash_table[ent].GetZipString(archive->central_directory.GetBasePtr());
|
||||
if (memcmp(from_offset.name, name_buf.data(), nameLen)) {
|
||||
return kInconsistentInformation;
|
||||
}
|
||||
|
||||
} else {
|
||||
ALOGW("Zip: lfh name did not match central directory.");
|
||||
if (lfh->file_name_length != nameLen) {
|
||||
ALOGW("Zip: lfh name length did not match central directory");
|
||||
return kInconsistentInformation;
|
||||
}
|
||||
const off64_t name_offset = local_header_offset + sizeof(LocalFileHeader);
|
||||
if (name_offset + lfh->file_name_length > cd_offset) {
|
||||
ALOGW("Zip: lfh name has invalid declared length");
|
||||
return kInvalidOffset;
|
||||
}
|
||||
std::vector<uint8_t> name_buf(nameLen);
|
||||
if (!archive->mapped_zip.ReadAtOffset(name_buf.data(), nameLen, name_offset)) {
|
||||
ALOGW("Zip: failed reading lfh name from offset %" PRId64, static_cast<int64_t>(name_offset));
|
||||
return kIoError;
|
||||
}
|
||||
const std::string_view entry_name =
|
||||
archive->hash_table[ent].ToStringView(archive->central_directory.GetBasePtr());
|
||||
if (memcmp(entry_name.data(), name_buf.data(), nameLen) != 0) {
|
||||
ALOGW("Zip: lfh name did not match central directory");
|
||||
return kInconsistentInformation;
|
||||
}
|
||||
|
||||
|
@ -691,21 +672,13 @@ static int32_t FindEntry(const ZipArchive* archive, const int32_t ent, ZipEntry*
|
|||
struct IterationHandle {
|
||||
ZipArchive* archive;
|
||||
|
||||
std::string prefix_holder;
|
||||
ZipString prefix;
|
||||
|
||||
std::string suffix_holder;
|
||||
ZipString suffix;
|
||||
std::string prefix;
|
||||
std::string suffix;
|
||||
|
||||
uint32_t position = 0;
|
||||
|
||||
IterationHandle(ZipArchive* archive, const std::string_view in_prefix,
|
||||
const std::string_view in_suffix)
|
||||
: archive(archive),
|
||||
prefix_holder(in_prefix),
|
||||
prefix(prefix_holder),
|
||||
suffix_holder(in_suffix),
|
||||
suffix(suffix_holder) {}
|
||||
IterationHandle(ZipArchive* archive, std::string_view in_prefix, std::string_view in_suffix)
|
||||
: archive(archive), prefix(in_prefix), suffix(in_suffix) {}
|
||||
};
|
||||
|
||||
int32_t StartIteration(ZipArchiveHandle archive, void** cookie_ptr,
|
||||
|
@ -737,8 +710,8 @@ int32_t FindEntry(const ZipArchiveHandle archive, const std::string_view entryNa
|
|||
return kInvalidEntryName;
|
||||
}
|
||||
|
||||
const int64_t ent = EntryToIndex(archive->hash_table, archive->hash_table_size,
|
||||
ZipString(entryName), archive->central_directory.GetBasePtr());
|
||||
const int64_t ent = EntryToIndex(archive->hash_table, archive->hash_table_size, entryName,
|
||||
archive->central_directory.GetBasePtr());
|
||||
if (ent < 0) {
|
||||
ALOGV("Zip: Could not find entry %.*s", static_cast<int>(entryName.size()), entryName.data());
|
||||
return static_cast<int32_t>(ent); // kEntryNotFound is safe to truncate.
|
||||
|
@ -757,15 +730,6 @@ int32_t Next(void* cookie, ZipEntry* data, std::string* name) {
|
|||
}
|
||||
|
||||
int32_t Next(void* cookie, ZipEntry* data, std::string_view* name) {
|
||||
ZipString zs;
|
||||
int32_t result = Next(cookie, data, &zs);
|
||||
if (result == 0 && name) {
|
||||
*name = std::string_view(reinterpret_cast<const char*>(zs.name), zs.name_length);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
int32_t Next(void* cookie, ZipEntry* data, ZipString* name) {
|
||||
IterationHandle* handle = reinterpret_cast<IterationHandle*>(cookie);
|
||||
if (handle == NULL) {
|
||||
ALOGW("Zip: Null ZipArchiveHandle");
|
||||
|
@ -782,16 +746,14 @@ int32_t Next(void* cookie, ZipEntry* data, ZipString* name) {
|
|||
const uint32_t hash_table_length = archive->hash_table_size;
|
||||
const ZipStringOffset* hash_table = archive->hash_table;
|
||||
for (uint32_t i = currentOffset; i < hash_table_length; ++i) {
|
||||
const ZipString from_offset =
|
||||
hash_table[i].GetZipString(archive->central_directory.GetBasePtr());
|
||||
if (hash_table[i].name_offset != 0 &&
|
||||
(handle->prefix.name_length == 0 || from_offset.StartsWith(handle->prefix)) &&
|
||||
(handle->suffix.name_length == 0 || from_offset.EndsWith(handle->suffix))) {
|
||||
const std::string_view entry_name =
|
||||
hash_table[i].ToStringView(archive->central_directory.GetBasePtr());
|
||||
if (hash_table[i].name_offset != 0 && (android::base::StartsWith(entry_name, handle->prefix) &&
|
||||
android::base::EndsWith(entry_name, handle->suffix))) {
|
||||
handle->position = (i + 1);
|
||||
const int error = FindEntry(archive, i, data);
|
||||
if (!error) {
|
||||
name->name = from_offset.name;
|
||||
name->name_length = hash_table[i].name_length;
|
||||
if (!error && name) {
|
||||
*name = entry_name;
|
||||
}
|
||||
return error;
|
||||
}
|
||||
|
@ -1159,13 +1121,6 @@ int GetFileDescriptor(const ZipArchiveHandle archive) {
|
|||
return archive->mapped_zip.GetFileDescriptor();
|
||||
}
|
||||
|
||||
ZipString::ZipString(std::string_view entry_name)
|
||||
: name(reinterpret_cast<const uint8_t*>(entry_name.data())) {
|
||||
size_t len = entry_name.size();
|
||||
CHECK_LE(len, static_cast<size_t>(UINT16_MAX));
|
||||
name_length = static_cast<uint16_t>(len);
|
||||
}
|
||||
|
||||
#if !defined(_WIN32)
|
||||
class ProcessWriter : public zip_archive::Writer {
|
||||
public:
|
||||
|
|
|
@ -137,22 +137,22 @@ class CentralDirectory {
|
|||
};
|
||||
|
||||
/**
|
||||
* More space efficient string representation of strings in an mmaped zipped file than
|
||||
* std::string_view or ZipString. Using ZipString as an entry in the ZipArchive hashtable wastes
|
||||
* space. ZipString stores a pointer to a string (on 64 bit, 8 bytes) and the length to read from
|
||||
* that pointer, 2 bytes. Because of alignment, the structure consumes 16 bytes, wasting 6 bytes.
|
||||
* ZipStringOffset stores a 4 byte offset from a fixed location in the memory mapped file instead
|
||||
* of the entire address, consuming 8 bytes with alignment.
|
||||
* More space efficient string representation of strings in an mmaped zipped
|
||||
* file than std::string_view. Using std::string_view as an entry in the
|
||||
* ZipArchive hash table wastes space. std::string_view stores a pointer to a
|
||||
* string (on 64 bit, 8 bytes) and the length to read from that pointer,
|
||||
* 2 bytes. Because of alignment, the structure consumes 16 bytes, wasting
|
||||
* 6 bytes.
|
||||
*
|
||||
* ZipStringOffset stores a 4 byte offset from a fixed location in the memory
|
||||
* mapped file instead of the entire address, consuming 8 bytes with alignment.
|
||||
*/
|
||||
struct ZipStringOffset {
|
||||
uint32_t name_offset;
|
||||
uint16_t name_length;
|
||||
|
||||
const ZipString GetZipString(const uint8_t* start) const {
|
||||
ZipString zip_string;
|
||||
zip_string.name = start + name_offset;
|
||||
zip_string.name_length = name_length;
|
||||
return zip_string;
|
||||
const std::string_view ToStringView(const uint8_t* start) const {
|
||||
return std::string_view{reinterpret_cast<const char*>(start + name_offset), name_length};
|
||||
}
|
||||
};
|
||||
|
||||
|
|
Loading…
Reference in a new issue