Add std::map implementation for cd entry map

Add the std::map implementation to be used later in zip64 format.
Also move the entry map classes to a separate file to make the hierarchy
clear.

Test: unittests pass
Change-Id: I74d95f53207cc8ca871b955e2a15c184d5497833
This commit is contained in:
Tianjie Xu 2020-03-15 21:23:24 -07:00
parent 73ae00bbde
commit 0ef9783c57
3 changed files with 155 additions and 18 deletions

View file

@ -107,15 +107,15 @@ static uint32_t ComputeHash(std::string_view name) {
}
// Convert a ZipEntry to a hash table index, verifying that it's in a valid range.
std::pair<int32_t, uint64_t> CdEntryMapZip32::GetCdEntryOffset(std::string_view name,
const uint8_t* start) const {
std::pair<ZipError, uint64_t> CdEntryMapZip32::GetCdEntryOffset(std::string_view name,
const uint8_t* start) const {
const uint32_t hash = ComputeHash(name);
// NOTE: (hash_table_size - 1) is guaranteed to be non-negative.
uint32_t ent = hash & (hash_table_size_ - 1);
while (hash_table_[ent].name_offset != 0) {
if (hash_table_[ent].ToStringView(start) == name) {
return {0, hash_table_[ent].name_offset};
return {kSuccess, hash_table_[ent].name_offset};
}
ent = (ent + 1) & (hash_table_size_ - 1);
}
@ -124,7 +124,7 @@ std::pair<int32_t, uint64_t> CdEntryMapZip32::GetCdEntryOffset(std::string_view
return {kEntryNotFound, 0};
}
int32_t CdEntryMapZip32::AddToMap(std::string_view name, const uint8_t* start) {
ZipError CdEntryMapZip32::AddToMap(std::string_view name, const uint8_t* start) {
const uint64_t hash = ComputeHash(name);
uint32_t ent = hash & (hash_table_size_ - 1);
@ -145,7 +145,7 @@ int32_t CdEntryMapZip32::AddToMap(std::string_view name, const uint8_t* start) {
const char* start_char = reinterpret_cast<const char*>(start);
hash_table_[ent].name_offset = static_cast<uint32_t>(name.data() - start_char);
hash_table_[ent].name_length = static_cast<uint16_t>(name.size());
return 0;
return kSuccess;
}
void CdEntryMapZip32::ResetIteration() {
@ -166,6 +166,11 @@ std::pair<std::string_view, uint64_t> CdEntryMapZip32::Next(const uint8_t* cd_st
}
CdEntryMapZip32::CdEntryMapZip32(uint16_t num_entries) {
/*
* Create hash table. We have a minimum 75% load factor, possibly as
* low as 50% after we round off to a power of 2. There must be at
* least one unused entry to avoid an infinite loop during creation.
*/
hash_table_size_ = RoundUpPower2(1 + (num_entries * 4) / 3);
hash_table_ = {
reinterpret_cast<ZipStringOffset*>(calloc(hash_table_size_, sizeof(ZipStringOffset))), free};
@ -179,6 +184,43 @@ std::unique_ptr<CdEntryMapInterface> CdEntryMapZip32::Create(uint16_t num_entrie
return std::unique_ptr<CdEntryMapInterface>(entry_map);
}
std::unique_ptr<CdEntryMapInterface> CdEntryMapZip64::Create() {
return std::unique_ptr<CdEntryMapInterface>(new CdEntryMapZip64());
}
ZipError CdEntryMapZip64::AddToMap(std::string_view name, const uint8_t* start) {
const auto [it, added] =
entry_table_.insert({name, name.data() - reinterpret_cast<const char*>(start)});
if (!added) {
ALOGW("Zip: Found duplicate entry %.*s", static_cast<int>(name.size()), name.data());
return kDuplicateEntry;
}
return kSuccess;
}
std::pair<ZipError, uint64_t> CdEntryMapZip64::GetCdEntryOffset(std::string_view name,
const uint8_t* /*cd_start*/) const {
const auto it = entry_table_.find(name);
if (it == entry_table_.end()) {
ALOGV("Zip: Could not find entry %.*s", static_cast<int>(name.size()), name.data());
return {kEntryNotFound, 0};
}
return {kSuccess, it->second};
}
void CdEntryMapZip64::ResetIteration() {
iterator_ = entry_table_.begin();
}
std::pair<std::string_view, uint64_t> CdEntryMapZip64::Next(const uint8_t* /*cd_start*/) {
if (iterator_ == entry_table_.end()) {
return {};
}
return *iterator_++;
}
#if defined(__BIONIC__)
uint64_t GetOwnerTag(const ZipArchive* archive) {
return android_fdsan_create_owner_tag(ANDROID_FDSAN_OWNER_TYPE_ZIPARCHIVE,
@ -357,12 +399,12 @@ static int32_t ParseZipArchive(ZipArchive* archive) {
const size_t cd_length = archive->central_directory.GetMapLength();
const uint16_t num_entries = archive->num_entries;
/*
* Create hash table. We have a minimum 75% load factor, possibly as
* low as 50% after we round off to a power of 2. There must be at
* least one unused entry to avoid an infinite loop during creation.
*/
archive->cd_entry_map = CdEntryMapZip32::Create(num_entries);
// TODO(xunchang) parse the zip64 Eocd
if (num_entries > UINT16_MAX) {
archive->cd_entry_map = CdEntryMapZip64::Create();
} else {
archive->cd_entry_map = CdEntryMapZip32::Create(num_entries);
}
if (archive->cd_entry_map == nullptr) {
return kAllocationFailed;
}

View file

@ -22,6 +22,7 @@
#include <stdlib.h>
#include <unistd.h>
#include <map>
#include <memory>
#include <utility>
#include <vector>
@ -46,7 +47,9 @@ static const char* kErrorMessages[] = {
"Allocation failed",
};
enum ErrorCodes : int32_t {
enum ZipError : int32_t {
kSuccess = 0,
kIterationEnd = -1,
// We encountered a Zlib error when inflating a stream from this file.
@ -149,12 +152,12 @@ class CdEntryMapInterface {
// Adds an entry to the map. The |name| should internally points to the
// filename field of a cd entry. And |start| points to the beginning of the
// central directory. Returns 0 on success.
virtual int32_t AddToMap(std::string_view name, const uint8_t* start) = 0;
virtual ZipError AddToMap(std::string_view name, const uint8_t* start) = 0;
// For the zip entry |entryName|, finds the offset of its filename field in
// the central directory. Returns a pair of [status, offset]. The value of
// the status is 0 on success.
virtual std::pair<int32_t, uint64_t> GetCdEntryOffset(std::string_view name,
const uint8_t* cd_start) const = 0;
virtual std::pair<ZipError, uint64_t> GetCdEntryOffset(std::string_view name,
const uint8_t* cd_start) const = 0;
// Resets the iterator to the beginning of the map.
virtual void ResetIteration() = 0;
// Returns the [name, cd offset] of the current element. Also increments the
@ -190,9 +193,9 @@ class CdEntryMapZip32 : public CdEntryMapInterface {
public:
static std::unique_ptr<CdEntryMapInterface> Create(uint16_t num_entries);
int32_t AddToMap(std::string_view name, const uint8_t* start) override;
std::pair<int32_t, uint64_t> GetCdEntryOffset(std::string_view name,
const uint8_t* cd_start) const override;
ZipError AddToMap(std::string_view name, const uint8_t* start) override;
std::pair<ZipError, uint64_t> GetCdEntryOffset(std::string_view name,
const uint8_t* cd_start) const override;
void ResetIteration() override;
std::pair<std::string_view, uint64_t> Next(const uint8_t* cd_start) override;
@ -210,6 +213,25 @@ class CdEntryMapZip32 : public CdEntryMapInterface {
uint32_t current_position_{0};
};
// This implementation of CdEntryMap uses a std::map
class CdEntryMapZip64 : public CdEntryMapInterface {
public:
static std::unique_ptr<CdEntryMapInterface> Create();
ZipError AddToMap(std::string_view name, const uint8_t* start) override;
std::pair<ZipError, uint64_t> GetCdEntryOffset(std::string_view name,
const uint8_t* cd_start) const override;
void ResetIteration() override;
std::pair<std::string_view, uint64_t> Next(const uint8_t* cd_start) override;
private:
CdEntryMapZip64() = default;
std::map<std::string_view, uint64_t> entry_table_;
std::map<std::string_view, uint64_t>::iterator iterator_;
};
struct ZipArchive {
// open Zip archive
mutable MappedZipFile mapped_zip;

View file

@ -24,11 +24,14 @@
#include <unistd.h>
#include <memory>
#include <set>
#include <string_view>
#include <vector>
#include <android-base/file.h>
#include <android-base/logging.h>
#include <android-base/mapped_file.h>
#include <android-base/strings.h>
#include <android-base/unique_fd.h>
#include <gtest/gtest.h>
#include <ziparchive/zip_archive.h>
@ -53,6 +56,76 @@ static int32_t OpenArchiveWrapper(const std::string& name, ZipArchiveHandle* han
return OpenArchive(abs_path.c_str(), handle);
}
class CdEntryMapTest : public ::testing::Test {
protected:
void SetUp() override {
names_ = {
"a.txt", "b.txt", "b/", "b/c.txt", "b/d.txt",
};
separator_ = "separator";
header_ = "metadata";
joined_names_ = header_ + android::base::Join(names_, separator_);
base_ptr_ = reinterpret_cast<uint8_t*>(&joined_names_[0]);
entry_maps_.emplace_back(CdEntryMapZip32::Create(static_cast<uint16_t>(names_.size())));
entry_maps_.emplace_back(CdEntryMapZip64::Create());
for (auto& cd_map : entry_maps_) {
ASSERT_NE(nullptr, cd_map);
size_t offset = header_.size();
for (const auto& name : names_) {
auto status = cd_map->AddToMap(
std::string_view{joined_names_.c_str() + offset, name.size()}, base_ptr_);
ASSERT_EQ(0, status);
offset += name.size() + separator_.size();
}
}
}
std::vector<std::string> names_;
// A continuous region of memory serves as a mock of the central directory.
std::string joined_names_;
// We expect some metadata at the beginning of the central directory and between filenames.
std::string header_;
std::string separator_;
std::vector<std::unique_ptr<CdEntryMapInterface>> entry_maps_;
uint8_t* base_ptr_{nullptr}; // Points to the start of the central directory.
};
TEST_F(CdEntryMapTest, AddDuplicatedEntry) {
for (auto& cd_map : entry_maps_) {
std::string_view name = "b.txt";
ASSERT_NE(0, cd_map->AddToMap(name, base_ptr_));
}
}
TEST_F(CdEntryMapTest, FindEntry) {
for (auto& cd_map : entry_maps_) {
uint64_t expected_offset = header_.size();
for (const auto& name : names_) {
auto [status, offset] = cd_map->GetCdEntryOffset(name, base_ptr_);
ASSERT_EQ(status, kSuccess);
ASSERT_EQ(offset, expected_offset);
expected_offset += name.size() + separator_.size();
}
}
}
TEST_F(CdEntryMapTest, Iteration) {
std::set<std::string_view> expected(names_.begin(), names_.end());
for (auto& cd_map : entry_maps_) {
cd_map->ResetIteration();
std::set<std::string_view> entry_set;
auto ret = cd_map->Next(base_ptr_);
while (ret != std::pair<std::string_view, uint64_t>{}) {
auto [it, insert_status] = entry_set.insert(ret.first);
ASSERT_TRUE(insert_status);
ret = cd_map->Next(base_ptr_);
}
ASSERT_EQ(expected, entry_set);
}
}
TEST(ziparchive, Open) {
ZipArchiveHandle handle;
ASSERT_EQ(0, OpenArchiveWrapper(kValidZip, &handle));