Move the implementation of cd entry map to a separate file

Move the entry map classes to a separate file to make the hierarchy
clear.

Test: unittests pass
Change-Id: Ie01d7835359daa4f59af75a0eda204c696d5658e
This commit is contained in:
Tianjie Xu 2020-03-13 16:16:24 -07:00
parent 0ef9783c57
commit 323c09c3d0
6 changed files with 360 additions and 293 deletions

View file

@ -60,6 +60,7 @@ cc_defaults {
srcs: [
"zip_archive.cc",
"zip_archive_stream_entry.cc",
"zip_cd_entry_map.cc",
"zip_writer.cc",
],

View file

@ -85,142 +85,6 @@ static const uint32_t kMaxEOCDSearch = kMaxCommentLen + sizeof(EocdRecord);
* of the string length into the hash table entry.
*/
/*
* Round up to the next highest power of 2.
*
* Found on http://graphics.stanford.edu/~seander/bithacks.html.
*/
static uint32_t RoundUpPower2(uint32_t val) {
val--;
val |= val >> 1;
val |= val >> 2;
val |= val >> 4;
val |= val >> 8;
val |= val >> 16;
val++;
return val;
}
static uint32_t ComputeHash(std::string_view name) {
return static_cast<uint32_t>(std::hash<std::string_view>{}(name));
}
// Convert a ZipEntry to a hash table index, verifying that it's in a valid range.
std::pair<ZipError, uint64_t> CdEntryMapZip32::GetCdEntryOffset(std::string_view name,
const uint8_t* start) const {
const uint32_t hash = ComputeHash(name);
// NOTE: (hash_table_size - 1) is guaranteed to be non-negative.
uint32_t ent = hash & (hash_table_size_ - 1);
while (hash_table_[ent].name_offset != 0) {
if (hash_table_[ent].ToStringView(start) == name) {
return {kSuccess, hash_table_[ent].name_offset};
}
ent = (ent + 1) & (hash_table_size_ - 1);
}
ALOGV("Zip: Unable to find entry %.*s", static_cast<int>(name.size()), name.data());
return {kEntryNotFound, 0};
}
ZipError CdEntryMapZip32::AddToMap(std::string_view name, const uint8_t* start) {
const uint64_t hash = ComputeHash(name);
uint32_t ent = hash & (hash_table_size_ - 1);
/*
* We over-allocated the table, so we're guaranteed to find an empty slot.
* Further, we guarantee that the hashtable size is not 0.
*/
while (hash_table_[ent].name_offset != 0) {
if (hash_table_[ent].ToStringView(start) == name) {
// We've found a duplicate entry. We don't accept duplicates.
ALOGW("Zip: Found duplicate entry %.*s", static_cast<int>(name.size()), name.data());
return kDuplicateEntry;
}
ent = (ent + 1) & (hash_table_size_ - 1);
}
// `name` has already been validated before entry.
const char* start_char = reinterpret_cast<const char*>(start);
hash_table_[ent].name_offset = static_cast<uint32_t>(name.data() - start_char);
hash_table_[ent].name_length = static_cast<uint16_t>(name.size());
return kSuccess;
}
void CdEntryMapZip32::ResetIteration() {
current_position_ = 0;
}
std::pair<std::string_view, uint64_t> CdEntryMapZip32::Next(const uint8_t* cd_start) {
while (current_position_ < hash_table_size_) {
const auto& entry = hash_table_[current_position_];
current_position_ += 1;
if (entry.name_offset != 0) {
return {entry.ToStringView(cd_start), entry.name_offset};
}
}
// We have reached the end of the hash table.
return {};
}
CdEntryMapZip32::CdEntryMapZip32(uint16_t num_entries) {
/*
* Create hash table. We have a minimum 75% load factor, possibly as
* low as 50% after we round off to a power of 2. There must be at
* least one unused entry to avoid an infinite loop during creation.
*/
hash_table_size_ = RoundUpPower2(1 + (num_entries * 4) / 3);
hash_table_ = {
reinterpret_cast<ZipStringOffset*>(calloc(hash_table_size_, sizeof(ZipStringOffset))), free};
}
std::unique_ptr<CdEntryMapInterface> CdEntryMapZip32::Create(uint16_t num_entries) {
auto entry_map = new CdEntryMapZip32(num_entries);
CHECK(entry_map->hash_table_ != nullptr)
<< "Zip: unable to allocate the " << entry_map->hash_table_size_
<< " entry hash_table, entry size: " << sizeof(ZipStringOffset);
return std::unique_ptr<CdEntryMapInterface>(entry_map);
}
std::unique_ptr<CdEntryMapInterface> CdEntryMapZip64::Create() {
return std::unique_ptr<CdEntryMapInterface>(new CdEntryMapZip64());
}
ZipError CdEntryMapZip64::AddToMap(std::string_view name, const uint8_t* start) {
const auto [it, added] =
entry_table_.insert({name, name.data() - reinterpret_cast<const char*>(start)});
if (!added) {
ALOGW("Zip: Found duplicate entry %.*s", static_cast<int>(name.size()), name.data());
return kDuplicateEntry;
}
return kSuccess;
}
std::pair<ZipError, uint64_t> CdEntryMapZip64::GetCdEntryOffset(std::string_view name,
const uint8_t* /*cd_start*/) const {
const auto it = entry_table_.find(name);
if (it == entry_table_.end()) {
ALOGV("Zip: Could not find entry %.*s", static_cast<int>(name.size()), name.data());
return {kEntryNotFound, 0};
}
return {kSuccess, it->second};
}
void CdEntryMapZip64::ResetIteration() {
iterator_ = entry_table_.begin();
}
std::pair<std::string_view, uint64_t> CdEntryMapZip64::Next(const uint8_t* /*cd_start*/) {
if (iterator_ == entry_table_.end()) {
return {};
}
return *iterator_++;
}
#if defined(__BIONIC__)
uint64_t GetOwnerTag(const ZipArchive* archive) {
return android_fdsan_create_owner_tag(ANDROID_FDSAN_OWNER_TYPE_ZIPARCHIVE,

View file

@ -22,81 +22,14 @@
#include <stdlib.h>
#include <unistd.h>
#include <map>
#include <memory>
#include <utility>
#include <vector>
#include "android-base/macros.h"
#include "android-base/mapped_file.h"
static const char* kErrorMessages[] = {
"Success",
"Iteration ended",
"Zlib error",
"Invalid file",
"Invalid handle",
"Duplicate entries in archive",
"Empty archive",
"Entry not found",
"Invalid offset",
"Inconsistent information",
"Invalid entry name",
"I/O error",
"File mapping failed",
"Allocation failed",
};
enum ZipError : int32_t {
kSuccess = 0,
kIterationEnd = -1,
// We encountered a Zlib error when inflating a stream from this file.
// Usually indicates file corruption.
kZlibError = -2,
// The input file cannot be processed as a zip archive. Usually because
// it's too small, too large or does not have a valid signature.
kInvalidFile = -3,
// An invalid iteration / ziparchive handle was passed in as an input
// argument.
kInvalidHandle = -4,
// The zip archive contained two (or possibly more) entries with the same
// name.
kDuplicateEntry = -5,
// The zip archive contains no entries.
kEmptyArchive = -6,
// The specified entry was not found in the archive.
kEntryNotFound = -7,
// The zip archive contained an invalid local file header pointer.
kInvalidOffset = -8,
// The zip archive contained inconsistent entry information. This could
// be because the central directory & local file header did not agree, or
// if the actual uncompressed length or crc32 do not match their declared
// values.
kInconsistentInformation = -9,
// An invalid entry name was encountered.
kInvalidEntryName = -10,
// An I/O related system call (read, lseek, ftruncate, map) failed.
kIoError = -11,
// We were not able to mmap the central directory or entry contents.
kMmapFailed = -12,
// An allocation failed.
kAllocationFailed = -13,
kLastErrorCode = kAllocationFailed,
};
#include "zip_cd_entry_map.h"
#include "zip_error.h"
class MappedZipFile {
public:
@ -144,94 +77,6 @@ class CentralDirectory {
size_t length_;
};
// This class is the interface of the central directory entries map. The map
// helps to locate a particular cd entry based on the filename.
class CdEntryMapInterface {
public:
virtual ~CdEntryMapInterface() = default;
// Adds an entry to the map. The |name| should internally points to the
// filename field of a cd entry. And |start| points to the beginning of the
// central directory. Returns 0 on success.
virtual ZipError AddToMap(std::string_view name, const uint8_t* start) = 0;
// For the zip entry |entryName|, finds the offset of its filename field in
// the central directory. Returns a pair of [status, offset]. The value of
// the status is 0 on success.
virtual std::pair<ZipError, uint64_t> GetCdEntryOffset(std::string_view name,
const uint8_t* cd_start) const = 0;
// Resets the iterator to the beginning of the map.
virtual void ResetIteration() = 0;
// Returns the [name, cd offset] of the current element. Also increments the
// iterator to points to the next element. Returns an empty pair we have read
// past boundary.
virtual std::pair<std::string_view, uint64_t> Next(const uint8_t* cd_start) = 0;
};
/**
* More space efficient string representation of strings in an mmaped zipped
* file than std::string_view. Using std::string_view as an entry in the
* ZipArchive hash table wastes space. std::string_view stores a pointer to a
* string (on 64 bit, 8 bytes) and the length to read from that pointer,
* 2 bytes. Because of alignment, the structure consumes 16 bytes, wasting
* 6 bytes.
*
* ZipStringOffset stores a 4 byte offset from a fixed location in the memory
* mapped file instead of the entire address, consuming 8 bytes with alignment.
*/
struct ZipStringOffset {
uint32_t name_offset;
uint16_t name_length;
const std::string_view ToStringView(const uint8_t* start) const {
return std::string_view{reinterpret_cast<const char*>(start + name_offset), name_length};
}
};
// This implementation of CdEntryMap uses an array hash table. It uses less
// memory than std::map; and it's used as the default implementation for zip
// archives without zip64 extension.
class CdEntryMapZip32 : public CdEntryMapInterface {
public:
static std::unique_ptr<CdEntryMapInterface> Create(uint16_t num_entries);
ZipError AddToMap(std::string_view name, const uint8_t* start) override;
std::pair<ZipError, uint64_t> GetCdEntryOffset(std::string_view name,
const uint8_t* cd_start) const override;
void ResetIteration() override;
std::pair<std::string_view, uint64_t> Next(const uint8_t* cd_start) override;
private:
explicit CdEntryMapZip32(uint16_t num_entries);
// We know how many entries are in the Zip archive, so we can have a
// fixed-size hash table. We define a load factor of 0.75 and over
// allocate so the maximum number entries can never be higher than
// ((4 * UINT16_MAX) / 3 + 1) which can safely fit into a uint32_t.
uint32_t hash_table_size_{0};
std::unique_ptr<ZipStringOffset[], decltype(&free)> hash_table_{nullptr, free};
// The position of element for the current iteration.
uint32_t current_position_{0};
};
// This implementation of CdEntryMap uses a std::map
class CdEntryMapZip64 : public CdEntryMapInterface {
public:
static std::unique_ptr<CdEntryMapInterface> Create();
ZipError AddToMap(std::string_view name, const uint8_t* start) override;
std::pair<ZipError, uint64_t> GetCdEntryOffset(std::string_view name,
const uint8_t* cd_start) const override;
void ResetIteration() override;
std::pair<std::string_view, uint64_t> Next(const uint8_t* cd_start) override;
private:
CdEntryMapZip64() = default;
std::map<std::string_view, uint64_t> entry_table_;
std::map<std::string_view, uint64_t>::iterator iterator_;
};
struct ZipArchive {
// open Zip archive
mutable MappedZipFile mapped_zip;

View file

@ -0,0 +1,156 @@
/*
* Copyright (C) 2020 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "zip_cd_entry_map.h"
#include <android-base/logging.h>
#include <log/log.h>
/*
* Round up to the next highest power of 2.
*
* Found on http://graphics.stanford.edu/~seander/bithacks.html.
*/
static uint32_t RoundUpPower2(uint32_t val) {
val--;
val |= val >> 1;
val |= val >> 2;
val |= val >> 4;
val |= val >> 8;
val |= val >> 16;
val++;
return val;
}
static uint32_t ComputeHash(std::string_view name) {
return static_cast<uint32_t>(std::hash<std::string_view>{}(name));
}
// Convert a ZipEntry to a hash table index, verifying that it's in a valid range.
std::pair<ZipError, uint64_t> CdEntryMapZip32::GetCdEntryOffset(std::string_view name,
const uint8_t* start) const {
const uint32_t hash = ComputeHash(name);
// NOTE: (hash_table_size - 1) is guaranteed to be non-negative.
uint32_t ent = hash & (hash_table_size_ - 1);
while (hash_table_[ent].name_offset != 0) {
if (hash_table_[ent].ToStringView(start) == name) {
return {kSuccess, hash_table_[ent].name_offset};
}
ent = (ent + 1) & (hash_table_size_ - 1);
}
ALOGV("Zip: Unable to find entry %.*s", static_cast<int>(name.size()), name.data());
return {kEntryNotFound, 0};
}
ZipError CdEntryMapZip32::AddToMap(std::string_view name, const uint8_t* start) {
const uint64_t hash = ComputeHash(name);
uint32_t ent = hash & (hash_table_size_ - 1);
/*
* We over-allocated the table, so we're guaranteed to find an empty slot.
* Further, we guarantee that the hashtable size is not 0.
*/
while (hash_table_[ent].name_offset != 0) {
if (hash_table_[ent].ToStringView(start) == name) {
// We've found a duplicate entry. We don't accept duplicates.
ALOGW("Zip: Found duplicate entry %.*s", static_cast<int>(name.size()), name.data());
return kDuplicateEntry;
}
ent = (ent + 1) & (hash_table_size_ - 1);
}
// `name` has already been validated before entry.
const char* start_char = reinterpret_cast<const char*>(start);
hash_table_[ent].name_offset = static_cast<uint32_t>(name.data() - start_char);
hash_table_[ent].name_length = static_cast<uint16_t>(name.size());
return kSuccess;
}
void CdEntryMapZip32::ResetIteration() {
current_position_ = 0;
}
std::pair<std::string_view, uint64_t> CdEntryMapZip32::Next(const uint8_t* cd_start) {
while (current_position_ < hash_table_size_) {
const auto& entry = hash_table_[current_position_];
current_position_ += 1;
if (entry.name_offset != 0) {
return {entry.ToStringView(cd_start), entry.name_offset};
}
}
// We have reached the end of the hash table.
return {};
}
CdEntryMapZip32::CdEntryMapZip32(uint16_t num_entries) {
/*
* Create hash table. We have a minimum 75% load factor, possibly as
* low as 50% after we round off to a power of 2. There must be at
* least one unused entry to avoid an infinite loop during creation.
*/
hash_table_size_ = RoundUpPower2(1 + (num_entries * 4) / 3);
hash_table_ = {
reinterpret_cast<ZipStringOffset*>(calloc(hash_table_size_, sizeof(ZipStringOffset))), free};
}
std::unique_ptr<CdEntryMapInterface> CdEntryMapZip32::Create(uint16_t num_entries) {
auto entry_map = new CdEntryMapZip32(num_entries);
CHECK(entry_map->hash_table_ != nullptr)
<< "Zip: unable to allocate the " << entry_map->hash_table_size_
<< " entry hash_table, entry size: " << sizeof(ZipStringOffset);
return std::unique_ptr<CdEntryMapInterface>(entry_map);
}
std::unique_ptr<CdEntryMapInterface> CdEntryMapZip64::Create() {
return std::unique_ptr<CdEntryMapInterface>(new CdEntryMapZip64());
}
ZipError CdEntryMapZip64::AddToMap(std::string_view name, const uint8_t* start) {
const auto [it, added] =
entry_table_.insert({name, name.data() - reinterpret_cast<const char*>(start)});
if (!added) {
ALOGW("Zip: Found duplicate entry %.*s", static_cast<int>(name.size()), name.data());
return kDuplicateEntry;
}
return kSuccess;
}
std::pair<ZipError, uint64_t> CdEntryMapZip64::GetCdEntryOffset(std::string_view name,
const uint8_t* /*cd_start*/) const {
const auto it = entry_table_.find(name);
if (it == entry_table_.end()) {
ALOGV("Zip: Could not find entry %.*s", static_cast<int>(name.size()), name.data());
return {kEntryNotFound, 0};
}
return {kSuccess, it->second};
}
void CdEntryMapZip64::ResetIteration() {
iterator_ = entry_table_.begin();
}
std::pair<std::string_view, uint64_t> CdEntryMapZip64::Next(const uint8_t* /*cd_start*/) {
if (iterator_ == entry_table_.end()) {
return {};
}
return *iterator_++;
}

View file

@ -0,0 +1,114 @@
/*
* Copyright (C) 2020 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once
#include <stdint.h>
#include <map>
#include <memory>
#include <string_view>
#include <utility>
#include "zip_error.h"
// This class is the interface of the central directory entries map. The map
// helps to locate a particular cd entry based on the filename.
class CdEntryMapInterface {
public:
virtual ~CdEntryMapInterface() = default;
// Adds an entry to the map. The |name| should internally points to the
// filename field of a cd entry. And |start| points to the beginning of the
// central directory. Returns 0 on success.
virtual ZipError AddToMap(std::string_view name, const uint8_t* start) = 0;
// For the zip entry |entryName|, finds the offset of its filename field in
// the central directory. Returns a pair of [status, offset]. The value of
// the status is 0 on success.
virtual std::pair<ZipError, uint64_t> GetCdEntryOffset(std::string_view name,
const uint8_t* cd_start) const = 0;
// Resets the iterator to the beginning of the map.
virtual void ResetIteration() = 0;
// Returns the [name, cd offset] of the current element. Also increments the
// iterator to points to the next element. Returns an empty pair we have read
// past boundary.
virtual std::pair<std::string_view, uint64_t> Next(const uint8_t* cd_start) = 0;
};
/**
* More space efficient string representation of strings in an mmaped zipped
* file than std::string_view. Using std::string_view as an entry in the
* ZipArchive hash table wastes space. std::string_view stores a pointer to a
* string (on 64 bit, 8 bytes) and the length to read from that pointer,
* 2 bytes. Because of alignment, the structure consumes 16 bytes, wasting
* 6 bytes.
*
* ZipStringOffset stores a 4 byte offset from a fixed location in the memory
* mapped file instead of the entire address, consuming 8 bytes with alignment.
*/
struct ZipStringOffset {
uint32_t name_offset;
uint16_t name_length;
const std::string_view ToStringView(const uint8_t* start) const {
return std::string_view{reinterpret_cast<const char*>(start + name_offset), name_length};
}
};
// This implementation of CdEntryMap uses an array hash table. It uses less
// memory than std::map; and it's used as the default implementation for zip
// archives without zip64 extension.
class CdEntryMapZip32 : public CdEntryMapInterface {
public:
static std::unique_ptr<CdEntryMapInterface> Create(uint16_t num_entries);
ZipError AddToMap(std::string_view name, const uint8_t* start) override;
std::pair<ZipError, uint64_t> GetCdEntryOffset(std::string_view name,
const uint8_t* cd_start) const override;
void ResetIteration() override;
std::pair<std::string_view, uint64_t> Next(const uint8_t* cd_start) override;
private:
explicit CdEntryMapZip32(uint16_t num_entries);
// We know how many entries are in the Zip archive, so we can have a
// fixed-size hash table. We define a load factor of 0.75 and over
// allocate so the maximum number entries can never be higher than
// ((4 * UINT16_MAX) / 3 + 1) which can safely fit into a uint32_t.
uint32_t hash_table_size_{0};
std::unique_ptr<ZipStringOffset[], decltype(&free)> hash_table_{nullptr, free};
// The position of element for the current iteration.
uint32_t current_position_{0};
};
// This implementation of CdEntryMap uses a std::map
class CdEntryMapZip64 : public CdEntryMapInterface {
public:
static std::unique_ptr<CdEntryMapInterface> Create();
ZipError AddToMap(std::string_view name, const uint8_t* start) override;
std::pair<ZipError, uint64_t> GetCdEntryOffset(std::string_view name,
const uint8_t* cd_start) const override;
void ResetIteration() override;
std::pair<std::string_view, uint64_t> Next(const uint8_t* cd_start) override;
private:
CdEntryMapZip64() = default;
std::map<std::string_view, uint64_t> entry_table_;
std::map<std::string_view, uint64_t>::iterator iterator_;
};

87
libziparchive/zip_error.h Normal file
View file

@ -0,0 +1,87 @@
/*
* Copyright (C) 2020 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once
#include <stdint.h>
static const char* kErrorMessages[] = {
"Success",
"Iteration ended",
"Zlib error",
"Invalid file",
"Invalid handle",
"Duplicate entries in archive",
"Empty archive",
"Entry not found",
"Invalid offset",
"Inconsistent information",
"Invalid entry name",
"I/O error",
"File mapping failed",
"Allocation failed",
};
enum ZipError : int32_t {
kSuccess = 0,
kIterationEnd = -1,
// We encountered a Zlib error when inflating a stream from this file.
// Usually indicates file corruption.
kZlibError = -2,
// The input file cannot be processed as a zip archive. Usually because
// it's too small, too large or does not have a valid signature.
kInvalidFile = -3,
// An invalid iteration / ziparchive handle was passed in as an input
// argument.
kInvalidHandle = -4,
// The zip archive contained two (or possibly more) entries with the same
// name.
kDuplicateEntry = -5,
// The zip archive contains no entries.
kEmptyArchive = -6,
// The specified entry was not found in the archive.
kEntryNotFound = -7,
// The zip archive contained an invalid local file header pointer.
kInvalidOffset = -8,
// The zip archive contained inconsistent entry information. This could
// be because the central directory & local file header did not agree, or
// if the actual uncompressed length or crc32 do not match their declared
// values.
kInconsistentInformation = -9,
// An invalid entry name was encountered.
kInvalidEntryName = -10,
// An I/O related system call (read, lseek, ftruncate, map) failed.
kIoError = -11,
// We were not able to mmap the central directory or entry contents.
kMmapFailed = -12,
// An allocation failed.
kAllocationFailed = -13,
kLastErrorCode = kAllocationFailed,
};