Merge "Implement the functions to parse zip64 structs"

This commit is contained in:
Tianjie Xu 2020-03-25 00:21:46 +00:00 committed by Gerrit Code Review
commit 19c5cc224f
5 changed files with 655 additions and 46 deletions

View file

@ -67,5 +67,11 @@
{
"name": "ziparchive-tests"
}
],
"postsubmit": [
{
"name": "ziparchive_tests_large"
}
]
}

View file

@ -212,3 +212,20 @@ sh_test {
data: ["cli-tests/**/*"],
target_required: ["cli-test", "ziptool"],
}
python_test_host {
name: "ziparchive_tests_large",
srcs: ["test_ziparchive_large.py"],
main: "test_ziparchive_large.py",
version: {
py2: {
enabled: true,
embedded_launcher: false,
},
py3: {
enabled: false,
embedded_launcher: false,
},
},
test_suites: ["general-tests"],
}

View file

@ -0,0 +1,99 @@
#!/usr/bin/env python
#
# Copyright (C) 2020 The Android Open Source Project
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
"""Unittests for parsing files in zip64 format"""
import os
import subprocess
import tempfile
import unittest
import zipfile
import time
class Zip64Test(unittest.TestCase):
@staticmethod
def _AddEntriesToZip(output_zip, entries_dict=None):
for name, size in entries_dict.items():
contents = name[0] * 1024
file_path = tempfile.NamedTemporaryFile()
with open(file_path.name, 'w') as f:
for it in range(0, size):
f.write(contents)
output_zip.write(file_path.name, arcname = name)
def _getEntryNames(self, zip_name):
cmd = ['ziptool', 'zipinfo', '-1', zip_name]
proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
output, _ = proc.communicate()
self.assertEquals(0, proc.returncode)
self.assertNotEqual(None, output)
return output.split()
def _ExtractEntries(self, zip_name):
temp_dir = tempfile.mkdtemp()
cmd = ['ziptool', 'unzip', '-d', temp_dir, zip_name]
proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
proc.communicate()
self.assertEquals(0, proc.returncode)
def test_entriesSmallerThan2G(self):
zip_path = tempfile.NamedTemporaryFile(suffix='.zip')
# Add a few entries with each of them smaller than 2GiB. But the entire zip file is larger
# than 4GiB in size.
with zipfile.ZipFile(zip_path, 'w', allowZip64=True) as output_zip:
entry_dict = {'a.txt': 1025 * 1024, 'b.txt': 1025 * 1024, 'c.txt': 1025 * 1024,
'd.txt': 1025 * 1024, 'e.txt': 1024}
self._AddEntriesToZip(output_zip, entry_dict)
read_names = self._getEntryNames(zip_path.name)
self.assertEquals(sorted(entry_dict.keys()), sorted(read_names))
self._ExtractEntries(zip_path.name)
def test_largeNumberOfEntries(self):
zip_path = tempfile.NamedTemporaryFile(suffix='.zip')
entry_dict = {}
# Add 100k entries (more than 65535|UINT16_MAX).
for num in range(0, 100 * 1024):
entry_dict[str(num)] = 50
with zipfile.ZipFile(zip_path, 'w', allowZip64=True) as output_zip:
self._AddEntriesToZip(output_zip, entry_dict)
read_names = self._getEntryNames(zip_path.name)
self.assertEquals(sorted(entry_dict.keys()), sorted(read_names))
self._ExtractEntries(zip_path.name)
def test_largeCompressedEntries(self):
zip_path = tempfile.NamedTemporaryFile(suffix='.zip')
with zipfile.ZipFile(zip_path, 'w', compression=zipfile.ZIP_DEFLATED,
allowZip64=True) as output_zip:
# Add entries close to 4GiB in size. Somehow the python library will put the (un)compressed
# sizes in the extra field. Test if our ziptool should be able to parse it.
entry_dict = {'e.txt': 4095 * 1024, 'f.txt': 4095 * 1024}
self._AddEntriesToZip(output_zip, entry_dict)
read_names = self._getEntryNames(zip_path.name)
self.assertEquals(sorted(entry_dict.keys()), sorted(read_names))
self._ExtractEntries(zip_path.name)
if __name__ == '__main__':
testsuite = unittest.TestLoader().discover(
os.path.dirname(os.path.realpath(__file__)))
unittest.TextTestRunner(verbosity=2).run(testsuite)

View file

@ -139,9 +139,60 @@ struct CentralDirectoryInfo {
uint64_t cd_start_offset;
};
static ZipError FindCentralDirectoryInfoForZip64(CentralDirectoryInfo* /* cdInfo */) {
ALOGW("Zip: Parsing zip64 EOCD isn't supported yet.");
return kInvalidFile;
static ZipError FindCentralDirectoryInfoForZip64(const char* debugFileName, ZipArchive* archive,
off64_t eocdOffset, CentralDirectoryInfo* cdInfo) {
if (eocdOffset <= sizeof(Zip64EocdLocator)) {
ALOGW("Zip: %s: Not enough space for zip64 eocd locator", debugFileName);
return kInvalidFile;
}
// We expect to find the zip64 eocd locator immediately before the zip eocd.
const int64_t locatorOffset = eocdOffset - sizeof(Zip64EocdLocator);
Zip64EocdLocator zip64EocdLocator{};
if (!archive->mapped_zip.ReadAtOffset(reinterpret_cast<uint8_t*>((&zip64EocdLocator)),
sizeof(Zip64EocdLocator), locatorOffset)) {
ALOGW("Zip: %s: Read %zu from offset %" PRId64 " failed %s", debugFileName,
sizeof(Zip64EocdLocator), locatorOffset, debugFileName);
return kIoError;
}
if (zip64EocdLocator.locator_signature != Zip64EocdLocator::kSignature) {
ALOGW("Zip: %s: Zip64 eocd locator signature not found at offset %" PRId64, debugFileName,
locatorOffset);
return kInvalidFile;
}
const int64_t zip64EocdOffset = zip64EocdLocator.zip64_eocd_offset;
if (zip64EocdOffset > locatorOffset - sizeof(Zip64EocdRecord)) {
ALOGW("Zip: %s: Bad zip64 eocd offset %" PRIu64, debugFileName, zip64EocdOffset);
return kInvalidOffset;
}
Zip64EocdRecord zip64EocdRecord{};
if (!archive->mapped_zip.ReadAtOffset(reinterpret_cast<uint8_t*>(&zip64EocdRecord),
sizeof(Zip64EocdRecord), zip64EocdOffset)) {
ALOGW("Zip: %s: read %zu from offset %" PRId64 " failed %s", debugFileName,
sizeof(Zip64EocdLocator), static_cast<int64_t>(zip64EocdOffset), debugFileName);
return kIoError;
}
if (zip64EocdRecord.record_signature != Zip64EocdRecord::kSignature) {
ALOGW("Zip: %s: Zip64 eocd record signature not found at offset %" PRId64, debugFileName,
zip64EocdOffset);
return kInvalidFile;
}
if (zip64EocdRecord.cd_start_offset > zip64EocdOffset - zip64EocdRecord.cd_size) {
ALOGW("Zip: %s: Bad offset for zip64 central directory. cd offset %" PRIu64 ", cd size %" PRIu64
", zip64 eocd offset %" PRIu64,
debugFileName, zip64EocdRecord.cd_start_offset, zip64EocdRecord.cd_size, zip64EocdOffset);
return kInvalidOffset;
}
*cdInfo = {.num_records = zip64EocdRecord.num_records,
.cd_size = zip64EocdRecord.cd_size,
.cd_start_offset = zip64EocdRecord.cd_start_offset};
return kSuccess;
}
static ZipError FindCentralDirectoryInfo(const char* debug_file_name, ZipArchive* archive,
@ -195,7 +246,7 @@ static ZipError FindCentralDirectoryInfo(const char* debug_file_name, ZipArchive
if (eocd->cd_size == UINT32_MAX || eocd->cd_start_offset == UINT32_MAX) {
ALOGV("Looking for the zip64 EOCD, cd_size: %" PRIu32 "cd_start_offset: %" PRId32,
eocd->cd_size, eocd->cd_start_offset);
return FindCentralDirectoryInfoForZip64(cdInfo);
return FindCentralDirectoryInfoForZip64(debug_file_name, archive, eocd_offset, cdInfo);
}
/*
@ -291,13 +342,104 @@ static ZipError MapCentralDirectory(const char* debug_file_name, ZipArchive* arc
return kSuccess;
}
static ZipError ParseZip64ExtendedInfoInExtraField(
const uint8_t* extraFieldStart, uint16_t extraFieldLength, uint32_t zip32UncompressedSize,
uint32_t zip32CompressedSize, std::optional<uint32_t> zip32LocalFileHeaderOffset,
Zip64ExtendedInfo* zip64Info) {
if (extraFieldLength <= 4) {
ALOGW("Zip: Extra field isn't large enough to hold zip64 info, size %" PRIu16,
extraFieldLength);
return kInvalidFile;
}
// Each header MUST consist of:
// Header ID - 2 bytes
// Data Size - 2 bytes
uint16_t offset = 0;
while (offset < extraFieldLength - 4) {
auto headerId = get_unaligned<uint16_t>(extraFieldStart + offset);
auto dataSize = get_unaligned<uint16_t>(extraFieldStart + offset + 2);
offset += 4;
if (dataSize > extraFieldLength - offset) {
ALOGW("Zip: Data size exceeds the boundary of extra field, data size %" PRIu16, dataSize);
return kInvalidOffset;
}
// Skip the other types of extensible data fields. Details in
// https://pkware.cachefly.net/webdocs/casestudies/APPNOTE.TXT section 4.5
if (headerId != Zip64ExtendedInfo::kHeaderId) {
offset += dataSize;
continue;
}
uint16_t expectedDataSize = 0;
// We expect the extended field to include both uncompressed and compressed size.
if (zip32UncompressedSize == UINT32_MAX || zip32CompressedSize == UINT32_MAX) {
expectedDataSize += 16;
}
if (zip32LocalFileHeaderOffset == UINT32_MAX) {
expectedDataSize += 8;
}
if (expectedDataSize == 0) {
ALOGW("Zip: Data size should not be 0 in zip64 extended field");
return kInvalidFile;
}
if (dataSize != expectedDataSize) {
auto localOffsetString = zip32LocalFileHeaderOffset.has_value()
? std::to_string(zip32LocalFileHeaderOffset.value())
: "missing";
ALOGW("Zip: Invalid data size in zip64 extended field, expect %" PRIu16 ", get %" PRIu16
", uncompressed size %" PRIu32 ", compressed size %" PRIu32 ", local header offset %s",
expectedDataSize, dataSize, zip32UncompressedSize, zip32CompressedSize,
localOffsetString.c_str());
return kInvalidFile;
}
std::optional<uint64_t> uncompressedFileSize;
std::optional<uint64_t> compressedFileSize;
std::optional<uint64_t> localHeaderOffset;
if (zip32UncompressedSize == UINT32_MAX || zip32CompressedSize == UINT32_MAX) {
uncompressedFileSize = get_unaligned<uint64_t>(extraFieldStart + offset);
compressedFileSize = get_unaligned<uint64_t>(extraFieldStart + offset + 8);
offset += 16;
// TODO(xunchang) Support handling file large than UINT32_MAX. It's theoretically possible
// for libz to (de)compressing file larger than UINT32_MAX. But we should use our own
// bytes counter to replace stream.total_out.
if (uncompressedFileSize.value() >= UINT32_MAX || compressedFileSize.value() >= UINT32_MAX) {
ALOGW(
"Zip: File size larger than UINT32_MAX isn't supported yet. uncompressed size %" PRIu64
", compressed size %" PRIu64,
uncompressedFileSize.value(), compressedFileSize.value());
return kInvalidFile;
}
}
if (zip32LocalFileHeaderOffset == UINT32_MAX) {
localHeaderOffset = get_unaligned<uint64_t>(extraFieldStart + offset);
offset += 8;
}
zip64Info->uncompressed_file_size = uncompressedFileSize;
zip64Info->compressed_file_size = compressedFileSize;
zip64Info->local_header_offset = localHeaderOffset;
return kSuccess;
}
ALOGW("Zip: zip64 extended info isn't found in the extra field.");
return kInvalidFile;
}
/*
* Parses the Zip archive's Central Directory. Allocates and populates the
* hash table.
*
* Returns 0 on success.
*/
static int32_t ParseZipArchive(ZipArchive* archive) {
static ZipError ParseZipArchive(ZipArchive* archive) {
const uint8_t* const cd_ptr = archive->central_directory.GetBasePtr();
const size_t cd_length = archive->central_directory.GetMapLength();
const uint64_t num_entries = archive->num_entries;
@ -327,7 +469,7 @@ static int32_t ParseZipArchive(ZipArchive* archive) {
return kInvalidFile;
}
const CentralDirectoryRecord* cdr = reinterpret_cast<const CentralDirectoryRecord*>(ptr);
auto cdr = reinterpret_cast<const CentralDirectoryRecord*>(ptr);
if (cdr->record_signature != CentralDirectoryRecord::kSignature) {
ALOGW("Zip: missed a central dir sig (at %" PRIu64 ")", i);
return kInvalidFile;
@ -355,9 +497,15 @@ static int32_t ParseZipArchive(ZipArchive* archive) {
off64_t local_header_offset = cdr->local_file_header_offset;
if (local_header_offset == UINT32_MAX) {
// TODO(xunchang) parse the zip64 eocd
ALOGW("Zip: Parsing zip64 cd entry isn't supported yet");
return kInvalidFile;
Zip64ExtendedInfo zip64_info{};
if (auto status = ParseZip64ExtendedInfoInExtraField(
extra_field, extra_length, cdr->uncompressed_size, cdr->compressed_size,
cdr->local_file_header_offset, &zip64_info);
status != kSuccess) {
return status;
}
CHECK(zip64_info.local_header_offset.has_value());
local_header_offset = zip64_info.local_header_offset.value();
}
if (local_header_offset >= archive->directory_offset) {
@ -405,7 +553,7 @@ static int32_t ParseZipArchive(ZipArchive* archive) {
ALOGV("+++ zip good scan %" PRIu64 " entries", num_entries);
return 0;
return kSuccess;
}
static int32_t OpenArchiveInternal(ZipArchive* archive, const char* debug_file_name) {
@ -522,7 +670,7 @@ static int32_t FindEntry(const ZipArchive* archive, std::string_view entryName,
return kInvalidOffset;
}
const CentralDirectoryRecord* cdr = reinterpret_cast<const CentralDirectoryRecord*>(ptr);
auto cdr = reinterpret_cast<const CentralDirectoryRecord*>(ptr);
// The offset of the start of the central directory in the zipfile.
// We keep this lying around so that we can sanity check all our lengths
@ -546,8 +694,27 @@ static int32_t FindEntry(const ZipArchive* archive, std::string_view entryName,
// the extra field.
if (cdr->uncompressed_size == UINT32_MAX || cdr->compressed_size == UINT32_MAX ||
cdr->local_file_header_offset == UINT32_MAX) {
ALOGW("Zip: Parsing zip64 local file header isn't supported yet");
return kInvalidFile;
const uint8_t* extra_field = ptr + sizeof(CentralDirectoryRecord) + cdr->file_name_length;
Zip64ExtendedInfo zip64_info{};
if (auto status = ParseZip64ExtendedInfoInExtraField(
extra_field, cdr->extra_field_length, cdr->uncompressed_size, cdr->compressed_size,
cdr->local_file_header_offset, &zip64_info);
status != kSuccess) {
return status;
}
if (cdr->uncompressed_size == UINT32_MAX || cdr->compressed_size == UINT32_MAX) {
CHECK(zip64_info.uncompressed_file_size.has_value());
CHECK(zip64_info.compressed_file_size.has_value());
// TODO(xunchang) remove the size limit and support entry length > UINT32_MAX.
data->uncompressed_length = static_cast<uint32_t>(zip64_info.uncompressed_file_size.value());
data->compressed_length = static_cast<uint32_t>(zip64_info.compressed_file_size.value());
}
if (local_header_offset == UINT32_MAX) {
CHECK(zip64_info.local_header_offset.has_value());
local_header_offset = zip64_info.local_header_offset.value();
}
}
if (local_header_offset + static_cast<off64_t>(sizeof(LocalFileHeader)) >= cd_offset) {
@ -562,14 +729,68 @@ static int32_t FindEntry(const ZipArchive* archive, std::string_view entryName,
return kIoError;
}
const LocalFileHeader* lfh = reinterpret_cast<const LocalFileHeader*>(lfh_buf);
auto lfh = reinterpret_cast<const LocalFileHeader*>(lfh_buf);
if (lfh->lfh_signature != LocalFileHeader::kSignature) {
ALOGW("Zip: didn't find signature at start of lfh, offset=%" PRId64,
static_cast<int64_t>(local_header_offset));
return kInvalidOffset;
}
// Check that the local file header name matches the declared name in the central directory.
CHECK_LE(entryName.size(), UINT16_MAX);
auto nameLen = static_cast<uint16_t>(entryName.size());
if (lfh->file_name_length != nameLen) {
ALOGW("Zip: lfh name length did not match central directory for %s: %" PRIu16 " %" PRIu16,
std::string(entryName).c_str(), lfh->file_name_length, nameLen);
return kInconsistentInformation;
}
const off64_t name_offset = local_header_offset + sizeof(LocalFileHeader);
if (name_offset > cd_offset - lfh->file_name_length) {
ALOGW("Zip: lfh name has invalid declared length");
return kInvalidOffset;
}
std::vector<uint8_t> name_buf(nameLen);
if (!archive->mapped_zip.ReadAtOffset(name_buf.data(), nameLen, name_offset)) {
ALOGW("Zip: failed reading lfh name from offset %" PRId64, static_cast<int64_t>(name_offset));
return kIoError;
}
if (memcmp(entryName.data(), name_buf.data(), nameLen) != 0) {
ALOGW("Zip: lfh name did not match central directory");
return kInconsistentInformation;
}
uint64_t lfh_uncompressed_size = lfh->uncompressed_size;
uint64_t lfh_compressed_size = lfh->compressed_size;
if (lfh_uncompressed_size == UINT32_MAX || lfh_compressed_size == UINT32_MAX) {
const off64_t lfh_extra_field_offset = name_offset + lfh->file_name_length;
const uint16_t lfh_extra_field_size = lfh->extra_field_length;
if (lfh_extra_field_offset > cd_offset - lfh_extra_field_size) {
ALOGW("Zip: extra field has a bad size for entry %s", std::string(entryName).c_str());
return kInvalidOffset;
}
std::vector<uint8_t> local_extra_field(lfh_extra_field_size);
if (!archive->mapped_zip.ReadAtOffset(local_extra_field.data(), lfh_extra_field_size,
lfh_extra_field_offset)) {
ALOGW("Zip: failed reading lfh extra field from offset %" PRId64, lfh_extra_field_offset);
return kIoError;
}
Zip64ExtendedInfo zip64_info{};
if (auto status = ParseZip64ExtendedInfoInExtraField(
local_extra_field.data(), lfh_extra_field_size, lfh->uncompressed_size,
lfh->compressed_size, std::nullopt, &zip64_info);
status != kSuccess) {
return status;
}
CHECK(zip64_info.uncompressed_file_size.has_value());
CHECK(zip64_info.compressed_file_size.has_value());
lfh_uncompressed_size = zip64_info.uncompressed_file_size.value();
lfh_compressed_size = zip64_info.compressed_file_size.value();
}
// Paranoia: Match the values specified in the local file header
// to those specified in the central directory.
@ -595,12 +816,12 @@ static int32_t FindEntry(const ZipArchive* archive, std::string_view entryName,
// header agree on the crc, compressed, and uncompressed sizes of the entry.
if ((lfh->gpb_flags & kGPBDDFlagMask) == 0) {
data->has_data_descriptor = 0;
if (data->compressed_length != lfh->compressed_size ||
data->uncompressed_length != lfh->uncompressed_size || data->crc32 != lfh->crc32) {
if (data->compressed_length != lfh_compressed_size ||
data->uncompressed_length != lfh_uncompressed_size || data->crc32 != lfh->crc32) {
ALOGW("Zip: size/crc32 mismatch. expected {%" PRIu32 ", %" PRIu32 ", %" PRIx32
"}, was {%" PRIu32 ", %" PRIu32 ", %" PRIx32 "}",
data->compressed_length, data->uncompressed_length, data->crc32, lfh->compressed_size,
lfh->uncompressed_size, lfh->crc32);
"}, was {%" PRIu64 ", %" PRIu64 ", %" PRIx32 "}",
data->compressed_length, data->uncompressed_length, data->crc32, lfh_compressed_size,
lfh_uncompressed_size, lfh->crc32);
return kInconsistentInformation;
}
} else {
@ -623,30 +844,6 @@ static int32_t FindEntry(const ZipArchive* archive, std::string_view entryName,
// Currently only needed to implement zipinfo.
data->is_text = (cdr->internal_file_attributes & 1);
// Check that the local file header name matches the declared
// name in the central directory.
CHECK_LE(entryName.size(), UINT16_MAX);
auto nameLen = static_cast<uint16_t>(entryName.size());
if (lfh->file_name_length != nameLen) {
ALOGW("Zip: lfh name length did not match central directory for %s: %" PRIu16 " %" PRIu16,
std::string(entryName).c_str(), lfh->file_name_length, nameLen);
return kInconsistentInformation;
}
const off64_t name_offset = local_header_offset + sizeof(LocalFileHeader);
if (name_offset + lfh->file_name_length > cd_offset) {
ALOGW("Zip: lfh name has invalid declared length");
return kInvalidOffset;
}
std::vector<uint8_t> name_buf(nameLen);
if (!archive->mapped_zip.ReadAtOffset(name_buf.data(), nameLen, name_offset)) {
ALOGW("Zip: failed reading lfh name from offset %" PRId64, static_cast<int64_t>(name_offset));
return kIoError;
}
if (memcmp(entryName.data(), name_buf.data(), nameLen) != 0) {
ALOGW("Zip: lfh name did not match central directory");
return kInconsistentInformation;
}
const off64_t data_offset = local_header_offset + sizeof(LocalFileHeader) +
lfh->file_name_length + lfh->extra_field_length;
if (data_offset > cd_offset) {

View file

@ -14,8 +14,6 @@
* limitations under the License.
*/
#include "zip_archive_private.h"
#include <errno.h>
#include <fcntl.h>
#include <getopt.h>
@ -23,6 +21,7 @@
#include <string.h>
#include <unistd.h>
#include <map>
#include <memory>
#include <set>
#include <string_view>
@ -31,12 +30,16 @@
#include <android-base/file.h>
#include <android-base/logging.h>
#include <android-base/mapped_file.h>
#include <android-base/memory.h>
#include <android-base/strings.h>
#include <android-base/unique_fd.h>
#include <gtest/gtest.h>
#include <ziparchive/zip_archive.h>
#include <ziparchive/zip_archive_stream_entry.h>
#include "zip_archive_common.h"
#include "zip_archive_private.h"
static std::string test_data_dir = android::base::GetExecutableDirectory() + "/testdata";
static const std::string kValidZip = "valid.zip";
@ -966,3 +969,290 @@ TEST(ziparchive, Inflate) {
ASSERT_EQ(0u, writer.GetOutput().size());
}
}
// The class constructs a zipfile with zip64 format, and test the parsing logic.
class Zip64ParseTest : public ::testing::Test {
protected:
struct LocalFileEntry {
std::vector<uint8_t> local_file_header;
std::string file_name;
std::vector<uint8_t> extended_field;
// Fake data to mimic the compressed bytes in the zipfile.
std::vector<uint8_t> compressed_bytes;
size_t GetSize() const {
return local_file_header.size() + file_name.size() + extended_field.size() +
compressed_bytes.size();
}
void CopyToOutput(std::vector<uint8_t>* output) const {
std::copy(local_file_header.begin(), local_file_header.end(), std::back_inserter(*output));
std::copy(file_name.begin(), file_name.end(), std::back_inserter(*output));
std::copy(extended_field.begin(), extended_field.end(), std::back_inserter(*output));
std::copy(compressed_bytes.begin(), compressed_bytes.end(), std::back_inserter(*output));
}
};
struct CdRecordEntry {
std::vector<uint8_t> central_directory_record;
std::string file_name;
std::vector<uint8_t> extended_field;
size_t GetSize() const {
return central_directory_record.size() + file_name.size() + extended_field.size();
}
void CopyToOutput(std::vector<uint8_t>* output) const {
std::copy(central_directory_record.begin(), central_directory_record.end(),
std::back_inserter(*output));
std::copy(file_name.begin(), file_name.end(), std::back_inserter(*output));
std::copy(extended_field.begin(), extended_field.end(), std::back_inserter(*output));
}
};
static void ConstructLocalFileHeader(const std::string& name, std::vector<uint8_t>* output,
uint32_t uncompressed_size, uint32_t compressed_size) {
LocalFileHeader lfh = {};
lfh.lfh_signature = LocalFileHeader::kSignature;
lfh.compressed_size = compressed_size;
lfh.uncompressed_size = uncompressed_size;
lfh.file_name_length = static_cast<uint16_t>(name.size());
lfh.extra_field_length = 20;
*output = std::vector<uint8_t>(reinterpret_cast<uint8_t*>(&lfh),
reinterpret_cast<uint8_t*>(&lfh) + sizeof(LocalFileHeader));
}
// Put one zip64 extended info in the extended field.
static void ConstructExtendedField(const std::vector<uint64_t>& zip64_fields,
std::vector<uint8_t>* output) {
ASSERT_FALSE(zip64_fields.empty());
uint16_t data_size = 8 * static_cast<uint16_t>(zip64_fields.size());
std::vector<uint8_t> extended_field(data_size + 4);
android::base::put_unaligned(extended_field.data(), Zip64ExtendedInfo::kHeaderId);
android::base::put_unaligned(extended_field.data() + 2, data_size);
size_t offset = 4;
for (const auto& field : zip64_fields) {
android::base::put_unaligned(extended_field.data() + offset, field);
offset += 8;
}
*output = std::move(extended_field);
}
static void ConstructCentralDirectoryRecord(const std::string& name, uint32_t uncompressed_size,
uint32_t compressed_size, uint32_t local_offset,
std::vector<uint8_t>* output) {
CentralDirectoryRecord cdr = {};
cdr.record_signature = CentralDirectoryRecord::kSignature;
cdr.compressed_size = uncompressed_size;
cdr.uncompressed_size = compressed_size;
cdr.file_name_length = static_cast<uint16_t>(name.size());
cdr.extra_field_length = local_offset == UINT32_MAX ? 28 : 20;
cdr.local_file_header_offset = local_offset;
*output =
std::vector<uint8_t>(reinterpret_cast<uint8_t*>(&cdr),
reinterpret_cast<uint8_t*>(&cdr) + sizeof(CentralDirectoryRecord));
}
// Add an entry to the zipfile, construct the corresponding local header and cd entry.
void AddEntry(const std::string& name, const std::vector<uint8_t>& content,
bool uncompressed_size_in_extended, bool compressed_size_in_extended,
bool local_offset_in_extended) {
auto uncompressed_size = static_cast<uint32_t>(content.size());
auto compressed_size = static_cast<uint32_t>(content.size());
uint32_t local_file_header_offset = 0;
std::for_each(file_entries_.begin(), file_entries_.end(),
[&local_file_header_offset](const LocalFileEntry& file_entry) {
local_file_header_offset += file_entry.GetSize();
});
std::vector<uint64_t> zip64_fields;
if (uncompressed_size_in_extended) {
zip64_fields.push_back(uncompressed_size);
uncompressed_size = UINT32_MAX;
}
if (compressed_size_in_extended) {
zip64_fields.push_back(compressed_size);
compressed_size = UINT32_MAX;
}
LocalFileEntry local_entry = {
.local_file_header = {},
.file_name = name,
.extended_field = {},
.compressed_bytes = content,
};
ConstructLocalFileHeader(name, &local_entry.local_file_header, uncompressed_size,
compressed_size);
ConstructExtendedField(zip64_fields, &local_entry.extended_field);
file_entries_.push_back(std::move(local_entry));
if (local_offset_in_extended) {
zip64_fields.push_back(local_file_header_offset);
local_file_header_offset = UINT32_MAX;
}
CdRecordEntry cd_entry = {
.central_directory_record = {},
.file_name = name,
.extended_field = {},
};
ConstructCentralDirectoryRecord(name, uncompressed_size, compressed_size,
local_file_header_offset, &cd_entry.central_directory_record);
ConstructExtendedField(zip64_fields, &cd_entry.extended_field);
cd_entries_.push_back(std::move(cd_entry));
}
void ConstructEocd() {
ASSERT_EQ(file_entries_.size(), cd_entries_.size());
Zip64EocdRecord zip64_eocd = {};
zip64_eocd.record_signature = Zip64EocdRecord::kSignature;
zip64_eocd.num_records = file_entries_.size();
zip64_eocd.cd_size = 0;
std::for_each(
cd_entries_.begin(), cd_entries_.end(),
[&zip64_eocd](const CdRecordEntry& cd_entry) { zip64_eocd.cd_size += cd_entry.GetSize(); });
zip64_eocd.cd_start_offset = 0;
std::for_each(file_entries_.begin(), file_entries_.end(),
[&zip64_eocd](const LocalFileEntry& file_entry) {
zip64_eocd.cd_start_offset += file_entry.GetSize();
});
zip64_eocd_record_ =
std::vector<uint8_t>(reinterpret_cast<uint8_t*>(&zip64_eocd),
reinterpret_cast<uint8_t*>(&zip64_eocd) + sizeof(Zip64EocdRecord));
Zip64EocdLocator zip64_locator = {};
zip64_locator.locator_signature = Zip64EocdLocator::kSignature;
zip64_locator.zip64_eocd_offset = zip64_eocd.cd_start_offset + zip64_eocd.cd_size;
zip64_eocd_locator_ =
std::vector<uint8_t>(reinterpret_cast<uint8_t*>(&zip64_locator),
reinterpret_cast<uint8_t*>(&zip64_locator) + sizeof(Zip64EocdLocator));
EocdRecord eocd = {};
eocd.eocd_signature = EocdRecord::kSignature,
eocd.num_records = file_entries_.size() > UINT16_MAX
? UINT16_MAX
: static_cast<uint16_t>(file_entries_.size());
eocd.cd_size = UINT32_MAX;
eocd.cd_start_offset = UINT32_MAX;
eocd_record_ = std::vector<uint8_t>(reinterpret_cast<uint8_t*>(&eocd),
reinterpret_cast<uint8_t*>(&eocd) + sizeof(EocdRecord));
}
// Concatenate all the local file entries, cd entries, and eocd metadata.
void ConstructZipFile() {
for (const auto& file_entry : file_entries_) {
file_entry.CopyToOutput(&zip_content_);
}
for (const auto& cd_entry : cd_entries_) {
cd_entry.CopyToOutput(&zip_content_);
}
std::copy(zip64_eocd_record_.begin(), zip64_eocd_record_.end(),
std::back_inserter(zip_content_));
std::copy(zip64_eocd_locator_.begin(), zip64_eocd_locator_.end(),
std::back_inserter(zip_content_));
std::copy(eocd_record_.begin(), eocd_record_.end(), std::back_inserter(zip_content_));
}
std::vector<uint8_t> zip_content_;
std::vector<LocalFileEntry> file_entries_;
std::vector<CdRecordEntry> cd_entries_;
std::vector<uint8_t> zip64_eocd_record_;
std::vector<uint8_t> zip64_eocd_locator_;
std::vector<uint8_t> eocd_record_;
};
TEST_F(Zip64ParseTest, openFile) {
AddEntry("a.txt", std::vector<uint8_t>(100, 'a'), true, true, false);
ConstructEocd();
ConstructZipFile();
ZipArchiveHandle handle;
ASSERT_EQ(
0, OpenArchiveFromMemory(zip_content_.data(), zip_content_.size(), "debug_zip64", &handle));
CloseArchive(handle);
}
TEST_F(Zip64ParseTest, openFilelocalOffsetInExtendedField) {
AddEntry("a.txt", std::vector<uint8_t>(100, 'a'), true, true, true);
AddEntry("b.txt", std::vector<uint8_t>(200, 'b'), true, true, true);
ConstructEocd();
ConstructZipFile();
ZipArchiveHandle handle;
ASSERT_EQ(
0, OpenArchiveFromMemory(zip_content_.data(), zip_content_.size(), "debug_zip64", &handle));
CloseArchive(handle);
}
TEST_F(Zip64ParseTest, openFileCompressedNotInExtendedField) {
AddEntry("a.txt", std::vector<uint8_t>(100, 'a'), true, false, false);
ConstructEocd();
ConstructZipFile();
ZipArchiveHandle handle;
// Zip64 extended fields must include both uncompressed and compressed size.
ASSERT_NE(
0, OpenArchiveFromMemory(zip_content_.data(), zip_content_.size(), "debug_zip64", &handle));
CloseArchive(handle);
}
TEST_F(Zip64ParseTest, findEntry) {
AddEntry("a.txt", std::vector<uint8_t>(200, 'a'), true, true, true);
AddEntry("b.txt", std::vector<uint8_t>(300, 'b'), true, true, false);
ConstructEocd();
ConstructZipFile();
ZipArchiveHandle handle;
ASSERT_EQ(
0, OpenArchiveFromMemory(zip_content_.data(), zip_content_.size(), "debug_zip64", &handle));
ZipEntry entry;
ASSERT_EQ(0, FindEntry(handle, "a.txt", &entry));
ASSERT_EQ(200, entry.uncompressed_length);
ASSERT_EQ(200, entry.compressed_length);
ASSERT_EQ(0, FindEntry(handle, "b.txt", &entry));
ASSERT_EQ(300, entry.uncompressed_length);
ASSERT_EQ(300, entry.compressed_length);
CloseArchive(handle);
}
TEST_F(Zip64ParseTest, openFileIncorrectDataSizeInLocalExtendedField) {
AddEntry("a.txt", std::vector<uint8_t>(100, 'a'), true, true, false);
ASSERT_EQ(1, file_entries_.size());
auto& extended_field = file_entries_[0].extended_field;
// data size exceeds the extended field size in local header.
android::base::put_unaligned<uint16_t>(extended_field.data() + 2, 30);
ConstructEocd();
ConstructZipFile();
ZipArchiveHandle handle;
ASSERT_EQ(
0, OpenArchiveFromMemory(zip_content_.data(), zip_content_.size(), "debug_zip64", &handle));
ZipEntry entry;
ASSERT_NE(0, FindEntry(handle, "a.txt", &entry));
CloseArchive(handle);
}
TEST_F(Zip64ParseTest, iterates) {
std::set<std::string_view> names{"a.txt", "b.txt", "c.txt", "d.txt", "e.txt"};
for (const auto& name : names) {
AddEntry(std::string(name), std::vector<uint8_t>(100, name[0]), true, true, true);
}
ConstructEocd();
ConstructZipFile();
ZipArchiveHandle handle;
ASSERT_EQ(
0, OpenArchiveFromMemory(zip_content_.data(), zip_content_.size(), "debug_zip64", &handle));
void* iteration_cookie;
ASSERT_EQ(0, StartIteration(handle, &iteration_cookie));
std::set<std::string_view> result;
std::string_view name;
ZipEntry entry;
while (Next(iteration_cookie, &entry, &name) == 0) result.emplace(name);
ASSERT_EQ(names, result);
CloseArchive(handle);
}