Merge "Support parsing of data descriptor" am: fcf2d8b89b

Change-Id: I4e93b4535f8d7d268aed2b3396f2008687fedb5b
This commit is contained in:
Tianjie Xu 2020-03-31 22:04:34 +00:00 committed by Automerger Merge Worker
commit 6476fabea6
8 changed files with 179 additions and 78 deletions

View file

@ -77,6 +77,10 @@ struct ZipEntry {
// footer.
uint32_t uncompressed_length;
// If the value of uncompressed length and compressed length are stored in
// the zip64 extended info of the extra field.
bool zip64_format_size{false};
// The offset to the start of data for this ZipEntry.
off64_t offset;

View file

@ -25,14 +25,18 @@ import zipfile
import time
class Zip64Test(unittest.TestCase):
@staticmethod
def _WriteFile(path, size_in_kib):
contents = os.path.basename(path)[0] * 1024
with open(path, 'w') as f:
for it in range(0, size_in_kib):
f.write(contents)
@staticmethod
def _AddEntriesToZip(output_zip, entries_dict=None):
for name, size in entries_dict.items():
contents = name[0] * 1024
file_path = tempfile.NamedTemporaryFile()
with open(file_path.name, 'w') as f:
for it in range(0, size):
f.write(contents)
Zip64Test._WriteFile(file_path.name, size)
output_zip.write(file_path.name, arcname = name)
def _getEntryNames(self, zip_name):
@ -93,6 +97,22 @@ class Zip64Test(unittest.TestCase):
self._ExtractEntries(zip_path.name)
def test_forceDataDescriptor(self):
file_path = tempfile.NamedTemporaryFile(suffix='.txt')
# TODO create the entry > 4GiB.
self._WriteFile(file_path.name, 1024)
zip_path = tempfile.NamedTemporaryFile(suffix='.zip')
with zipfile.ZipFile(zip_path, 'w', allowZip64=True) as output_zip:
pass
# The fd option force writes a data descriptor
cmd = ['zip', '-fd', zip_path.name, file_path.name]
proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
proc.communicate()
read_names = self._getEntryNames(zip_path.name)
self.assertEquals([file_path.name[1:]], read_names)
self._ExtractEntries(zip_path.name)
if __name__ == '__main__':
testsuite = unittest.TestLoader().discover(
os.path.dirname(os.path.realpath(__file__)))

BIN
libziparchive/testdata/zip64.zip vendored Normal file

Binary file not shown.

View file

@ -57,8 +57,6 @@
#include "zip_archive_common.h"
#include "zip_archive_private.h"
using android::base::get_unaligned;
// Used to turn on crc checks - verify that the content CRC matches the values
// specified in the local file header and the central directory.
static const bool kCrcChecksEnabled = false;
@ -221,7 +219,7 @@ static ZipError FindCentralDirectoryInfo(const char* debug_file_name, ZipArchive
for (; i >= 0; i--) {
if (scan_buffer[i] == 0x50) {
uint32_t* sig_addr = reinterpret_cast<uint32_t*>(&scan_buffer[i]);
if (get_unaligned<uint32_t>(sig_addr) == EocdRecord::kSignature) {
if (android::base::get_unaligned<uint32_t>(sig_addr) == EocdRecord::kSignature) {
ALOGV("+++ Found EOCD at buf+%d", i);
break;
}
@ -360,8 +358,9 @@ static ZipError ParseZip64ExtendedInfoInExtraField(
// Data Size - 2 bytes
uint16_t offset = 0;
while (offset < extraFieldLength - 4) {
auto headerId = get_unaligned<uint16_t>(extraFieldStart + offset);
auto dataSize = get_unaligned<uint16_t>(extraFieldStart + offset + 2);
auto readPtr = const_cast<uint8_t*>(extraFieldStart + offset);
auto headerId = ConsumeUnaligned<uint16_t>(&readPtr);
auto dataSize = ConsumeUnaligned<uint16_t>(&readPtr);
offset += 4;
if (dataSize > extraFieldLength - offset) {
@ -376,54 +375,44 @@ static ZipError ParseZip64ExtendedInfoInExtraField(
continue;
}
uint16_t expectedDataSize = 0;
// We expect the extended field to include both uncompressed and compressed size.
if (zip32UncompressedSize == UINT32_MAX || zip32CompressedSize == UINT32_MAX) {
expectedDataSize += 16;
std::optional<uint64_t> uncompressedFileSize;
std::optional<uint64_t> compressedFileSize;
std::optional<uint64_t> localHeaderOffset;
if (zip32UncompressedSize == UINT32_MAX) {
uncompressedFileSize = ConsumeUnaligned<uint64_t>(&readPtr);
}
if (zip32CompressedSize == UINT32_MAX) {
compressedFileSize = ConsumeUnaligned<uint64_t>(&readPtr);
}
if (zip32LocalFileHeaderOffset == UINT32_MAX) {
expectedDataSize += 8;
localHeaderOffset = ConsumeUnaligned<uint64_t>(&readPtr);
}
if (expectedDataSize == 0) {
// calculate how many bytes we read after the data size field.
size_t bytesRead = readPtr - (extraFieldStart + offset);
if (bytesRead == 0) {
ALOGW("Zip: Data size should not be 0 in zip64 extended field");
return kInvalidFile;
}
if (dataSize != expectedDataSize) {
if (dataSize != bytesRead) {
auto localOffsetString = zip32LocalFileHeaderOffset.has_value()
? std::to_string(zip32LocalFileHeaderOffset.value())
: "missing";
ALOGW("Zip: Invalid data size in zip64 extended field, expect %" PRIu16 ", get %" PRIu16
ALOGW("Zip: Invalid data size in zip64 extended field, expect %zu , get %" PRIu16
", uncompressed size %" PRIu32 ", compressed size %" PRIu32 ", local header offset %s",
expectedDataSize, dataSize, zip32UncompressedSize, zip32CompressedSize,
bytesRead, dataSize, zip32UncompressedSize, zip32CompressedSize,
localOffsetString.c_str());
return kInvalidFile;
}
std::optional<uint64_t> uncompressedFileSize;
std::optional<uint64_t> compressedFileSize;
std::optional<uint64_t> localHeaderOffset;
if (zip32UncompressedSize == UINT32_MAX || zip32CompressedSize == UINT32_MAX) {
uncompressedFileSize = get_unaligned<uint64_t>(extraFieldStart + offset);
compressedFileSize = get_unaligned<uint64_t>(extraFieldStart + offset + 8);
offset += 16;
// TODO(xunchang) Support handling file large than UINT32_MAX. It's theoretically possible
// for libz to (de)compressing file larger than UINT32_MAX. But we should use our own
// bytes counter to replace stream.total_out.
if (uncompressedFileSize.value() >= UINT32_MAX || compressedFileSize.value() >= UINT32_MAX) {
ALOGW(
"Zip: File size larger than UINT32_MAX isn't supported yet. uncompressed size %" PRIu64
", compressed size %" PRIu64,
uncompressedFileSize.value(), compressedFileSize.value());
return kInvalidFile;
}
}
if (zip32LocalFileHeaderOffset == UINT32_MAX) {
localHeaderOffset = get_unaligned<uint64_t>(extraFieldStart + offset);
offset += 8;
// TODO(xunchang) Support handling file large than UINT32_MAX. It's theoretically possible
// for libz to (de)compressing file larger than UINT32_MAX. But we should use our own
// bytes counter to replace stream.total_out.
if ((uncompressedFileSize.has_value() && uncompressedFileSize.value() > UINT32_MAX) ||
(compressedFileSize.has_value() && compressedFileSize.value() > UINT32_MAX)) {
ALOGW("Zip: File size larger than UINT32_MAX isn't supported yet");
return kInvalidFile;
}
zip64Info->uncompressed_file_size = uncompressedFileSize;
@ -625,7 +614,8 @@ void CloseArchive(ZipArchiveHandle archive) {
}
static int32_t ValidateDataDescriptor(MappedZipFile& mapped_zip, ZipEntry* entry) {
uint8_t ddBuf[sizeof(DataDescriptor) + sizeof(DataDescriptor::kOptSignature)];
// Maximum possible size for data descriptor: 2 * 4 + 2 * 8 = 24 bytes
uint8_t ddBuf[24];
off64_t offset = entry->offset;
if (entry->method != kCompressStored) {
offset += entry->compressed_length;
@ -638,18 +628,26 @@ static int32_t ValidateDataDescriptor(MappedZipFile& mapped_zip, ZipEntry* entry
}
const uint32_t ddSignature = *(reinterpret_cast<const uint32_t*>(ddBuf));
const uint16_t ddOffset = (ddSignature == DataDescriptor::kOptSignature) ? 4 : 0;
const DataDescriptor* descriptor = reinterpret_cast<const DataDescriptor*>(ddBuf + ddOffset);
uint8_t* ddReadPtr = (ddSignature == DataDescriptor::kOptSignature) ? ddBuf + 4 : ddBuf;
DataDescriptor descriptor{};
descriptor.crc32 = ConsumeUnaligned<uint32_t>(&ddReadPtr);
if (entry->zip64_format_size) {
descriptor.compressed_size = ConsumeUnaligned<uint64_t>(&ddReadPtr);
descriptor.uncompressed_size = ConsumeUnaligned<uint64_t>(&ddReadPtr);
} else {
descriptor.compressed_size = ConsumeUnaligned<uint32_t>(&ddReadPtr);
descriptor.uncompressed_size = ConsumeUnaligned<uint32_t>(&ddReadPtr);
}
// Validate that the values in the data descriptor match those in the central
// directory.
if (entry->compressed_length != descriptor->compressed_size ||
entry->uncompressed_length != descriptor->uncompressed_size ||
entry->crc32 != descriptor->crc32) {
if (entry->compressed_length != descriptor.compressed_size ||
entry->uncompressed_length != descriptor.uncompressed_size ||
entry->crc32 != descriptor.crc32) {
ALOGW("Zip: size/crc32 mismatch. expected {%" PRIu32 ", %" PRIu32 ", %" PRIx32
"}, was {%" PRIu32 ", %" PRIu32 ", %" PRIx32 "}",
"}, was {%" PRIu64 ", %" PRIu64 ", %" PRIx32 "}",
entry->compressed_length, entry->uncompressed_length, entry->crc32,
descriptor->compressed_size, descriptor->uncompressed_size, descriptor->crc32);
descriptor.compressed_size, descriptor.uncompressed_size, descriptor.crc32);
return kInconsistentInformation;
}
@ -706,18 +704,14 @@ static int32_t FindEntry(const ZipArchive* archive, std::string_view entryName,
return status;
}
if (cdr->uncompressed_size == UINT32_MAX || cdr->compressed_size == UINT32_MAX) {
CHECK(zip64_info.uncompressed_file_size.has_value());
CHECK(zip64_info.compressed_file_size.has_value());
// TODO(xunchang) remove the size limit and support entry length > UINT32_MAX.
data->uncompressed_length = static_cast<uint32_t>(zip64_info.uncompressed_file_size.value());
data->compressed_length = static_cast<uint32_t>(zip64_info.compressed_file_size.value());
}
if (local_header_offset == UINT32_MAX) {
CHECK(zip64_info.local_header_offset.has_value());
local_header_offset = zip64_info.local_header_offset.value();
}
// TODO(xunchang) remove the size limit and support entry length > UINT32_MAX.
data->uncompressed_length =
static_cast<uint32_t>(zip64_info.uncompressed_file_size.value_or(cdr->uncompressed_size));
data->compressed_length =
static_cast<uint32_t>(zip64_info.compressed_file_size.value_or(cdr->compressed_size));
local_header_offset = zip64_info.local_header_offset.value_or(local_header_offset);
data->zip64_format_size =
cdr->uncompressed_size == UINT32_MAX || cdr->compressed_size == UINT32_MAX;
}
if (local_header_offset + static_cast<off64_t>(sizeof(LocalFileHeader)) >= cd_offset) {
@ -766,6 +760,13 @@ static int32_t FindEntry(const ZipArchive* archive, std::string_view entryName,
uint64_t lfh_uncompressed_size = lfh->uncompressed_size;
uint64_t lfh_compressed_size = lfh->compressed_size;
if (lfh_uncompressed_size == UINT32_MAX || lfh_compressed_size == UINT32_MAX) {
if (lfh_uncompressed_size != UINT32_MAX || lfh_compressed_size != UINT32_MAX) {
ALOGW(
"Zip: The zip64 extended field in the local header MUST include BOTH original and "
"compressed file size fields.");
return kInvalidFile;
}
const off64_t lfh_extra_field_offset = name_offset + lfh->file_name_length;
const uint16_t lfh_extra_field_size = lfh->extra_field_length;
if (lfh_extra_field_offset > cd_offset - lfh_extra_field_size) {

View file

@ -165,15 +165,24 @@ struct DataDescriptor {
// CRC-32 checksum of the entry.
uint32_t crc32;
// Compressed size of the entry.
uint32_t compressed_size;
// Uncompressed size of the entry.
uint32_t uncompressed_size;
// For ZIP64 format archives, the compressed and uncompressed sizes are 8
// bytes each. Also, the ZIP64 format MAY be used regardless of the size
// of a file. When extracting, if the zip64 extended information extra field
// is present for the file the compressed and uncompressed sizes will be 8
// byte values.
// Compressed size of the entry, the field can be either 4 bytes or 8 bytes
// in the zip file.
uint64_t compressed_size;
// Uncompressed size of the entry, the field can be either 4 bytes or 8 bytes
// in the zip file.
uint64_t uncompressed_size;
private:
DataDescriptor() = default;
DISALLOW_COPY_AND_ASSIGN(DataDescriptor);
} __attribute__((packed));
};
// The zip64 end of central directory locator helps to find the zip64 EOCD.
struct Zip64EocdLocator {

View file

@ -28,6 +28,7 @@
#include "android-base/macros.h"
#include "android-base/mapped_file.h"
#include "android-base/memory.h"
#include "zip_cd_entry_map.h"
#include "zip_error.h"
@ -104,3 +105,20 @@ struct ZipArchive {
bool InitializeCentralDirectory(off64_t cd_start_offset, size_t cd_size);
};
int32_t ExtractToWriter(ZipArchiveHandle handle, ZipEntry* entry, zip_archive::Writer* writer);
// Reads the unaligned data of type |T| and auto increment the offset.
template <typename T>
static T ConsumeUnaligned(uint8_t** address) {
auto ret = android::base::get_unaligned<T>(*address);
*address += sizeof(T);
return ret;
}
// Writes the unaligned data of type |T| and auto increment the offset.
template <typename T>
void EmitUnaligned(uint8_t** address, T data) {
android::base::put_unaligned<T>(*address, data);
*address += sizeof(T);
}

View file

@ -979,10 +979,11 @@ class Zip64ParseTest : public ::testing::Test {
std::vector<uint8_t> extended_field;
// Fake data to mimic the compressed bytes in the zipfile.
std::vector<uint8_t> compressed_bytes;
std::vector<uint8_t> data_descriptor;
size_t GetSize() const {
return local_file_header.size() + file_name.size() + extended_field.size() +
compressed_bytes.size();
compressed_bytes.size() + data_descriptor.size();
}
void CopyToOutput(std::vector<uint8_t>* output) const {
@ -990,6 +991,7 @@ class Zip64ParseTest : public ::testing::Test {
std::copy(file_name.begin(), file_name.end(), std::back_inserter(*output));
std::copy(extended_field.begin(), extended_field.end(), std::back_inserter(*output));
std::copy(compressed_bytes.begin(), compressed_bytes.end(), std::back_inserter(*output));
std::copy(data_descriptor.begin(), data_descriptor.end(), std::back_inserter(*output));
}
};
@ -1057,7 +1059,7 @@ class Zip64ParseTest : public ::testing::Test {
// Add an entry to the zipfile, construct the corresponding local header and cd entry.
void AddEntry(const std::string& name, const std::vector<uint8_t>& content,
bool uncompressed_size_in_extended, bool compressed_size_in_extended,
bool local_offset_in_extended) {
bool local_offset_in_extended, bool include_data_descriptor = false) {
auto uncompressed_size = static_cast<uint32_t>(content.size());
auto compressed_size = static_cast<uint32_t>(content.size());
uint32_t local_file_header_offset = 0;
@ -1084,6 +1086,21 @@ class Zip64ParseTest : public ::testing::Test {
ConstructLocalFileHeader(name, &local_entry.local_file_header, uncompressed_size,
compressed_size);
ConstructExtendedField(zip64_fields, &local_entry.extended_field);
if (include_data_descriptor) {
size_t descriptor_size = compressed_size_in_extended ? 24 : 16;
local_entry.data_descriptor.resize(descriptor_size);
uint8_t* write_ptr = local_entry.data_descriptor.data();
EmitUnaligned<uint32_t>(&write_ptr, DataDescriptor::kOptSignature);
EmitUnaligned<uint32_t>(&write_ptr, 0 /* crc */);
if (compressed_size_in_extended) {
EmitUnaligned<uint64_t>(&write_ptr, compressed_size_in_extended);
EmitUnaligned<uint64_t>(&write_ptr, uncompressed_size_in_extended);
} else {
EmitUnaligned<uint32_t>(&write_ptr, compressed_size_in_extended);
EmitUnaligned<uint32_t>(&write_ptr, uncompressed_size_in_extended);
}
}
file_entries_.push_back(std::move(local_entry));
if (local_offset_in_extended) {
@ -1270,3 +1287,38 @@ TEST_F(Zip64ParseTest, zip64EocdWrongLocatorOffset) {
0, OpenArchiveFromMemory(zip_content_.data(), zip_content_.size(), "debug_zip64", &handle));
CloseArchive(handle);
}
TEST_F(Zip64ParseTest, extract) {
std::vector<uint8_t> content(200, 'a');
AddEntry("a.txt", content, true, true, true);
ConstructEocd();
ConstructZipFile();
ZipArchiveHandle handle;
ASSERT_EQ(
0, OpenArchiveFromMemory(zip_content_.data(), zip_content_.size(), "debug_zip64", &handle));
ZipEntry entry;
ASSERT_EQ(0, FindEntry(handle, "a.txt", &entry));
VectorWriter writer;
ASSERT_EQ(0, ExtractToWriter(handle, &entry, &writer));
ASSERT_EQ(content, writer.GetOutput());
}
TEST_F(Zip64ParseTest, extractWithDataDescriptor) {
std::vector<uint8_t> content(300, 'b');
AddEntry("a.txt", std::vector<uint8_t>(200, 'a'), true, true, true);
AddEntry("b.txt", content, true, true, true, true /* data descriptor */);
ConstructEocd();
ConstructZipFile();
ZipArchiveHandle handle;
ASSERT_EQ(
0, OpenArchiveFromMemory(zip_content_.data(), zip_content_.size(), "debug_zip64", &handle));
ZipEntry entry;
ASSERT_EQ(0, FindEntry(handle, "b.txt", &entry));
VectorWriter writer;
ASSERT_EQ(0, ExtractToWriter(handle, &entry, &writer));
ASSERT_EQ(content, writer.GetOutput());
}

View file

@ -475,19 +475,16 @@ int32_t ZipWriter::FinishEntry() {
if (ShouldUseDataDescriptor()) {
// Some versions of ZIP don't allow STORED data to have a trailing DataDescriptor.
// If this file is not seekable, or if the data is compressed, write a DataDescriptor.
const uint32_t sig = DataDescriptor::kOptSignature;
if (fwrite(&sig, sizeof(sig), 1, file_) != 1) {
// We haven't supported zip64 format yet. Write both uncompressed size and compressed
// size as uint32_t.
std::vector<uint32_t> dataDescriptor = {
DataDescriptor::kOptSignature, current_file_entry_.crc32,
current_file_entry_.compressed_size, current_file_entry_.uncompressed_size};
if (fwrite(dataDescriptor.data(), dataDescriptor.size() * sizeof(uint32_t), 1, file_) != 1) {
return HandleError(kIoError);
}
DataDescriptor dd = {};
dd.crc32 = current_file_entry_.crc32;
dd.compressed_size = current_file_entry_.compressed_size;
dd.uncompressed_size = current_file_entry_.uncompressed_size;
if (fwrite(&dd, sizeof(dd), 1, file_) != 1) {
return HandleError(kIoError);
}
current_offset_ += sizeof(DataDescriptor::kOptSignature) + sizeof(dd);
current_offset_ += sizeof(uint32_t) * dataDescriptor.size();
} else {
// Seek back to the header and rewrite to include the size.
if (fseeko(file_, current_file_entry_.local_file_header_offset, SEEK_SET) != 0) {