libziparchive: Add ability to backup in ZipWriter

Based on the compressed size of a file entry,
the decision needs to be made to instead store the file
uncompressed. This adds support to ZipWriter to backup
its last file entry.

The file is now always truncated when the EOCD is written out,
to account for the case where a file entry is backed-up and the
resulting file written is much smaller, leaving garbage data at
the end of the file.

This change also includes a rename of FileInfo -> FileEntry.
This struct was private (now public), so it shouldn't affect any
clients.

Bug: 35461578
Test: make ziparchive-tests
Change-Id: I23dc584406274ab7b8ce62b3fbc3562ca4c2603e
This commit is contained in:
Adam Lesinski 2017-03-16 13:23:51 -07:00
parent a4bc98941a
commit 537713bace
3 changed files with 245 additions and 101 deletions

View file

@ -17,15 +17,16 @@
#ifndef LIBZIPARCHIVE_ZIPWRITER_H_
#define LIBZIPARCHIVE_ZIPWRITER_H_
#include "android-base/macros.h"
#include <utils/Compat.h>
#include <cstdio>
#include <ctime>
#include <zlib.h>
#include <memory>
#include <string>
#include <vector>
#include <zlib.h>
#include "android-base/macros.h"
#include "utils/Compat.h"
/**
* Writes a Zip file via a stateful interface.
@ -63,6 +64,20 @@ public:
kAlign32 = 0x02,
};
/**
* A struct representing a zip file entry.
*/
struct FileEntry {
std::string path;
uint16_t compression_method;
uint32_t crc32;
uint32_t compressed_size;
uint32_t uncompressed_size;
uint16_t last_mod_time;
uint16_t last_mod_date;
uint32_t local_file_header_offset;
};
static const char* ErrorCodeString(int32_t error_code);
/**
@ -121,6 +136,19 @@ public:
*/
int32_t FinishEntry();
/**
* Discards the last-written entry. Can only be called after an entry has been written using
* FinishEntry().
* Returns 0 on success, and an error value < 0 on failure.
*/
int32_t DiscardLastEntry();
/**
* Sets `out_entry` to the last entry written after a call to FinishEntry().
* Returns 0 on success, and an error value < 0 if no entries have been written.
*/
int32_t GetLastEntry(FileEntry* out_entry);
/**
* Writes the Central Directory Headers and flushes the zip file stream.
* Returns 0 on success, and an error value < 0 on failure.
@ -130,22 +158,11 @@ public:
private:
DISALLOW_COPY_AND_ASSIGN(ZipWriter);
struct FileInfo {
std::string path;
uint16_t compression_method;
uint32_t crc32;
uint32_t compressed_size;
uint32_t uncompressed_size;
uint16_t last_mod_time;
uint16_t last_mod_date;
uint32_t local_file_header_offset;
};
int32_t HandleError(int32_t error_code);
int32_t PrepareDeflate();
int32_t StoreBytes(FileInfo* file, const void* data, size_t len);
int32_t CompressBytes(FileInfo* file, const void* data, size_t len);
int32_t FlushCompressedBytes(FileInfo* file);
int32_t StoreBytes(FileEntry* file, const void* data, size_t len);
int32_t CompressBytes(FileEntry* file, const void* data, size_t len);
int32_t FlushCompressedBytes(FileEntry* file);
enum class State {
kWritingZip,
@ -157,7 +174,8 @@ private:
FILE* file_;
off64_t current_offset_;
State state_;
std::vector<FileInfo> files_;
std::vector<FileEntry> files_;
FileEntry current_file_entry_;
std::unique_ptr<z_stream, void(*)(z_stream*)> z_stream_;
std::vector<uint8_t> buffer_;

View file

@ -14,21 +14,22 @@
* limitations under the License.
*/
#include "entry_name_utils-inl.h"
#include "zip_archive_common.h"
#include "ziparchive/zip_writer.h"
#include <utils/Log.h>
#include <sys/param.h>
#include <cassert>
#include <cstdio>
#include <memory>
#include <vector>
#include <sys/param.h>
#include <zlib.h>
#define DEF_MEM_LEVEL 8 // normally in zutil.h?
#include <memory>
#include <vector>
#include "android-base/logging.h"
#include "utils/Log.h"
#include "entry_name_utils-inl.h"
#include "zip_archive_common.h"
#if !defined(powerof2)
#define powerof2(x) ((((x)-1)&(x))==0)
#endif
@ -171,12 +172,12 @@ int32_t ZipWriter::StartAlignedEntryWithTime(const char* path, size_t flags,
return kInvalidAlignment;
}
FileInfo fileInfo = {};
fileInfo.path = std::string(path);
fileInfo.local_file_header_offset = current_offset_;
current_file_entry_ = {};
current_file_entry_.path = path;
current_file_entry_.local_file_header_offset = current_offset_;
if (!IsValidEntryName(reinterpret_cast<const uint8_t*>(fileInfo.path.data()),
fileInfo.path.size())) {
if (!IsValidEntryName(reinterpret_cast<const uint8_t*>(current_file_entry_.path.data()),
current_file_entry_.path.size())) {
return kInvalidEntryName;
}
@ -188,24 +189,24 @@ int32_t ZipWriter::StartAlignedEntryWithTime(const char* path, size_t flags,
header.gpb_flags |= kGPBDDFlagMask;
if (flags & ZipWriter::kCompress) {
fileInfo.compression_method = kCompressDeflated;
current_file_entry_.compression_method = kCompressDeflated;
int32_t result = PrepareDeflate();
if (result != kNoError) {
return result;
}
} else {
fileInfo.compression_method = kCompressStored;
current_file_entry_.compression_method = kCompressStored;
}
header.compression_method = fileInfo.compression_method;
header.compression_method = current_file_entry_.compression_method;
ExtractTimeAndDate(time, &fileInfo.last_mod_time, &fileInfo.last_mod_date);
header.last_mod_time = fileInfo.last_mod_time;
header.last_mod_date = fileInfo.last_mod_date;
ExtractTimeAndDate(time, &current_file_entry_.last_mod_time, &current_file_entry_.last_mod_date);
header.last_mod_time = current_file_entry_.last_mod_time;
header.last_mod_date = current_file_entry_.last_mod_date;
header.file_name_length = fileInfo.path.size();
header.file_name_length = current_file_entry_.path.size();
off64_t offset = current_offset_ + sizeof(header) + fileInfo.path.size();
off64_t offset = current_offset_ + sizeof(header) + current_file_entry_.path.size();
std::vector<char> zero_padding;
if (alignment != 0 && (offset & (alignment - 1))) {
// Pad the extra field so the data will be aligned.
@ -220,7 +221,8 @@ int32_t ZipWriter::StartAlignedEntryWithTime(const char* path, size_t flags,
return HandleError(kIoError);
}
if (fwrite(path, sizeof(*path), fileInfo.path.size(), file_) != fileInfo.path.size()) {
if (fwrite(path, sizeof(*path), current_file_entry_.path.size(), file_)
!= current_file_entry_.path.size()) {
return HandleError(kIoError);
}
@ -230,15 +232,37 @@ int32_t ZipWriter::StartAlignedEntryWithTime(const char* path, size_t flags,
return HandleError(kIoError);
}
files_.emplace_back(std::move(fileInfo));
current_offset_ = offset;
state_ = State::kWritingEntry;
return kNoError;
}
int32_t ZipWriter::DiscardLastEntry() {
if (state_ != State::kWritingZip || files_.empty()) {
return kInvalidState;
}
FileEntry& last_entry = files_.back();
current_offset_ = last_entry.local_file_header_offset;
if (fseeko(file_, current_offset_, SEEK_SET) != 0) {
return HandleError(kIoError);
}
files_.pop_back();
return kNoError;
}
int32_t ZipWriter::GetLastEntry(FileEntry* out_entry) {
CHECK(out_entry != nullptr);
if (files_.empty()) {
return kInvalidState;
}
*out_entry = files_.back();
return kNoError;
}
int32_t ZipWriter::PrepareDeflate() {
assert(state_ == State::kWritingZip);
CHECK(state_ == State::kWritingZip);
// Initialize the z_stream for compression.
z_stream_ = std::unique_ptr<z_stream, void(*)(z_stream*)>(new z_stream(), DeleteZStream);
@ -269,25 +293,25 @@ int32_t ZipWriter::WriteBytes(const void* data, size_t len) {
return HandleError(kInvalidState);
}
FileInfo& currentFile = files_.back();
int32_t result = kNoError;
if (currentFile.compression_method & kCompressDeflated) {
result = CompressBytes(&currentFile, data, len);
if (current_file_entry_.compression_method & kCompressDeflated) {
result = CompressBytes(&current_file_entry_, data, len);
} else {
result = StoreBytes(&currentFile, data, len);
result = StoreBytes(&current_file_entry_, data, len);
}
if (result != kNoError) {
return result;
}
currentFile.crc32 = crc32(currentFile.crc32, reinterpret_cast<const Bytef*>(data), len);
currentFile.uncompressed_size += len;
current_file_entry_.crc32 = crc32(current_file_entry_.crc32,
reinterpret_cast<const Bytef*>(data), len);
current_file_entry_.uncompressed_size += len;
return kNoError;
}
int32_t ZipWriter::StoreBytes(FileInfo* file, const void* data, size_t len) {
assert(state_ == State::kWritingEntry);
int32_t ZipWriter::StoreBytes(FileEntry* file, const void* data, size_t len) {
CHECK(state_ == State::kWritingEntry);
if (fwrite(data, 1, len, file_) != len) {
return HandleError(kIoError);
@ -297,11 +321,11 @@ int32_t ZipWriter::StoreBytes(FileInfo* file, const void* data, size_t len) {
return kNoError;
}
int32_t ZipWriter::CompressBytes(FileInfo* file, const void* data, size_t len) {
assert(state_ == State::kWritingEntry);
assert(z_stream_);
assert(z_stream_->next_out != nullptr);
assert(z_stream_->avail_out != 0);
int32_t ZipWriter::CompressBytes(FileEntry* file, const void* data, size_t len) {
CHECK(state_ == State::kWritingEntry);
CHECK(z_stream_);
CHECK(z_stream_->next_out != nullptr);
CHECK(z_stream_->avail_out != 0);
// Prepare the input.
z_stream_->next_in = reinterpret_cast<const uint8_t*>(data);
@ -331,17 +355,17 @@ int32_t ZipWriter::CompressBytes(FileInfo* file, const void* data, size_t len) {
return kNoError;
}
int32_t ZipWriter::FlushCompressedBytes(FileInfo* file) {
assert(state_ == State::kWritingEntry);
assert(z_stream_);
assert(z_stream_->next_out != nullptr);
assert(z_stream_->avail_out != 0);
int32_t ZipWriter::FlushCompressedBytes(FileEntry* file) {
CHECK(state_ == State::kWritingEntry);
CHECK(z_stream_);
CHECK(z_stream_->next_out != nullptr);
CHECK(z_stream_->avail_out != 0);
// Keep deflating while there isn't enough space in the buffer to
// to complete the compress.
int zerr;
while ((zerr = deflate(z_stream_.get(), Z_FINISH)) == Z_OK) {
assert(z_stream_->avail_out == 0);
CHECK(z_stream_->avail_out == 0);
size_t write_bytes = z_stream_->next_out - buffer_.data();
if (fwrite(buffer_.data(), 1, write_bytes, file_) != write_bytes) {
return HandleError(kIoError);
@ -373,9 +397,8 @@ int32_t ZipWriter::FinishEntry() {
return kInvalidState;
}
FileInfo& currentFile = files_.back();
if (currentFile.compression_method & kCompressDeflated) {
int32_t result = FlushCompressedBytes(&currentFile);
if (current_file_entry_.compression_method & kCompressDeflated) {
int32_t result = FlushCompressedBytes(&current_file_entry_);
if (result != kNoError) {
return result;
}
@ -388,13 +411,15 @@ int32_t ZipWriter::FinishEntry() {
}
DataDescriptor dd = {};
dd.crc32 = currentFile.crc32;
dd.compressed_size = currentFile.compressed_size;
dd.uncompressed_size = currentFile.uncompressed_size;
dd.crc32 = current_file_entry_.crc32;
dd.compressed_size = current_file_entry_.compressed_size;
dd.uncompressed_size = current_file_entry_.uncompressed_size;
if (fwrite(&dd, sizeof(dd), 1, file_) != 1) {
return HandleError(kIoError);
}
files_.emplace_back(std::move(current_file_entry_));
current_offset_ += sizeof(DataDescriptor::kOptSignature) + sizeof(dd);
state_ = State::kWritingZip;
return kNoError;
@ -406,7 +431,7 @@ int32_t ZipWriter::Finish() {
}
off64_t startOfCdr = current_offset_;
for (FileInfo& file : files_) {
for (FileEntry& file : files_) {
CentralDirectoryRecord cdr = {};
cdr.record_signature = CentralDirectoryRecord::kSignature;
cdr.gpb_flags |= kGPBDDFlagMask;
@ -442,11 +467,19 @@ int32_t ZipWriter::Finish() {
return HandleError(kIoError);
}
current_offset_ += sizeof(er);
// Since we can BackUp() and potentially finish writing at an offset less than one we had
// already written at, we must truncate the file.
if (ftruncate64(fileno(file_), current_offset_) != 0) {
return HandleError(kIoError);
}
if (fflush(file_) != 0) {
return HandleError(kIoError);
}
current_offset_ += sizeof(er);
state_ = State::kDone;
return kNoError;
}

View file

@ -23,6 +23,10 @@
#include <memory>
#include <vector>
static ::testing::AssertionResult AssertFileEntryContentsEq(const std::string& expected,
ZipArchiveHandle handle,
ZipEntry* zip_entry);
struct zipwriter : public ::testing::Test {
TemporaryFile* temp_file_;
int fd_;
@ -59,16 +63,10 @@ TEST_F(zipwriter, WriteUncompressedZipWithOneFile) {
ZipEntry data;
ASSERT_EQ(0, FindEntry(handle, ZipString("file.txt"), &data));
EXPECT_EQ(strlen(expected), data.compressed_length);
EXPECT_EQ(strlen(expected), data.uncompressed_length);
EXPECT_EQ(kCompressStored, data.method);
char buffer[6];
EXPECT_EQ(0,
ExtractToMemory(handle, &data, reinterpret_cast<uint8_t*>(&buffer), sizeof(buffer)));
buffer[5] = 0;
EXPECT_STREQ(expected, buffer);
EXPECT_EQ(strlen(expected), data.compressed_length);
ASSERT_EQ(strlen(expected), data.uncompressed_length);
ASSERT_TRUE(AssertFileEntryContentsEq(expected, handle, &data));
CloseArchive(handle);
}
@ -94,26 +92,19 @@ TEST_F(zipwriter, WriteUncompressedZipWithMultipleFiles) {
ZipArchiveHandle handle;
ASSERT_EQ(0, OpenArchiveFd(fd_, "temp", &handle, false));
char buffer[4];
ZipEntry data;
ASSERT_EQ(0, FindEntry(handle, ZipString("file.txt"), &data));
EXPECT_EQ(kCompressStored, data.method);
EXPECT_EQ(2u, data.compressed_length);
EXPECT_EQ(2u, data.uncompressed_length);
ASSERT_EQ(0,
ExtractToMemory(handle, &data, reinterpret_cast<uint8_t*>(buffer), arraysize(buffer)));
buffer[2] = 0;
EXPECT_STREQ("he", buffer);
ASSERT_EQ(2u, data.uncompressed_length);
ASSERT_TRUE(AssertFileEntryContentsEq("he", handle, &data));
ASSERT_EQ(0, FindEntry(handle, ZipString("file/file.txt"), &data));
EXPECT_EQ(kCompressStored, data.method);
EXPECT_EQ(3u, data.compressed_length);
EXPECT_EQ(3u, data.uncompressed_length);
ASSERT_EQ(0,
ExtractToMemory(handle, &data, reinterpret_cast<uint8_t*>(buffer), arraysize(buffer)));
buffer[3] = 0;
EXPECT_STREQ("llo", buffer);
ASSERT_EQ(3u, data.uncompressed_length);
ASSERT_TRUE(AssertFileEntryContentsEq("llo", handle, &data));
ASSERT_EQ(0, FindEntry(handle, ZipString("file/file2.txt"), &data));
EXPECT_EQ(kCompressStored, data.method);
@ -143,7 +134,7 @@ TEST_F(zipwriter, WriteUncompressedZipFileWithAlignedFlag) {
CloseArchive(handle);
}
void ConvertZipTimeToTm(uint32_t& zip_time, struct tm* tm) {
static void ConvertZipTimeToTm(uint32_t& zip_time, struct tm* tm) {
memset(tm, 0, sizeof(struct tm));
tm->tm_hour = (zip_time >> 11) & 0x1f;
tm->tm_min = (zip_time >> 5) & 0x3f;
@ -264,14 +255,8 @@ TEST_F(zipwriter, WriteCompressedZipWithOneFile) {
ZipEntry data;
ASSERT_EQ(0, FindEntry(handle, ZipString("file.txt"), &data));
EXPECT_EQ(kCompressDeflated, data.method);
EXPECT_EQ(4u, data.uncompressed_length);
char buffer[5];
ASSERT_EQ(0,
ExtractToMemory(handle, &data, reinterpret_cast<uint8_t*>(buffer), arraysize(buffer)));
buffer[4] = 0;
EXPECT_STREQ("helo", buffer);
ASSERT_EQ(4u, data.uncompressed_length);
ASSERT_TRUE(AssertFileEntryContentsEq("helo", handle, &data));
CloseArchive(handle);
}
@ -319,3 +304,111 @@ TEST_F(zipwriter, CheckStartEntryErrors) {
ASSERT_EQ(-5, writer.StartAlignedEntry("align.txt", ZipWriter::kAlign32, 4096));
ASSERT_EQ(-6, writer.StartAlignedEntry("align.txt", 0, 3));
}
TEST_F(zipwriter, BackupRemovesTheLastFile) {
ZipWriter writer(file_);
const char* kKeepThis = "keep this";
const char* kDropThis = "drop this";
const char* kReplaceWithThis = "replace with this";
ZipWriter::FileEntry entry;
EXPECT_LT(writer.GetLastEntry(&entry), 0);
ASSERT_EQ(0, writer.StartEntry("keep.txt", 0));
ASSERT_EQ(0, writer.WriteBytes(kKeepThis, strlen(kKeepThis)));
ASSERT_EQ(0, writer.FinishEntry());
ASSERT_EQ(0, writer.GetLastEntry(&entry));
EXPECT_EQ("keep.txt", entry.path);
ASSERT_EQ(0, writer.StartEntry("drop.txt", 0));
ASSERT_EQ(0, writer.WriteBytes(kDropThis, strlen(kDropThis)));
ASSERT_EQ(0, writer.FinishEntry());
ASSERT_EQ(0, writer.GetLastEntry(&entry));
EXPECT_EQ("drop.txt", entry.path);
ASSERT_EQ(0, writer.DiscardLastEntry());
ASSERT_EQ(0, writer.GetLastEntry(&entry));
EXPECT_EQ("keep.txt", entry.path);
ASSERT_EQ(0, writer.StartEntry("replace.txt", 0));
ASSERT_EQ(0, writer.WriteBytes(kReplaceWithThis, strlen(kReplaceWithThis)));
ASSERT_EQ(0, writer.FinishEntry());
ASSERT_EQ(0, writer.GetLastEntry(&entry));
EXPECT_EQ("replace.txt", entry.path);
ASSERT_EQ(0, writer.Finish());
// Verify that "drop.txt" does not exist.
ASSERT_GE(0, lseek(fd_, 0, SEEK_SET));
ZipArchiveHandle handle;
ASSERT_EQ(0, OpenArchiveFd(fd_, "temp", &handle, false));
ZipEntry data;
ASSERT_EQ(0, FindEntry(handle, ZipString("keep.txt"), &data));
ASSERT_TRUE(AssertFileEntryContentsEq(kKeepThis, handle, &data));
ASSERT_NE(0, FindEntry(handle, ZipString("drop.txt"), &data));
ASSERT_EQ(0, FindEntry(handle, ZipString("replace.txt"), &data));
ASSERT_TRUE(AssertFileEntryContentsEq(kReplaceWithThis, handle, &data));
CloseArchive(handle);
}
TEST_F(zipwriter, TruncateFileAfterBackup) {
ZipWriter writer(file_);
const char* kSmall = "small";
ASSERT_EQ(0, writer.StartEntry("small.txt", 0));
ASSERT_EQ(0, writer.WriteBytes(kSmall, strlen(kSmall)));
ASSERT_EQ(0, writer.FinishEntry());
ASSERT_EQ(0, writer.StartEntry("large.txt", 0));
std::vector<uint8_t> data;
data.resize(1024*1024, 0xef);
ASSERT_EQ(0, writer.WriteBytes(data.data(), data.size()));
ASSERT_EQ(0, writer.FinishEntry());
off64_t before_len = ftello64(file_);
ZipWriter::FileEntry entry;
ASSERT_EQ(0, writer.GetLastEntry(&entry));
ASSERT_EQ(0, writer.DiscardLastEntry());
ASSERT_EQ(0, writer.Finish());
off64_t after_len = ftello64(file_);
ASSERT_GT(before_len, after_len);
}
static ::testing::AssertionResult AssertFileEntryContentsEq(const std::string& expected,
ZipArchiveHandle handle,
ZipEntry* zip_entry) {
if (expected.size() != zip_entry->uncompressed_length) {
return ::testing::AssertionFailure() << "uncompressed entry size "
<< zip_entry->uncompressed_length << " does not match expected size " << expected.size();
}
std::string actual;
actual.resize(expected.size());
uint8_t* buffer = reinterpret_cast<uint8_t*>(&*actual.begin());
if (ExtractToMemory(handle, zip_entry, buffer, actual.size()) != 0) {
return ::testing::AssertionFailure() << "failed to extract entry";
}
if (expected != actual) {
return ::testing::AssertionFailure() << "actual zip_entry data '" << actual
<< "' does not match expected '" << expected << "'";
}
return ::testing::AssertionSuccess();
}