2017-03-26 22:36:49 +02:00
|
|
|
/*
|
|
|
|
* Copyright (C) 2017 The Android Open Source Project
|
|
|
|
*
|
|
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
* you may not use this file except in compliance with the License.
|
|
|
|
* You may obtain a copy of the License at
|
|
|
|
*
|
|
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
*
|
|
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
* See the License for the specific language governing permissions and
|
|
|
|
* limitations under the License.
|
|
|
|
*/
|
|
|
|
|
|
|
|
#pragma once
|
|
|
|
|
|
|
|
#include <stddef.h>
|
|
|
|
|
|
|
|
#include <string>
|
2017-03-31 01:57:29 +02:00
|
|
|
#include <utility>
|
2017-03-26 22:36:49 +02:00
|
|
|
#include <vector>
|
|
|
|
|
|
|
|
#include <android-base/logging.h>
|
|
|
|
#include <android-base/parseint.h>
|
2017-07-27 01:41:24 +02:00
|
|
|
#include <android-base/stringprintf.h>
|
2017-03-26 22:36:49 +02:00
|
|
|
#include <android-base/strings.h>
|
|
|
|
|
2017-03-31 01:57:29 +02:00
|
|
|
using Range = std::pair<size_t, size_t>;
|
|
|
|
|
|
|
|
class RangeSet {
|
|
|
|
public:
|
|
|
|
RangeSet() : blocks_(0) {}
|
|
|
|
|
|
|
|
explicit RangeSet(std::vector<Range>&& pairs) {
|
|
|
|
CHECK_NE(pairs.size(), static_cast<size_t>(0)) << "Invalid number of tokens";
|
|
|
|
|
|
|
|
// Sanity check the input.
|
|
|
|
size_t result = 0;
|
|
|
|
for (const auto& range : pairs) {
|
|
|
|
CHECK_LT(range.first, range.second)
|
|
|
|
<< "Empty or negative range: " << range.first << ", " << range.second;
|
|
|
|
size_t sz = range.second - range.first;
|
|
|
|
CHECK_LE(result, SIZE_MAX - sz) << "RangeSet size overflow";
|
|
|
|
result += sz;
|
|
|
|
}
|
|
|
|
|
|
|
|
ranges_ = pairs;
|
|
|
|
blocks_ = result;
|
|
|
|
}
|
2017-03-26 22:36:49 +02:00
|
|
|
|
|
|
|
static RangeSet Parse(const std::string& range_text) {
|
|
|
|
std::vector<std::string> pieces = android::base::Split(range_text, ",");
|
|
|
|
CHECK_GE(pieces.size(), static_cast<size_t>(3)) << "Invalid range text: " << range_text;
|
|
|
|
|
|
|
|
size_t num;
|
|
|
|
CHECK(android::base::ParseUint(pieces[0], &num, static_cast<size_t>(INT_MAX)))
|
|
|
|
<< "Failed to parse the number of tokens: " << range_text;
|
|
|
|
|
|
|
|
CHECK_NE(num, static_cast<size_t>(0)) << "Invalid number of tokens: " << range_text;
|
|
|
|
CHECK_EQ(num % 2, static_cast<size_t>(0)) << "Number of tokens must be even: " << range_text;
|
|
|
|
CHECK_EQ(num, pieces.size() - 1) << "Mismatching number of tokens: " << range_text;
|
|
|
|
|
2017-03-31 01:57:29 +02:00
|
|
|
std::vector<Range> pairs;
|
2017-03-26 22:36:49 +02:00
|
|
|
for (size_t i = 0; i < num; i += 2) {
|
2017-03-31 01:57:29 +02:00
|
|
|
size_t first;
|
|
|
|
CHECK(android::base::ParseUint(pieces[i + 1], &first, static_cast<size_t>(INT_MAX)));
|
|
|
|
size_t second;
|
|
|
|
CHECK(android::base::ParseUint(pieces[i + 2], &second, static_cast<size_t>(INT_MAX)));
|
|
|
|
|
|
|
|
pairs.emplace_back(first, second);
|
2017-03-26 22:36:49 +02:00
|
|
|
}
|
|
|
|
|
2017-03-31 01:57:29 +02:00
|
|
|
return RangeSet(std::move(pairs));
|
2017-03-26 22:36:49 +02:00
|
|
|
}
|
|
|
|
|
2017-07-27 01:41:24 +02:00
|
|
|
std::string ToString() const {
|
|
|
|
if (ranges_.empty()) {
|
|
|
|
return "";
|
|
|
|
}
|
|
|
|
std::string result = std::to_string(ranges_.size() * 2);
|
|
|
|
for (const auto& r : ranges_) {
|
|
|
|
result += android::base::StringPrintf(",%zu,%zu", r.first, r.second);
|
|
|
|
}
|
|
|
|
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
2017-03-26 22:36:49 +02:00
|
|
|
// Get the block number for the i-th (starting from 0) block in the RangeSet.
|
|
|
|
size_t GetBlockNumber(size_t idx) const {
|
2017-03-31 01:57:29 +02:00
|
|
|
CHECK_LT(idx, blocks_) << "Out of bound index " << idx << " (total blocks: " << blocks_ << ")";
|
|
|
|
|
|
|
|
for (const auto& range : ranges_) {
|
|
|
|
if (idx < range.second - range.first) {
|
|
|
|
return range.first + idx;
|
2017-03-26 22:36:49 +02:00
|
|
|
}
|
2017-03-31 01:57:29 +02:00
|
|
|
idx -= (range.second - range.first);
|
2017-03-26 22:36:49 +02:00
|
|
|
}
|
2017-03-31 01:57:29 +02:00
|
|
|
|
|
|
|
CHECK(false) << "Failed to find block number for index " << idx;
|
2017-03-26 22:36:49 +02:00
|
|
|
return 0; // Unreachable, but to make compiler happy.
|
|
|
|
}
|
|
|
|
|
|
|
|
// RangeSet has half-closed half-open bounds. For example, "3,5" contains blocks 3 and 4. So "3,5"
|
|
|
|
// and "5,7" are not overlapped.
|
|
|
|
bool Overlaps(const RangeSet& other) const {
|
2017-03-31 01:57:29 +02:00
|
|
|
for (const auto& range : ranges_) {
|
|
|
|
size_t start = range.first;
|
|
|
|
size_t end = range.second;
|
|
|
|
for (const auto& other_range : other.ranges_) {
|
|
|
|
size_t other_start = other_range.first;
|
|
|
|
size_t other_end = other_range.second;
|
2017-03-26 22:36:49 +02:00
|
|
|
// [start, end) vs [other_start, other_end)
|
|
|
|
if (!(other_start >= end || start >= other_end)) {
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2017-03-31 01:57:29 +02:00
|
|
|
// size() gives the number of Range's in this RangeSet.
|
|
|
|
size_t size() const {
|
|
|
|
return ranges_.size();
|
|
|
|
}
|
|
|
|
|
|
|
|
// blocks() gives the number of all blocks in this RangeSet.
|
|
|
|
size_t blocks() const {
|
|
|
|
return blocks_;
|
|
|
|
}
|
|
|
|
|
|
|
|
// We provide const iterators only.
|
|
|
|
std::vector<Range>::const_iterator cbegin() const {
|
|
|
|
return ranges_.cbegin();
|
|
|
|
}
|
|
|
|
|
|
|
|
std::vector<Range>::const_iterator cend() const {
|
|
|
|
return ranges_.cend();
|
|
|
|
}
|
|
|
|
|
|
|
|
// Need to provide begin()/end() since range-based loop expects begin()/end().
|
|
|
|
std::vector<Range>::const_iterator begin() const {
|
|
|
|
return ranges_.cbegin();
|
|
|
|
}
|
|
|
|
|
|
|
|
std::vector<Range>::const_iterator end() const {
|
|
|
|
return ranges_.cend();
|
|
|
|
}
|
|
|
|
|
|
|
|
// Reverse const iterators for MoveRange().
|
|
|
|
std::vector<Range>::const_reverse_iterator crbegin() const {
|
|
|
|
return ranges_.crbegin();
|
|
|
|
}
|
|
|
|
|
|
|
|
std::vector<Range>::const_reverse_iterator crend() const {
|
|
|
|
return ranges_.crend();
|
|
|
|
}
|
|
|
|
|
|
|
|
const Range& operator[](size_t i) const {
|
|
|
|
return ranges_[i];
|
|
|
|
}
|
|
|
|
|
2017-03-26 22:36:49 +02:00
|
|
|
bool operator==(const RangeSet& other) const {
|
2017-03-31 01:57:29 +02:00
|
|
|
// The orders of Range's matter. "4,1,5,8,10" != "4,8,10,1,5".
|
|
|
|
return (ranges_ == other.ranges_);
|
2017-03-26 22:36:49 +02:00
|
|
|
}
|
2017-03-31 01:57:29 +02:00
|
|
|
|
|
|
|
bool operator!=(const RangeSet& other) const {
|
|
|
|
return ranges_ != other.ranges_;
|
|
|
|
}
|
|
|
|
|
2017-07-27 01:41:24 +02:00
|
|
|
protected:
|
2017-03-31 01:57:29 +02:00
|
|
|
// Actual limit for each value and the total number are both INT_MAX.
|
|
|
|
std::vector<Range> ranges_;
|
|
|
|
size_t blocks_;
|
2017-03-26 22:36:49 +02:00
|
|
|
};
|
2017-07-27 01:41:24 +02:00
|
|
|
|
|
|
|
static constexpr size_t kBlockSize = 4096;
|
|
|
|
|
|
|
|
// The class is a sorted version of a RangeSet; and it's useful in imgdiff to split the input
|
|
|
|
// files when we're handling large zip files. Specifically, we can treat the input file as a
|
|
|
|
// continuous RangeSet (i.e. RangeSet("0-99") for a 100 blocks file); and break it down into
|
|
|
|
// several smaller chunks based on the zip entries.
|
|
|
|
|
|
|
|
// For example, [source: 0-99] can be split into
|
|
|
|
// [split_src1: 10-29]; [split_src2: 40-49, 60-69]; [split_src3: 70-89]
|
|
|
|
// Here "10-29" simply means block 10th to block 29th with respect to the original input file.
|
|
|
|
// Also, note that the split sources should be mutual exclusive, but they don't need to cover
|
|
|
|
// every block in the original source.
|
|
|
|
class SortedRangeSet : public RangeSet {
|
|
|
|
public:
|
|
|
|
SortedRangeSet() {}
|
|
|
|
|
|
|
|
// Ranges in the the set should be mutually exclusive; and they're sorted by the start block.
|
|
|
|
explicit SortedRangeSet(std::vector<Range>&& pairs) : RangeSet(std::move(pairs)) {
|
|
|
|
std::sort(ranges_.begin(), ranges_.end());
|
|
|
|
}
|
|
|
|
|
|
|
|
void Insert(const Range& to_insert) {
|
|
|
|
SortedRangeSet rs({ to_insert });
|
|
|
|
Insert(rs);
|
|
|
|
}
|
|
|
|
|
|
|
|
// Insert the input SortedRangeSet; keep the ranges sorted and merge the overlap ranges.
|
|
|
|
void Insert(const SortedRangeSet& rs) {
|
|
|
|
if (rs.size() == 0) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
// Merge and sort the two RangeSets.
|
|
|
|
std::vector<Range> temp = std::move(ranges_);
|
|
|
|
std::copy(rs.begin(), rs.end(), std::back_inserter(temp));
|
|
|
|
std::sort(temp.begin(), temp.end());
|
|
|
|
|
|
|
|
Clear();
|
|
|
|
// Trim overlaps and insert the result back to ranges_.
|
|
|
|
Range to_insert = temp.front();
|
|
|
|
for (auto it = temp.cbegin() + 1; it != temp.cend(); it++) {
|
|
|
|
if (it->first <= to_insert.second) {
|
|
|
|
to_insert.second = std::max(to_insert.second, it->second);
|
|
|
|
} else {
|
|
|
|
ranges_.push_back(to_insert);
|
|
|
|
blocks_ += (to_insert.second - to_insert.first);
|
|
|
|
to_insert = *it;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
ranges_.push_back(to_insert);
|
|
|
|
blocks_ += (to_insert.second - to_insert.first);
|
|
|
|
}
|
|
|
|
|
|
|
|
void Clear() {
|
|
|
|
blocks_ = 0;
|
|
|
|
ranges_.clear();
|
|
|
|
}
|
|
|
|
|
|
|
|
using RangeSet::Overlaps;
|
|
|
|
bool Overlaps(size_t start, size_t len) const {
|
|
|
|
RangeSet rs({ { start / kBlockSize, (start + len - 1) / kBlockSize + 1 } });
|
|
|
|
return Overlaps(rs);
|
|
|
|
}
|
|
|
|
|
|
|
|
// Compute the block range the file occupies, and insert that range.
|
|
|
|
void Insert(size_t start, size_t len) {
|
|
|
|
Range to_insert{ start / kBlockSize, (start + len - 1) / kBlockSize + 1 };
|
|
|
|
Insert(to_insert);
|
|
|
|
}
|
|
|
|
|
|
|
|
// Given an offset of the file, checks if the corresponding block (by considering the file as
|
|
|
|
// 0-based continuous block ranges) is covered by the SortedRangeSet. If so, returns the offset
|
|
|
|
// within this SortedRangeSet.
|
|
|
|
//
|
|
|
|
// For example, the 4106-th byte of a file is from block 1, assuming a block size of 4096-byte.
|
|
|
|
// The mapped offset within a SortedRangeSet("1-9 15-19") is 10.
|
|
|
|
//
|
|
|
|
// An offset of 65546 falls into the 16-th block in a file. Block 16 is contained as the 10-th
|
|
|
|
// item in SortedRangeSet("1-9 15-19"). So its data can be found at offset 40970 (i.e. 4096 * 10
|
|
|
|
// + 10) in a range represented by this SortedRangeSet.
|
|
|
|
size_t GetOffsetInRangeSet(size_t old_offset) const {
|
|
|
|
size_t old_block_start = old_offset / kBlockSize;
|
|
|
|
size_t new_block_start = 0;
|
|
|
|
for (const auto& range : ranges_) {
|
|
|
|
// Find the index of old_block_start.
|
|
|
|
if (old_block_start >= range.second) {
|
|
|
|
new_block_start += (range.second - range.first);
|
|
|
|
} else if (old_block_start >= range.first) {
|
|
|
|
new_block_start += (old_block_start - range.first);
|
|
|
|
return (new_block_start * kBlockSize + old_offset % kBlockSize);
|
|
|
|
} else {
|
2017-08-18 02:50:56 +02:00
|
|
|
CHECK(false) << "block_start " << old_block_start
|
|
|
|
<< " is missing between two ranges: " << this->ToString();
|
2017-07-27 01:41:24 +02:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
}
|
2017-08-18 02:50:56 +02:00
|
|
|
CHECK(false) << "block_start " << old_block_start
|
|
|
|
<< " exceeds the limit of current RangeSet: " << this->ToString();
|
2017-07-27 01:41:24 +02:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
};
|