Merge "Add implemention of SortedRangeSet" am: 64cba55fbc
am: fbd4b10310
Change-Id: I2c6bc65375be433e79db8861e3c5e936847c897e
This commit is contained in:
commit
a09376ed90
2 changed files with 162 additions and 1 deletions
|
@ -110,3 +110,50 @@ TEST(RangeSetTest, iterators) {
|
||||||
}
|
}
|
||||||
ASSERT_EQ((std::vector<Range>{ Range{ 8, 10 }, Range{ 1, 5 } }), ranges);
|
ASSERT_EQ((std::vector<Range>{ Range{ 8, 10 }, Range{ 1, 5 } }), ranges);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TEST(RangeSetTest, tostring) {
|
||||||
|
ASSERT_EQ("2,1,6", RangeSet::Parse("2,1,6").ToString());
|
||||||
|
ASSERT_EQ("4,1,5,8,10", RangeSet::Parse("4,1,5,8,10").ToString());
|
||||||
|
ASSERT_EQ("6,1,3,4,6,15,22", RangeSet::Parse("6,1,3,4,6,15,22").ToString());
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(SortedRangeSetTest, insertion) {
|
||||||
|
SortedRangeSet rs({ { 2, 3 }, { 4, 6 }, { 8, 14 } });
|
||||||
|
rs.Insert({ 1, 2 });
|
||||||
|
ASSERT_EQ(SortedRangeSet({ { 1, 3 }, { 4, 6 }, { 8, 14 } }), rs);
|
||||||
|
ASSERT_EQ(static_cast<size_t>(10), rs.blocks());
|
||||||
|
rs.Insert({ 3, 5 });
|
||||||
|
ASSERT_EQ(SortedRangeSet({ { 1, 6 }, { 8, 14 } }), rs);
|
||||||
|
ASSERT_EQ(static_cast<size_t>(11), rs.blocks());
|
||||||
|
|
||||||
|
SortedRangeSet r1({ { 20, 22 }, { 15, 18 } });
|
||||||
|
rs.Insert(r1);
|
||||||
|
ASSERT_EQ(SortedRangeSet({ { 1, 6 }, { 8, 14 }, { 15, 18 }, { 20, 22 } }), rs);
|
||||||
|
ASSERT_EQ(static_cast<size_t>(16), rs.blocks());
|
||||||
|
|
||||||
|
SortedRangeSet r2({ { 2, 7 }, { 15, 21 }, { 20, 25 } });
|
||||||
|
rs.Insert(r2);
|
||||||
|
ASSERT_EQ(SortedRangeSet({ { 1, 7 }, { 8, 14 }, { 15, 25 } }), rs);
|
||||||
|
ASSERT_EQ(static_cast<size_t>(22), rs.blocks());
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(SortedRangeSetTest, file_range) {
|
||||||
|
SortedRangeSet rs;
|
||||||
|
rs.Insert(4096, 4096);
|
||||||
|
ASSERT_EQ(SortedRangeSet({ { 1, 2 } }), rs);
|
||||||
|
// insert block 2-9
|
||||||
|
rs.Insert(4096 * 3 - 1, 4096 * 7);
|
||||||
|
ASSERT_EQ(SortedRangeSet({ { 1, 10 } }), rs);
|
||||||
|
// insert block 15-19
|
||||||
|
rs.Insert(4096 * 15 + 1, 4096 * 4);
|
||||||
|
ASSERT_EQ(SortedRangeSet({ { 1, 10 }, { 15, 20 } }), rs);
|
||||||
|
|
||||||
|
// rs overlaps block 2-2
|
||||||
|
ASSERT_TRUE(rs.Overlaps(4096 * 2 - 1, 10));
|
||||||
|
ASSERT_FALSE(rs.Overlaps(4096 * 10, 4096 * 5));
|
||||||
|
|
||||||
|
ASSERT_EQ(static_cast<size_t>(10), rs.GetOffsetInRangeSet(4106));
|
||||||
|
ASSERT_EQ(static_cast<size_t>(40970), rs.GetOffsetInRangeSet(4096 * 16 + 10));
|
||||||
|
// block#10 not in range.
|
||||||
|
ASSERT_EXIT(rs.GetOffsetInRangeSet(40970), ::testing::KilledBySignal(SIGABRT), "");
|
||||||
|
}
|
|
@ -24,6 +24,7 @@
|
||||||
|
|
||||||
#include <android-base/logging.h>
|
#include <android-base/logging.h>
|
||||||
#include <android-base/parseint.h>
|
#include <android-base/parseint.h>
|
||||||
|
#include <android-base/stringprintf.h>
|
||||||
#include <android-base/strings.h>
|
#include <android-base/strings.h>
|
||||||
|
|
||||||
using Range = std::pair<size_t, size_t>;
|
using Range = std::pair<size_t, size_t>;
|
||||||
|
@ -74,6 +75,18 @@ class RangeSet {
|
||||||
return RangeSet(std::move(pairs));
|
return RangeSet(std::move(pairs));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::string ToString() const {
|
||||||
|
if (ranges_.empty()) {
|
||||||
|
return "";
|
||||||
|
}
|
||||||
|
std::string result = std::to_string(ranges_.size() * 2);
|
||||||
|
for (const auto& r : ranges_) {
|
||||||
|
result += android::base::StringPrintf(",%zu,%zu", r.first, r.second);
|
||||||
|
}
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
// Get the block number for the i-th (starting from 0) block in the RangeSet.
|
// Get the block number for the i-th (starting from 0) block in the RangeSet.
|
||||||
size_t GetBlockNumber(size_t idx) const {
|
size_t GetBlockNumber(size_t idx) const {
|
||||||
CHECK_LT(idx, blocks_) << "Out of bound index " << idx << " (total blocks: " << blocks_ << ")";
|
CHECK_LT(idx, blocks_) << "Out of bound index " << idx << " (total blocks: " << blocks_ << ")";
|
||||||
|
@ -157,8 +170,109 @@ class RangeSet {
|
||||||
return ranges_ != other.ranges_;
|
return ranges_ != other.ranges_;
|
||||||
}
|
}
|
||||||
|
|
||||||
private:
|
protected:
|
||||||
// Actual limit for each value and the total number are both INT_MAX.
|
// Actual limit for each value and the total number are both INT_MAX.
|
||||||
std::vector<Range> ranges_;
|
std::vector<Range> ranges_;
|
||||||
size_t blocks_;
|
size_t blocks_;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
static constexpr size_t kBlockSize = 4096;
|
||||||
|
|
||||||
|
// The class is a sorted version of a RangeSet; and it's useful in imgdiff to split the input
|
||||||
|
// files when we're handling large zip files. Specifically, we can treat the input file as a
|
||||||
|
// continuous RangeSet (i.e. RangeSet("0-99") for a 100 blocks file); and break it down into
|
||||||
|
// several smaller chunks based on the zip entries.
|
||||||
|
|
||||||
|
// For example, [source: 0-99] can be split into
|
||||||
|
// [split_src1: 10-29]; [split_src2: 40-49, 60-69]; [split_src3: 70-89]
|
||||||
|
// Here "10-29" simply means block 10th to block 29th with respect to the original input file.
|
||||||
|
// Also, note that the split sources should be mutual exclusive, but they don't need to cover
|
||||||
|
// every block in the original source.
|
||||||
|
class SortedRangeSet : public RangeSet {
|
||||||
|
public:
|
||||||
|
SortedRangeSet() {}
|
||||||
|
|
||||||
|
// Ranges in the the set should be mutually exclusive; and they're sorted by the start block.
|
||||||
|
explicit SortedRangeSet(std::vector<Range>&& pairs) : RangeSet(std::move(pairs)) {
|
||||||
|
std::sort(ranges_.begin(), ranges_.end());
|
||||||
|
}
|
||||||
|
|
||||||
|
void Insert(const Range& to_insert) {
|
||||||
|
SortedRangeSet rs({ to_insert });
|
||||||
|
Insert(rs);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Insert the input SortedRangeSet; keep the ranges sorted and merge the overlap ranges.
|
||||||
|
void Insert(const SortedRangeSet& rs) {
|
||||||
|
if (rs.size() == 0) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
// Merge and sort the two RangeSets.
|
||||||
|
std::vector<Range> temp = std::move(ranges_);
|
||||||
|
std::copy(rs.begin(), rs.end(), std::back_inserter(temp));
|
||||||
|
std::sort(temp.begin(), temp.end());
|
||||||
|
|
||||||
|
Clear();
|
||||||
|
// Trim overlaps and insert the result back to ranges_.
|
||||||
|
Range to_insert = temp.front();
|
||||||
|
for (auto it = temp.cbegin() + 1; it != temp.cend(); it++) {
|
||||||
|
if (it->first <= to_insert.second) {
|
||||||
|
to_insert.second = std::max(to_insert.second, it->second);
|
||||||
|
} else {
|
||||||
|
ranges_.push_back(to_insert);
|
||||||
|
blocks_ += (to_insert.second - to_insert.first);
|
||||||
|
to_insert = *it;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
ranges_.push_back(to_insert);
|
||||||
|
blocks_ += (to_insert.second - to_insert.first);
|
||||||
|
}
|
||||||
|
|
||||||
|
void Clear() {
|
||||||
|
blocks_ = 0;
|
||||||
|
ranges_.clear();
|
||||||
|
}
|
||||||
|
|
||||||
|
using RangeSet::Overlaps;
|
||||||
|
bool Overlaps(size_t start, size_t len) const {
|
||||||
|
RangeSet rs({ { start / kBlockSize, (start + len - 1) / kBlockSize + 1 } });
|
||||||
|
return Overlaps(rs);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Compute the block range the file occupies, and insert that range.
|
||||||
|
void Insert(size_t start, size_t len) {
|
||||||
|
Range to_insert{ start / kBlockSize, (start + len - 1) / kBlockSize + 1 };
|
||||||
|
Insert(to_insert);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Given an offset of the file, checks if the corresponding block (by considering the file as
|
||||||
|
// 0-based continuous block ranges) is covered by the SortedRangeSet. If so, returns the offset
|
||||||
|
// within this SortedRangeSet.
|
||||||
|
//
|
||||||
|
// For example, the 4106-th byte of a file is from block 1, assuming a block size of 4096-byte.
|
||||||
|
// The mapped offset within a SortedRangeSet("1-9 15-19") is 10.
|
||||||
|
//
|
||||||
|
// An offset of 65546 falls into the 16-th block in a file. Block 16 is contained as the 10-th
|
||||||
|
// item in SortedRangeSet("1-9 15-19"). So its data can be found at offset 40970 (i.e. 4096 * 10
|
||||||
|
// + 10) in a range represented by this SortedRangeSet.
|
||||||
|
size_t GetOffsetInRangeSet(size_t old_offset) const {
|
||||||
|
size_t old_block_start = old_offset / kBlockSize;
|
||||||
|
size_t new_block_start = 0;
|
||||||
|
for (const auto& range : ranges_) {
|
||||||
|
// Find the index of old_block_start.
|
||||||
|
if (old_block_start >= range.second) {
|
||||||
|
new_block_start += (range.second - range.first);
|
||||||
|
} else if (old_block_start >= range.first) {
|
||||||
|
new_block_start += (old_block_start - range.first);
|
||||||
|
return (new_block_start * kBlockSize + old_offset % kBlockSize);
|
||||||
|
} else {
|
||||||
|
CHECK(false) <<"block_start " << old_block_start << " is missing between two ranges: "
|
||||||
|
<< this->ToString();
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
CHECK(false) <<"block_start " << old_block_start << " exceeds the limit of current RangeSet: "
|
||||||
|
<< this->ToString();
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
};
|
Loading…
Reference in a new issue