Merge "Add implemention of SortedRangeSet" am: 64cba55fbc

am: fbd4b10310

Change-Id: I2c6bc65375be433e79db8861e3c5e936847c897e
This commit is contained in:
Tianjie Xu 2017-08-01 00:34:49 +00:00 committed by android-build-merger
commit a09376ed90
2 changed files with 162 additions and 1 deletions

View file

@ -110,3 +110,50 @@ TEST(RangeSetTest, iterators) {
} }
ASSERT_EQ((std::vector<Range>{ Range{ 8, 10 }, Range{ 1, 5 } }), ranges); ASSERT_EQ((std::vector<Range>{ Range{ 8, 10 }, Range{ 1, 5 } }), ranges);
} }
TEST(RangeSetTest, tostring) {
ASSERT_EQ("2,1,6", RangeSet::Parse("2,1,6").ToString());
ASSERT_EQ("4,1,5,8,10", RangeSet::Parse("4,1,5,8,10").ToString());
ASSERT_EQ("6,1,3,4,6,15,22", RangeSet::Parse("6,1,3,4,6,15,22").ToString());
}
TEST(SortedRangeSetTest, insertion) {
SortedRangeSet rs({ { 2, 3 }, { 4, 6 }, { 8, 14 } });
rs.Insert({ 1, 2 });
ASSERT_EQ(SortedRangeSet({ { 1, 3 }, { 4, 6 }, { 8, 14 } }), rs);
ASSERT_EQ(static_cast<size_t>(10), rs.blocks());
rs.Insert({ 3, 5 });
ASSERT_EQ(SortedRangeSet({ { 1, 6 }, { 8, 14 } }), rs);
ASSERT_EQ(static_cast<size_t>(11), rs.blocks());
SortedRangeSet r1({ { 20, 22 }, { 15, 18 } });
rs.Insert(r1);
ASSERT_EQ(SortedRangeSet({ { 1, 6 }, { 8, 14 }, { 15, 18 }, { 20, 22 } }), rs);
ASSERT_EQ(static_cast<size_t>(16), rs.blocks());
SortedRangeSet r2({ { 2, 7 }, { 15, 21 }, { 20, 25 } });
rs.Insert(r2);
ASSERT_EQ(SortedRangeSet({ { 1, 7 }, { 8, 14 }, { 15, 25 } }), rs);
ASSERT_EQ(static_cast<size_t>(22), rs.blocks());
}
TEST(SortedRangeSetTest, file_range) {
SortedRangeSet rs;
rs.Insert(4096, 4096);
ASSERT_EQ(SortedRangeSet({ { 1, 2 } }), rs);
// insert block 2-9
rs.Insert(4096 * 3 - 1, 4096 * 7);
ASSERT_EQ(SortedRangeSet({ { 1, 10 } }), rs);
// insert block 15-19
rs.Insert(4096 * 15 + 1, 4096 * 4);
ASSERT_EQ(SortedRangeSet({ { 1, 10 }, { 15, 20 } }), rs);
// rs overlaps block 2-2
ASSERT_TRUE(rs.Overlaps(4096 * 2 - 1, 10));
ASSERT_FALSE(rs.Overlaps(4096 * 10, 4096 * 5));
ASSERT_EQ(static_cast<size_t>(10), rs.GetOffsetInRangeSet(4106));
ASSERT_EQ(static_cast<size_t>(40970), rs.GetOffsetInRangeSet(4096 * 16 + 10));
// block#10 not in range.
ASSERT_EXIT(rs.GetOffsetInRangeSet(40970), ::testing::KilledBySignal(SIGABRT), "");
}

View file

@ -24,6 +24,7 @@
#include <android-base/logging.h> #include <android-base/logging.h>
#include <android-base/parseint.h> #include <android-base/parseint.h>
#include <android-base/stringprintf.h>
#include <android-base/strings.h> #include <android-base/strings.h>
using Range = std::pair<size_t, size_t>; using Range = std::pair<size_t, size_t>;
@ -74,6 +75,18 @@ class RangeSet {
return RangeSet(std::move(pairs)); return RangeSet(std::move(pairs));
} }
std::string ToString() const {
if (ranges_.empty()) {
return "";
}
std::string result = std::to_string(ranges_.size() * 2);
for (const auto& r : ranges_) {
result += android::base::StringPrintf(",%zu,%zu", r.first, r.second);
}
return result;
}
// Get the block number for the i-th (starting from 0) block in the RangeSet. // Get the block number for the i-th (starting from 0) block in the RangeSet.
size_t GetBlockNumber(size_t idx) const { size_t GetBlockNumber(size_t idx) const {
CHECK_LT(idx, blocks_) << "Out of bound index " << idx << " (total blocks: " << blocks_ << ")"; CHECK_LT(idx, blocks_) << "Out of bound index " << idx << " (total blocks: " << blocks_ << ")";
@ -157,8 +170,109 @@ class RangeSet {
return ranges_ != other.ranges_; return ranges_ != other.ranges_;
} }
private: protected:
// Actual limit for each value and the total number are both INT_MAX. // Actual limit for each value and the total number are both INT_MAX.
std::vector<Range> ranges_; std::vector<Range> ranges_;
size_t blocks_; size_t blocks_;
}; };
static constexpr size_t kBlockSize = 4096;
// The class is a sorted version of a RangeSet; and it's useful in imgdiff to split the input
// files when we're handling large zip files. Specifically, we can treat the input file as a
// continuous RangeSet (i.e. RangeSet("0-99") for a 100 blocks file); and break it down into
// several smaller chunks based on the zip entries.
// For example, [source: 0-99] can be split into
// [split_src1: 10-29]; [split_src2: 40-49, 60-69]; [split_src3: 70-89]
// Here "10-29" simply means block 10th to block 29th with respect to the original input file.
// Also, note that the split sources should be mutual exclusive, but they don't need to cover
// every block in the original source.
class SortedRangeSet : public RangeSet {
public:
SortedRangeSet() {}
// Ranges in the the set should be mutually exclusive; and they're sorted by the start block.
explicit SortedRangeSet(std::vector<Range>&& pairs) : RangeSet(std::move(pairs)) {
std::sort(ranges_.begin(), ranges_.end());
}
void Insert(const Range& to_insert) {
SortedRangeSet rs({ to_insert });
Insert(rs);
}
// Insert the input SortedRangeSet; keep the ranges sorted and merge the overlap ranges.
void Insert(const SortedRangeSet& rs) {
if (rs.size() == 0) {
return;
}
// Merge and sort the two RangeSets.
std::vector<Range> temp = std::move(ranges_);
std::copy(rs.begin(), rs.end(), std::back_inserter(temp));
std::sort(temp.begin(), temp.end());
Clear();
// Trim overlaps and insert the result back to ranges_.
Range to_insert = temp.front();
for (auto it = temp.cbegin() + 1; it != temp.cend(); it++) {
if (it->first <= to_insert.second) {
to_insert.second = std::max(to_insert.second, it->second);
} else {
ranges_.push_back(to_insert);
blocks_ += (to_insert.second - to_insert.first);
to_insert = *it;
}
}
ranges_.push_back(to_insert);
blocks_ += (to_insert.second - to_insert.first);
}
void Clear() {
blocks_ = 0;
ranges_.clear();
}
using RangeSet::Overlaps;
bool Overlaps(size_t start, size_t len) const {
RangeSet rs({ { start / kBlockSize, (start + len - 1) / kBlockSize + 1 } });
return Overlaps(rs);
}
// Compute the block range the file occupies, and insert that range.
void Insert(size_t start, size_t len) {
Range to_insert{ start / kBlockSize, (start + len - 1) / kBlockSize + 1 };
Insert(to_insert);
}
// Given an offset of the file, checks if the corresponding block (by considering the file as
// 0-based continuous block ranges) is covered by the SortedRangeSet. If so, returns the offset
// within this SortedRangeSet.
//
// For example, the 4106-th byte of a file is from block 1, assuming a block size of 4096-byte.
// The mapped offset within a SortedRangeSet("1-9 15-19") is 10.
//
// An offset of 65546 falls into the 16-th block in a file. Block 16 is contained as the 10-th
// item in SortedRangeSet("1-9 15-19"). So its data can be found at offset 40970 (i.e. 4096 * 10
// + 10) in a range represented by this SortedRangeSet.
size_t GetOffsetInRangeSet(size_t old_offset) const {
size_t old_block_start = old_offset / kBlockSize;
size_t new_block_start = 0;
for (const auto& range : ranges_) {
// Find the index of old_block_start.
if (old_block_start >= range.second) {
new_block_start += (range.second - range.first);
} else if (old_block_start >= range.first) {
new_block_start += (old_block_start - range.first);
return (new_block_start * kBlockSize + old_offset % kBlockSize);
} else {
CHECK(false) <<"block_start " << old_block_start << " is missing between two ranges: "
<< this->ToString();
return 0;
}
}
CHECK(false) <<"block_start " << old_block_start << " exceeds the limit of current RangeSet: "
<< this->ToString();
return 0;
}
};