Merge "Split large files for BBOTA v3."
This commit is contained in:
commit
95708eea4e
2 changed files with 101 additions and 17 deletions
|
@ -297,7 +297,6 @@ class BlockImageDiff(object):
|
|||
out = []
|
||||
|
||||
total = 0
|
||||
performs_read = False
|
||||
|
||||
stashes = {}
|
||||
stashed_blocks = 0
|
||||
|
@ -415,7 +414,6 @@ class BlockImageDiff(object):
|
|||
out.append("%s %s\n" % (xf.style, xf.tgt_ranges.to_string_raw()))
|
||||
total += tgt_size
|
||||
elif xf.style == "move":
|
||||
performs_read = True
|
||||
assert xf.tgt_ranges
|
||||
assert xf.src_ranges.size() == tgt_size
|
||||
if xf.src_ranges != xf.tgt_ranges:
|
||||
|
@ -440,7 +438,6 @@ class BlockImageDiff(object):
|
|||
xf.tgt_ranges.to_string_raw(), src_str))
|
||||
total += tgt_size
|
||||
elif xf.style in ("bsdiff", "imgdiff"):
|
||||
performs_read = True
|
||||
assert xf.tgt_ranges
|
||||
assert xf.src_ranges
|
||||
if self.version == 1:
|
||||
|
@ -551,6 +548,7 @@ class BlockImageDiff(object):
|
|||
max_allowed = cache_size * stash_threshold / self.tgt.blocksize
|
||||
|
||||
stashed_blocks = 0
|
||||
new_blocks = 0
|
||||
|
||||
# Now go through all the commands. Compute the required stash size on the
|
||||
# fly. If a command requires excess stash than available, it deletes the
|
||||
|
@ -566,8 +564,7 @@ class BlockImageDiff(object):
|
|||
# that will use this stash and replace the command with "new".
|
||||
use_cmd = stashes[idx][2]
|
||||
replaced_cmds.append(use_cmd)
|
||||
print(" %s replaced due to an explicit stash of %d blocks." % (
|
||||
use_cmd, sr.size()))
|
||||
print("%10d %9s %s" % (sr.size(), "explicit", use_cmd))
|
||||
else:
|
||||
stashed_blocks += sr.size()
|
||||
|
||||
|
@ -582,8 +579,7 @@ class BlockImageDiff(object):
|
|||
if xf.src_ranges.overlaps(xf.tgt_ranges):
|
||||
if stashed_blocks + xf.src_ranges.size() > max_allowed:
|
||||
replaced_cmds.append(xf)
|
||||
print(" %s replaced due to an implicit stash of %d blocks." % (
|
||||
xf, xf.src_ranges.size()))
|
||||
print("%10d %9s %s" % (xf.src_ranges.size(), "implicit", xf))
|
||||
|
||||
# Replace the commands in replaced_cmds with "new"s.
|
||||
for cmd in replaced_cmds:
|
||||
|
@ -593,9 +589,13 @@ class BlockImageDiff(object):
|
|||
def_cmd = stashes[idx][1]
|
||||
assert (idx, sr) in def_cmd.stash_before
|
||||
def_cmd.stash_before.remove((idx, sr))
|
||||
new_blocks += sr.size()
|
||||
|
||||
cmd.ConvertToNew()
|
||||
|
||||
print(" Total %d blocks are packed as new blocks due to insufficient "
|
||||
"cache size." % (new_blocks,))
|
||||
|
||||
def ComputePatches(self, prefix):
|
||||
print("Reticulating splines...")
|
||||
diff_q = []
|
||||
|
@ -951,6 +951,57 @@ class BlockImageDiff(object):
|
|||
a.goes_after[b] = size
|
||||
|
||||
def FindTransfers(self):
|
||||
"""Parse the file_map to generate all the transfers."""
|
||||
|
||||
def AddTransfer(tgt_name, src_name, tgt_ranges, src_ranges, style, by_id,
|
||||
split=False):
|
||||
"""Wrapper function for adding a Transfer().
|
||||
|
||||
For BBOTA v3, we need to stash source blocks for resumable feature.
|
||||
However, with the growth of file size and the shrink of the cache
|
||||
partition source blocks are too large to be stashed. If a file occupies
|
||||
too many blocks (greater than MAX_BLOCKS_PER_DIFF_TRANSFER), we split it
|
||||
into smaller pieces by getting multiple Transfer()s.
|
||||
|
||||
The downside is that after splitting, we can no longer use imgdiff but
|
||||
only bsdiff."""
|
||||
|
||||
MAX_BLOCKS_PER_DIFF_TRANSFER = 1024
|
||||
|
||||
# We care about diff transfers only.
|
||||
if style != "diff" or not split:
|
||||
Transfer(tgt_name, src_name, tgt_ranges, src_ranges, style, by_id)
|
||||
return
|
||||
|
||||
# Change nothing for small files.
|
||||
if (tgt_ranges.size() <= MAX_BLOCKS_PER_DIFF_TRANSFER and
|
||||
src_ranges.size() <= MAX_BLOCKS_PER_DIFF_TRANSFER):
|
||||
Transfer(tgt_name, src_name, tgt_ranges, src_ranges, style, by_id)
|
||||
return
|
||||
|
||||
pieces = 0
|
||||
while (tgt_ranges.size() > MAX_BLOCKS_PER_DIFF_TRANSFER and
|
||||
src_ranges.size() > MAX_BLOCKS_PER_DIFF_TRANSFER):
|
||||
tgt_split_name = "%s-%d" % (tgt_name, pieces)
|
||||
src_split_name = "%s-%d" % (src_name, pieces)
|
||||
tgt_first = tgt_ranges.first(MAX_BLOCKS_PER_DIFF_TRANSFER)
|
||||
src_first = src_ranges.first(MAX_BLOCKS_PER_DIFF_TRANSFER)
|
||||
Transfer(tgt_split_name, src_split_name, tgt_first, src_first, style,
|
||||
by_id)
|
||||
|
||||
tgt_ranges = tgt_ranges.subtract(tgt_first)
|
||||
src_ranges = src_ranges.subtract(src_first)
|
||||
pieces += 1
|
||||
|
||||
# Handle remaining blocks.
|
||||
if tgt_ranges.size() or src_ranges.size():
|
||||
# Must be both non-empty.
|
||||
assert tgt_ranges.size() and src_ranges.size()
|
||||
tgt_split_name = "%s-%d" % (tgt_name, pieces)
|
||||
src_split_name = "%s-%d" % (src_name, pieces)
|
||||
Transfer(tgt_split_name, src_split_name, tgt_ranges, src_ranges, style,
|
||||
by_id)
|
||||
|
||||
empty = RangeSet()
|
||||
for tgt_fn, tgt_ranges in self.tgt.file_map.items():
|
||||
if tgt_fn == "__ZERO":
|
||||
|
@ -958,28 +1009,28 @@ class BlockImageDiff(object):
|
|||
# in any file and that are filled with zeros. We have a
|
||||
# special transfer style for zero blocks.
|
||||
src_ranges = self.src.file_map.get("__ZERO", empty)
|
||||
Transfer(tgt_fn, "__ZERO", tgt_ranges, src_ranges,
|
||||
AddTransfer(tgt_fn, "__ZERO", tgt_ranges, src_ranges,
|
||||
"zero", self.transfers)
|
||||
continue
|
||||
|
||||
elif tgt_fn == "__COPY":
|
||||
# "__COPY" domain includes all the blocks not contained in any
|
||||
# file and that need to be copied unconditionally to the target.
|
||||
Transfer(tgt_fn, None, tgt_ranges, empty, "new", self.transfers)
|
||||
AddTransfer(tgt_fn, None, tgt_ranges, empty, "new", self.transfers)
|
||||
continue
|
||||
|
||||
elif tgt_fn in self.src.file_map:
|
||||
# Look for an exact pathname match in the source.
|
||||
Transfer(tgt_fn, tgt_fn, tgt_ranges, self.src.file_map[tgt_fn],
|
||||
"diff", self.transfers)
|
||||
AddTransfer(tgt_fn, tgt_fn, tgt_ranges, self.src.file_map[tgt_fn],
|
||||
"diff", self.transfers, self.version >= 3)
|
||||
continue
|
||||
|
||||
b = os.path.basename(tgt_fn)
|
||||
if b in self.src_basenames:
|
||||
# Look for an exact basename match in the source.
|
||||
src_fn = self.src_basenames[b]
|
||||
Transfer(tgt_fn, src_fn, tgt_ranges, self.src.file_map[src_fn],
|
||||
"diff", self.transfers)
|
||||
AddTransfer(tgt_fn, src_fn, tgt_ranges, self.src.file_map[src_fn],
|
||||
"diff", self.transfers, self.version >= 3)
|
||||
continue
|
||||
|
||||
b = re.sub("[0-9]+", "#", b)
|
||||
|
@ -989,11 +1040,11 @@ class BlockImageDiff(object):
|
|||
# for .so files that contain version numbers in the filename
|
||||
# that get bumped.)
|
||||
src_fn = self.src_numpatterns[b]
|
||||
Transfer(tgt_fn, src_fn, tgt_ranges, self.src.file_map[src_fn],
|
||||
"diff", self.transfers)
|
||||
AddTransfer(tgt_fn, src_fn, tgt_ranges, self.src.file_map[src_fn],
|
||||
"diff", self.transfers, self.version >= 3)
|
||||
continue
|
||||
|
||||
Transfer(tgt_fn, None, tgt_ranges, empty, "new", self.transfers)
|
||||
AddTransfer(tgt_fn, None, tgt_ranges, empty, "new", self.transfers)
|
||||
|
||||
def AbbreviateSourceNames(self):
|
||||
for k in self.src.file_map.keys():
|
||||
|
|
|
@ -24,6 +24,7 @@ class RangeSet(object):
|
|||
lots of runs."""
|
||||
|
||||
def __init__(self, data=None):
|
||||
# TODO(tbao): monotonic is broken when passing in a tuple.
|
||||
self.monotonic = False
|
||||
if isinstance(data, str):
|
||||
self._parse_internal(data)
|
||||
|
@ -260,6 +261,38 @@ class RangeSet(object):
|
|||
out = out.union(RangeSet(str(s1) + "-" + str(e1-1)))
|
||||
return out
|
||||
|
||||
def first(self, n):
|
||||
"""Return the RangeSet that contains at most the first 'n' integers.
|
||||
|
||||
>>> RangeSet("0-9").first(1)
|
||||
<RangeSet("0")>
|
||||
>>> RangeSet("10-19").first(5)
|
||||
<RangeSet("10-14")>
|
||||
>>> RangeSet("10-19").first(15)
|
||||
<RangeSet("10-19")>
|
||||
>>> RangeSet("10-19 30-39").first(3)
|
||||
<RangeSet("10-12")>
|
||||
>>> RangeSet("10-19 30-39").first(15)
|
||||
<RangeSet("10-19 30-34")>
|
||||
>>> RangeSet("10-19 30-39").first(30)
|
||||
<RangeSet("10-19 30-39")>
|
||||
>>> RangeSet("0-9").first(0)
|
||||
<RangeSet("")>
|
||||
"""
|
||||
|
||||
if self.size() <= n:
|
||||
return self
|
||||
|
||||
out = []
|
||||
for s, e in self:
|
||||
if e - s >= n:
|
||||
out += (s, s+n)
|
||||
break
|
||||
else:
|
||||
out += (s, e)
|
||||
n -= e - s
|
||||
return RangeSet(data=out)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
import doctest
|
||||
|
|
Loading…
Reference in a new issue