Merge "Add multithread support to call imgdiff with block-limit"
This commit is contained in:
commit
5d60cd2d3e
1 changed files with 230 additions and 62 deletions
|
@ -16,6 +16,7 @@ from __future__ import print_function
|
|||
|
||||
import array
|
||||
import common
|
||||
import copy
|
||||
import functools
|
||||
import heapq
|
||||
import itertools
|
||||
|
@ -204,6 +205,18 @@ class Transfer(object):
|
|||
self.id = len(by_id)
|
||||
by_id.append(self)
|
||||
|
||||
self._patch = None
|
||||
|
||||
@property
|
||||
def patch(self):
|
||||
return self._patch
|
||||
|
||||
@patch.setter
|
||||
def patch(self, patch):
|
||||
if patch:
|
||||
assert self.style == "diff"
|
||||
self._patch = patch
|
||||
|
||||
def NetStashChange(self):
|
||||
return (sum(sr.size() for (_, sr) in self.stash_before) -
|
||||
sum(sr.size() for (_, sr) in self.use_stash))
|
||||
|
@ -213,6 +226,7 @@ class Transfer(object):
|
|||
self.use_stash = []
|
||||
self.style = "new"
|
||||
self.src_ranges = RangeSet()
|
||||
self.patch = None
|
||||
|
||||
def __str__(self):
|
||||
return (str(self.id) + ": <" + str(self.src_ranges) + " " + self.style +
|
||||
|
@ -675,6 +689,7 @@ class BlockImageDiff(object):
|
|||
# These are identical; we don't need to generate a patch,
|
||||
# just issue copy commands on the device.
|
||||
xf.style = "move"
|
||||
xf.patch = None
|
||||
tgt_size = xf.tgt_ranges.size() * self.tgt.blocksize
|
||||
if xf.src_ranges != xf.tgt_ranges:
|
||||
print("%10d %10d (%6.2f%%) %7s %s %s (from %s)" % (
|
||||
|
@ -683,24 +698,33 @@ class BlockImageDiff(object):
|
|||
xf.tgt_name + " (from " + xf.src_name + ")"),
|
||||
str(xf.tgt_ranges), str(xf.src_ranges)))
|
||||
else:
|
||||
# For files in zip format (eg, APKs, JARs, etc.) we would
|
||||
# like to use imgdiff -z if possible (because it usually
|
||||
# produces significantly smaller patches than bsdiff).
|
||||
# This is permissible if:
|
||||
#
|
||||
# - imgdiff is not disabled, and
|
||||
# - the source and target files are monotonic (ie, the
|
||||
# data is stored with blocks in increasing order), and
|
||||
# - we haven't removed any blocks from the source set.
|
||||
#
|
||||
# If these conditions are satisfied then appending all the
|
||||
# blocks in the set together in order will produce a valid
|
||||
# zip file (plus possibly extra zeros in the last block),
|
||||
# which is what imgdiff needs to operate. (imgdiff is
|
||||
# fine with extra zeros at the end of the file.)
|
||||
imgdiff = (not self.disable_imgdiff and xf.intact and
|
||||
xf.tgt_name.split(".")[-1].lower()
|
||||
in ("apk", "jar", "zip"))
|
||||
if xf.patch:
|
||||
# We have already generated the patch with imgdiff. Check if the
|
||||
# transfer is intact.
|
||||
assert not self.disable_imgdiff
|
||||
imgdiff = True
|
||||
if not xf.intact:
|
||||
imgdiff = False
|
||||
xf.patch = None
|
||||
else:
|
||||
# For files in zip format (eg, APKs, JARs, etc.) we would
|
||||
# like to use imgdiff -z if possible (because it usually
|
||||
# produces significantly smaller patches than bsdiff).
|
||||
# This is permissible if:
|
||||
#
|
||||
# - imgdiff is not disabled, and
|
||||
# - the source and target files are monotonic (ie, the
|
||||
# data is stored with blocks in increasing order), and
|
||||
# - we haven't removed any blocks from the source set.
|
||||
#
|
||||
# If these conditions are satisfied then appending all the
|
||||
# blocks in the set together in order will produce a valid
|
||||
# zip file (plus possibly extra zeros in the last block),
|
||||
# which is what imgdiff needs to operate. (imgdiff is
|
||||
# fine with extra zeros at the end of the file.)
|
||||
imgdiff = (not self.disable_imgdiff and xf.intact and
|
||||
xf.tgt_name.split(".")[-1].lower()
|
||||
in ("apk", "jar", "zip"))
|
||||
xf.style = "imgdiff" if imgdiff else "bsdiff"
|
||||
diff_queue.append((index, imgdiff, patch_num))
|
||||
patch_num += 1
|
||||
|
@ -738,48 +762,51 @@ class BlockImageDiff(object):
|
|||
xf_index, imgdiff, patch_index = diff_queue.pop()
|
||||
|
||||
xf = self.transfers[xf_index]
|
||||
src_ranges = xf.src_ranges
|
||||
tgt_ranges = xf.tgt_ranges
|
||||
patch = xf.patch
|
||||
if not patch:
|
||||
src_ranges = xf.src_ranges
|
||||
tgt_ranges = xf.tgt_ranges
|
||||
|
||||
# Needs lock since WriteRangeDataToFd() is stateful (calling seek).
|
||||
with lock:
|
||||
src_file = common.MakeTempFile(prefix="src-")
|
||||
with open(src_file, "wb") as fd:
|
||||
self.src.WriteRangeDataToFd(src_ranges, fd)
|
||||
|
||||
tgt_file = common.MakeTempFile(prefix="tgt-")
|
||||
with open(tgt_file, "wb") as fd:
|
||||
self.tgt.WriteRangeDataToFd(tgt_ranges, fd)
|
||||
|
||||
message = []
|
||||
try:
|
||||
patch = compute_patch(src_file, tgt_file, imgdiff)
|
||||
except ValueError as e:
|
||||
message.append(
|
||||
"Failed to generate %s for %s: tgt=%s, src=%s:\n%s" % (
|
||||
"imgdiff" if imgdiff else "bsdiff",
|
||||
xf.tgt_name if xf.tgt_name == xf.src_name else
|
||||
xf.tgt_name + " (from " + xf.src_name + ")",
|
||||
xf.tgt_ranges, xf.src_ranges, e.message))
|
||||
# TODO(b/68016761): Better handle the holes in mke2fs created images.
|
||||
if imgdiff:
|
||||
try:
|
||||
patch = compute_patch(src_file, tgt_file, imgdiff=False)
|
||||
message.append(
|
||||
"Fell back and generated with bsdiff instead for %s" % (
|
||||
xf.tgt_name,))
|
||||
xf.style = "bsdiff"
|
||||
with lock:
|
||||
warning_messages.extend(message)
|
||||
del message[:]
|
||||
except ValueError as e:
|
||||
message.append(
|
||||
"Also failed to generate with bsdiff for %s:\n%s" % (
|
||||
xf.tgt_name, e.message))
|
||||
|
||||
if message:
|
||||
# Needs lock since WriteRangeDataToFd() is stateful (calling seek).
|
||||
with lock:
|
||||
error_messages.extend(message)
|
||||
src_file = common.MakeTempFile(prefix="src-")
|
||||
with open(src_file, "wb") as fd:
|
||||
self.src.WriteRangeDataToFd(src_ranges, fd)
|
||||
|
||||
tgt_file = common.MakeTempFile(prefix="tgt-")
|
||||
with open(tgt_file, "wb") as fd:
|
||||
self.tgt.WriteRangeDataToFd(tgt_ranges, fd)
|
||||
|
||||
message = []
|
||||
try:
|
||||
patch = compute_patch(src_file, tgt_file, imgdiff)
|
||||
except ValueError as e:
|
||||
message.append(
|
||||
"Failed to generate %s for %s: tgt=%s, src=%s:\n%s" % (
|
||||
"imgdiff" if imgdiff else "bsdiff",
|
||||
xf.tgt_name if xf.tgt_name == xf.src_name else
|
||||
xf.tgt_name + " (from " + xf.src_name + ")",
|
||||
xf.tgt_ranges, xf.src_ranges, e.message))
|
||||
# TODO(b/68016761): Better handle the holes in mke2fs created
|
||||
# images.
|
||||
if imgdiff:
|
||||
try:
|
||||
patch = compute_patch(src_file, tgt_file, imgdiff=False)
|
||||
message.append(
|
||||
"Fell back and generated with bsdiff instead for %s" % (
|
||||
xf.tgt_name,))
|
||||
xf.style = "bsdiff"
|
||||
with lock:
|
||||
warning_messages.extend(message)
|
||||
del message[:]
|
||||
except ValueError as e:
|
||||
message.append(
|
||||
"Also failed to generate with bsdiff for %s:\n%s" % (
|
||||
xf.tgt_name, e.message))
|
||||
|
||||
if message:
|
||||
with lock:
|
||||
error_messages.extend(message)
|
||||
|
||||
with lock:
|
||||
patches[patch_index] = (xf_index, patch)
|
||||
|
@ -1167,12 +1194,9 @@ class BlockImageDiff(object):
|
|||
Compared to the fixed 1024-block limit, it reduces the overall package
|
||||
size by 30% for volantis, and 20% for angler and bullhead."""
|
||||
|
||||
assert style == "diff"
|
||||
# Possibly split large files into smaller chunks.
|
||||
pieces = 0
|
||||
cache_size = common.OPTIONS.cache_size
|
||||
split_threshold = 0.125
|
||||
max_blocks_per_transfer = int(cache_size * split_threshold /
|
||||
self.tgt.blocksize)
|
||||
|
||||
# Change nothing for small files.
|
||||
if (tgt_ranges.size() <= max_blocks_per_transfer and
|
||||
|
@ -1182,6 +1206,14 @@ class BlockImageDiff(object):
|
|||
style, by_id)
|
||||
return
|
||||
|
||||
if tgt_name.split(".")[-1].lower() in ("apk", "jar", "zip"):
|
||||
split_enable = (not self.disable_imgdiff and src_ranges.monotonic and
|
||||
tgt_ranges.monotonic)
|
||||
if split_enable and (self.tgt.RangeSha1(tgt_ranges) !=
|
||||
self.src.RangeSha1(src_ranges)):
|
||||
large_apks.append((tgt_name, src_name, tgt_ranges, src_ranges))
|
||||
return
|
||||
|
||||
while (tgt_ranges.size() > max_blocks_per_transfer and
|
||||
src_ranges.size() > max_blocks_per_transfer):
|
||||
tgt_split_name = "%s-%d" % (tgt_name, pieces)
|
||||
|
@ -1275,8 +1307,136 @@ class BlockImageDiff(object):
|
|||
AddSplitTransfers(
|
||||
tgt_name, src_name, tgt_ranges, src_ranges, style, by_id)
|
||||
|
||||
def ParseAndValidateSplitInfo(patch_size, tgt_ranges, src_ranges,
|
||||
split_info):
|
||||
"""Parse the split_info and return a list of info tuples.
|
||||
|
||||
Args:
|
||||
patch_size: total size of the patch file.
|
||||
tgt_ranges: Ranges of the target file within the original image.
|
||||
src_ranges: Ranges of the source file within the original image.
|
||||
split_info format:
|
||||
imgdiff version#
|
||||
count of pieces
|
||||
<patch_size_1> <tgt_size_1> <src_ranges_1>
|
||||
...
|
||||
<patch_size_n> <tgt_size_n> <src_ranges_n>
|
||||
|
||||
Returns:
|
||||
[patch_start, patch_len, split_tgt_ranges, split_src_ranges]
|
||||
"""
|
||||
|
||||
version = int(split_info[0])
|
||||
assert version == 2
|
||||
count = int(split_info[1])
|
||||
assert len(split_info) - 2 == count
|
||||
|
||||
split_info_list = []
|
||||
patch_start = 0
|
||||
tgt_remain = copy.deepcopy(tgt_ranges)
|
||||
# each line has the format <patch_size>, <tgt_size>, <src_ranges>
|
||||
for line in split_info[2:]:
|
||||
info = line.split()
|
||||
assert len(info) == 3
|
||||
patch_length = int(info[0])
|
||||
|
||||
split_tgt_size = int(info[1])
|
||||
assert split_tgt_size % 4096 == 0
|
||||
assert split_tgt_size / 4096 <= tgt_remain.size()
|
||||
split_tgt_ranges = tgt_remain.first(split_tgt_size / 4096)
|
||||
tgt_remain = tgt_remain.subtract(split_tgt_ranges)
|
||||
|
||||
# Find the split_src_ranges within the image file from its relative
|
||||
# position in file.
|
||||
split_src_indices = RangeSet.parse_raw(info[2])
|
||||
split_src_ranges = RangeSet()
|
||||
for r in split_src_indices:
|
||||
curr_range = src_ranges.first(r[1]).subtract(src_ranges.first(r[0]))
|
||||
assert not split_src_ranges.overlaps(curr_range)
|
||||
split_src_ranges = split_src_ranges.union(curr_range)
|
||||
|
||||
split_info_list.append((patch_start, patch_length,
|
||||
split_tgt_ranges, split_src_ranges))
|
||||
patch_start += patch_length
|
||||
|
||||
# Check that the sizes of all the split pieces add up to the final file
|
||||
# size for patch and target.
|
||||
assert tgt_remain.size() == 0
|
||||
assert patch_start == patch_size
|
||||
return split_info_list
|
||||
|
||||
def AddSplitTransferForLargeApks():
|
||||
"""Create split transfers for large apk files.
|
||||
|
||||
Example: Chrome.apk will be split into
|
||||
src-0: Chrome.apk-0, tgt-0: Chrome.apk-0
|
||||
src-1: Chrome.apk-1, tgt-1: Chrome.apk-1
|
||||
...
|
||||
|
||||
After the split, the target pieces are continuous and block aligned; and
|
||||
the source pieces are mutually exclusive. During the split, we also
|
||||
generate and save the image patch between src-X & tgt-X. This patch will
|
||||
be valid because the block ranges of src-X & tgt-X will always stay the
|
||||
same afterwards; but there's a chance we don't use the patch if we
|
||||
convert the "diff" command into "new" or "move" later.
|
||||
"""
|
||||
|
||||
while True:
|
||||
with transfer_lock:
|
||||
if not large_apks:
|
||||
return
|
||||
tgt_name, src_name, tgt_ranges, src_ranges = large_apks.pop(0)
|
||||
|
||||
src_file = common.MakeTempFile(prefix="src-")
|
||||
tgt_file = common.MakeTempFile(prefix="tgt-")
|
||||
with transfer_lock:
|
||||
with open(src_file, "wb") as src_fd:
|
||||
self.src.WriteRangeDataToFd(src_ranges, src_fd)
|
||||
with open(tgt_file, "wb") as tgt_fd:
|
||||
self.tgt.WriteRangeDataToFd(tgt_ranges, tgt_fd)
|
||||
|
||||
patch_file = common.MakeTempFile(prefix="patch-")
|
||||
patch_info_file = common.MakeTempFile(prefix="split_info-")
|
||||
cmd = ["imgdiff", "-z",
|
||||
"--block-limit={}".format(max_blocks_per_transfer),
|
||||
"--split-info=" + patch_info_file,
|
||||
src_file, tgt_file, patch_file]
|
||||
p = common.Run(cmd, stdout=subprocess.PIPE)
|
||||
p.communicate()
|
||||
# TODO(xunchang) fall back to the normal split if imgdiff fails.
|
||||
if p.returncode != 0:
|
||||
raise ValueError("Failed to create patch between {} and {}".format(
|
||||
src_name, tgt_name))
|
||||
|
||||
with open(patch_info_file) as patch_info:
|
||||
lines = patch_info.readlines()
|
||||
|
||||
patch_size_total = os.path.getsize(patch_file)
|
||||
split_info_list = ParseAndValidateSplitInfo(patch_size_total,
|
||||
tgt_ranges, src_ranges,
|
||||
lines)
|
||||
for index, (patch_start, patch_length, split_tgt_ranges,
|
||||
split_src_ranges) in enumerate(split_info_list):
|
||||
with open(patch_file) as f:
|
||||
f.seek(patch_start)
|
||||
patch_content = f.read(patch_length)
|
||||
|
||||
split_src_name = "{}-{}".format(src_name, index)
|
||||
split_tgt_name = "{}-{}".format(tgt_name, index)
|
||||
transfer_split = Transfer(split_tgt_name, split_src_name,
|
||||
split_tgt_ranges, split_src_ranges,
|
||||
self.tgt.RangeSha1(split_tgt_ranges),
|
||||
self.src.RangeSha1(split_src_ranges),
|
||||
"diff", self.transfers)
|
||||
transfer_split.patch = patch_content
|
||||
|
||||
print("Finding transfers...")
|
||||
|
||||
large_apks = []
|
||||
cache_size = common.OPTIONS.cache_size
|
||||
split_threshold = 0.125
|
||||
max_blocks_per_transfer = int(cache_size * split_threshold /
|
||||
self.tgt.blocksize)
|
||||
empty = RangeSet()
|
||||
for tgt_fn, tgt_ranges in self.tgt.file_map.items():
|
||||
if tgt_fn == "__ZERO":
|
||||
|
@ -1321,6 +1481,14 @@ class BlockImageDiff(object):
|
|||
|
||||
AddTransfer(tgt_fn, None, tgt_ranges, empty, "new", self.transfers)
|
||||
|
||||
transfer_lock = threading.Lock()
|
||||
threads = [threading.Thread(target=AddSplitTransferForLargeApks)
|
||||
for _ in range(self.threads)]
|
||||
for th in threads:
|
||||
th.start()
|
||||
while threads:
|
||||
threads.pop().join()
|
||||
|
||||
def AbbreviateSourceNames(self):
|
||||
for k in self.src.file_map.keys():
|
||||
b = os.path.basename(k)
|
||||
|
|
Loading…
Reference in a new issue