1 // 2 // Copyright (C) 2015 The Android Open Source Project 3 // 4 // Licensed under the Apache License, Version 2.0 (the "License"); 5 // you may not use this file except in compliance with the License. 6 // You may obtain a copy of the License at 7 // 8 // http://www.apache.org/licenses/LICENSE-2.0 9 // 10 // Unless required by applicable law or agreed to in writing, software 11 // distributed under the License is distributed on an "AS IS" BASIS, 12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 // See the License for the specific language governing permissions and 14 // limitations under the License. 15 // 16 17 #include "update_engine/payload_generator/delta_diff_utils.h" 18 19 #include <endian.h> 20 #if defined(__clang__) 21 // TODO(*): Remove these pragmas when b/35721782 is fixed. 22 #pragma clang diagnostic push 23 #pragma clang diagnostic ignored "-Wmacro-redefined" 24 #endif 25 #include <ext2fs/ext2fs.h> 26 #if defined(__clang__) 27 #pragma clang diagnostic pop 28 #endif 29 #include <unistd.h> 30 31 #include <algorithm> 32 #include <functional> 33 #include <list> 34 #include <map> 35 #include <memory> 36 #include <numeric> 37 #include <utility> 38 39 #include <base/files/file_util.h> 40 #include <base/format_macros.h> 41 #include <base/strings/string_util.h> 42 #include <base/strings/stringprintf.h> 43 #include <base/threading/simple_thread.h> 44 #include <base/time/time.h> 45 #include <brillo/data_encoding.h> 46 #include <bsdiff/bsdiff.h> 47 #include <bsdiff/patch_writer_factory.h> 48 #include <puffin/utils.h> 49 50 #include "update_engine/common/hash_calculator.h" 51 #include "update_engine/common/subprocess.h" 52 #include "update_engine/common/utils.h" 53 #include "update_engine/payload_consumer/payload_constants.h" 54 #include "update_engine/payload_generator/ab_generator.h" 55 #include "update_engine/payload_generator/block_mapping.h" 56 #include "update_engine/payload_generator/bzip.h" 57 #include "update_engine/payload_generator/deflate_utils.h" 58 #include "update_engine/payload_generator/delta_diff_generator.h" 59 #include "update_engine/payload_generator/extent_ranges.h" 60 #include "update_engine/payload_generator/extent_utils.h" 61 #include "update_engine/payload_generator/squashfs_filesystem.h" 62 #include "update_engine/payload_generator/xz.h" 63 64 using std::list; 65 using std::map; 66 using std::string; 67 using std::vector; 68 69 namespace chromeos_update_engine { 70 namespace { 71 72 // The maximum destination size allowed for bsdiff. In general, bsdiff should 73 // work for arbitrary big files, but the payload generation and payload 74 // application requires a significant amount of RAM. We put a hard-limit of 75 // 200 MiB that should not affect any released board, but will limit the 76 // Chrome binary in ASan builders. 77 const uint64_t kMaxBsdiffDestinationSize = 200 * 1024 * 1024; // bytes 78 79 // The maximum destination size allowed for puffdiff. In general, puffdiff 80 // should work for arbitrary big files, but the payload application is quite 81 // memory intensive, so we limit these operations to 150 MiB. 82 const uint64_t kMaxPuffdiffDestinationSize = 150 * 1024 * 1024; // bytes 83 84 const int kBrotliCompressionQuality = 11; 85 86 // Process a range of blocks from |range_start| to |range_end| in the extent at 87 // position |*idx_p| of |extents|. If |do_remove| is true, this range will be 88 // removed, which may cause the extent to be trimmed, split or removed entirely. 89 // The value of |*idx_p| is updated to point to the next extent to be processed. 90 // Returns true iff the next extent to process is a new or updated one. 91 bool ProcessExtentBlockRange(vector<Extent>* extents, 92 size_t* idx_p, 93 const bool do_remove, 94 uint64_t range_start, 95 uint64_t range_end) { 96 size_t idx = *idx_p; 97 uint64_t start_block = (*extents)[idx].start_block(); 98 uint64_t num_blocks = (*extents)[idx].num_blocks(); 99 uint64_t range_size = range_end - range_start; 100 101 if (do_remove) { 102 if (range_size == num_blocks) { 103 // Remove the entire extent. 104 extents->erase(extents->begin() + idx); 105 } else if (range_end == num_blocks) { 106 // Trim the end of the extent. 107 (*extents)[idx].set_num_blocks(num_blocks - range_size); 108 idx++; 109 } else if (range_start == 0) { 110 // Trim the head of the extent. 111 (*extents)[idx].set_start_block(start_block + range_size); 112 (*extents)[idx].set_num_blocks(num_blocks - range_size); 113 } else { 114 // Trim the middle, splitting the remainder into two parts. 115 (*extents)[idx].set_num_blocks(range_start); 116 Extent e; 117 e.set_start_block(start_block + range_end); 118 e.set_num_blocks(num_blocks - range_end); 119 idx++; 120 extents->insert(extents->begin() + idx, e); 121 } 122 } else if (range_end == num_blocks) { 123 // Done with this extent. 124 idx++; 125 } else { 126 return false; 127 } 128 129 *idx_p = idx; 130 return true; 131 } 132 133 // Remove identical corresponding block ranges in |src_extents| and 134 // |dst_extents|. Used for preventing moving of blocks onto themselves during 135 // MOVE operations. The value of |total_bytes| indicates the actual length of 136 // content; this may be slightly less than the total size of blocks, in which 137 // case the last block is only partly occupied with data. Returns the total 138 // number of bytes removed. 139 size_t RemoveIdenticalBlockRanges(vector<Extent>* src_extents, 140 vector<Extent>* dst_extents, 141 const size_t total_bytes) { 142 size_t src_idx = 0; 143 size_t dst_idx = 0; 144 uint64_t src_offset = 0, dst_offset = 0; 145 size_t removed_bytes = 0, nonfull_block_bytes; 146 bool do_remove = false; 147 while (src_idx < src_extents->size() && dst_idx < dst_extents->size()) { 148 do_remove = ((*src_extents)[src_idx].start_block() + src_offset == 149 (*dst_extents)[dst_idx].start_block() + dst_offset); 150 151 uint64_t src_num_blocks = (*src_extents)[src_idx].num_blocks(); 152 uint64_t dst_num_blocks = (*dst_extents)[dst_idx].num_blocks(); 153 uint64_t min_num_blocks = 154 std::min(src_num_blocks - src_offset, dst_num_blocks - dst_offset); 155 uint64_t prev_src_offset = src_offset; 156 uint64_t prev_dst_offset = dst_offset; 157 src_offset += min_num_blocks; 158 dst_offset += min_num_blocks; 159 160 bool new_src = ProcessExtentBlockRange( 161 src_extents, &src_idx, do_remove, prev_src_offset, src_offset); 162 bool new_dst = ProcessExtentBlockRange( 163 dst_extents, &dst_idx, do_remove, prev_dst_offset, dst_offset); 164 if (new_src) { 165 src_offset = 0; 166 } 167 if (new_dst) { 168 dst_offset = 0; 169 } 170 171 if (do_remove) 172 removed_bytes += min_num_blocks * kBlockSize; 173 } 174 175 // If we removed the last block and this block is only partly used by file 176 // content, deduct the unused portion from the total removed byte count. 177 if (do_remove && (nonfull_block_bytes = total_bytes % kBlockSize)) 178 removed_bytes -= kBlockSize - nonfull_block_bytes; 179 180 return removed_bytes; 181 } 182 183 // Storing a diff operation has more overhead over replace operation in the 184 // manifest, we need to store an additional src_sha256_hash which is 32 bytes 185 // and not compressible, and also src_extents which could use anywhere from a 186 // few bytes to hundreds of bytes depending on the number of extents. 187 // This function evaluates the overhead tradeoff and determines if it's worth to 188 // use a diff operation with data blob of |diff_size| and |num_src_extents| 189 // extents over an existing |op| with data blob of |old_blob_size|. 190 bool IsDiffOperationBetter(const InstallOperation& op, 191 size_t old_blob_size, 192 size_t diff_size, 193 size_t num_src_extents) { 194 if (!diff_utils::IsAReplaceOperation(op.type())) 195 return diff_size < old_blob_size; 196 197 // Reference: https://developers.google.com/protocol-buffers/docs/encoding 198 // For |src_sha256_hash| we need 1 byte field number/type, 1 byte size and 32 199 // bytes data, for |src_extents| we need 1 byte field number/type and 1 byte 200 // size. 201 constexpr size_t kDiffOverhead = 1 + 1 + 32 + 1 + 1; 202 // Each extent has two variable length encoded uint64, here we use a rough 203 // estimate of 6 bytes overhead per extent, since |num_blocks| is usually 204 // very small. 205 constexpr size_t kDiffOverheadPerExtent = 6; 206 207 return diff_size + kDiffOverhead + num_src_extents * kDiffOverheadPerExtent < 208 old_blob_size; 209 } 210 211 // Returns the levenshtein distance between string |a| and |b|. 212 // https://en.wikipedia.org/wiki/Levenshtein_distance 213 int LevenshteinDistance(const string& a, const string& b) { 214 vector<int> distances(a.size() + 1); 215 std::iota(distances.begin(), distances.end(), 0); 216 217 for (size_t i = 1; i <= b.size(); i++) { 218 distances[0] = i; 219 int previous_distance = i - 1; 220 for (size_t j = 1; j <= a.size(); j++) { 221 int new_distance = 222 std::min({distances[j] + 1, 223 distances[j - 1] + 1, 224 previous_distance + (a[j - 1] == b[i - 1] ? 0 : 1)}); 225 previous_distance = distances[j]; 226 distances[j] = new_distance; 227 } 228 } 229 return distances.back(); 230 } 231 } // namespace 232 233 namespace diff_utils { 234 235 // This class encapsulates a file delta processing thread work. The 236 // processor computes the delta between the source and target files; 237 // and write the compressed delta to the blob. 238 class FileDeltaProcessor : public base::DelegateSimpleThread::Delegate { 239 public: 240 FileDeltaProcessor(const string& old_part, 241 const string& new_part, 242 const PayloadVersion& version, 243 const vector<Extent>& old_extents, 244 const vector<Extent>& new_extents, 245 const vector<puffin::BitExtent>& old_deflates, 246 const vector<puffin::BitExtent>& new_deflates, 247 const string& name, 248 ssize_t chunk_blocks, 249 BlobFileWriter* blob_file) 250 : old_part_(old_part), 251 new_part_(new_part), 252 version_(version), 253 old_extents_(old_extents), 254 new_extents_(new_extents), 255 new_extents_blocks_(utils::BlocksInExtents(new_extents)), 256 old_deflates_(old_deflates), 257 new_deflates_(new_deflates), 258 name_(name), 259 chunk_blocks_(chunk_blocks), 260 blob_file_(blob_file) {} 261 262 bool operator>(const FileDeltaProcessor& other) const { 263 return new_extents_blocks_ > other.new_extents_blocks_; 264 } 265 266 ~FileDeltaProcessor() override = default; 267 268 // Overrides DelegateSimpleThread::Delegate. 269 // Calculate the list of operations and write their corresponding deltas to 270 // the blob_file. 271 void Run() override; 272 273 // Merge each file processor's ops list to aops. 274 bool MergeOperation(vector<AnnotatedOperation>* aops); 275 276 private: 277 const string& old_part_; // NOLINT(runtime/member_string_references) 278 const string& new_part_; // NOLINT(runtime/member_string_references) 279 const PayloadVersion& version_; 280 281 // The block ranges of the old/new file within the src/tgt image 282 const vector<Extent> old_extents_; 283 const vector<Extent> new_extents_; 284 const size_t new_extents_blocks_; 285 const vector<puffin::BitExtent> old_deflates_; 286 const vector<puffin::BitExtent> new_deflates_; 287 const string name_; 288 // Block limit of one aop. 289 const ssize_t chunk_blocks_; 290 BlobFileWriter* blob_file_; 291 292 // The list of ops to reach the new file from the old file. 293 vector<AnnotatedOperation> file_aops_; 294 295 bool failed_ = false; 296 297 DISALLOW_COPY_AND_ASSIGN(FileDeltaProcessor); 298 }; 299 300 void FileDeltaProcessor::Run() { 301 TEST_AND_RETURN(blob_file_ != nullptr); 302 base::TimeTicks start = base::TimeTicks::Now(); 303 304 if (!DeltaReadFile(&file_aops_, 305 old_part_, 306 new_part_, 307 old_extents_, 308 new_extents_, 309 old_deflates_, 310 new_deflates_, 311 name_, 312 chunk_blocks_, 313 version_, 314 blob_file_)) { 315 LOG(ERROR) << "Failed to generate delta for " << name_ << " (" 316 << new_extents_blocks_ << " blocks)"; 317 failed_ = true; 318 return; 319 } 320 321 if (!version_.InplaceUpdate()) { 322 if (!ABGenerator::FragmentOperations( 323 version_, &file_aops_, new_part_, blob_file_)) { 324 LOG(ERROR) << "Failed to fragment operations for " << name_; 325 failed_ = true; 326 return; 327 } 328 } 329 330 LOG(INFO) << "Encoded file " << name_ << " (" << new_extents_blocks_ 331 << " blocks) in " << (base::TimeTicks::Now() - start); 332 } 333 334 bool FileDeltaProcessor::MergeOperation(vector<AnnotatedOperation>* aops) { 335 if (failed_) 336 return false; 337 aops->reserve(aops->size() + file_aops_.size()); 338 std::move(file_aops_.begin(), file_aops_.end(), std::back_inserter(*aops)); 339 return true; 340 } 341 342 FilesystemInterface::File GetOldFile( 343 const map<string, FilesystemInterface::File>& old_files_map, 344 const string& new_file_name) { 345 if (old_files_map.empty()) 346 return {}; 347 348 auto old_file_iter = old_files_map.find(new_file_name); 349 if (old_file_iter != old_files_map.end()) 350 return old_file_iter->second; 351 352 // No old file match for the new file name, use a similar file with the 353 // shortest levenshtein distance. 354 // This works great if the file has version number in it, but even for 355 // a completely new file, using a similar file can still help. 356 int min_distance = new_file_name.size(); 357 const FilesystemInterface::File* old_file; 358 for (const auto& pair : old_files_map) { 359 int distance = LevenshteinDistance(new_file_name, pair.first); 360 if (distance < min_distance) { 361 min_distance = distance; 362 old_file = &pair.second; 363 } 364 } 365 LOG(INFO) << "Using " << old_file->name << " as source for " << new_file_name; 366 return *old_file; 367 } 368 369 bool DeltaReadPartition(vector<AnnotatedOperation>* aops, 370 const PartitionConfig& old_part, 371 const PartitionConfig& new_part, 372 ssize_t hard_chunk_blocks, 373 size_t soft_chunk_blocks, 374 const PayloadVersion& version, 375 BlobFileWriter* blob_file) { 376 ExtentRanges old_visited_blocks; 377 ExtentRanges new_visited_blocks; 378 379 // If verity is enabled, mark those blocks as visited to skip generating 380 // operations for them. 381 if (version.minor >= kVerityMinorPayloadVersion && 382 !new_part.verity.IsEmpty()) { 383 LOG(INFO) << "Skipping verity hash tree blocks: " 384 << ExtentsToString({new_part.verity.hash_tree_extent}); 385 new_visited_blocks.AddExtent(new_part.verity.hash_tree_extent); 386 LOG(INFO) << "Skipping verity FEC blocks: " 387 << ExtentsToString({new_part.verity.fec_extent}); 388 new_visited_blocks.AddExtent(new_part.verity.fec_extent); 389 } 390 391 ExtentRanges old_zero_blocks; 392 TEST_AND_RETURN_FALSE(DeltaMovedAndZeroBlocks(aops, 393 old_part.path, 394 new_part.path, 395 old_part.size / kBlockSize, 396 new_part.size / kBlockSize, 397 soft_chunk_blocks, 398 version, 399 blob_file, 400 &old_visited_blocks, 401 &new_visited_blocks, 402 &old_zero_blocks)); 403 404 bool puffdiff_allowed = version.OperationAllowed(InstallOperation::PUFFDIFF); 405 map<string, FilesystemInterface::File> old_files_map; 406 if (old_part.fs_interface) { 407 vector<FilesystemInterface::File> old_files; 408 TEST_AND_RETURN_FALSE(deflate_utils::PreprocessPartitionFiles( 409 old_part, &old_files, puffdiff_allowed)); 410 for (const FilesystemInterface::File& file : old_files) 411 old_files_map[file.name] = file; 412 } 413 414 TEST_AND_RETURN_FALSE(new_part.fs_interface); 415 vector<FilesystemInterface::File> new_files; 416 TEST_AND_RETURN_FALSE(deflate_utils::PreprocessPartitionFiles( 417 new_part, &new_files, puffdiff_allowed)); 418 419 list<FileDeltaProcessor> file_delta_processors; 420 421 // The processing is very straightforward here, we generate operations for 422 // every file (and pseudo-file such as the metadata) in the new filesystem 423 // based on the file with the same name in the old filesystem, if any. 424 // Files with overlapping data blocks (like hardlinks or filesystems with tail 425 // packing or compression where the blocks store more than one file) are only 426 // generated once in the new image, but are also used only once from the old 427 // image due to some simplifications (see below). 428 for (const FilesystemInterface::File& new_file : new_files) { 429 // Ignore the files in the new filesystem without blocks. Symlinks with 430 // data blocks (for example, symlinks bigger than 60 bytes in ext2) are 431 // handled as normal files. We also ignore blocks that were already 432 // processed by a previous file. 433 vector<Extent> new_file_extents = 434 FilterExtentRanges(new_file.extents, new_visited_blocks); 435 new_visited_blocks.AddExtents(new_file_extents); 436 437 if (new_file_extents.empty()) 438 continue; 439 440 // We can't visit each dst image inode more than once, as that would 441 // duplicate work. Here, we avoid visiting each source image inode 442 // more than once. Technically, we could have multiple operations 443 // that read the same blocks from the source image for diffing, but 444 // we choose not to avoid complexity. Eventually we will move away 445 // from using a graph/cycle detection/etc to generate diffs, and at that 446 // time, it will be easy (non-complex) to have many operations read 447 // from the same source blocks. At that time, this code can die. -adlr 448 FilesystemInterface::File old_file = 449 GetOldFile(old_files_map, new_file.name); 450 vector<Extent> old_file_extents; 451 if (version.InplaceUpdate()) 452 old_file_extents = 453 FilterExtentRanges(old_file.extents, old_visited_blocks); 454 else 455 old_file_extents = FilterExtentRanges(old_file.extents, old_zero_blocks); 456 old_visited_blocks.AddExtents(old_file_extents); 457 458 file_delta_processors.emplace_back(old_part.path, 459 new_part.path, 460 version, 461 std::move(old_file_extents), 462 std::move(new_file_extents), 463 old_file.deflates, 464 new_file.deflates, 465 new_file.name, // operation name 466 hard_chunk_blocks, 467 blob_file); 468 } 469 // Process all the blocks not included in any file. We provided all the unused 470 // blocks in the old partition as available data. 471 vector<Extent> new_unvisited = { 472 ExtentForRange(0, new_part.size / kBlockSize)}; 473 new_unvisited = FilterExtentRanges(new_unvisited, new_visited_blocks); 474 if (!new_unvisited.empty()) { 475 vector<Extent> old_unvisited; 476 if (old_part.fs_interface) { 477 old_unvisited.push_back(ExtentForRange(0, old_part.size / kBlockSize)); 478 old_unvisited = FilterExtentRanges(old_unvisited, old_visited_blocks); 479 } 480 481 LOG(INFO) << "Scanning " << utils::BlocksInExtents(new_unvisited) 482 << " unwritten blocks using chunk size of " << soft_chunk_blocks 483 << " blocks."; 484 // We use the soft_chunk_blocks limit for the <non-file-data> as we don't 485 // really know the structure of this data and we should not expect it to 486 // have redundancy between partitions. 487 file_delta_processors.emplace_back( 488 old_part.path, 489 new_part.path, 490 version, 491 std::move(old_unvisited), 492 std::move(new_unvisited), 493 vector<puffin::BitExtent>{}, // old_deflates, 494 vector<puffin::BitExtent>{}, // new_deflates 495 "<non-file-data>", // operation name 496 soft_chunk_blocks, 497 blob_file); 498 } 499 500 size_t max_threads = GetMaxThreads(); 501 502 // Sort the files in descending order based on number of new blocks to make 503 // sure we start the largest ones first. 504 if (file_delta_processors.size() > max_threads) { 505 file_delta_processors.sort(std::greater<FileDeltaProcessor>()); 506 } 507 508 base::DelegateSimpleThreadPool thread_pool("incremental-update-generator", 509 max_threads); 510 thread_pool.Start(); 511 for (auto& processor : file_delta_processors) { 512 thread_pool.AddWork(&processor); 513 } 514 thread_pool.JoinAll(); 515 516 for (auto& processor : file_delta_processors) { 517 TEST_AND_RETURN_FALSE(processor.MergeOperation(aops)); 518 } 519 520 return true; 521 } 522 523 bool DeltaMovedAndZeroBlocks(vector<AnnotatedOperation>* aops, 524 const string& old_part, 525 const string& new_part, 526 size_t old_num_blocks, 527 size_t new_num_blocks, 528 ssize_t chunk_blocks, 529 const PayloadVersion& version, 530 BlobFileWriter* blob_file, 531 ExtentRanges* old_visited_blocks, 532 ExtentRanges* new_visited_blocks, 533 ExtentRanges* old_zero_blocks) { 534 vector<BlockMapping::BlockId> old_block_ids; 535 vector<BlockMapping::BlockId> new_block_ids; 536 TEST_AND_RETURN_FALSE(MapPartitionBlocks(old_part, 537 new_part, 538 old_num_blocks * kBlockSize, 539 new_num_blocks * kBlockSize, 540 kBlockSize, 541 &old_block_ids, 542 &new_block_ids)); 543 544 // If the update is inplace, we map all the blocks that didn't move, 545 // regardless of the contents since they are already copied and no operation 546 // is required. 547 if (version.InplaceUpdate()) { 548 uint64_t num_blocks = std::min(old_num_blocks, new_num_blocks); 549 for (uint64_t block = 0; block < num_blocks; block++) { 550 if (old_block_ids[block] == new_block_ids[block] && 551 !old_visited_blocks->ContainsBlock(block) && 552 !new_visited_blocks->ContainsBlock(block)) { 553 old_visited_blocks->AddBlock(block); 554 new_visited_blocks->AddBlock(block); 555 } 556 } 557 } 558 559 // A mapping from the block_id to the list of block numbers with that block id 560 // in the old partition. This is used to lookup where in the old partition 561 // is a block from the new partition. 562 map<BlockMapping::BlockId, vector<uint64_t>> old_blocks_map; 563 564 for (uint64_t block = old_num_blocks; block-- > 0;) { 565 if (old_block_ids[block] != 0 && !old_visited_blocks->ContainsBlock(block)) 566 old_blocks_map[old_block_ids[block]].push_back(block); 567 568 // Mark all zeroed blocks in the old image as "used" since it doesn't make 569 // any sense to spend I/O to read zeros from the source partition and more 570 // importantly, these could sometimes be blocks discarded in the SSD which 571 // would read non-zero values. 572 if (old_block_ids[block] == 0) 573 old_zero_blocks->AddBlock(block); 574 } 575 old_visited_blocks->AddRanges(*old_zero_blocks); 576 577 // The collection of blocks in the new partition with just zeros. This is a 578 // common case for free-space that's also problematic for bsdiff, so we want 579 // to optimize it using REPLACE_BZ operations. The blob for a REPLACE_BZ of 580 // just zeros is so small that it doesn't make sense to spend the I/O reading 581 // zeros from the old partition. 582 vector<Extent> new_zeros; 583 584 vector<Extent> old_identical_blocks; 585 vector<Extent> new_identical_blocks; 586 587 for (uint64_t block = 0; block < new_num_blocks; block++) { 588 // Only produce operations for blocks that were not yet visited. 589 if (new_visited_blocks->ContainsBlock(block)) 590 continue; 591 if (new_block_ids[block] == 0) { 592 AppendBlockToExtents(&new_zeros, block); 593 continue; 594 } 595 596 auto old_blocks_map_it = old_blocks_map.find(new_block_ids[block]); 597 // Check if the block exists in the old partition at all. 598 if (old_blocks_map_it == old_blocks_map.end() || 599 old_blocks_map_it->second.empty()) 600 continue; 601 602 AppendBlockToExtents(&old_identical_blocks, 603 old_blocks_map_it->second.back()); 604 AppendBlockToExtents(&new_identical_blocks, block); 605 // We can't reuse source blocks in minor version 1 because the cycle 606 // breaking algorithm used in the in-place update doesn't support that. 607 if (version.InplaceUpdate()) 608 old_blocks_map_it->second.pop_back(); 609 } 610 611 if (chunk_blocks == -1) 612 chunk_blocks = new_num_blocks; 613 614 // Produce operations for the zero blocks split per output extent. 615 size_t num_ops = aops->size(); 616 new_visited_blocks->AddExtents(new_zeros); 617 for (const Extent& extent : new_zeros) { 618 if (version.OperationAllowed(InstallOperation::ZERO)) { 619 for (uint64_t offset = 0; offset < extent.num_blocks(); 620 offset += chunk_blocks) { 621 uint64_t num_blocks = 622 std::min(static_cast<uint64_t>(extent.num_blocks()) - offset, 623 static_cast<uint64_t>(chunk_blocks)); 624 InstallOperation operation; 625 operation.set_type(InstallOperation::ZERO); 626 *(operation.add_dst_extents()) = 627 ExtentForRange(extent.start_block() + offset, num_blocks); 628 aops->push_back({.name = "<zeros>", .op = operation}); 629 } 630 } else { 631 TEST_AND_RETURN_FALSE(DeltaReadFile(aops, 632 "", 633 new_part, 634 {}, // old_extents 635 {extent}, // new_extents 636 {}, // old_deflates 637 {}, // new_deflates 638 "<zeros>", 639 chunk_blocks, 640 version, 641 blob_file)); 642 } 643 } 644 LOG(INFO) << "Produced " << (aops->size() - num_ops) << " operations for " 645 << utils::BlocksInExtents(new_zeros) << " zeroed blocks"; 646 647 // Produce MOVE/SOURCE_COPY operations for the moved blocks. 648 num_ops = aops->size(); 649 uint64_t used_blocks = 0; 650 old_visited_blocks->AddExtents(old_identical_blocks); 651 new_visited_blocks->AddExtents(new_identical_blocks); 652 for (const Extent& extent : new_identical_blocks) { 653 // We split the operation at the extent boundary or when bigger than 654 // chunk_blocks. 655 for (uint64_t op_block_offset = 0; op_block_offset < extent.num_blocks(); 656 op_block_offset += chunk_blocks) { 657 aops->emplace_back(); 658 AnnotatedOperation* aop = &aops->back(); 659 aop->name = "<identical-blocks>"; 660 aop->op.set_type(version.OperationAllowed(InstallOperation::SOURCE_COPY) 661 ? InstallOperation::SOURCE_COPY 662 : InstallOperation::MOVE); 663 664 uint64_t chunk_num_blocks = 665 std::min(static_cast<uint64_t>(extent.num_blocks()) - op_block_offset, 666 static_cast<uint64_t>(chunk_blocks)); 667 668 // The current operation represents the move/copy operation for the 669 // sublist starting at |used_blocks| of length |chunk_num_blocks| where 670 // the src and dst are from |old_identical_blocks| and 671 // |new_identical_blocks| respectively. 672 StoreExtents( 673 ExtentsSublist(old_identical_blocks, used_blocks, chunk_num_blocks), 674 aop->op.mutable_src_extents()); 675 676 Extent* op_dst_extent = aop->op.add_dst_extents(); 677 op_dst_extent->set_start_block(extent.start_block() + op_block_offset); 678 op_dst_extent->set_num_blocks(chunk_num_blocks); 679 CHECK( 680 vector<Extent>{*op_dst_extent} == // NOLINT(whitespace/braces) 681 ExtentsSublist(new_identical_blocks, used_blocks, chunk_num_blocks)); 682 683 used_blocks += chunk_num_blocks; 684 } 685 } 686 LOG(INFO) << "Produced " << (aops->size() - num_ops) << " operations for " 687 << used_blocks << " identical blocks moved"; 688 689 return true; 690 } 691 692 bool DeltaReadFile(vector<AnnotatedOperation>* aops, 693 const string& old_part, 694 const string& new_part, 695 const vector<Extent>& old_extents, 696 const vector<Extent>& new_extents, 697 const vector<puffin::BitExtent>& old_deflates, 698 const vector<puffin::BitExtent>& new_deflates, 699 const string& name, 700 ssize_t chunk_blocks, 701 const PayloadVersion& version, 702 BlobFileWriter* blob_file) { 703 brillo::Blob data; 704 InstallOperation operation; 705 706 uint64_t total_blocks = utils::BlocksInExtents(new_extents); 707 if (chunk_blocks == -1) 708 chunk_blocks = total_blocks; 709 710 for (uint64_t block_offset = 0; block_offset < total_blocks; 711 block_offset += chunk_blocks) { 712 // Split the old/new file in the same chunks. Note that this could drop 713 // some information from the old file used for the new chunk. If the old 714 // file is smaller (or even empty when there's no old file) the chunk will 715 // also be empty. 716 vector<Extent> old_extents_chunk = 717 ExtentsSublist(old_extents, block_offset, chunk_blocks); 718 vector<Extent> new_extents_chunk = 719 ExtentsSublist(new_extents, block_offset, chunk_blocks); 720 NormalizeExtents(&old_extents_chunk); 721 NormalizeExtents(&new_extents_chunk); 722 723 TEST_AND_RETURN_FALSE(ReadExtentsToDiff(old_part, 724 new_part, 725 old_extents_chunk, 726 new_extents_chunk, 727 old_deflates, 728 new_deflates, 729 version, 730 &data, 731 &operation)); 732 733 // Check if the operation writes nothing. 734 if (operation.dst_extents_size() == 0) { 735 if (operation.type() == InstallOperation::MOVE) { 736 LOG(INFO) << "Empty MOVE operation (" << name << "), skipping"; 737 continue; 738 } else { 739 LOG(ERROR) << "Empty non-MOVE operation"; 740 return false; 741 } 742 } 743 744 // Now, insert into the list of operations. 745 AnnotatedOperation aop; 746 aop.name = name; 747 if (static_cast<uint64_t>(chunk_blocks) < total_blocks) { 748 aop.name = base::StringPrintf( 749 "%s:%" PRIu64, name.c_str(), block_offset / chunk_blocks); 750 } 751 aop.op = operation; 752 753 // Write the data 754 TEST_AND_RETURN_FALSE(aop.SetOperationBlob(data, blob_file)); 755 aops->emplace_back(aop); 756 } 757 return true; 758 } 759 760 bool GenerateBestFullOperation(const brillo::Blob& new_data, 761 const PayloadVersion& version, 762 brillo::Blob* out_blob, 763 InstallOperation::Type* out_type) { 764 if (new_data.empty()) 765 return false; 766 767 if (version.OperationAllowed(InstallOperation::ZERO) && 768 std::all_of( 769 new_data.begin(), new_data.end(), [](uint8_t x) { return x == 0; })) { 770 // The read buffer is all zeros, so produce a ZERO operation. No need to 771 // check other types of operations in this case. 772 *out_blob = brillo::Blob(); 773 *out_type = InstallOperation::ZERO; 774 return true; 775 } 776 777 bool out_blob_set = false; 778 779 // Try compressing |new_data| with xz first. 780 if (version.OperationAllowed(InstallOperation::REPLACE_XZ)) { 781 brillo::Blob new_data_xz; 782 if (XzCompress(new_data, &new_data_xz) && !new_data_xz.empty()) { 783 *out_type = InstallOperation::REPLACE_XZ; 784 *out_blob = std::move(new_data_xz); 785 out_blob_set = true; 786 } 787 } 788 789 // Try compressing it with bzip2. 790 if (version.OperationAllowed(InstallOperation::REPLACE_BZ)) { 791 brillo::Blob new_data_bz; 792 // TODO(deymo): Implement some heuristic to determine if it is worth trying 793 // to compress the blob with bzip2 if we already have a good REPLACE_XZ. 794 if (BzipCompress(new_data, &new_data_bz) && !new_data_bz.empty() && 795 (!out_blob_set || out_blob->size() > new_data_bz.size())) { 796 // A REPLACE_BZ is better or nothing else was set. 797 *out_type = InstallOperation::REPLACE_BZ; 798 *out_blob = std::move(new_data_bz); 799 out_blob_set = true; 800 } 801 } 802 803 // If nothing else worked or it was badly compressed we try a REPLACE. 804 if (!out_blob_set || out_blob->size() >= new_data.size()) { 805 *out_type = InstallOperation::REPLACE; 806 // This needs to make a copy of the data in the case bzip or xz didn't 807 // compress well, which is not the common case so the performance hit is 808 // low. 809 *out_blob = new_data; 810 } 811 return true; 812 } 813 814 bool ReadExtentsToDiff(const string& old_part, 815 const string& new_part, 816 const vector<Extent>& old_extents, 817 const vector<Extent>& new_extents, 818 const vector<puffin::BitExtent>& old_deflates, 819 const vector<puffin::BitExtent>& new_deflates, 820 const PayloadVersion& version, 821 brillo::Blob* out_data, 822 InstallOperation* out_op) { 823 InstallOperation operation; 824 825 // We read blocks from old_extents and write blocks to new_extents. 826 uint64_t blocks_to_read = utils::BlocksInExtents(old_extents); 827 uint64_t blocks_to_write = utils::BlocksInExtents(new_extents); 828 829 // Disable bsdiff, and puffdiff when the data is too big. 830 bool bsdiff_allowed = 831 version.OperationAllowed(InstallOperation::SOURCE_BSDIFF) || 832 version.OperationAllowed(InstallOperation::BSDIFF); 833 if (bsdiff_allowed && 834 blocks_to_read * kBlockSize > kMaxBsdiffDestinationSize) { 835 LOG(INFO) << "bsdiff blacklisted, data too big: " 836 << blocks_to_read * kBlockSize << " bytes"; 837 bsdiff_allowed = false; 838 } 839 840 bool puffdiff_allowed = version.OperationAllowed(InstallOperation::PUFFDIFF); 841 if (puffdiff_allowed && 842 blocks_to_read * kBlockSize > kMaxPuffdiffDestinationSize) { 843 LOG(INFO) << "puffdiff blacklisted, data too big: " 844 << blocks_to_read * kBlockSize << " bytes"; 845 puffdiff_allowed = false; 846 } 847 848 // Make copies of the extents so we can modify them. 849 vector<Extent> src_extents = old_extents; 850 vector<Extent> dst_extents = new_extents; 851 852 // Read in bytes from new data. 853 brillo::Blob new_data; 854 TEST_AND_RETURN_FALSE(utils::ReadExtents(new_part, 855 new_extents, 856 &new_data, 857 kBlockSize * blocks_to_write, 858 kBlockSize)); 859 TEST_AND_RETURN_FALSE(!new_data.empty()); 860 861 // Data blob that will be written to delta file. 862 brillo::Blob data_blob; 863 864 // Try generating a full operation for the given new data, regardless of the 865 // old_data. 866 InstallOperation::Type op_type; 867 TEST_AND_RETURN_FALSE( 868 GenerateBestFullOperation(new_data, version, &data_blob, &op_type)); 869 operation.set_type(op_type); 870 871 brillo::Blob old_data; 872 if (blocks_to_read > 0) { 873 // Read old data. 874 TEST_AND_RETURN_FALSE(utils::ReadExtents(old_part, 875 src_extents, 876 &old_data, 877 kBlockSize * blocks_to_read, 878 kBlockSize)); 879 if (old_data == new_data) { 880 // No change in data. 881 operation.set_type(version.OperationAllowed(InstallOperation::SOURCE_COPY) 882 ? InstallOperation::SOURCE_COPY 883 : InstallOperation::MOVE); 884 data_blob = brillo::Blob(); 885 } else if (IsDiffOperationBetter( 886 operation, data_blob.size(), 0, src_extents.size())) { 887 // No point in trying diff if zero blob size diff operation is 888 // still worse than replace. 889 if (bsdiff_allowed) { 890 base::FilePath patch; 891 TEST_AND_RETURN_FALSE(base::CreateTemporaryFile(&patch)); 892 ScopedPathUnlinker unlinker(patch.value()); 893 894 std::unique_ptr<bsdiff::PatchWriterInterface> bsdiff_patch_writer; 895 InstallOperation::Type operation_type = InstallOperation::BSDIFF; 896 if (version.OperationAllowed(InstallOperation::BROTLI_BSDIFF)) { 897 bsdiff_patch_writer = 898 bsdiff::CreateBSDF2PatchWriter(patch.value(), 899 bsdiff::CompressorType::kBrotli, 900 kBrotliCompressionQuality); 901 operation_type = InstallOperation::BROTLI_BSDIFF; 902 } else { 903 bsdiff_patch_writer = bsdiff::CreateBsdiffPatchWriter(patch.value()); 904 if (version.OperationAllowed(InstallOperation::SOURCE_BSDIFF)) { 905 operation_type = InstallOperation::SOURCE_BSDIFF; 906 } 907 } 908 909 brillo::Blob bsdiff_delta; 910 TEST_AND_RETURN_FALSE(0 == bsdiff::bsdiff(old_data.data(), 911 old_data.size(), 912 new_data.data(), 913 new_data.size(), 914 bsdiff_patch_writer.get(), 915 nullptr)); 916 917 TEST_AND_RETURN_FALSE(utils::ReadFile(patch.value(), &bsdiff_delta)); 918 CHECK_GT(bsdiff_delta.size(), static_cast<brillo::Blob::size_type>(0)); 919 if (IsDiffOperationBetter(operation, 920 data_blob.size(), 921 bsdiff_delta.size(), 922 src_extents.size())) { 923 operation.set_type(operation_type); 924 data_blob = std::move(bsdiff_delta); 925 } 926 } 927 if (puffdiff_allowed) { 928 // Find all deflate positions inside the given extents and then put all 929 // deflates together because we have already read all the extents into 930 // one buffer. 931 vector<puffin::BitExtent> src_deflates; 932 TEST_AND_RETURN_FALSE(deflate_utils::FindAndCompactDeflates( 933 src_extents, old_deflates, &src_deflates)); 934 935 vector<puffin::BitExtent> dst_deflates; 936 TEST_AND_RETURN_FALSE(deflate_utils::FindAndCompactDeflates( 937 dst_extents, new_deflates, &dst_deflates)); 938 939 puffin::RemoveEqualBitExtents( 940 old_data, new_data, &src_deflates, &dst_deflates); 941 942 // See crbug.com/915559. 943 if (version.minor <= kPuffdiffMinorPayloadVersion) { 944 TEST_AND_RETURN_FALSE(puffin::RemoveDeflatesWithBadDistanceCaches( 945 old_data, &src_deflates)); 946 947 TEST_AND_RETURN_FALSE(puffin::RemoveDeflatesWithBadDistanceCaches( 948 new_data, &dst_deflates)); 949 } 950 951 // Only Puffdiff if both files have at least one deflate left. 952 if (!src_deflates.empty() && !dst_deflates.empty()) { 953 brillo::Blob puffdiff_delta; 954 string temp_file_path; 955 TEST_AND_RETURN_FALSE(utils::MakeTempFile( 956 "puffdiff-delta.XXXXXX", &temp_file_path, nullptr)); 957 ScopedPathUnlinker temp_file_unlinker(temp_file_path); 958 959 // Perform PuffDiff operation. 960 TEST_AND_RETURN_FALSE(puffin::PuffDiff(old_data, 961 new_data, 962 src_deflates, 963 dst_deflates, 964 temp_file_path, 965 &puffdiff_delta)); 966 TEST_AND_RETURN_FALSE(puffdiff_delta.size() > 0); 967 if (IsDiffOperationBetter(operation, 968 data_blob.size(), 969 puffdiff_delta.size(), 970 src_extents.size())) { 971 operation.set_type(InstallOperation::PUFFDIFF); 972 data_blob = std::move(puffdiff_delta); 973 } 974 } 975 } 976 } 977 } 978 979 // Remove identical src/dst block ranges in MOVE operations. 980 if (operation.type() == InstallOperation::MOVE) { 981 auto removed_bytes = 982 RemoveIdenticalBlockRanges(&src_extents, &dst_extents, new_data.size()); 983 operation.set_src_length(old_data.size() - removed_bytes); 984 operation.set_dst_length(new_data.size() - removed_bytes); 985 } 986 987 // WARNING: We always set legacy |src_length| and |dst_length| fields for 988 // BSDIFF. For SOURCE_BSDIFF we only set them for minor version 3 and 989 // lower. This is needed because we used to use these two parameters in the 990 // SOURCE_BSDIFF for minor version 3 and lower, but we do not need them 991 // anymore in higher minor versions. This means if we stop adding these 992 // parameters for those minor versions, the delta payloads will be invalid. 993 if (operation.type() == InstallOperation::BSDIFF || 994 (operation.type() == InstallOperation::SOURCE_BSDIFF && 995 version.minor <= kOpSrcHashMinorPayloadVersion)) { 996 operation.set_src_length(old_data.size()); 997 operation.set_dst_length(new_data.size()); 998 } 999 1000 // Embed extents in the operation. Replace (all variants), zero and discard 1001 // operations should not have source extents. 1002 if (!IsNoSourceOperation(operation.type())) { 1003 StoreExtents(src_extents, operation.mutable_src_extents()); 1004 } 1005 // All operations have dst_extents. 1006 StoreExtents(dst_extents, operation.mutable_dst_extents()); 1007 1008 *out_data = std::move(data_blob); 1009 *out_op = operation; 1010 return true; 1011 } 1012 1013 bool IsAReplaceOperation(InstallOperation::Type op_type) { 1014 return (op_type == InstallOperation::REPLACE || 1015 op_type == InstallOperation::REPLACE_BZ || 1016 op_type == InstallOperation::REPLACE_XZ); 1017 } 1018 1019 bool IsNoSourceOperation(InstallOperation::Type op_type) { 1020 return (IsAReplaceOperation(op_type) || op_type == InstallOperation::ZERO || 1021 op_type == InstallOperation::DISCARD); 1022 } 1023 1024 // Returns true if |op| is a no-op operation that doesn't do any useful work 1025 // (e.g., a move operation that copies blocks onto themselves). 1026 bool IsNoopOperation(const InstallOperation& op) { 1027 return (op.type() == InstallOperation::MOVE && 1028 ExpandExtents(op.src_extents()) == ExpandExtents(op.dst_extents())); 1029 } 1030 1031 void FilterNoopOperations(vector<AnnotatedOperation>* ops) { 1032 ops->erase(std::remove_if(ops->begin(), 1033 ops->end(), 1034 [](const AnnotatedOperation& aop) { 1035 return IsNoopOperation(aop.op); 1036 }), 1037 ops->end()); 1038 } 1039 1040 bool InitializePartitionInfo(const PartitionConfig& part, PartitionInfo* info) { 1041 info->set_size(part.size); 1042 HashCalculator hasher; 1043 TEST_AND_RETURN_FALSE(hasher.UpdateFile(part.path, part.size) == 1044 static_cast<off_t>(part.size)); 1045 TEST_AND_RETURN_FALSE(hasher.Finalize()); 1046 const brillo::Blob& hash = hasher.raw_hash(); 1047 info->set_hash(hash.data(), hash.size()); 1048 LOG(INFO) << part.path << ": size=" << part.size 1049 << " hash=" << brillo::data_encoding::Base64Encode(hash); 1050 return true; 1051 } 1052 1053 bool CompareAopsByDestination(AnnotatedOperation first_aop, 1054 AnnotatedOperation second_aop) { 1055 // We want empty operations to be at the end of the payload. 1056 if (!first_aop.op.dst_extents().size() || !second_aop.op.dst_extents().size()) 1057 return ((!first_aop.op.dst_extents().size()) < 1058 (!second_aop.op.dst_extents().size())); 1059 uint32_t first_dst_start = first_aop.op.dst_extents(0).start_block(); 1060 uint32_t second_dst_start = second_aop.op.dst_extents(0).start_block(); 1061 return first_dst_start < second_dst_start; 1062 } 1063 1064 bool IsExtFilesystem(const string& device) { 1065 brillo::Blob header; 1066 // See include/linux/ext2_fs.h for more details on the structure. We obtain 1067 // ext2 constants from ext2fs/ext2fs.h header but we don't link with the 1068 // library. 1069 if (!utils::ReadFileChunk( 1070 device, 0, SUPERBLOCK_OFFSET + SUPERBLOCK_SIZE, &header) || 1071 header.size() < SUPERBLOCK_OFFSET + SUPERBLOCK_SIZE) 1072 return false; 1073 1074 const uint8_t* superblock = header.data() + SUPERBLOCK_OFFSET; 1075 1076 // ext3_fs.h: ext3_super_block.s_blocks_count 1077 uint32_t block_count = 1078 *reinterpret_cast<const uint32_t*>(superblock + 1 * sizeof(int32_t)); 1079 1080 // ext3_fs.h: ext3_super_block.s_log_block_size 1081 uint32_t log_block_size = 1082 *reinterpret_cast<const uint32_t*>(superblock + 6 * sizeof(int32_t)); 1083 1084 // ext3_fs.h: ext3_super_block.s_magic 1085 uint16_t magic = 1086 *reinterpret_cast<const uint16_t*>(superblock + 14 * sizeof(int32_t)); 1087 1088 block_count = le32toh(block_count); 1089 log_block_size = le32toh(log_block_size) + EXT2_MIN_BLOCK_LOG_SIZE; 1090 magic = le16toh(magic); 1091 1092 if (magic != EXT2_SUPER_MAGIC) 1093 return false; 1094 1095 // Sanity check the parameters. 1096 TEST_AND_RETURN_FALSE(log_block_size >= EXT2_MIN_BLOCK_LOG_SIZE && 1097 log_block_size <= EXT2_MAX_BLOCK_LOG_SIZE); 1098 TEST_AND_RETURN_FALSE(block_count > 0); 1099 return true; 1100 } 1101 1102 // Return the number of CPUs on the machine, and 4 threads in minimum. 1103 size_t GetMaxThreads() { 1104 return std::max(sysconf(_SC_NPROCESSORS_ONLN), 4L); 1105 } 1106 1107 } // namespace diff_utils 1108 1109 } // namespace chromeos_update_engine 1110