1 // 2 // Copyright (C) 2020 The Android Open Source Project 3 // 4 // Licensed under the Apache License, Version 2.0 (the "License"); 5 // you may not use this file except in compliance with the License. 6 // You may obtain a copy of the License at 7 // 8 // http://www.apache.org/licenses/LICENSE-2.0 9 // 10 // Unless required by applicable law or agreed to in writing, software 11 // distributed under the License is distributed on an "AS IS" BASIS, 12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 // See the License for the specific language governing permissions and 14 // limitations under the License. 15 // 16 #include "update_engine/cleanup_previous_update_action.h" 17 18 #include <chrono> // NOLINT(build/c++11) -- for merge times 19 #include <functional> 20 #include <string> 21 #include <type_traits> 22 23 #include <android-base/properties.h> 24 #include <base/bind.h> 25 26 #ifndef __ANDROID_RECOVERY__ 27 #include <statslog.h> 28 #endif 29 30 #include "update_engine/common/utils.h" 31 #include "update_engine/payload_consumer/delta_performer.h" 32 33 using android::base::GetBoolProperty; 34 using android::snapshot::SnapshotManager; 35 using android::snapshot::SnapshotMergeStats; 36 using android::snapshot::UpdateState; 37 using brillo::MessageLoop; 38 39 constexpr char kBootCompletedProp[] = "sys.boot_completed"; 40 // Interval to check sys.boot_completed. 41 constexpr auto kCheckBootCompletedInterval = base::TimeDelta::FromSeconds(2); 42 // Interval to check IBootControl::isSlotMarkedSuccessful 43 constexpr auto kCheckSlotMarkedSuccessfulInterval = 44 base::TimeDelta::FromSeconds(2); 45 // Interval to call SnapshotManager::ProcessUpdateState 46 constexpr auto kWaitForMergeInterval = base::TimeDelta::FromSeconds(2); 47 48 #ifdef __ANDROID_RECOVERY__ 49 static constexpr bool kIsRecovery = true; 50 #else 51 static constexpr bool kIsRecovery = false; 52 #endif 53 54 namespace chromeos_update_engine { 55 56 CleanupPreviousUpdateAction::CleanupPreviousUpdateAction( 57 PrefsInterface* prefs, 58 BootControlInterface* boot_control, 59 android::snapshot::SnapshotManager* snapshot, 60 CleanupPreviousUpdateActionDelegateInterface* delegate) 61 : prefs_(prefs), 62 boot_control_(boot_control), 63 snapshot_(snapshot), 64 delegate_(delegate), 65 running_(false), 66 cancel_failed_(false), 67 last_percentage_(0), 68 merge_stats_(SnapshotMergeStats::GetInstance(*snapshot)) {} 69 70 void CleanupPreviousUpdateAction::PerformAction() { 71 ResumeAction(); 72 } 73 74 void CleanupPreviousUpdateAction::TerminateProcessing() { 75 SuspendAction(); 76 } 77 78 void CleanupPreviousUpdateAction::ResumeAction() { 79 CHECK(prefs_); 80 CHECK(boot_control_); 81 82 LOG(INFO) << "Starting/resuming CleanupPreviousUpdateAction"; 83 running_ = true; 84 StartActionInternal(); 85 } 86 87 void CleanupPreviousUpdateAction::SuspendAction() { 88 LOG(INFO) << "Stopping/suspending CleanupPreviousUpdateAction"; 89 running_ = false; 90 } 91 92 void CleanupPreviousUpdateAction::ActionCompleted(ErrorCode error_code) { 93 running_ = false; 94 ReportMergeStats(); 95 metadata_device_ = nullptr; 96 } 97 98 std::string CleanupPreviousUpdateAction::Type() const { 99 return StaticType(); 100 } 101 102 std::string CleanupPreviousUpdateAction::StaticType() { 103 return "CleanupPreviousUpdateAction"; 104 } 105 106 void CleanupPreviousUpdateAction::StartActionInternal() { 107 // Do nothing on non-VAB device. 108 if (!boot_control_->GetDynamicPartitionControl() 109 ->GetVirtualAbFeatureFlag() 110 .IsEnabled()) { 111 processor_->ActionComplete(this, ErrorCode::kSuccess); 112 return; 113 } 114 // SnapshotManager is only available on VAB devices. 115 CHECK(snapshot_); 116 WaitBootCompletedOrSchedule(); 117 } 118 119 void CleanupPreviousUpdateAction::ScheduleWaitBootCompleted() { 120 TEST_AND_RETURN(running_); 121 MessageLoop::current()->PostDelayedTask( 122 FROM_HERE, 123 base::Bind(&CleanupPreviousUpdateAction::WaitBootCompletedOrSchedule, 124 base::Unretained(this)), 125 kCheckBootCompletedInterval); 126 } 127 128 void CleanupPreviousUpdateAction::WaitBootCompletedOrSchedule() { 129 TEST_AND_RETURN(running_); 130 if (!kIsRecovery && 131 !android::base::GetBoolProperty(kBootCompletedProp, false)) { 132 // repeat 133 ScheduleWaitBootCompleted(); 134 return; 135 } 136 137 LOG(INFO) << "Boot completed, waiting on markBootSuccessful()"; 138 CheckSlotMarkedSuccessfulOrSchedule(); 139 } 140 141 void CleanupPreviousUpdateAction::ScheduleWaitMarkBootSuccessful() { 142 TEST_AND_RETURN(running_); 143 MessageLoop::current()->PostDelayedTask( 144 FROM_HERE, 145 base::Bind( 146 &CleanupPreviousUpdateAction::CheckSlotMarkedSuccessfulOrSchedule, 147 base::Unretained(this)), 148 kCheckSlotMarkedSuccessfulInterval); 149 } 150 151 void CleanupPreviousUpdateAction::CheckSlotMarkedSuccessfulOrSchedule() { 152 TEST_AND_RETURN(running_); 153 if (!kIsRecovery && 154 !boot_control_->IsSlotMarkedSuccessful(boot_control_->GetCurrentSlot())) { 155 ScheduleWaitMarkBootSuccessful(); 156 } 157 158 if (metadata_device_ == nullptr) { 159 metadata_device_ = snapshot_->EnsureMetadataMounted(); 160 } 161 162 if (metadata_device_ == nullptr) { 163 LOG(ERROR) << "Failed to mount /metadata."; 164 // If metadata is erased but not formatted, it is possible to not mount 165 // it in recovery. It is safe to skip CleanupPreviousUpdateAction. 166 processor_->ActionComplete( 167 this, kIsRecovery ? ErrorCode::kSuccess : ErrorCode::kError); 168 return; 169 } 170 171 if (kIsRecovery) { 172 auto snapshots_created = 173 snapshot_->RecoveryCreateSnapshotDevices(metadata_device_); 174 switch (snapshots_created) { 175 case android::snapshot::CreateResult::CREATED: { 176 // If previous update has not finished merging, snapshots exists and are 177 // created here so that ProcessUpdateState can proceed. 178 LOG(INFO) << "Snapshot devices are created"; 179 break; 180 } 181 case android::snapshot::CreateResult::NOT_CREATED: { 182 // If there is no previous update, no snapshot devices are created and 183 // ProcessUpdateState will return immediately. Hence, NOT_CREATED is not 184 // considered an error. 185 LOG(INFO) << "Snapshot devices are not created"; 186 break; 187 } 188 case android::snapshot::CreateResult::ERROR: 189 default: { 190 LOG(ERROR) 191 << "Failed to create snapshot devices (CreateResult = " 192 << static_cast< 193 std::underlying_type_t<android::snapshot::CreateResult>>( 194 snapshots_created); 195 processor_->ActionComplete(this, ErrorCode::kError); 196 return; 197 } 198 } 199 } 200 201 if (!merge_stats_->Start()) { 202 // Not an error because CleanupPreviousUpdateAction may be paused and 203 // resumed while kernel continues merging snapshots in the background. 204 LOG(WARNING) << "SnapshotMergeStats::Start failed."; 205 } 206 LOG(INFO) << "Waiting for any previous merge request to complete. " 207 << "This can take up to several minutes."; 208 WaitForMergeOrSchedule(); 209 } 210 211 void CleanupPreviousUpdateAction::ScheduleWaitForMerge() { 212 TEST_AND_RETURN(running_); 213 MessageLoop::current()->PostDelayedTask( 214 FROM_HERE, 215 base::Bind(&CleanupPreviousUpdateAction::WaitForMergeOrSchedule, 216 base::Unretained(this)), 217 kWaitForMergeInterval); 218 } 219 220 void CleanupPreviousUpdateAction::WaitForMergeOrSchedule() { 221 TEST_AND_RETURN(running_); 222 auto state = snapshot_->ProcessUpdateState( 223 std::bind(&CleanupPreviousUpdateAction::OnMergePercentageUpdate, this), 224 std::bind(&CleanupPreviousUpdateAction::BeforeCancel, this)); 225 merge_stats_->set_state(state); 226 227 switch (state) { 228 case UpdateState::None: { 229 LOG(INFO) << "Can't find any snapshot to merge."; 230 ErrorCode error_code = ErrorCode::kSuccess; 231 if (!snapshot_->CancelUpdate()) { 232 error_code = ErrorCode::kError; 233 LOG(INFO) << "Failed to call SnapshotManager::CancelUpdate()."; 234 } 235 processor_->ActionComplete(this, error_code); 236 return; 237 } 238 239 case UpdateState::Initiated: { 240 LOG(ERROR) << "Previous update has not been completed, not cleaning up"; 241 processor_->ActionComplete(this, ErrorCode::kSuccess); 242 return; 243 } 244 245 case UpdateState::Unverified: { 246 InitiateMergeAndWait(); 247 return; 248 } 249 250 case UpdateState::Merging: { 251 ScheduleWaitForMerge(); 252 return; 253 } 254 255 case UpdateState::MergeNeedsReboot: { 256 LOG(ERROR) << "Need reboot to finish merging."; 257 processor_->ActionComplete(this, ErrorCode::kError); 258 return; 259 } 260 261 case UpdateState::MergeCompleted: { 262 LOG(INFO) << "Merge finished with state MergeCompleted."; 263 processor_->ActionComplete(this, ErrorCode::kSuccess); 264 return; 265 } 266 267 case UpdateState::MergeFailed: { 268 LOG(ERROR) << "Merge failed. Device may be corrupted."; 269 processor_->ActionComplete(this, ErrorCode::kDeviceCorrupted); 270 return; 271 } 272 273 case UpdateState::Cancelled: { 274 // DeltaPerformer::ResetUpdateProgress failed, hence snapshots are 275 // not deleted to avoid inconsistency. 276 // Nothing can be done here; just try next time. 277 ErrorCode error_code = 278 cancel_failed_ ? ErrorCode::kError : ErrorCode::kSuccess; 279 processor_->ActionComplete(this, error_code); 280 return; 281 } 282 283 default: { 284 // Protobuf has some reserved enum values, so a default case is needed. 285 LOG(FATAL) << "SnapshotManager::ProcessUpdateState returns " 286 << static_cast<int32_t>(state); 287 } 288 } 289 } 290 291 bool CleanupPreviousUpdateAction::OnMergePercentageUpdate() { 292 double percentage = 0.0; 293 snapshot_->GetUpdateState(&percentage); 294 if (delegate_) { 295 // libsnapshot uses [0, 100] percentage but update_engine uses [0, 1]. 296 delegate_->OnCleanupProgressUpdate(percentage / 100); 297 } 298 299 // Log if percentage increments by at least 1. 300 if (last_percentage_ < static_cast<unsigned int>(percentage)) { 301 last_percentage_ = percentage; 302 LOG(INFO) << "Waiting for merge to complete: " << last_percentage_ << "%."; 303 } 304 305 // Do not continue to wait for merge. Instead, let ProcessUpdateState 306 // return Merging directly so that we can ScheduleWaitForMerge() in 307 // MessageLoop. 308 return false; 309 } 310 311 bool CleanupPreviousUpdateAction::BeforeCancel() { 312 if (DeltaPerformer::ResetUpdateProgress( 313 prefs_, 314 false /* quick */, 315 false /* skip dynamic partitions metadata*/)) { 316 return true; 317 } 318 319 // ResetUpdateProgress might not work on stub prefs. Do additional checks. 320 LOG(WARNING) << "ProcessUpdateState returns Cancelled but cleanup failed."; 321 322 std::string val; 323 ignore_result(prefs_->GetString(kPrefsDynamicPartitionMetadataUpdated, &val)); 324 if (val.empty()) { 325 LOG(INFO) << kPrefsDynamicPartitionMetadataUpdated 326 << " is empty, assuming successful cleanup"; 327 return true; 328 } 329 LOG(WARNING) 330 << kPrefsDynamicPartitionMetadataUpdated << " is " << val 331 << ", not deleting snapshots even though UpdateState is Cancelled."; 332 cancel_failed_ = true; 333 return false; 334 } 335 336 void CleanupPreviousUpdateAction::InitiateMergeAndWait() { 337 TEST_AND_RETURN(running_); 338 LOG(INFO) << "Attempting to initiate merge."; 339 // suspend the VAB merge when running a DSU 340 if (GetBoolProperty("ro.gsid.image_running", false)) { 341 LOG(WARNING) << "Suspend the VAB merge when running a DSU."; 342 processor_->ActionComplete(this, ErrorCode::kError); 343 return; 344 } 345 346 uint64_t cow_file_size; 347 if (snapshot_->InitiateMerge(&cow_file_size)) { 348 merge_stats_->set_cow_file_size(cow_file_size); 349 WaitForMergeOrSchedule(); 350 return; 351 } 352 353 LOG(WARNING) << "InitiateMerge failed."; 354 auto state = snapshot_->GetUpdateState(); 355 merge_stats_->set_state(state); 356 if (state == UpdateState::Unverified) { 357 // We are stuck at unverified state. This can happen if the update has 358 // been applied, but it has not even been attempted yet (in libsnapshot, 359 // rollback indicator does not exist); for example, if update_engine 360 // restarts before the device reboots, then this state may be reached. 361 // Nothing should be done here. 362 LOG(WARNING) << "InitiateMerge leaves the device at " 363 << "UpdateState::Unverified. (Did update_engine " 364 << "restarted?)"; 365 processor_->ActionComplete(this, ErrorCode::kSuccess); 366 return; 367 } 368 369 // State does seems to be advanced. 370 // It is possibly racy. For example, on a userdebug build, the user may 371 // manually initiate a merge with snapshotctl between last time 372 // update_engine checks UpdateState. Hence, just call 373 // WaitForMergeOrSchedule one more time. 374 LOG(WARNING) << "IniitateMerge failed but GetUpdateState returned " 375 << android::snapshot::UpdateState_Name(state) 376 << ", try to wait for merge again."; 377 WaitForMergeOrSchedule(); 378 return; 379 } 380 381 void CleanupPreviousUpdateAction::ReportMergeStats() { 382 auto result = merge_stats_->Finish(); 383 if (result == nullptr) { 384 LOG(WARNING) << "Not reporting merge stats because " 385 "SnapshotMergeStats::Finish failed."; 386 return; 387 } 388 389 #ifdef __ANDROID_RECOVERY__ 390 LOG(INFO) << "Skip reporting merge stats in recovery."; 391 #else 392 const auto& report = result->report(); 393 394 if (report.state() == UpdateState::None || 395 report.state() == UpdateState::Initiated || 396 report.state() == UpdateState::Unverified) { 397 LOG(INFO) << "Not reporting merge stats because state is " 398 << android::snapshot::UpdateState_Name(report.state()); 399 return; 400 } 401 402 auto passed_ms = std::chrono::duration_cast<std::chrono::milliseconds>( 403 result->merge_time()); 404 405 bool vab_retrofit = boot_control_->GetDynamicPartitionControl() 406 ->GetVirtualAbFeatureFlag() 407 .IsRetrofit(); 408 409 LOG(INFO) << "Reporting merge stats: " 410 << android::snapshot::UpdateState_Name(report.state()) << " in " 411 << passed_ms.count() << "ms (resumed " << report.resume_count() 412 << " times), using " << report.cow_file_size() 413 << " bytes of COW image."; 414 android::util::stats_write(android::util::SNAPSHOT_MERGE_REPORTED, 415 static_cast<int32_t>(report.state()), 416 static_cast<int64_t>(passed_ms.count()), 417 static_cast<int32_t>(report.resume_count()), 418 vab_retrofit, 419 static_cast<int64_t>(report.cow_file_size())); 420 #endif 421 } 422 423 } // namespace chromeos_update_engine 424