1 //
2 // Copyright (C) 2020 The Android Open Source Project
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 //      http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 #include "update_engine/cleanup_previous_update_action.h"
17 
18 #include <chrono>  // NOLINT(build/c++11) -- for merge times
19 #include <functional>
20 #include <string>
21 #include <type_traits>
22 
23 #include <android-base/properties.h>
24 #include <base/bind.h>
25 
26 #ifndef __ANDROID_RECOVERY__
27 #include <statslog.h>
28 #endif
29 
30 #include "update_engine/common/utils.h"
31 #include "update_engine/payload_consumer/delta_performer.h"
32 
33 using android::base::GetBoolProperty;
34 using android::snapshot::SnapshotManager;
35 using android::snapshot::SnapshotMergeStats;
36 using android::snapshot::UpdateState;
37 using brillo::MessageLoop;
38 
39 constexpr char kBootCompletedProp[] = "sys.boot_completed";
40 // Interval to check sys.boot_completed.
41 constexpr auto kCheckBootCompletedInterval = base::TimeDelta::FromSeconds(2);
42 // Interval to check IBootControl::isSlotMarkedSuccessful
43 constexpr auto kCheckSlotMarkedSuccessfulInterval =
44     base::TimeDelta::FromSeconds(2);
45 // Interval to call SnapshotManager::ProcessUpdateState
46 constexpr auto kWaitForMergeInterval = base::TimeDelta::FromSeconds(2);
47 
48 #ifdef __ANDROID_RECOVERY__
49 static constexpr bool kIsRecovery = true;
50 #else
51 static constexpr bool kIsRecovery = false;
52 #endif
53 
54 namespace chromeos_update_engine {
55 
56 CleanupPreviousUpdateAction::CleanupPreviousUpdateAction(
57     PrefsInterface* prefs,
58     BootControlInterface* boot_control,
59     android::snapshot::SnapshotManager* snapshot,
60     CleanupPreviousUpdateActionDelegateInterface* delegate)
61     : prefs_(prefs),
62       boot_control_(boot_control),
63       snapshot_(snapshot),
64       delegate_(delegate),
65       running_(false),
66       cancel_failed_(false),
67       last_percentage_(0),
68       merge_stats_(SnapshotMergeStats::GetInstance(*snapshot)) {}
69 
70 void CleanupPreviousUpdateAction::PerformAction() {
71   ResumeAction();
72 }
73 
74 void CleanupPreviousUpdateAction::TerminateProcessing() {
75   SuspendAction();
76 }
77 
78 void CleanupPreviousUpdateAction::ResumeAction() {
79   CHECK(prefs_);
80   CHECK(boot_control_);
81 
82   LOG(INFO) << "Starting/resuming CleanupPreviousUpdateAction";
83   running_ = true;
84   StartActionInternal();
85 }
86 
87 void CleanupPreviousUpdateAction::SuspendAction() {
88   LOG(INFO) << "Stopping/suspending CleanupPreviousUpdateAction";
89   running_ = false;
90 }
91 
92 void CleanupPreviousUpdateAction::ActionCompleted(ErrorCode error_code) {
93   running_ = false;
94   ReportMergeStats();
95   metadata_device_ = nullptr;
96 }
97 
98 std::string CleanupPreviousUpdateAction::Type() const {
99   return StaticType();
100 }
101 
102 std::string CleanupPreviousUpdateAction::StaticType() {
103   return "CleanupPreviousUpdateAction";
104 }
105 
106 void CleanupPreviousUpdateAction::StartActionInternal() {
107   // Do nothing on non-VAB device.
108   if (!boot_control_->GetDynamicPartitionControl()
109            ->GetVirtualAbFeatureFlag()
110            .IsEnabled()) {
111     processor_->ActionComplete(this, ErrorCode::kSuccess);
112     return;
113   }
114   // SnapshotManager is only available on VAB devices.
115   CHECK(snapshot_);
116   WaitBootCompletedOrSchedule();
117 }
118 
119 void CleanupPreviousUpdateAction::ScheduleWaitBootCompleted() {
120   TEST_AND_RETURN(running_);
121   MessageLoop::current()->PostDelayedTask(
122       FROM_HERE,
123       base::Bind(&CleanupPreviousUpdateAction::WaitBootCompletedOrSchedule,
124                  base::Unretained(this)),
125       kCheckBootCompletedInterval);
126 }
127 
128 void CleanupPreviousUpdateAction::WaitBootCompletedOrSchedule() {
129   TEST_AND_RETURN(running_);
130   if (!kIsRecovery &&
131       !android::base::GetBoolProperty(kBootCompletedProp, false)) {
132     // repeat
133     ScheduleWaitBootCompleted();
134     return;
135   }
136 
137   LOG(INFO) << "Boot completed, waiting on markBootSuccessful()";
138   CheckSlotMarkedSuccessfulOrSchedule();
139 }
140 
141 void CleanupPreviousUpdateAction::ScheduleWaitMarkBootSuccessful() {
142   TEST_AND_RETURN(running_);
143   MessageLoop::current()->PostDelayedTask(
144       FROM_HERE,
145       base::Bind(
146           &CleanupPreviousUpdateAction::CheckSlotMarkedSuccessfulOrSchedule,
147           base::Unretained(this)),
148       kCheckSlotMarkedSuccessfulInterval);
149 }
150 
151 void CleanupPreviousUpdateAction::CheckSlotMarkedSuccessfulOrSchedule() {
152   TEST_AND_RETURN(running_);
153   if (!kIsRecovery &&
154       !boot_control_->IsSlotMarkedSuccessful(boot_control_->GetCurrentSlot())) {
155     ScheduleWaitMarkBootSuccessful();
156   }
157 
158   if (metadata_device_ == nullptr) {
159     metadata_device_ = snapshot_->EnsureMetadataMounted();
160   }
161 
162   if (metadata_device_ == nullptr) {
163     LOG(ERROR) << "Failed to mount /metadata.";
164     // If metadata is erased but not formatted, it is possible to not mount
165     // it in recovery. It is safe to skip CleanupPreviousUpdateAction.
166     processor_->ActionComplete(
167         this, kIsRecovery ? ErrorCode::kSuccess : ErrorCode::kError);
168     return;
169   }
170 
171   if (kIsRecovery) {
172     auto snapshots_created =
173         snapshot_->RecoveryCreateSnapshotDevices(metadata_device_);
174     switch (snapshots_created) {
175       case android::snapshot::CreateResult::CREATED: {
176         // If previous update has not finished merging, snapshots exists and are
177         // created here so that ProcessUpdateState can proceed.
178         LOG(INFO) << "Snapshot devices are created";
179         break;
180       }
181       case android::snapshot::CreateResult::NOT_CREATED: {
182         // If there is no previous update, no snapshot devices are created and
183         // ProcessUpdateState will return immediately. Hence, NOT_CREATED is not
184         // considered an error.
185         LOG(INFO) << "Snapshot devices are not created";
186         break;
187       }
188       case android::snapshot::CreateResult::ERROR:
189       default: {
190         LOG(ERROR)
191             << "Failed to create snapshot devices (CreateResult = "
192             << static_cast<
193                    std::underlying_type_t<android::snapshot::CreateResult>>(
194                    snapshots_created);
195         processor_->ActionComplete(this, ErrorCode::kError);
196         return;
197       }
198     }
199   }
200 
201   if (!merge_stats_->Start()) {
202     // Not an error because CleanupPreviousUpdateAction may be paused and
203     // resumed while kernel continues merging snapshots in the background.
204     LOG(WARNING) << "SnapshotMergeStats::Start failed.";
205   }
206   LOG(INFO) << "Waiting for any previous merge request to complete. "
207             << "This can take up to several minutes.";
208   WaitForMergeOrSchedule();
209 }
210 
211 void CleanupPreviousUpdateAction::ScheduleWaitForMerge() {
212   TEST_AND_RETURN(running_);
213   MessageLoop::current()->PostDelayedTask(
214       FROM_HERE,
215       base::Bind(&CleanupPreviousUpdateAction::WaitForMergeOrSchedule,
216                  base::Unretained(this)),
217       kWaitForMergeInterval);
218 }
219 
220 void CleanupPreviousUpdateAction::WaitForMergeOrSchedule() {
221   TEST_AND_RETURN(running_);
222   auto state = snapshot_->ProcessUpdateState(
223       std::bind(&CleanupPreviousUpdateAction::OnMergePercentageUpdate, this),
224       std::bind(&CleanupPreviousUpdateAction::BeforeCancel, this));
225   merge_stats_->set_state(state);
226 
227   switch (state) {
228     case UpdateState::None: {
229       LOG(INFO) << "Can't find any snapshot to merge.";
230       ErrorCode error_code = ErrorCode::kSuccess;
231       if (!snapshot_->CancelUpdate()) {
232         error_code = ErrorCode::kError;
233         LOG(INFO) << "Failed to call SnapshotManager::CancelUpdate().";
234       }
235       processor_->ActionComplete(this, error_code);
236       return;
237     }
238 
239     case UpdateState::Initiated: {
240       LOG(ERROR) << "Previous update has not been completed, not cleaning up";
241       processor_->ActionComplete(this, ErrorCode::kSuccess);
242       return;
243     }
244 
245     case UpdateState::Unverified: {
246       InitiateMergeAndWait();
247       return;
248     }
249 
250     case UpdateState::Merging: {
251       ScheduleWaitForMerge();
252       return;
253     }
254 
255     case UpdateState::MergeNeedsReboot: {
256       LOG(ERROR) << "Need reboot to finish merging.";
257       processor_->ActionComplete(this, ErrorCode::kError);
258       return;
259     }
260 
261     case UpdateState::MergeCompleted: {
262       LOG(INFO) << "Merge finished with state MergeCompleted.";
263       processor_->ActionComplete(this, ErrorCode::kSuccess);
264       return;
265     }
266 
267     case UpdateState::MergeFailed: {
268       LOG(ERROR) << "Merge failed. Device may be corrupted.";
269       processor_->ActionComplete(this, ErrorCode::kDeviceCorrupted);
270       return;
271     }
272 
273     case UpdateState::Cancelled: {
274       // DeltaPerformer::ResetUpdateProgress failed, hence snapshots are
275       // not deleted to avoid inconsistency.
276       // Nothing can be done here; just try next time.
277       ErrorCode error_code =
278           cancel_failed_ ? ErrorCode::kError : ErrorCode::kSuccess;
279       processor_->ActionComplete(this, error_code);
280       return;
281     }
282 
283     default: {
284       // Protobuf has some reserved enum values, so a default case is needed.
285       LOG(FATAL) << "SnapshotManager::ProcessUpdateState returns "
286                  << static_cast<int32_t>(state);
287     }
288   }
289 }
290 
291 bool CleanupPreviousUpdateAction::OnMergePercentageUpdate() {
292   double percentage = 0.0;
293   snapshot_->GetUpdateState(&percentage);
294   if (delegate_) {
295     // libsnapshot uses [0, 100] percentage but update_engine uses [0, 1].
296     delegate_->OnCleanupProgressUpdate(percentage / 100);
297   }
298 
299   // Log if percentage increments by at least 1.
300   if (last_percentage_ < static_cast<unsigned int>(percentage)) {
301     last_percentage_ = percentage;
302     LOG(INFO) << "Waiting for merge to complete: " << last_percentage_ << "%.";
303   }
304 
305   // Do not continue to wait for merge. Instead, let ProcessUpdateState
306   // return Merging directly so that we can ScheduleWaitForMerge() in
307   // MessageLoop.
308   return false;
309 }
310 
311 bool CleanupPreviousUpdateAction::BeforeCancel() {
312   if (DeltaPerformer::ResetUpdateProgress(
313           prefs_,
314           false /* quick */,
315           false /* skip dynamic partitions metadata*/)) {
316     return true;
317   }
318 
319   // ResetUpdateProgress might not work on stub prefs. Do additional checks.
320   LOG(WARNING) << "ProcessUpdateState returns Cancelled but cleanup failed.";
321 
322   std::string val;
323   ignore_result(prefs_->GetString(kPrefsDynamicPartitionMetadataUpdated, &val));
324   if (val.empty()) {
325     LOG(INFO) << kPrefsDynamicPartitionMetadataUpdated
326               << " is empty, assuming successful cleanup";
327     return true;
328   }
329   LOG(WARNING)
330       << kPrefsDynamicPartitionMetadataUpdated << " is " << val
331       << ", not deleting snapshots even though UpdateState is Cancelled.";
332   cancel_failed_ = true;
333   return false;
334 }
335 
336 void CleanupPreviousUpdateAction::InitiateMergeAndWait() {
337   TEST_AND_RETURN(running_);
338   LOG(INFO) << "Attempting to initiate merge.";
339   // suspend the VAB merge when running a DSU
340   if (GetBoolProperty("ro.gsid.image_running", false)) {
341     LOG(WARNING) << "Suspend the VAB merge when running a DSU.";
342     processor_->ActionComplete(this, ErrorCode::kError);
343     return;
344   }
345 
346   uint64_t cow_file_size;
347   if (snapshot_->InitiateMerge(&cow_file_size)) {
348     merge_stats_->set_cow_file_size(cow_file_size);
349     WaitForMergeOrSchedule();
350     return;
351   }
352 
353   LOG(WARNING) << "InitiateMerge failed.";
354   auto state = snapshot_->GetUpdateState();
355   merge_stats_->set_state(state);
356   if (state == UpdateState::Unverified) {
357     // We are stuck at unverified state. This can happen if the update has
358     // been applied, but it has not even been attempted yet (in libsnapshot,
359     // rollback indicator does not exist); for example, if update_engine
360     // restarts before the device reboots, then this state may be reached.
361     // Nothing should be done here.
362     LOG(WARNING) << "InitiateMerge leaves the device at "
363                  << "UpdateState::Unverified. (Did update_engine "
364                  << "restarted?)";
365     processor_->ActionComplete(this, ErrorCode::kSuccess);
366     return;
367   }
368 
369   // State does seems to be advanced.
370   // It is possibly racy. For example, on a userdebug build, the user may
371   // manually initiate a merge with snapshotctl between last time
372   // update_engine checks UpdateState. Hence, just call
373   // WaitForMergeOrSchedule one more time.
374   LOG(WARNING) << "IniitateMerge failed but GetUpdateState returned "
375                << android::snapshot::UpdateState_Name(state)
376                << ", try to wait for merge again.";
377   WaitForMergeOrSchedule();
378   return;
379 }
380 
381 void CleanupPreviousUpdateAction::ReportMergeStats() {
382   auto result = merge_stats_->Finish();
383   if (result == nullptr) {
384     LOG(WARNING) << "Not reporting merge stats because "
385                     "SnapshotMergeStats::Finish failed.";
386     return;
387   }
388 
389 #ifdef __ANDROID_RECOVERY__
390   LOG(INFO) << "Skip reporting merge stats in recovery.";
391 #else
392   const auto& report = result->report();
393 
394   if (report.state() == UpdateState::None ||
395       report.state() == UpdateState::Initiated ||
396       report.state() == UpdateState::Unverified) {
397     LOG(INFO) << "Not reporting merge stats because state is "
398               << android::snapshot::UpdateState_Name(report.state());
399     return;
400   }
401 
402   auto passed_ms = std::chrono::duration_cast<std::chrono::milliseconds>(
403       result->merge_time());
404 
405   bool vab_retrofit = boot_control_->GetDynamicPartitionControl()
406                           ->GetVirtualAbFeatureFlag()
407                           .IsRetrofit();
408 
409   LOG(INFO) << "Reporting merge stats: "
410             << android::snapshot::UpdateState_Name(report.state()) << " in "
411             << passed_ms.count() << "ms (resumed " << report.resume_count()
412             << " times), using " << report.cow_file_size()
413             << " bytes of COW image.";
414   android::util::stats_write(android::util::SNAPSHOT_MERGE_REPORTED,
415                              static_cast<int32_t>(report.state()),
416                              static_cast<int64_t>(passed_ms.count()),
417                              static_cast<int32_t>(report.resume_count()),
418                              vab_retrofit,
419                              static_cast<int64_t>(report.cow_file_size()));
420 #endif
421 }
422 
423 }  // namespace chromeos_update_engine
424