1 /*
2  * Copyright (C) 2014 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "code_generator_arm64.h"
18 
19 #include "arch/arm64/asm_support_arm64.h"
20 #include "arch/arm64/instruction_set_features_arm64.h"
21 #include "art_method-inl.h"
22 #include "base/bit_utils.h"
23 #include "base/bit_utils_iterator.h"
24 #include "class_table.h"
25 #include "code_generator_utils.h"
26 #include "compiled_method.h"
27 #include "entrypoints/quick/quick_entrypoints.h"
28 #include "entrypoints/quick/quick_entrypoints_enum.h"
29 #include "gc/accounting/card_table.h"
30 #include "gc/space/image_space.h"
31 #include "heap_poisoning.h"
32 #include "intrinsics.h"
33 #include "intrinsics_arm64.h"
34 #include "linker/linker_patch.h"
35 #include "lock_word.h"
36 #include "mirror/array-inl.h"
37 #include "mirror/class-inl.h"
38 #include "offsets.h"
39 #include "thread.h"
40 #include "utils/arm64/assembler_arm64.h"
41 #include "utils/assembler.h"
42 #include "utils/stack_checks.h"
43 
44 using namespace vixl::aarch64;  // NOLINT(build/namespaces)
45 using vixl::ExactAssemblyScope;
46 using vixl::CodeBufferCheckScope;
47 using vixl::EmissionCheckScope;
48 
49 #ifdef __
50 #error "ARM64 Codegen VIXL macro-assembler macro already defined."
51 #endif
52 
53 namespace art {
54 
55 template<class MirrorType>
56 class GcRoot;
57 
58 namespace arm64 {
59 
60 using helpers::ARM64EncodableConstantOrRegister;
61 using helpers::ArtVixlRegCodeCoherentForRegSet;
62 using helpers::CPURegisterFrom;
63 using helpers::DRegisterFrom;
64 using helpers::FPRegisterFrom;
65 using helpers::HeapOperand;
66 using helpers::HeapOperandFrom;
67 using helpers::InputCPURegisterOrZeroRegAt;
68 using helpers::InputFPRegisterAt;
69 using helpers::InputOperandAt;
70 using helpers::InputRegisterAt;
71 using helpers::Int64FromLocation;
72 using helpers::IsConstantZeroBitPattern;
73 using helpers::LocationFrom;
74 using helpers::OperandFromMemOperand;
75 using helpers::OutputCPURegister;
76 using helpers::OutputFPRegister;
77 using helpers::OutputRegister;
78 using helpers::QRegisterFrom;
79 using helpers::RegisterFrom;
80 using helpers::StackOperandFrom;
81 using helpers::VIXLRegCodeFromART;
82 using helpers::WRegisterFrom;
83 using helpers::XRegisterFrom;
84 
85 // The compare/jump sequence will generate about (1.5 * num_entries + 3) instructions. While jump
86 // table version generates 7 instructions and num_entries literals. Compare/jump sequence will
87 // generates less code/data with a small num_entries.
88 static constexpr uint32_t kPackedSwitchCompareJumpThreshold = 7;
89 
90 // Reference load (except object array loads) is using LDR Wt, [Xn, #offset] which can handle
91 // offset < 16KiB. For offsets >= 16KiB, the load shall be emitted as two or more instructions.
92 // For the Baker read barrier implementation using link-time generated thunks we need to split
93 // the offset explicitly.
94 constexpr uint32_t kReferenceLoadMinFarOffset = 16 * KB;
95 
96 inline Condition ARM64Condition(IfCondition cond) {
97   switch (cond) {
98     case kCondEQ: return eq;
99     case kCondNE: return ne;
100     case kCondLT: return lt;
101     case kCondLE: return le;
102     case kCondGT: return gt;
103     case kCondGE: return ge;
104     case kCondB:  return lo;
105     case kCondBE: return ls;
106     case kCondA:  return hi;
107     case kCondAE: return hs;
108   }
109   LOG(FATAL) << "Unreachable";
110   UNREACHABLE();
111 }
112 
113 inline Condition ARM64FPCondition(IfCondition cond, bool gt_bias) {
114   // The ARM64 condition codes can express all the necessary branches, see the
115   // "Meaning (floating-point)" column in the table C1-1 in the ARMv8 reference manual.
116   // There is no dex instruction or HIR that would need the missing conditions
117   // "equal or unordered" or "not equal".
118   switch (cond) {
119     case kCondEQ: return eq;
120     case kCondNE: return ne /* unordered */;
121     case kCondLT: return gt_bias ? cc : lt /* unordered */;
122     case kCondLE: return gt_bias ? ls : le /* unordered */;
123     case kCondGT: return gt_bias ? hi /* unordered */ : gt;
124     case kCondGE: return gt_bias ? cs /* unordered */ : ge;
125     default:
126       LOG(FATAL) << "UNREACHABLE";
127       UNREACHABLE();
128   }
129 }
130 
131 Location ARM64ReturnLocation(DataType::Type return_type) {
132   // Note that in practice, `LocationFrom(x0)` and `LocationFrom(w0)` create the
133   // same Location object, and so do `LocationFrom(d0)` and `LocationFrom(s0)`,
134   // but we use the exact registers for clarity.
135   if (return_type == DataType::Type::kFloat32) {
136     return LocationFrom(s0);
137   } else if (return_type == DataType::Type::kFloat64) {
138     return LocationFrom(d0);
139   } else if (return_type == DataType::Type::kInt64) {
140     return LocationFrom(x0);
141   } else if (return_type == DataType::Type::kVoid) {
142     return Location::NoLocation();
143   } else {
144     return LocationFrom(w0);
145   }
146 }
147 
148 Location InvokeRuntimeCallingConvention::GetReturnLocation(DataType::Type return_type) {
149   return ARM64ReturnLocation(return_type);
150 }
151 
152 static RegisterSet OneRegInReferenceOutSaveEverythingCallerSaves() {
153   InvokeRuntimeCallingConvention calling_convention;
154   RegisterSet caller_saves = RegisterSet::Empty();
155   caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0).GetCode()));
156   DCHECK_EQ(calling_convention.GetRegisterAt(0).GetCode(),
157             RegisterFrom(calling_convention.GetReturnLocation(DataType::Type::kReference),
158                          DataType::Type::kReference).GetCode());
159   return caller_saves;
160 }
161 
162 // NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
163 #define __ down_cast<CodeGeneratorARM64*>(codegen)->GetVIXLAssembler()->  // NOLINT
164 #define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kArm64PointerSize, x).Int32Value()
165 
166 // Calculate memory accessing operand for save/restore live registers.
167 static void SaveRestoreLiveRegistersHelper(CodeGenerator* codegen,
168                                            LocationSummary* locations,
169                                            int64_t spill_offset,
170                                            bool is_save) {
171   const uint32_t core_spills = codegen->GetSlowPathSpills(locations, /* core_registers= */ true);
172   const uint32_t fp_spills = codegen->GetSlowPathSpills(locations, /* core_registers= */ false);
173   DCHECK(ArtVixlRegCodeCoherentForRegSet(core_spills,
174                                          codegen->GetNumberOfCoreRegisters(),
175                                          fp_spills,
176                                          codegen->GetNumberOfFloatingPointRegisters()));
177 
178   CPURegList core_list = CPURegList(CPURegister::kRegister, kXRegSize, core_spills);
179   unsigned v_reg_size = codegen->GetGraph()->HasSIMD() ? kQRegSize : kDRegSize;
180   CPURegList fp_list = CPURegList(CPURegister::kVRegister, v_reg_size, fp_spills);
181 
182   MacroAssembler* masm = down_cast<CodeGeneratorARM64*>(codegen)->GetVIXLAssembler();
183   UseScratchRegisterScope temps(masm);
184 
185   Register base = masm->StackPointer();
186   int64_t core_spill_size = core_list.GetTotalSizeInBytes();
187   int64_t fp_spill_size = fp_list.GetTotalSizeInBytes();
188   int64_t reg_size = kXRegSizeInBytes;
189   int64_t max_ls_pair_offset = spill_offset + core_spill_size + fp_spill_size - 2 * reg_size;
190   uint32_t ls_access_size = WhichPowerOf2(reg_size);
191   if (((core_list.GetCount() > 1) || (fp_list.GetCount() > 1)) &&
192       !masm->IsImmLSPair(max_ls_pair_offset, ls_access_size)) {
193     // If the offset does not fit in the instruction's immediate field, use an alternate register
194     // to compute the base address(float point registers spill base address).
195     Register new_base = temps.AcquireSameSizeAs(base);
196     __ Add(new_base, base, Operand(spill_offset + core_spill_size));
197     base = new_base;
198     spill_offset = -core_spill_size;
199     int64_t new_max_ls_pair_offset = fp_spill_size - 2 * reg_size;
200     DCHECK(masm->IsImmLSPair(spill_offset, ls_access_size));
201     DCHECK(masm->IsImmLSPair(new_max_ls_pair_offset, ls_access_size));
202   }
203 
204   if (is_save) {
205     __ StoreCPURegList(core_list, MemOperand(base, spill_offset));
206     __ StoreCPURegList(fp_list, MemOperand(base, spill_offset + core_spill_size));
207   } else {
208     __ LoadCPURegList(core_list, MemOperand(base, spill_offset));
209     __ LoadCPURegList(fp_list, MemOperand(base, spill_offset + core_spill_size));
210   }
211 }
212 
213 void SlowPathCodeARM64::SaveLiveRegisters(CodeGenerator* codegen, LocationSummary* locations) {
214   size_t stack_offset = codegen->GetFirstRegisterSlotInSlowPath();
215   const uint32_t core_spills = codegen->GetSlowPathSpills(locations, /* core_registers= */ true);
216   for (uint32_t i : LowToHighBits(core_spills)) {
217     // If the register holds an object, update the stack mask.
218     if (locations->RegisterContainsObject(i)) {
219       locations->SetStackBit(stack_offset / kVRegSize);
220     }
221     DCHECK_LT(stack_offset, codegen->GetFrameSize() - codegen->FrameEntrySpillSize());
222     DCHECK_LT(i, kMaximumNumberOfExpectedRegisters);
223     saved_core_stack_offsets_[i] = stack_offset;
224     stack_offset += kXRegSizeInBytes;
225   }
226 
227   const size_t fp_reg_size = codegen->GetGraph()->HasSIMD() ? kQRegSizeInBytes : kDRegSizeInBytes;
228   const uint32_t fp_spills = codegen->GetSlowPathSpills(locations, /* core_registers= */ false);
229   for (uint32_t i : LowToHighBits(fp_spills)) {
230     DCHECK_LT(stack_offset, codegen->GetFrameSize() - codegen->FrameEntrySpillSize());
231     DCHECK_LT(i, kMaximumNumberOfExpectedRegisters);
232     saved_fpu_stack_offsets_[i] = stack_offset;
233     stack_offset += fp_reg_size;
234   }
235 
236   SaveRestoreLiveRegistersHelper(codegen,
237                                  locations,
238                                  codegen->GetFirstRegisterSlotInSlowPath(), /* is_save= */ true);
239 }
240 
241 void SlowPathCodeARM64::RestoreLiveRegisters(CodeGenerator* codegen, LocationSummary* locations) {
242   SaveRestoreLiveRegistersHelper(codegen,
243                                  locations,
244                                  codegen->GetFirstRegisterSlotInSlowPath(), /* is_save= */ false);
245 }
246 
247 class BoundsCheckSlowPathARM64 : public SlowPathCodeARM64 {
248  public:
249   explicit BoundsCheckSlowPathARM64(HBoundsCheck* instruction) : SlowPathCodeARM64(instruction) {}
250 
251   void EmitNativeCode(CodeGenerator* codegen) override {
252     LocationSummary* locations = instruction_->GetLocations();
253     CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
254 
255     __ Bind(GetEntryLabel());
256     if (instruction_->CanThrowIntoCatchBlock()) {
257       // Live registers will be restored in the catch block if caught.
258       SaveLiveRegisters(codegen, instruction_->GetLocations());
259     }
260     // We're moving two locations to locations that could overlap, so we need a parallel
261     // move resolver.
262     InvokeRuntimeCallingConvention calling_convention;
263     codegen->EmitParallelMoves(locations->InAt(0),
264                                LocationFrom(calling_convention.GetRegisterAt(0)),
265                                DataType::Type::kInt32,
266                                locations->InAt(1),
267                                LocationFrom(calling_convention.GetRegisterAt(1)),
268                                DataType::Type::kInt32);
269     QuickEntrypointEnum entrypoint = instruction_->AsBoundsCheck()->IsStringCharAt()
270         ? kQuickThrowStringBounds
271         : kQuickThrowArrayBounds;
272     arm64_codegen->InvokeRuntime(entrypoint, instruction_, instruction_->GetDexPc(), this);
273     CheckEntrypointTypes<kQuickThrowStringBounds, void, int32_t, int32_t>();
274     CheckEntrypointTypes<kQuickThrowArrayBounds, void, int32_t, int32_t>();
275   }
276 
277   bool IsFatal() const override { return true; }
278 
279   const char* GetDescription() const override { return "BoundsCheckSlowPathARM64"; }
280 
281  private:
282   DISALLOW_COPY_AND_ASSIGN(BoundsCheckSlowPathARM64);
283 };
284 
285 class DivZeroCheckSlowPathARM64 : public SlowPathCodeARM64 {
286  public:
287   explicit DivZeroCheckSlowPathARM64(HDivZeroCheck* instruction) : SlowPathCodeARM64(instruction) {}
288 
289   void EmitNativeCode(CodeGenerator* codegen) override {
290     CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
291     __ Bind(GetEntryLabel());
292     arm64_codegen->InvokeRuntime(kQuickThrowDivZero, instruction_, instruction_->GetDexPc(), this);
293     CheckEntrypointTypes<kQuickThrowDivZero, void, void>();
294   }
295 
296   bool IsFatal() const override { return true; }
297 
298   const char* GetDescription() const override { return "DivZeroCheckSlowPathARM64"; }
299 
300  private:
301   DISALLOW_COPY_AND_ASSIGN(DivZeroCheckSlowPathARM64);
302 };
303 
304 class LoadClassSlowPathARM64 : public SlowPathCodeARM64 {
305  public:
306   LoadClassSlowPathARM64(HLoadClass* cls, HInstruction* at)
307       : SlowPathCodeARM64(at), cls_(cls) {
308     DCHECK(at->IsLoadClass() || at->IsClinitCheck());
309     DCHECK_EQ(instruction_->IsLoadClass(), cls_ == instruction_);
310   }
311 
312   void EmitNativeCode(CodeGenerator* codegen) override {
313     LocationSummary* locations = instruction_->GetLocations();
314     Location out = locations->Out();
315     const uint32_t dex_pc = instruction_->GetDexPc();
316     bool must_resolve_type = instruction_->IsLoadClass() && cls_->MustResolveTypeOnSlowPath();
317     bool must_do_clinit = instruction_->IsClinitCheck() || cls_->MustGenerateClinitCheck();
318 
319     CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
320     __ Bind(GetEntryLabel());
321     SaveLiveRegisters(codegen, locations);
322 
323     InvokeRuntimeCallingConvention calling_convention;
324     if (must_resolve_type) {
325       DCHECK(IsSameDexFile(cls_->GetDexFile(), arm64_codegen->GetGraph()->GetDexFile()));
326       dex::TypeIndex type_index = cls_->GetTypeIndex();
327       __ Mov(calling_convention.GetRegisterAt(0).W(), type_index.index_);
328       arm64_codegen->InvokeRuntime(kQuickResolveType, instruction_, dex_pc, this);
329       CheckEntrypointTypes<kQuickResolveType, void*, uint32_t>();
330       // If we also must_do_clinit, the resolved type is now in the correct register.
331     } else {
332       DCHECK(must_do_clinit);
333       Location source = instruction_->IsLoadClass() ? out : locations->InAt(0);
334       arm64_codegen->MoveLocation(LocationFrom(calling_convention.GetRegisterAt(0)),
335                                   source,
336                                   cls_->GetType());
337     }
338     if (must_do_clinit) {
339       arm64_codegen->InvokeRuntime(kQuickInitializeStaticStorage, instruction_, dex_pc, this);
340       CheckEntrypointTypes<kQuickInitializeStaticStorage, void*, mirror::Class*>();
341     }
342 
343     // Move the class to the desired location.
344     if (out.IsValid()) {
345       DCHECK(out.IsRegister() && !locations->GetLiveRegisters()->ContainsCoreRegister(out.reg()));
346       DataType::Type type = instruction_->GetType();
347       arm64_codegen->MoveLocation(out, calling_convention.GetReturnLocation(type), type);
348     }
349     RestoreLiveRegisters(codegen, locations);
350     __ B(GetExitLabel());
351   }
352 
353   const char* GetDescription() const override { return "LoadClassSlowPathARM64"; }
354 
355  private:
356   // The class this slow path will load.
357   HLoadClass* const cls_;
358 
359   DISALLOW_COPY_AND_ASSIGN(LoadClassSlowPathARM64);
360 };
361 
362 class LoadStringSlowPathARM64 : public SlowPathCodeARM64 {
363  public:
364   explicit LoadStringSlowPathARM64(HLoadString* instruction)
365       : SlowPathCodeARM64(instruction) {}
366 
367   void EmitNativeCode(CodeGenerator* codegen) override {
368     LocationSummary* locations = instruction_->GetLocations();
369     DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg()));
370     CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
371 
372     __ Bind(GetEntryLabel());
373     SaveLiveRegisters(codegen, locations);
374 
375     InvokeRuntimeCallingConvention calling_convention;
376     const dex::StringIndex string_index = instruction_->AsLoadString()->GetStringIndex();
377     __ Mov(calling_convention.GetRegisterAt(0).W(), string_index.index_);
378     arm64_codegen->InvokeRuntime(kQuickResolveString, instruction_, instruction_->GetDexPc(), this);
379     CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>();
380     DataType::Type type = instruction_->GetType();
381     arm64_codegen->MoveLocation(locations->Out(), calling_convention.GetReturnLocation(type), type);
382 
383     RestoreLiveRegisters(codegen, locations);
384 
385     __ B(GetExitLabel());
386   }
387 
388   const char* GetDescription() const override { return "LoadStringSlowPathARM64"; }
389 
390  private:
391   DISALLOW_COPY_AND_ASSIGN(LoadStringSlowPathARM64);
392 };
393 
394 class NullCheckSlowPathARM64 : public SlowPathCodeARM64 {
395  public:
396   explicit NullCheckSlowPathARM64(HNullCheck* instr) : SlowPathCodeARM64(instr) {}
397 
398   void EmitNativeCode(CodeGenerator* codegen) override {
399     CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
400     __ Bind(GetEntryLabel());
401     if (instruction_->CanThrowIntoCatchBlock()) {
402       // Live registers will be restored in the catch block if caught.
403       SaveLiveRegisters(codegen, instruction_->GetLocations());
404     }
405     arm64_codegen->InvokeRuntime(kQuickThrowNullPointer,
406                                  instruction_,
407                                  instruction_->GetDexPc(),
408                                  this);
409     CheckEntrypointTypes<kQuickThrowNullPointer, void, void>();
410   }
411 
412   bool IsFatal() const override { return true; }
413 
414   const char* GetDescription() const override { return "NullCheckSlowPathARM64"; }
415 
416  private:
417   DISALLOW_COPY_AND_ASSIGN(NullCheckSlowPathARM64);
418 };
419 
420 class SuspendCheckSlowPathARM64 : public SlowPathCodeARM64 {
421  public:
422   SuspendCheckSlowPathARM64(HSuspendCheck* instruction, HBasicBlock* successor)
423       : SlowPathCodeARM64(instruction), successor_(successor) {}
424 
425   void EmitNativeCode(CodeGenerator* codegen) override {
426     LocationSummary* locations = instruction_->GetLocations();
427     CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
428     __ Bind(GetEntryLabel());
429     SaveLiveRegisters(codegen, locations);  // Only saves live 128-bit regs for SIMD.
430     arm64_codegen->InvokeRuntime(kQuickTestSuspend, instruction_, instruction_->GetDexPc(), this);
431     CheckEntrypointTypes<kQuickTestSuspend, void, void>();
432     RestoreLiveRegisters(codegen, locations);  // Only restores live 128-bit regs for SIMD.
433     if (successor_ == nullptr) {
434       __ B(GetReturnLabel());
435     } else {
436       __ B(arm64_codegen->GetLabelOf(successor_));
437     }
438   }
439 
440   vixl::aarch64::Label* GetReturnLabel() {
441     DCHECK(successor_ == nullptr);
442     return &return_label_;
443   }
444 
445   HBasicBlock* GetSuccessor() const {
446     return successor_;
447   }
448 
449   const char* GetDescription() const override { return "SuspendCheckSlowPathARM64"; }
450 
451  private:
452   // If not null, the block to branch to after the suspend check.
453   HBasicBlock* const successor_;
454 
455   // If `successor_` is null, the label to branch to after the suspend check.
456   vixl::aarch64::Label return_label_;
457 
458   DISALLOW_COPY_AND_ASSIGN(SuspendCheckSlowPathARM64);
459 };
460 
461 class TypeCheckSlowPathARM64 : public SlowPathCodeARM64 {
462  public:
463   TypeCheckSlowPathARM64(HInstruction* instruction, bool is_fatal)
464       : SlowPathCodeARM64(instruction), is_fatal_(is_fatal) {}
465 
466   void EmitNativeCode(CodeGenerator* codegen) override {
467     LocationSummary* locations = instruction_->GetLocations();
468 
469     DCHECK(instruction_->IsCheckCast()
470            || !locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg()));
471     CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
472     uint32_t dex_pc = instruction_->GetDexPc();
473 
474     __ Bind(GetEntryLabel());
475 
476     if (!is_fatal_ || instruction_->CanThrowIntoCatchBlock()) {
477       SaveLiveRegisters(codegen, locations);
478     }
479 
480     // We're moving two locations to locations that could overlap, so we need a parallel
481     // move resolver.
482     InvokeRuntimeCallingConvention calling_convention;
483     codegen->EmitParallelMoves(locations->InAt(0),
484                                LocationFrom(calling_convention.GetRegisterAt(0)),
485                                DataType::Type::kReference,
486                                locations->InAt(1),
487                                LocationFrom(calling_convention.GetRegisterAt(1)),
488                                DataType::Type::kReference);
489     if (instruction_->IsInstanceOf()) {
490       arm64_codegen->InvokeRuntime(kQuickInstanceofNonTrivial, instruction_, dex_pc, this);
491       CheckEntrypointTypes<kQuickInstanceofNonTrivial, size_t, mirror::Object*, mirror::Class*>();
492       DataType::Type ret_type = instruction_->GetType();
493       Location ret_loc = calling_convention.GetReturnLocation(ret_type);
494       arm64_codegen->MoveLocation(locations->Out(), ret_loc, ret_type);
495     } else {
496       DCHECK(instruction_->IsCheckCast());
497       arm64_codegen->InvokeRuntime(kQuickCheckInstanceOf, instruction_, dex_pc, this);
498       CheckEntrypointTypes<kQuickCheckInstanceOf, void, mirror::Object*, mirror::Class*>();
499     }
500 
501     if (!is_fatal_) {
502       RestoreLiveRegisters(codegen, locations);
503       __ B(GetExitLabel());
504     }
505   }
506 
507   const char* GetDescription() const override { return "TypeCheckSlowPathARM64"; }
508   bool IsFatal() const override { return is_fatal_; }
509 
510  private:
511   const bool is_fatal_;
512 
513   DISALLOW_COPY_AND_ASSIGN(TypeCheckSlowPathARM64);
514 };
515 
516 class DeoptimizationSlowPathARM64 : public SlowPathCodeARM64 {
517  public:
518   explicit DeoptimizationSlowPathARM64(HDeoptimize* instruction)
519       : SlowPathCodeARM64(instruction) {}
520 
521   void EmitNativeCode(CodeGenerator* codegen) override {
522     CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
523     __ Bind(GetEntryLabel());
524     LocationSummary* locations = instruction_->GetLocations();
525     SaveLiveRegisters(codegen, locations);
526     InvokeRuntimeCallingConvention calling_convention;
527     __ Mov(calling_convention.GetRegisterAt(0),
528            static_cast<uint32_t>(instruction_->AsDeoptimize()->GetDeoptimizationKind()));
529     arm64_codegen->InvokeRuntime(kQuickDeoptimize, instruction_, instruction_->GetDexPc(), this);
530     CheckEntrypointTypes<kQuickDeoptimize, void, DeoptimizationKind>();
531   }
532 
533   const char* GetDescription() const override { return "DeoptimizationSlowPathARM64"; }
534 
535  private:
536   DISALLOW_COPY_AND_ASSIGN(DeoptimizationSlowPathARM64);
537 };
538 
539 class ArraySetSlowPathARM64 : public SlowPathCodeARM64 {
540  public:
541   explicit ArraySetSlowPathARM64(HInstruction* instruction) : SlowPathCodeARM64(instruction) {}
542 
543   void EmitNativeCode(CodeGenerator* codegen) override {
544     LocationSummary* locations = instruction_->GetLocations();
545     __ Bind(GetEntryLabel());
546     SaveLiveRegisters(codegen, locations);
547 
548     InvokeRuntimeCallingConvention calling_convention;
549     HParallelMove parallel_move(codegen->GetGraph()->GetAllocator());
550     parallel_move.AddMove(
551         locations->InAt(0),
552         LocationFrom(calling_convention.GetRegisterAt(0)),
553         DataType::Type::kReference,
554         nullptr);
555     parallel_move.AddMove(
556         locations->InAt(1),
557         LocationFrom(calling_convention.GetRegisterAt(1)),
558         DataType::Type::kInt32,
559         nullptr);
560     parallel_move.AddMove(
561         locations->InAt(2),
562         LocationFrom(calling_convention.GetRegisterAt(2)),
563         DataType::Type::kReference,
564         nullptr);
565     codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
566 
567     CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
568     arm64_codegen->InvokeRuntime(kQuickAputObject, instruction_, instruction_->GetDexPc(), this);
569     CheckEntrypointTypes<kQuickAputObject, void, mirror::Array*, int32_t, mirror::Object*>();
570     RestoreLiveRegisters(codegen, locations);
571     __ B(GetExitLabel());
572   }
573 
574   const char* GetDescription() const override { return "ArraySetSlowPathARM64"; }
575 
576  private:
577   DISALLOW_COPY_AND_ASSIGN(ArraySetSlowPathARM64);
578 };
579 
580 void JumpTableARM64::EmitTable(CodeGeneratorARM64* codegen) {
581   uint32_t num_entries = switch_instr_->GetNumEntries();
582   DCHECK_GE(num_entries, kPackedSwitchCompareJumpThreshold);
583 
584   // We are about to use the assembler to place literals directly. Make sure we have enough
585   // underlying code buffer and we have generated the jump table with right size.
586   EmissionCheckScope scope(codegen->GetVIXLAssembler(),
587                            num_entries * sizeof(int32_t),
588                            CodeBufferCheckScope::kExactSize);
589 
590   __ Bind(&table_start_);
591   const ArenaVector<HBasicBlock*>& successors = switch_instr_->GetBlock()->GetSuccessors();
592   for (uint32_t i = 0; i < num_entries; i++) {
593     vixl::aarch64::Label* target_label = codegen->GetLabelOf(successors[i]);
594     DCHECK(target_label->IsBound());
595     ptrdiff_t jump_offset = target_label->GetLocation() - table_start_.GetLocation();
596     DCHECK_GT(jump_offset, std::numeric_limits<int32_t>::min());
597     DCHECK_LE(jump_offset, std::numeric_limits<int32_t>::max());
598     Literal<int32_t> literal(jump_offset);
599     __ place(&literal);
600   }
601 }
602 
603 // Slow path generating a read barrier for a heap reference.
604 class ReadBarrierForHeapReferenceSlowPathARM64 : public SlowPathCodeARM64 {
605  public:
606   ReadBarrierForHeapReferenceSlowPathARM64(HInstruction* instruction,
607                                            Location out,
608                                            Location ref,
609                                            Location obj,
610                                            uint32_t offset,
611                                            Location index)
612       : SlowPathCodeARM64(instruction),
613         out_(out),
614         ref_(ref),
615         obj_(obj),
616         offset_(offset),
617         index_(index) {
618     DCHECK(kEmitCompilerReadBarrier);
619     // If `obj` is equal to `out` or `ref`, it means the initial object
620     // has been overwritten by (or after) the heap object reference load
621     // to be instrumented, e.g.:
622     //
623     //   __ Ldr(out, HeapOperand(out, class_offset);
624     //   codegen_->GenerateReadBarrierSlow(instruction, out_loc, out_loc, out_loc, offset);
625     //
626     // In that case, we have lost the information about the original
627     // object, and the emitted read barrier cannot work properly.
628     DCHECK(!obj.Equals(out)) << "obj=" << obj << " out=" << out;
629     DCHECK(!obj.Equals(ref)) << "obj=" << obj << " ref=" << ref;
630   }
631 
632   void EmitNativeCode(CodeGenerator* codegen) override {
633     CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
634     LocationSummary* locations = instruction_->GetLocations();
635     DataType::Type type = DataType::Type::kReference;
636     DCHECK(locations->CanCall());
637     DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(out_.reg()));
638     DCHECK(instruction_->IsInstanceFieldGet() ||
639            instruction_->IsStaticFieldGet() ||
640            instruction_->IsArrayGet() ||
641            instruction_->IsInstanceOf() ||
642            instruction_->IsCheckCast() ||
643            (instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()))
644         << "Unexpected instruction in read barrier for heap reference slow path: "
645         << instruction_->DebugName();
646     // The read barrier instrumentation of object ArrayGet
647     // instructions does not support the HIntermediateAddress
648     // instruction.
649     DCHECK(!(instruction_->IsArrayGet() &&
650              instruction_->AsArrayGet()->GetArray()->IsIntermediateAddress()));
651 
652     __ Bind(GetEntryLabel());
653 
654     SaveLiveRegisters(codegen, locations);
655 
656     // We may have to change the index's value, but as `index_` is a
657     // constant member (like other "inputs" of this slow path),
658     // introduce a copy of it, `index`.
659     Location index = index_;
660     if (index_.IsValid()) {
661       // Handle `index_` for HArrayGet and UnsafeGetObject/UnsafeGetObjectVolatile intrinsics.
662       if (instruction_->IsArrayGet()) {
663         // Compute the actual memory offset and store it in `index`.
664         Register index_reg = RegisterFrom(index_, DataType::Type::kInt32);
665         DCHECK(locations->GetLiveRegisters()->ContainsCoreRegister(index_.reg()));
666         if (codegen->IsCoreCalleeSaveRegister(index_.reg())) {
667           // We are about to change the value of `index_reg` (see the
668           // calls to vixl::MacroAssembler::Lsl and
669           // vixl::MacroAssembler::Mov below), but it has
670           // not been saved by the previous call to
671           // art::SlowPathCode::SaveLiveRegisters, as it is a
672           // callee-save register --
673           // art::SlowPathCode::SaveLiveRegisters does not consider
674           // callee-save registers, as it has been designed with the
675           // assumption that callee-save registers are supposed to be
676           // handled by the called function.  So, as a callee-save
677           // register, `index_reg` _would_ eventually be saved onto
678           // the stack, but it would be too late: we would have
679           // changed its value earlier.  Therefore, we manually save
680           // it here into another freely available register,
681           // `free_reg`, chosen of course among the caller-save
682           // registers (as a callee-save `free_reg` register would
683           // exhibit the same problem).
684           //
685           // Note we could have requested a temporary register from
686           // the register allocator instead; but we prefer not to, as
687           // this is a slow path, and we know we can find a
688           // caller-save register that is available.
689           Register free_reg = FindAvailableCallerSaveRegister(codegen);
690           __ Mov(free_reg.W(), index_reg);
691           index_reg = free_reg;
692           index = LocationFrom(index_reg);
693         } else {
694           // The initial register stored in `index_` has already been
695           // saved in the call to art::SlowPathCode::SaveLiveRegisters
696           // (as it is not a callee-save register), so we can freely
697           // use it.
698         }
699         // Shifting the index value contained in `index_reg` by the scale
700         // factor (2) cannot overflow in practice, as the runtime is
701         // unable to allocate object arrays with a size larger than
702         // 2^26 - 1 (that is, 2^28 - 4 bytes).
703         __ Lsl(index_reg, index_reg, DataType::SizeShift(type));
704         static_assert(
705             sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
706             "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
707         __ Add(index_reg, index_reg, Operand(offset_));
708       } else {
709         // In the case of the UnsafeGetObject/UnsafeGetObjectVolatile
710         // intrinsics, `index_` is not shifted by a scale factor of 2
711         // (as in the case of ArrayGet), as it is actually an offset
712         // to an object field within an object.
713         DCHECK(instruction_->IsInvoke()) << instruction_->DebugName();
714         DCHECK(instruction_->GetLocations()->Intrinsified());
715         DCHECK((instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObject) ||
716                (instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile))
717             << instruction_->AsInvoke()->GetIntrinsic();
718         DCHECK_EQ(offset_, 0u);
719         DCHECK(index_.IsRegister());
720       }
721     }
722 
723     // We're moving two or three locations to locations that could
724     // overlap, so we need a parallel move resolver.
725     InvokeRuntimeCallingConvention calling_convention;
726     HParallelMove parallel_move(codegen->GetGraph()->GetAllocator());
727     parallel_move.AddMove(ref_,
728                           LocationFrom(calling_convention.GetRegisterAt(0)),
729                           type,
730                           nullptr);
731     parallel_move.AddMove(obj_,
732                           LocationFrom(calling_convention.GetRegisterAt(1)),
733                           type,
734                           nullptr);
735     if (index.IsValid()) {
736       parallel_move.AddMove(index,
737                             LocationFrom(calling_convention.GetRegisterAt(2)),
738                             DataType::Type::kInt32,
739                             nullptr);
740       codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
741     } else {
742       codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
743       arm64_codegen->MoveConstant(LocationFrom(calling_convention.GetRegisterAt(2)), offset_);
744     }
745     arm64_codegen->InvokeRuntime(kQuickReadBarrierSlow,
746                                  instruction_,
747                                  instruction_->GetDexPc(),
748                                  this);
749     CheckEntrypointTypes<
750         kQuickReadBarrierSlow, mirror::Object*, mirror::Object*, mirror::Object*, uint32_t>();
751     arm64_codegen->MoveLocation(out_, calling_convention.GetReturnLocation(type), type);
752 
753     RestoreLiveRegisters(codegen, locations);
754 
755     __ B(GetExitLabel());
756   }
757 
758   const char* GetDescription() const override { return "ReadBarrierForHeapReferenceSlowPathARM64"; }
759 
760  private:
761   Register FindAvailableCallerSaveRegister(CodeGenerator* codegen) {
762     size_t ref = static_cast<int>(XRegisterFrom(ref_).GetCode());
763     size_t obj = static_cast<int>(XRegisterFrom(obj_).GetCode());
764     for (size_t i = 0, e = codegen->GetNumberOfCoreRegisters(); i < e; ++i) {
765       if (i != ref && i != obj && !codegen->IsCoreCalleeSaveRegister(i)) {
766         return Register(VIXLRegCodeFromART(i), kXRegSize);
767       }
768     }
769     // We shall never fail to find a free caller-save register, as
770     // there are more than two core caller-save registers on ARM64
771     // (meaning it is possible to find one which is different from
772     // `ref` and `obj`).
773     DCHECK_GT(codegen->GetNumberOfCoreCallerSaveRegisters(), 2u);
774     LOG(FATAL) << "Could not find a free register";
775     UNREACHABLE();
776   }
777 
778   const Location out_;
779   const Location ref_;
780   const Location obj_;
781   const uint32_t offset_;
782   // An additional location containing an index to an array.
783   // Only used for HArrayGet and the UnsafeGetObject &
784   // UnsafeGetObjectVolatile intrinsics.
785   const Location index_;
786 
787   DISALLOW_COPY_AND_ASSIGN(ReadBarrierForHeapReferenceSlowPathARM64);
788 };
789 
790 // Slow path generating a read barrier for a GC root.
791 class ReadBarrierForRootSlowPathARM64 : public SlowPathCodeARM64 {
792  public:
793   ReadBarrierForRootSlowPathARM64(HInstruction* instruction, Location out, Location root)
794       : SlowPathCodeARM64(instruction), out_(out), root_(root) {
795     DCHECK(kEmitCompilerReadBarrier);
796   }
797 
798   void EmitNativeCode(CodeGenerator* codegen) override {
799     LocationSummary* locations = instruction_->GetLocations();
800     DataType::Type type = DataType::Type::kReference;
801     DCHECK(locations->CanCall());
802     DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(out_.reg()));
803     DCHECK(instruction_->IsLoadClass() || instruction_->IsLoadString())
804         << "Unexpected instruction in read barrier for GC root slow path: "
805         << instruction_->DebugName();
806 
807     __ Bind(GetEntryLabel());
808     SaveLiveRegisters(codegen, locations);
809 
810     InvokeRuntimeCallingConvention calling_convention;
811     CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
812     // The argument of the ReadBarrierForRootSlow is not a managed
813     // reference (`mirror::Object*`), but a `GcRoot<mirror::Object>*`;
814     // thus we need a 64-bit move here, and we cannot use
815     //
816     //   arm64_codegen->MoveLocation(
817     //       LocationFrom(calling_convention.GetRegisterAt(0)),
818     //       root_,
819     //       type);
820     //
821     // which would emit a 32-bit move, as `type` is a (32-bit wide)
822     // reference type (`DataType::Type::kReference`).
823     __ Mov(calling_convention.GetRegisterAt(0), XRegisterFrom(out_));
824     arm64_codegen->InvokeRuntime(kQuickReadBarrierForRootSlow,
825                                  instruction_,
826                                  instruction_->GetDexPc(),
827                                  this);
828     CheckEntrypointTypes<kQuickReadBarrierForRootSlow, mirror::Object*, GcRoot<mirror::Object>*>();
829     arm64_codegen->MoveLocation(out_, calling_convention.GetReturnLocation(type), type);
830 
831     RestoreLiveRegisters(codegen, locations);
832     __ B(GetExitLabel());
833   }
834 
835   const char* GetDescription() const override { return "ReadBarrierForRootSlowPathARM64"; }
836 
837  private:
838   const Location out_;
839   const Location root_;
840 
841   DISALLOW_COPY_AND_ASSIGN(ReadBarrierForRootSlowPathARM64);
842 };
843 
844 #undef __
845 
846 Location InvokeDexCallingConventionVisitorARM64::GetNextLocation(DataType::Type type) {
847   Location next_location;
848   if (type == DataType::Type::kVoid) {
849     LOG(FATAL) << "Unreachable type " << type;
850   }
851 
852   if (DataType::IsFloatingPointType(type) &&
853       (float_index_ < calling_convention.GetNumberOfFpuRegisters())) {
854     next_location = LocationFrom(calling_convention.GetFpuRegisterAt(float_index_++));
855   } else if (!DataType::IsFloatingPointType(type) &&
856              (gp_index_ < calling_convention.GetNumberOfRegisters())) {
857     next_location = LocationFrom(calling_convention.GetRegisterAt(gp_index_++));
858   } else {
859     size_t stack_offset = calling_convention.GetStackOffsetOf(stack_index_);
860     next_location = DataType::Is64BitType(type) ? Location::DoubleStackSlot(stack_offset)
861                                                 : Location::StackSlot(stack_offset);
862   }
863 
864   // Space on the stack is reserved for all arguments.
865   stack_index_ += DataType::Is64BitType(type) ? 2 : 1;
866   return next_location;
867 }
868 
869 Location InvokeDexCallingConventionVisitorARM64::GetMethodLocation() const {
870   return LocationFrom(kArtMethodRegister);
871 }
872 
873 CodeGeneratorARM64::CodeGeneratorARM64(HGraph* graph,
874                                        const CompilerOptions& compiler_options,
875                                        OptimizingCompilerStats* stats)
876     : CodeGenerator(graph,
877                     kNumberOfAllocatableRegisters,
878                     kNumberOfAllocatableFPRegisters,
879                     kNumberOfAllocatableRegisterPairs,
880                     callee_saved_core_registers.GetList(),
881                     callee_saved_fp_registers.GetList(),
882                     compiler_options,
883                     stats),
884       block_labels_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
885       jump_tables_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
886       location_builder_(graph, this),
887       instruction_visitor_(graph, this),
888       move_resolver_(graph->GetAllocator(), this),
889       assembler_(graph->GetAllocator(),
890                  compiler_options.GetInstructionSetFeatures()->AsArm64InstructionSetFeatures()),
891       boot_image_method_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
892       method_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
893       boot_image_type_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
894       type_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
895       boot_image_string_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
896       string_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
897       boot_image_other_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
898       call_entrypoint_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
899       baker_read_barrier_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
900       uint32_literals_(std::less<uint32_t>(),
901                        graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
902       uint64_literals_(std::less<uint64_t>(),
903                        graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
904       jit_string_patches_(StringReferenceValueComparator(),
905                           graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
906       jit_class_patches_(TypeReferenceValueComparator(),
907                          graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
908       jit_baker_read_barrier_slow_paths_(std::less<uint32_t>(),
909                                          graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)) {
910   // Save the link register (containing the return address) to mimic Quick.
911   AddAllocatedRegister(LocationFrom(lr));
912 }
913 
914 #define __ GetVIXLAssembler()->
915 
916 void CodeGeneratorARM64::EmitJumpTables() {
917   for (auto&& jump_table : jump_tables_) {
918     jump_table->EmitTable(this);
919   }
920 }
921 
922 void CodeGeneratorARM64::Finalize(CodeAllocator* allocator) {
923   EmitJumpTables();
924 
925   // Emit JIT baker read barrier slow paths.
926   DCHECK(Runtime::Current()->UseJitCompilation() || jit_baker_read_barrier_slow_paths_.empty());
927   for (auto& entry : jit_baker_read_barrier_slow_paths_) {
928     uint32_t encoded_data = entry.first;
929     vixl::aarch64::Label* slow_path_entry = &entry.second.label;
930     __ Bind(slow_path_entry);
931     CompileBakerReadBarrierThunk(*GetAssembler(), encoded_data, /* debug_name= */ nullptr);
932   }
933 
934   // Ensure we emit the literal pool.
935   __ FinalizeCode();
936 
937   CodeGenerator::Finalize(allocator);
938 
939   // Verify Baker read barrier linker patches.
940   if (kIsDebugBuild) {
941     ArrayRef<const uint8_t> code = allocator->GetMemory();
942     for (const BakerReadBarrierPatchInfo& info : baker_read_barrier_patches_) {
943       DCHECK(info.label.IsBound());
944       uint32_t literal_offset = info.label.GetLocation();
945       DCHECK_ALIGNED(literal_offset, 4u);
946 
947       auto GetInsn = [&code](uint32_t offset) {
948         DCHECK_ALIGNED(offset, 4u);
949         return
950             (static_cast<uint32_t>(code[offset + 0]) << 0) +
951             (static_cast<uint32_t>(code[offset + 1]) << 8) +
952             (static_cast<uint32_t>(code[offset + 2]) << 16)+
953             (static_cast<uint32_t>(code[offset + 3]) << 24);
954       };
955 
956       const uint32_t encoded_data = info.custom_data;
957       BakerReadBarrierKind kind = BakerReadBarrierKindField::Decode(encoded_data);
958       // Check that the next instruction matches the expected LDR.
959       switch (kind) {
960         case BakerReadBarrierKind::kField:
961         case BakerReadBarrierKind::kAcquire: {
962           DCHECK_GE(code.size() - literal_offset, 8u);
963           uint32_t next_insn = GetInsn(literal_offset + 4u);
964           CheckValidReg(next_insn & 0x1fu);  // Check destination register.
965           const uint32_t base_reg = BakerReadBarrierFirstRegField::Decode(encoded_data);
966           if (kind == BakerReadBarrierKind::kField) {
967             // LDR (immediate) with correct base_reg.
968             CHECK_EQ(next_insn & 0xffc003e0u, 0xb9400000u | (base_reg << 5));
969           } else {
970             DCHECK(kind == BakerReadBarrierKind::kAcquire);
971             // LDAR with correct base_reg.
972             CHECK_EQ(next_insn & 0xffffffe0u, 0x88dffc00u | (base_reg << 5));
973           }
974           break;
975         }
976         case BakerReadBarrierKind::kArray: {
977           DCHECK_GE(code.size() - literal_offset, 8u);
978           uint32_t next_insn = GetInsn(literal_offset + 4u);
979           // LDR (register) with the correct base_reg, size=10 (32-bit), option=011 (extend = LSL),
980           // and S=1 (shift amount = 2 for 32-bit version), i.e. LDR Wt, [Xn, Xm, LSL #2].
981           CheckValidReg(next_insn & 0x1fu);  // Check destination register.
982           const uint32_t base_reg = BakerReadBarrierFirstRegField::Decode(encoded_data);
983           CHECK_EQ(next_insn & 0xffe0ffe0u, 0xb8607800u | (base_reg << 5));
984           CheckValidReg((next_insn >> 16) & 0x1f);  // Check index register
985           break;
986         }
987         case BakerReadBarrierKind::kGcRoot: {
988           DCHECK_GE(literal_offset, 4u);
989           uint32_t prev_insn = GetInsn(literal_offset - 4u);
990           const uint32_t root_reg = BakerReadBarrierFirstRegField::Decode(encoded_data);
991           // Usually LDR (immediate) with correct root_reg but
992           // we may have a "MOV marked, old_value" for UnsafeCASObject.
993           if ((prev_insn & 0xffe0ffff) != (0x2a0003e0 | root_reg)) {    // MOV?
994             CHECK_EQ(prev_insn & 0xffc0001fu, 0xb9400000u | root_reg);  // LDR?
995           }
996           break;
997         }
998         default:
999           LOG(FATAL) << "Unexpected kind: " << static_cast<uint32_t>(kind);
1000           UNREACHABLE();
1001       }
1002     }
1003   }
1004 }
1005 
1006 void ParallelMoveResolverARM64::PrepareForEmitNativeCode() {
1007   // Note: There are 6 kinds of moves:
1008   // 1. constant -> GPR/FPR (non-cycle)
1009   // 2. constant -> stack (non-cycle)
1010   // 3. GPR/FPR -> GPR/FPR
1011   // 4. GPR/FPR -> stack
1012   // 5. stack -> GPR/FPR
1013   // 6. stack -> stack (non-cycle)
1014   // Case 1, 2 and 6 should never be included in a dependency cycle on ARM64. For case 3, 4, and 5
1015   // VIXL uses at most 1 GPR. VIXL has 2 GPR and 1 FPR temps, and there should be no intersecting
1016   // cycles on ARM64, so we always have 1 GPR and 1 FPR available VIXL temps to resolve the
1017   // dependency.
1018   vixl_temps_.Open(GetVIXLAssembler());
1019 }
1020 
1021 void ParallelMoveResolverARM64::FinishEmitNativeCode() {
1022   vixl_temps_.Close();
1023 }
1024 
1025 Location ParallelMoveResolverARM64::AllocateScratchLocationFor(Location::Kind kind) {
1026   DCHECK(kind == Location::kRegister || kind == Location::kFpuRegister
1027          || kind == Location::kStackSlot || kind == Location::kDoubleStackSlot
1028          || kind == Location::kSIMDStackSlot);
1029   kind = (kind == Location::kFpuRegister || kind == Location::kSIMDStackSlot)
1030       ? Location::kFpuRegister
1031       : Location::kRegister;
1032   Location scratch = GetScratchLocation(kind);
1033   if (!scratch.Equals(Location::NoLocation())) {
1034     return scratch;
1035   }
1036   // Allocate from VIXL temp registers.
1037   if (kind == Location::kRegister) {
1038     scratch = LocationFrom(vixl_temps_.AcquireX());
1039   } else {
1040     DCHECK_EQ(kind, Location::kFpuRegister);
1041     scratch = LocationFrom(codegen_->GetGraph()->HasSIMD()
1042         ? vixl_temps_.AcquireVRegisterOfSize(kQRegSize)
1043         : vixl_temps_.AcquireD());
1044   }
1045   AddScratchLocation(scratch);
1046   return scratch;
1047 }
1048 
1049 void ParallelMoveResolverARM64::FreeScratchLocation(Location loc) {
1050   if (loc.IsRegister()) {
1051     vixl_temps_.Release(XRegisterFrom(loc));
1052   } else {
1053     DCHECK(loc.IsFpuRegister());
1054     vixl_temps_.Release(codegen_->GetGraph()->HasSIMD() ? QRegisterFrom(loc) : DRegisterFrom(loc));
1055   }
1056   RemoveScratchLocation(loc);
1057 }
1058 
1059 void ParallelMoveResolverARM64::EmitMove(size_t index) {
1060   MoveOperands* move = moves_[index];
1061   codegen_->MoveLocation(move->GetDestination(), move->GetSource(), DataType::Type::kVoid);
1062 }
1063 
1064 void CodeGeneratorARM64::MaybeIncrementHotness(bool is_frame_entry) {
1065   MacroAssembler* masm = GetVIXLAssembler();
1066   if (GetCompilerOptions().CountHotnessInCompiledCode()) {
1067     UseScratchRegisterScope temps(masm);
1068     Register counter = temps.AcquireX();
1069     Register method = is_frame_entry ? kArtMethodRegister : temps.AcquireX();
1070     if (!is_frame_entry) {
1071       __ Ldr(method, MemOperand(sp, 0));
1072     }
1073     __ Ldrh(counter, MemOperand(method, ArtMethod::HotnessCountOffset().Int32Value()));
1074     __ Add(counter, counter, 1);
1075     // Subtract one if the counter would overflow.
1076     __ Sub(counter, counter, Operand(counter, LSR, 16));
1077     __ Strh(counter, MemOperand(method, ArtMethod::HotnessCountOffset().Int32Value()));
1078   }
1079 
1080   if (GetGraph()->IsCompilingBaseline() && !Runtime::Current()->IsAotCompiler()) {
1081     ScopedObjectAccess soa(Thread::Current());
1082     ProfilingInfo* info = GetGraph()->GetArtMethod()->GetProfilingInfo(kRuntimePointerSize);
1083     if (info != nullptr) {
1084       uint64_t address = reinterpret_cast64<uint64_t>(info);
1085       vixl::aarch64::Label done;
1086       UseScratchRegisterScope temps(masm);
1087       Register temp = temps.AcquireX();
1088       Register counter = temps.AcquireW();
1089       __ Mov(temp, address);
1090       __ Ldrh(counter, MemOperand(temp, ProfilingInfo::BaselineHotnessCountOffset().Int32Value()));
1091       __ Add(counter, counter, 1);
1092       __ Strh(counter, MemOperand(temp, ProfilingInfo::BaselineHotnessCountOffset().Int32Value()));
1093       __ Tst(counter, 0xffff);
1094       __ B(ne, &done);
1095       if (is_frame_entry) {
1096         if (HasEmptyFrame()) {
1097           // The entyrpoint expects the method at the bottom of the stack. We
1098           // claim stack space necessary for alignment.
1099           __ Claim(kStackAlignment);
1100           __ Stp(kArtMethodRegister, lr, MemOperand(sp, 0));
1101         } else if (!RequiresCurrentMethod()) {
1102           __ Str(kArtMethodRegister, MemOperand(sp, 0));
1103         }
1104       } else {
1105         CHECK(RequiresCurrentMethod());
1106       }
1107       uint32_t entrypoint_offset =
1108           GetThreadOffset<kArm64PointerSize>(kQuickCompileOptimized).Int32Value();
1109       __ Ldr(lr, MemOperand(tr, entrypoint_offset));
1110       // Note: we don't record the call here (and therefore don't generate a stack
1111       // map), as the entrypoint should never be suspended.
1112       __ Blr(lr);
1113       if (HasEmptyFrame()) {
1114         CHECK(is_frame_entry);
1115         __ Ldr(lr, MemOperand(sp, 8));
1116         __ Drop(kStackAlignment);
1117       }
1118       __ Bind(&done);
1119     }
1120   }
1121 }
1122 
1123 void CodeGeneratorARM64::GenerateFrameEntry() {
1124   MacroAssembler* masm = GetVIXLAssembler();
1125   __ Bind(&frame_entry_label_);
1126 
1127   bool do_overflow_check =
1128       FrameNeedsStackCheck(GetFrameSize(), InstructionSet::kArm64) || !IsLeafMethod();
1129   if (do_overflow_check) {
1130     UseScratchRegisterScope temps(masm);
1131     Register temp = temps.AcquireX();
1132     DCHECK(GetCompilerOptions().GetImplicitStackOverflowChecks());
1133     __ Sub(temp, sp, static_cast<int32_t>(GetStackOverflowReservedBytes(InstructionSet::kArm64)));
1134     {
1135       // Ensure that between load and RecordPcInfo there are no pools emitted.
1136       ExactAssemblyScope eas(GetVIXLAssembler(),
1137                              kInstructionSize,
1138                              CodeBufferCheckScope::kExactSize);
1139       __ ldr(wzr, MemOperand(temp, 0));
1140       RecordPcInfo(nullptr, 0);
1141     }
1142   }
1143 
1144   if (!HasEmptyFrame()) {
1145     // Stack layout:
1146     //      sp[frame_size - 8]        : lr.
1147     //      ...                       : other preserved core registers.
1148     //      ...                       : other preserved fp registers.
1149     //      ...                       : reserved frame space.
1150     //      sp[0]                     : current method.
1151     int32_t frame_size = dchecked_integral_cast<int32_t>(GetFrameSize());
1152     uint32_t core_spills_offset = frame_size - GetCoreSpillSize();
1153     CPURegList preserved_core_registers = GetFramePreservedCoreRegisters();
1154     DCHECK(!preserved_core_registers.IsEmpty());
1155     uint32_t fp_spills_offset = frame_size - FrameEntrySpillSize();
1156     CPURegList preserved_fp_registers = GetFramePreservedFPRegisters();
1157 
1158     // Save the current method if we need it, or if using STP reduces code
1159     // size. Note that we do not do this in HCurrentMethod, as the
1160     // instruction might have been removed in the SSA graph.
1161     CPURegister lowest_spill;
1162     if (core_spills_offset == kXRegSizeInBytes) {
1163       // If there is no gap between the method and the lowest core spill, use
1164       // aligned STP pre-index to store both. Max difference is 512. We do
1165       // that to reduce code size even if we do not have to save the method.
1166       DCHECK_LE(frame_size, 512);  // 32 core registers are only 256 bytes.
1167       lowest_spill = preserved_core_registers.PopLowestIndex();
1168       __ Stp(kArtMethodRegister, lowest_spill, MemOperand(sp, -frame_size, PreIndex));
1169     } else if (RequiresCurrentMethod()) {
1170       __ Str(kArtMethodRegister, MemOperand(sp, -frame_size, PreIndex));
1171     } else {
1172       __ Claim(frame_size);
1173     }
1174     GetAssembler()->cfi().AdjustCFAOffset(frame_size);
1175     if (lowest_spill.IsValid()) {
1176       GetAssembler()->cfi().RelOffset(DWARFReg(lowest_spill), core_spills_offset);
1177       core_spills_offset += kXRegSizeInBytes;
1178     }
1179     GetAssembler()->SpillRegisters(preserved_core_registers, core_spills_offset);
1180     GetAssembler()->SpillRegisters(preserved_fp_registers, fp_spills_offset);
1181 
1182     if (GetGraph()->HasShouldDeoptimizeFlag()) {
1183       // Initialize should_deoptimize flag to 0.
1184       Register wzr = Register(VIXLRegCodeFromART(WZR), kWRegSize);
1185       __ Str(wzr, MemOperand(sp, GetStackOffsetOfShouldDeoptimizeFlag()));
1186     }
1187   }
1188   MaybeIncrementHotness(/* is_frame_entry= */ true);
1189   MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__);
1190 }
1191 
1192 void CodeGeneratorARM64::GenerateFrameExit() {
1193   GetAssembler()->cfi().RememberState();
1194   if (!HasEmptyFrame()) {
1195     int32_t frame_size = dchecked_integral_cast<int32_t>(GetFrameSize());
1196     uint32_t core_spills_offset = frame_size - GetCoreSpillSize();
1197     CPURegList preserved_core_registers = GetFramePreservedCoreRegisters();
1198     DCHECK(!preserved_core_registers.IsEmpty());
1199     uint32_t fp_spills_offset = frame_size - FrameEntrySpillSize();
1200     CPURegList preserved_fp_registers = GetFramePreservedFPRegisters();
1201 
1202     CPURegister lowest_spill;
1203     if (core_spills_offset == kXRegSizeInBytes) {
1204       // If there is no gap between the method and the lowest core spill, use
1205       // aligned LDP pre-index to pop both. Max difference is 504. We do
1206       // that to reduce code size even though the loaded method is unused.
1207       DCHECK_LE(frame_size, 504);  // 32 core registers are only 256 bytes.
1208       lowest_spill = preserved_core_registers.PopLowestIndex();
1209       core_spills_offset += kXRegSizeInBytes;
1210     }
1211     GetAssembler()->UnspillRegisters(preserved_fp_registers, fp_spills_offset);
1212     GetAssembler()->UnspillRegisters(preserved_core_registers, core_spills_offset);
1213     if (lowest_spill.IsValid()) {
1214       __ Ldp(xzr, lowest_spill, MemOperand(sp, frame_size, PostIndex));
1215       GetAssembler()->cfi().Restore(DWARFReg(lowest_spill));
1216     } else {
1217       __ Drop(frame_size);
1218     }
1219     GetAssembler()->cfi().AdjustCFAOffset(-frame_size);
1220   }
1221   __ Ret();
1222   GetAssembler()->cfi().RestoreState();
1223   GetAssembler()->cfi().DefCFAOffset(GetFrameSize());
1224 }
1225 
1226 CPURegList CodeGeneratorARM64::GetFramePreservedCoreRegisters() const {
1227   DCHECK(ArtVixlRegCodeCoherentForRegSet(core_spill_mask_, GetNumberOfCoreRegisters(), 0, 0));
1228   return CPURegList(CPURegister::kRegister, kXRegSize,
1229                     core_spill_mask_);
1230 }
1231 
1232 CPURegList CodeGeneratorARM64::GetFramePreservedFPRegisters() const {
1233   DCHECK(ArtVixlRegCodeCoherentForRegSet(0, 0, fpu_spill_mask_,
1234                                          GetNumberOfFloatingPointRegisters()));
1235   return CPURegList(CPURegister::kVRegister, kDRegSize,
1236                     fpu_spill_mask_);
1237 }
1238 
1239 void CodeGeneratorARM64::Bind(HBasicBlock* block) {
1240   __ Bind(GetLabelOf(block));
1241 }
1242 
1243 void CodeGeneratorARM64::MoveConstant(Location location, int32_t value) {
1244   DCHECK(location.IsRegister());
1245   __ Mov(RegisterFrom(location, DataType::Type::kInt32), value);
1246 }
1247 
1248 void CodeGeneratorARM64::AddLocationAsTemp(Location location, LocationSummary* locations) {
1249   if (location.IsRegister()) {
1250     locations->AddTemp(location);
1251   } else {
1252     UNIMPLEMENTED(FATAL) << "AddLocationAsTemp not implemented for location " << location;
1253   }
1254 }
1255 
1256 void CodeGeneratorARM64::MarkGCCard(Register object, Register value, bool value_can_be_null) {
1257   UseScratchRegisterScope temps(GetVIXLAssembler());
1258   Register card = temps.AcquireX();
1259   Register temp = temps.AcquireW();   // Index within the CardTable - 32bit.
1260   vixl::aarch64::Label done;
1261   if (value_can_be_null) {
1262     __ Cbz(value, &done);
1263   }
1264   // Load the address of the card table into `card`.
1265   __ Ldr(card, MemOperand(tr, Thread::CardTableOffset<kArm64PointerSize>().Int32Value()));
1266   // Calculate the offset (in the card table) of the card corresponding to
1267   // `object`.
1268   __ Lsr(temp, object, gc::accounting::CardTable::kCardShift);
1269   // Write the `art::gc::accounting::CardTable::kCardDirty` value into the
1270   // `object`'s card.
1271   //
1272   // Register `card` contains the address of the card table. Note that the card
1273   // table's base is biased during its creation so that it always starts at an
1274   // address whose least-significant byte is equal to `kCardDirty` (see
1275   // art::gc::accounting::CardTable::Create). Therefore the STRB instruction
1276   // below writes the `kCardDirty` (byte) value into the `object`'s card
1277   // (located at `card + object >> kCardShift`).
1278   //
1279   // This dual use of the value in register `card` (1. to calculate the location
1280   // of the card to mark; and 2. to load the `kCardDirty` value) saves a load
1281   // (no need to explicitly load `kCardDirty` as an immediate value).
1282   __ Strb(card, MemOperand(card, temp.X()));
1283   if (value_can_be_null) {
1284     __ Bind(&done);
1285   }
1286 }
1287 
1288 void CodeGeneratorARM64::SetupBlockedRegisters() const {
1289   // Blocked core registers:
1290   //      lr        : Runtime reserved.
1291   //      tr        : Runtime reserved.
1292   //      mr        : Runtime reserved.
1293   //      ip1       : VIXL core temp.
1294   //      ip0       : VIXL core temp.
1295   //      x18       : Platform register.
1296   //
1297   // Blocked fp registers:
1298   //      d31       : VIXL fp temp.
1299   CPURegList reserved_core_registers = vixl_reserved_core_registers;
1300   reserved_core_registers.Combine(runtime_reserved_core_registers);
1301   while (!reserved_core_registers.IsEmpty()) {
1302     blocked_core_registers_[reserved_core_registers.PopLowestIndex().GetCode()] = true;
1303   }
1304   blocked_core_registers_[X18] = true;
1305 
1306   CPURegList reserved_fp_registers = vixl_reserved_fp_registers;
1307   while (!reserved_fp_registers.IsEmpty()) {
1308     blocked_fpu_registers_[reserved_fp_registers.PopLowestIndex().GetCode()] = true;
1309   }
1310 
1311   if (GetGraph()->IsDebuggable()) {
1312     // Stubs do not save callee-save floating point registers. If the graph
1313     // is debuggable, we need to deal with these registers differently. For
1314     // now, just block them.
1315     CPURegList reserved_fp_registers_debuggable = callee_saved_fp_registers;
1316     while (!reserved_fp_registers_debuggable.IsEmpty()) {
1317       blocked_fpu_registers_[reserved_fp_registers_debuggable.PopLowestIndex().GetCode()] = true;
1318     }
1319   }
1320 }
1321 
1322 size_t CodeGeneratorARM64::SaveCoreRegister(size_t stack_index, uint32_t reg_id) {
1323   Register reg = Register(VIXLRegCodeFromART(reg_id), kXRegSize);
1324   __ Str(reg, MemOperand(sp, stack_index));
1325   return kArm64WordSize;
1326 }
1327 
1328 size_t CodeGeneratorARM64::RestoreCoreRegister(size_t stack_index, uint32_t reg_id) {
1329   Register reg = Register(VIXLRegCodeFromART(reg_id), kXRegSize);
1330   __ Ldr(reg, MemOperand(sp, stack_index));
1331   return kArm64WordSize;
1332 }
1333 
1334 size_t CodeGeneratorARM64::SaveFloatingPointRegister(size_t stack_index ATTRIBUTE_UNUSED,
1335                                                      uint32_t reg_id ATTRIBUTE_UNUSED) {
1336   LOG(FATAL) << "FP registers shouldn't be saved/restored individually, "
1337              << "use SaveRestoreLiveRegistersHelper";
1338   UNREACHABLE();
1339 }
1340 
1341 size_t CodeGeneratorARM64::RestoreFloatingPointRegister(size_t stack_index ATTRIBUTE_UNUSED,
1342                                                         uint32_t reg_id ATTRIBUTE_UNUSED) {
1343   LOG(FATAL) << "FP registers shouldn't be saved/restored individually, "
1344              << "use SaveRestoreLiveRegistersHelper";
1345   UNREACHABLE();
1346 }
1347 
1348 void CodeGeneratorARM64::DumpCoreRegister(std::ostream& stream, int reg) const {
1349   stream << XRegister(reg);
1350 }
1351 
1352 void CodeGeneratorARM64::DumpFloatingPointRegister(std::ostream& stream, int reg) const {
1353   stream << DRegister(reg);
1354 }
1355 
1356 const Arm64InstructionSetFeatures& CodeGeneratorARM64::GetInstructionSetFeatures() const {
1357   return *GetCompilerOptions().GetInstructionSetFeatures()->AsArm64InstructionSetFeatures();
1358 }
1359 
1360 void CodeGeneratorARM64::MoveConstant(CPURegister destination, HConstant* constant) {
1361   if (constant->IsIntConstant()) {
1362     __ Mov(Register(destination), constant->AsIntConstant()->GetValue());
1363   } else if (constant->IsLongConstant()) {
1364     __ Mov(Register(destination), constant->AsLongConstant()->GetValue());
1365   } else if (constant->IsNullConstant()) {
1366     __ Mov(Register(destination), 0);
1367   } else if (constant->IsFloatConstant()) {
1368     __ Fmov(VRegister(destination), constant->AsFloatConstant()->GetValue());
1369   } else {
1370     DCHECK(constant->IsDoubleConstant());
1371     __ Fmov(VRegister(destination), constant->AsDoubleConstant()->GetValue());
1372   }
1373 }
1374 
1375 
1376 static bool CoherentConstantAndType(Location constant, DataType::Type type) {
1377   DCHECK(constant.IsConstant());
1378   HConstant* cst = constant.GetConstant();
1379   return (cst->IsIntConstant() && type == DataType::Type::kInt32) ||
1380          // Null is mapped to a core W register, which we associate with kPrimInt.
1381          (cst->IsNullConstant() && type == DataType::Type::kInt32) ||
1382          (cst->IsLongConstant() && type == DataType::Type::kInt64) ||
1383          (cst->IsFloatConstant() && type == DataType::Type::kFloat32) ||
1384          (cst->IsDoubleConstant() && type == DataType::Type::kFloat64);
1385 }
1386 
1387 // Allocate a scratch register from the VIXL pool, querying first
1388 // the floating-point register pool, and then the core register
1389 // pool. This is essentially a reimplementation of
1390 // vixl::aarch64::UseScratchRegisterScope::AcquireCPURegisterOfSize
1391 // using a different allocation strategy.
1392 static CPURegister AcquireFPOrCoreCPURegisterOfSize(vixl::aarch64::MacroAssembler* masm,
1393                                                     vixl::aarch64::UseScratchRegisterScope* temps,
1394                                                     int size_in_bits) {
1395   return masm->GetScratchVRegisterList()->IsEmpty()
1396       ? CPURegister(temps->AcquireRegisterOfSize(size_in_bits))
1397       : CPURegister(temps->AcquireVRegisterOfSize(size_in_bits));
1398 }
1399 
1400 void CodeGeneratorARM64::MoveLocation(Location destination,
1401                                       Location source,
1402                                       DataType::Type dst_type) {
1403   if (source.Equals(destination)) {
1404     return;
1405   }
1406 
1407   // A valid move can always be inferred from the destination and source
1408   // locations. When moving from and to a register, the argument type can be
1409   // used to generate 32bit instead of 64bit moves. In debug mode we also
1410   // checks the coherency of the locations and the type.
1411   bool unspecified_type = (dst_type == DataType::Type::kVoid);
1412 
1413   if (destination.IsRegister() || destination.IsFpuRegister()) {
1414     if (unspecified_type) {
1415       HConstant* src_cst = source.IsConstant() ? source.GetConstant() : nullptr;
1416       if (source.IsStackSlot() ||
1417           (src_cst != nullptr && (src_cst->IsIntConstant()
1418                                   || src_cst->IsFloatConstant()
1419                                   || src_cst->IsNullConstant()))) {
1420         // For stack slots and 32bit constants, a 64bit type is appropriate.
1421         dst_type = destination.IsRegister() ? DataType::Type::kInt32 : DataType::Type::kFloat32;
1422       } else {
1423         // If the source is a double stack slot or a 64bit constant, a 64bit
1424         // type is appropriate. Else the source is a register, and since the
1425         // type has not been specified, we chose a 64bit type to force a 64bit
1426         // move.
1427         dst_type = destination.IsRegister() ? DataType::Type::kInt64 : DataType::Type::kFloat64;
1428       }
1429     }
1430     DCHECK((destination.IsFpuRegister() && DataType::IsFloatingPointType(dst_type)) ||
1431            (destination.IsRegister() && !DataType::IsFloatingPointType(dst_type)));
1432     CPURegister dst = CPURegisterFrom(destination, dst_type);
1433     if (source.IsStackSlot() || source.IsDoubleStackSlot()) {
1434       DCHECK(dst.Is64Bits() == source.IsDoubleStackSlot());
1435       __ Ldr(dst, StackOperandFrom(source));
1436     } else if (source.IsSIMDStackSlot()) {
1437       __ Ldr(QRegisterFrom(destination), StackOperandFrom(source));
1438     } else if (source.IsConstant()) {
1439       DCHECK(CoherentConstantAndType(source, dst_type));
1440       MoveConstant(dst, source.GetConstant());
1441     } else if (source.IsRegister()) {
1442       if (destination.IsRegister()) {
1443         __ Mov(Register(dst), RegisterFrom(source, dst_type));
1444       } else {
1445         DCHECK(destination.IsFpuRegister());
1446         DataType::Type source_type = DataType::Is64BitType(dst_type)
1447             ? DataType::Type::kInt64
1448             : DataType::Type::kInt32;
1449         __ Fmov(FPRegisterFrom(destination, dst_type), RegisterFrom(source, source_type));
1450       }
1451     } else {
1452       DCHECK(source.IsFpuRegister());
1453       if (destination.IsRegister()) {
1454         DataType::Type source_type = DataType::Is64BitType(dst_type)
1455             ? DataType::Type::kFloat64
1456             : DataType::Type::kFloat32;
1457         __ Fmov(RegisterFrom(destination, dst_type), FPRegisterFrom(source, source_type));
1458       } else {
1459         DCHECK(destination.IsFpuRegister());
1460         if (GetGraph()->HasSIMD()) {
1461           __ Mov(QRegisterFrom(destination), QRegisterFrom(source));
1462         } else {
1463           __ Fmov(VRegister(dst), FPRegisterFrom(source, dst_type));
1464         }
1465       }
1466     }
1467   } else if (destination.IsSIMDStackSlot()) {
1468     if (source.IsFpuRegister()) {
1469       __ Str(QRegisterFrom(source), StackOperandFrom(destination));
1470     } else {
1471       DCHECK(source.IsSIMDStackSlot());
1472       UseScratchRegisterScope temps(GetVIXLAssembler());
1473       if (GetVIXLAssembler()->GetScratchVRegisterList()->IsEmpty()) {
1474         Register temp = temps.AcquireX();
1475         __ Ldr(temp, MemOperand(sp, source.GetStackIndex()));
1476         __ Str(temp, MemOperand(sp, destination.GetStackIndex()));
1477         __ Ldr(temp, MemOperand(sp, source.GetStackIndex() + kArm64WordSize));
1478         __ Str(temp, MemOperand(sp, destination.GetStackIndex() + kArm64WordSize));
1479       } else {
1480         VRegister temp = temps.AcquireVRegisterOfSize(kQRegSize);
1481         __ Ldr(temp, StackOperandFrom(source));
1482         __ Str(temp, StackOperandFrom(destination));
1483       }
1484     }
1485   } else {  // The destination is not a register. It must be a stack slot.
1486     DCHECK(destination.IsStackSlot() || destination.IsDoubleStackSlot());
1487     if (source.IsRegister() || source.IsFpuRegister()) {
1488       if (unspecified_type) {
1489         if (source.IsRegister()) {
1490           dst_type = destination.IsStackSlot() ? DataType::Type::kInt32 : DataType::Type::kInt64;
1491         } else {
1492           dst_type =
1493               destination.IsStackSlot() ? DataType::Type::kFloat32 : DataType::Type::kFloat64;
1494         }
1495       }
1496       DCHECK((destination.IsDoubleStackSlot() == DataType::Is64BitType(dst_type)) &&
1497              (source.IsFpuRegister() == DataType::IsFloatingPointType(dst_type)));
1498       __ Str(CPURegisterFrom(source, dst_type), StackOperandFrom(destination));
1499     } else if (source.IsConstant()) {
1500       DCHECK(unspecified_type || CoherentConstantAndType(source, dst_type))
1501           << source << " " << dst_type;
1502       UseScratchRegisterScope temps(GetVIXLAssembler());
1503       HConstant* src_cst = source.GetConstant();
1504       CPURegister temp;
1505       if (src_cst->IsZeroBitPattern()) {
1506         temp = (src_cst->IsLongConstant() || src_cst->IsDoubleConstant())
1507             ? Register(xzr)
1508             : Register(wzr);
1509       } else {
1510         if (src_cst->IsIntConstant()) {
1511           temp = temps.AcquireW();
1512         } else if (src_cst->IsLongConstant()) {
1513           temp = temps.AcquireX();
1514         } else if (src_cst->IsFloatConstant()) {
1515           temp = temps.AcquireS();
1516         } else {
1517           DCHECK(src_cst->IsDoubleConstant());
1518           temp = temps.AcquireD();
1519         }
1520         MoveConstant(temp, src_cst);
1521       }
1522       __ Str(temp, StackOperandFrom(destination));
1523     } else {
1524       DCHECK(source.IsStackSlot() || source.IsDoubleStackSlot());
1525       DCHECK(source.IsDoubleStackSlot() == destination.IsDoubleStackSlot());
1526       UseScratchRegisterScope temps(GetVIXLAssembler());
1527       // Use any scratch register (a core or a floating-point one)
1528       // from VIXL scratch register pools as a temporary.
1529       //
1530       // We used to only use the FP scratch register pool, but in some
1531       // rare cases the only register from this pool (D31) would
1532       // already be used (e.g. within a ParallelMove instruction, when
1533       // a move is blocked by a another move requiring a scratch FP
1534       // register, which would reserve D31). To prevent this issue, we
1535       // ask for a scratch register of any type (core or FP).
1536       //
1537       // Also, we start by asking for a FP scratch register first, as the
1538       // demand of scratch core registers is higher. This is why we
1539       // use AcquireFPOrCoreCPURegisterOfSize instead of
1540       // UseScratchRegisterScope::AcquireCPURegisterOfSize, which
1541       // allocates core scratch registers first.
1542       CPURegister temp = AcquireFPOrCoreCPURegisterOfSize(
1543           GetVIXLAssembler(),
1544           &temps,
1545           (destination.IsDoubleStackSlot() ? kXRegSize : kWRegSize));
1546       __ Ldr(temp, StackOperandFrom(source));
1547       __ Str(temp, StackOperandFrom(destination));
1548     }
1549   }
1550 }
1551 
1552 void CodeGeneratorARM64::Load(DataType::Type type,
1553                               CPURegister dst,
1554                               const MemOperand& src) {
1555   switch (type) {
1556     case DataType::Type::kBool:
1557     case DataType::Type::kUint8:
1558       __ Ldrb(Register(dst), src);
1559       break;
1560     case DataType::Type::kInt8:
1561       __ Ldrsb(Register(dst), src);
1562       break;
1563     case DataType::Type::kUint16:
1564       __ Ldrh(Register(dst), src);
1565       break;
1566     case DataType::Type::kInt16:
1567       __ Ldrsh(Register(dst), src);
1568       break;
1569     case DataType::Type::kInt32:
1570     case DataType::Type::kReference:
1571     case DataType::Type::kInt64:
1572     case DataType::Type::kFloat32:
1573     case DataType::Type::kFloat64:
1574       DCHECK_EQ(dst.Is64Bits(), DataType::Is64BitType(type));
1575       __ Ldr(dst, src);
1576       break;
1577     case DataType::Type::kUint32:
1578     case DataType::Type::kUint64:
1579     case DataType::Type::kVoid:
1580       LOG(FATAL) << "Unreachable type " << type;
1581   }
1582 }
1583 
1584 void CodeGeneratorARM64::LoadAcquire(HInstruction* instruction,
1585                                      CPURegister dst,
1586                                      const MemOperand& src,
1587                                      bool needs_null_check) {
1588   MacroAssembler* masm = GetVIXLAssembler();
1589   UseScratchRegisterScope temps(masm);
1590   Register temp_base = temps.AcquireX();
1591   DataType::Type type = instruction->GetType();
1592 
1593   DCHECK(!src.IsPreIndex());
1594   DCHECK(!src.IsPostIndex());
1595 
1596   // TODO(vixl): Let the MacroAssembler handle MemOperand.
1597   __ Add(temp_base, src.GetBaseRegister(), OperandFromMemOperand(src));
1598   {
1599     // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
1600     MemOperand base = MemOperand(temp_base);
1601     switch (type) {
1602       case DataType::Type::kBool:
1603       case DataType::Type::kUint8:
1604       case DataType::Type::kInt8:
1605         {
1606           ExactAssemblyScope eas(masm, kInstructionSize, CodeBufferCheckScope::kExactSize);
1607           __ ldarb(Register(dst), base);
1608           if (needs_null_check) {
1609             MaybeRecordImplicitNullCheck(instruction);
1610           }
1611         }
1612         if (type == DataType::Type::kInt8) {
1613           __ Sbfx(Register(dst), Register(dst), 0, DataType::Size(type) * kBitsPerByte);
1614         }
1615         break;
1616       case DataType::Type::kUint16:
1617       case DataType::Type::kInt16:
1618         {
1619           ExactAssemblyScope eas(masm, kInstructionSize, CodeBufferCheckScope::kExactSize);
1620           __ ldarh(Register(dst), base);
1621           if (needs_null_check) {
1622             MaybeRecordImplicitNullCheck(instruction);
1623           }
1624         }
1625         if (type == DataType::Type::kInt16) {
1626           __ Sbfx(Register(dst), Register(dst), 0, DataType::Size(type) * kBitsPerByte);
1627         }
1628         break;
1629       case DataType::Type::kInt32:
1630       case DataType::Type::kReference:
1631       case DataType::Type::kInt64:
1632         DCHECK_EQ(dst.Is64Bits(), DataType::Is64BitType(type));
1633         {
1634           ExactAssemblyScope eas(masm, kInstructionSize, CodeBufferCheckScope::kExactSize);
1635           __ ldar(Register(dst), base);
1636           if (needs_null_check) {
1637             MaybeRecordImplicitNullCheck(instruction);
1638           }
1639         }
1640         break;
1641       case DataType::Type::kFloat32:
1642       case DataType::Type::kFloat64: {
1643         DCHECK(dst.IsFPRegister());
1644         DCHECK_EQ(dst.Is64Bits(), DataType::Is64BitType(type));
1645 
1646         Register temp = dst.Is64Bits() ? temps.AcquireX() : temps.AcquireW();
1647         {
1648           ExactAssemblyScope eas(masm, kInstructionSize, CodeBufferCheckScope::kExactSize);
1649           __ ldar(temp, base);
1650           if (needs_null_check) {
1651             MaybeRecordImplicitNullCheck(instruction);
1652           }
1653         }
1654         __ Fmov(VRegister(dst), temp);
1655         break;
1656       }
1657       case DataType::Type::kUint32:
1658       case DataType::Type::kUint64:
1659       case DataType::Type::kVoid:
1660         LOG(FATAL) << "Unreachable type " << type;
1661     }
1662   }
1663 }
1664 
1665 void CodeGeneratorARM64::Store(DataType::Type type,
1666                                CPURegister src,
1667                                const MemOperand& dst) {
1668   switch (type) {
1669     case DataType::Type::kBool:
1670     case DataType::Type::kUint8:
1671     case DataType::Type::kInt8:
1672       __ Strb(Register(src), dst);
1673       break;
1674     case DataType::Type::kUint16:
1675     case DataType::Type::kInt16:
1676       __ Strh(Register(src), dst);
1677       break;
1678     case DataType::Type::kInt32:
1679     case DataType::Type::kReference:
1680     case DataType::Type::kInt64:
1681     case DataType::Type::kFloat32:
1682     case DataType::Type::kFloat64:
1683       DCHECK_EQ(src.Is64Bits(), DataType::Is64BitType(type));
1684       __ Str(src, dst);
1685       break;
1686     case DataType::Type::kUint32:
1687     case DataType::Type::kUint64:
1688     case DataType::Type::kVoid:
1689       LOG(FATAL) << "Unreachable type " << type;
1690   }
1691 }
1692 
1693 void CodeGeneratorARM64::StoreRelease(HInstruction* instruction,
1694                                       DataType::Type type,
1695                                       CPURegister src,
1696                                       const MemOperand& dst,
1697                                       bool needs_null_check) {
1698   MacroAssembler* masm = GetVIXLAssembler();
1699   UseScratchRegisterScope temps(GetVIXLAssembler());
1700   Register temp_base = temps.AcquireX();
1701 
1702   DCHECK(!dst.IsPreIndex());
1703   DCHECK(!dst.IsPostIndex());
1704 
1705   // TODO(vixl): Let the MacroAssembler handle this.
1706   Operand op = OperandFromMemOperand(dst);
1707   __ Add(temp_base, dst.GetBaseRegister(), op);
1708   MemOperand base = MemOperand(temp_base);
1709   // Ensure that between store and MaybeRecordImplicitNullCheck there are no pools emitted.
1710   switch (type) {
1711     case DataType::Type::kBool:
1712     case DataType::Type::kUint8:
1713     case DataType::Type::kInt8:
1714       {
1715         ExactAssemblyScope eas(masm, kInstructionSize, CodeBufferCheckScope::kExactSize);
1716         __ stlrb(Register(src), base);
1717         if (needs_null_check) {
1718           MaybeRecordImplicitNullCheck(instruction);
1719         }
1720       }
1721       break;
1722     case DataType::Type::kUint16:
1723     case DataType::Type::kInt16:
1724       {
1725         ExactAssemblyScope eas(masm, kInstructionSize, CodeBufferCheckScope::kExactSize);
1726         __ stlrh(Register(src), base);
1727         if (needs_null_check) {
1728           MaybeRecordImplicitNullCheck(instruction);
1729         }
1730       }
1731       break;
1732     case DataType::Type::kInt32:
1733     case DataType::Type::kReference:
1734     case DataType::Type::kInt64:
1735       DCHECK_EQ(src.Is64Bits(), DataType::Is64BitType(type));
1736       {
1737         ExactAssemblyScope eas(masm, kInstructionSize, CodeBufferCheckScope::kExactSize);
1738         __ stlr(Register(src), base);
1739         if (needs_null_check) {
1740           MaybeRecordImplicitNullCheck(instruction);
1741         }
1742       }
1743       break;
1744     case DataType::Type::kFloat32:
1745     case DataType::Type::kFloat64: {
1746       DCHECK_EQ(src.Is64Bits(), DataType::Is64BitType(type));
1747       Register temp_src;
1748       if (src.IsZero()) {
1749         // The zero register is used to avoid synthesizing zero constants.
1750         temp_src = Register(src);
1751       } else {
1752         DCHECK(src.IsFPRegister());
1753         temp_src = src.Is64Bits() ? temps.AcquireX() : temps.AcquireW();
1754         __ Fmov(temp_src, VRegister(src));
1755       }
1756       {
1757         ExactAssemblyScope eas(masm, kInstructionSize, CodeBufferCheckScope::kExactSize);
1758         __ stlr(temp_src, base);
1759         if (needs_null_check) {
1760           MaybeRecordImplicitNullCheck(instruction);
1761         }
1762       }
1763       break;
1764     }
1765     case DataType::Type::kUint32:
1766     case DataType::Type::kUint64:
1767     case DataType::Type::kVoid:
1768       LOG(FATAL) << "Unreachable type " << type;
1769   }
1770 }
1771 
1772 void CodeGeneratorARM64::InvokeRuntime(QuickEntrypointEnum entrypoint,
1773                                        HInstruction* instruction,
1774                                        uint32_t dex_pc,
1775                                        SlowPathCode* slow_path) {
1776   ValidateInvokeRuntime(entrypoint, instruction, slow_path);
1777 
1778   ThreadOffset64 entrypoint_offset = GetThreadOffset<kArm64PointerSize>(entrypoint);
1779   // Reduce code size for AOT by using shared trampolines for slow path runtime calls across the
1780   // entire oat file. This adds an extra branch and we do not want to slow down the main path.
1781   // For JIT, thunk sharing is per-method, so the gains would be smaller or even negative.
1782   if (slow_path == nullptr || Runtime::Current()->UseJitCompilation()) {
1783     __ Ldr(lr, MemOperand(tr, entrypoint_offset.Int32Value()));
1784     // Ensure the pc position is recorded immediately after the `blr` instruction.
1785     ExactAssemblyScope eas(GetVIXLAssembler(), kInstructionSize, CodeBufferCheckScope::kExactSize);
1786     __ blr(lr);
1787     if (EntrypointRequiresStackMap(entrypoint)) {
1788       RecordPcInfo(instruction, dex_pc, slow_path);
1789     }
1790   } else {
1791     // Ensure the pc position is recorded immediately after the `bl` instruction.
1792     ExactAssemblyScope eas(GetVIXLAssembler(), kInstructionSize, CodeBufferCheckScope::kExactSize);
1793     EmitEntrypointThunkCall(entrypoint_offset);
1794     if (EntrypointRequiresStackMap(entrypoint)) {
1795       RecordPcInfo(instruction, dex_pc, slow_path);
1796     }
1797   }
1798 }
1799 
1800 void CodeGeneratorARM64::InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset,
1801                                                              HInstruction* instruction,
1802                                                              SlowPathCode* slow_path) {
1803   ValidateInvokeRuntimeWithoutRecordingPcInfo(instruction, slow_path);
1804   __ Ldr(lr, MemOperand(tr, entry_point_offset));
1805   __ Blr(lr);
1806 }
1807 
1808 void InstructionCodeGeneratorARM64::GenerateClassInitializationCheck(SlowPathCodeARM64* slow_path,
1809                                                                      Register class_reg) {
1810   UseScratchRegisterScope temps(GetVIXLAssembler());
1811   Register temp = temps.AcquireW();
1812   constexpr size_t status_lsb_position = SubtypeCheckBits::BitStructSizeOf();
1813   const size_t status_byte_offset =
1814       mirror::Class::StatusOffset().SizeValue() + (status_lsb_position / kBitsPerByte);
1815   constexpr uint32_t shifted_visibly_initialized_value =
1816       enum_cast<uint32_t>(ClassStatus::kVisiblyInitialized) << (status_lsb_position % kBitsPerByte);
1817 
1818   // CMP (immediate) is limited to imm12 or imm12<<12, so we would need to materialize
1819   // the constant 0xf0000000 for comparison with the full 32-bit field. To reduce the code
1820   // size, load only the high byte of the field and compare with 0xf0.
1821   // Note: The same code size could be achieved with LDR+MNV(asr #24)+CBNZ but benchmarks
1822   // show that this pattern is slower (tested on little cores).
1823   __ Ldrb(temp, HeapOperand(class_reg, status_byte_offset));
1824   __ Cmp(temp, shifted_visibly_initialized_value);
1825   __ B(lo, slow_path->GetEntryLabel());
1826   __ Bind(slow_path->GetExitLabel());
1827 }
1828 
1829 void InstructionCodeGeneratorARM64::GenerateBitstringTypeCheckCompare(
1830     HTypeCheckInstruction* check, vixl::aarch64::Register temp) {
1831   uint32_t path_to_root = check->GetBitstringPathToRoot();
1832   uint32_t mask = check->GetBitstringMask();
1833   DCHECK(IsPowerOfTwo(mask + 1));
1834   size_t mask_bits = WhichPowerOf2(mask + 1);
1835 
1836   if (mask_bits == 16u) {
1837     // Load only the bitstring part of the status word.
1838     __ Ldrh(temp, HeapOperand(temp, mirror::Class::StatusOffset()));
1839   } else {
1840     // /* uint32_t */ temp = temp->status_
1841     __ Ldr(temp, HeapOperand(temp, mirror::Class::StatusOffset()));
1842     // Extract the bitstring bits.
1843     __ Ubfx(temp, temp, 0, mask_bits);
1844   }
1845   // Compare the bitstring bits to `path_to_root`.
1846   __ Cmp(temp, path_to_root);
1847 }
1848 
1849 void CodeGeneratorARM64::GenerateMemoryBarrier(MemBarrierKind kind) {
1850   BarrierType type = BarrierAll;
1851 
1852   switch (kind) {
1853     case MemBarrierKind::kAnyAny:
1854     case MemBarrierKind::kAnyStore: {
1855       type = BarrierAll;
1856       break;
1857     }
1858     case MemBarrierKind::kLoadAny: {
1859       type = BarrierReads;
1860       break;
1861     }
1862     case MemBarrierKind::kStoreStore: {
1863       type = BarrierWrites;
1864       break;
1865     }
1866     default:
1867       LOG(FATAL) << "Unexpected memory barrier " << kind;
1868   }
1869   __ Dmb(InnerShareable, type);
1870 }
1871 
1872 void InstructionCodeGeneratorARM64::GenerateSuspendCheck(HSuspendCheck* instruction,
1873                                                          HBasicBlock* successor) {
1874   SuspendCheckSlowPathARM64* slow_path =
1875       down_cast<SuspendCheckSlowPathARM64*>(instruction->GetSlowPath());
1876   if (slow_path == nullptr) {
1877     slow_path =
1878         new (codegen_->GetScopedAllocator()) SuspendCheckSlowPathARM64(instruction, successor);
1879     instruction->SetSlowPath(slow_path);
1880     codegen_->AddSlowPath(slow_path);
1881     if (successor != nullptr) {
1882       DCHECK(successor->IsLoopHeader());
1883     }
1884   } else {
1885     DCHECK_EQ(slow_path->GetSuccessor(), successor);
1886   }
1887 
1888   UseScratchRegisterScope temps(codegen_->GetVIXLAssembler());
1889   Register temp = temps.AcquireW();
1890 
1891   __ Ldrh(temp, MemOperand(tr, Thread::ThreadFlagsOffset<kArm64PointerSize>().SizeValue()));
1892   if (successor == nullptr) {
1893     __ Cbnz(temp, slow_path->GetEntryLabel());
1894     __ Bind(slow_path->GetReturnLabel());
1895   } else {
1896     __ Cbz(temp, codegen_->GetLabelOf(successor));
1897     __ B(slow_path->GetEntryLabel());
1898     // slow_path will return to GetLabelOf(successor).
1899   }
1900 }
1901 
1902 InstructionCodeGeneratorARM64::InstructionCodeGeneratorARM64(HGraph* graph,
1903                                                              CodeGeneratorARM64* codegen)
1904       : InstructionCodeGenerator(graph, codegen),
1905         assembler_(codegen->GetAssembler()),
1906         codegen_(codegen) {}
1907 
1908 void LocationsBuilderARM64::HandleBinaryOp(HBinaryOperation* instr) {
1909   DCHECK_EQ(instr->InputCount(), 2U);
1910   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instr);
1911   DataType::Type type = instr->GetResultType();
1912   switch (type) {
1913     case DataType::Type::kInt32:
1914     case DataType::Type::kInt64:
1915       locations->SetInAt(0, Location::RequiresRegister());
1916       locations->SetInAt(1, ARM64EncodableConstantOrRegister(instr->InputAt(1), instr));
1917       locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
1918       break;
1919 
1920     case DataType::Type::kFloat32:
1921     case DataType::Type::kFloat64:
1922       locations->SetInAt(0, Location::RequiresFpuRegister());
1923       locations->SetInAt(1, Location::RequiresFpuRegister());
1924       locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
1925       break;
1926 
1927     default:
1928       LOG(FATAL) << "Unexpected " << instr->DebugName() << " type " << type;
1929   }
1930 }
1931 
1932 void LocationsBuilderARM64::HandleFieldGet(HInstruction* instruction,
1933                                            const FieldInfo& field_info) {
1934   DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet());
1935 
1936   bool object_field_get_with_read_barrier =
1937       kEmitCompilerReadBarrier && (instruction->GetType() == DataType::Type::kReference);
1938   LocationSummary* locations =
1939       new (GetGraph()->GetAllocator()) LocationSummary(instruction,
1940                                                        object_field_get_with_read_barrier
1941                                                            ? LocationSummary::kCallOnSlowPath
1942                                                            : LocationSummary::kNoCall);
1943   if (object_field_get_with_read_barrier && kUseBakerReadBarrier) {
1944     locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
1945     // We need a temporary register for the read barrier load in
1946     // CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier()
1947     // only if the field is volatile or the offset is too big.
1948     if (field_info.IsVolatile() ||
1949         field_info.GetFieldOffset().Uint32Value() >= kReferenceLoadMinFarOffset) {
1950       locations->AddTemp(FixedTempLocation());
1951     }
1952   }
1953   locations->SetInAt(0, Location::RequiresRegister());
1954   if (DataType::IsFloatingPointType(instruction->GetType())) {
1955     locations->SetOut(Location::RequiresFpuRegister());
1956   } else {
1957     // The output overlaps for an object field get when read barriers
1958     // are enabled: we do not want the load to overwrite the object's
1959     // location, as we need it to emit the read barrier.
1960     locations->SetOut(
1961         Location::RequiresRegister(),
1962         object_field_get_with_read_barrier ? Location::kOutputOverlap : Location::kNoOutputOverlap);
1963   }
1964 }
1965 
1966 void InstructionCodeGeneratorARM64::HandleFieldGet(HInstruction* instruction,
1967                                                    const FieldInfo& field_info) {
1968   DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet());
1969   LocationSummary* locations = instruction->GetLocations();
1970   Location base_loc = locations->InAt(0);
1971   Location out = locations->Out();
1972   uint32_t offset = field_info.GetFieldOffset().Uint32Value();
1973   DCHECK_EQ(DataType::Size(field_info.GetFieldType()), DataType::Size(instruction->GetType()));
1974   DataType::Type load_type = instruction->GetType();
1975   MemOperand field = HeapOperand(InputRegisterAt(instruction, 0), field_info.GetFieldOffset());
1976 
1977   if (kEmitCompilerReadBarrier && kUseBakerReadBarrier &&
1978       load_type == DataType::Type::kReference) {
1979     // Object FieldGet with Baker's read barrier case.
1980     // /* HeapReference<Object> */ out = *(base + offset)
1981     Register base = RegisterFrom(base_loc, DataType::Type::kReference);
1982     Location maybe_temp =
1983         (locations->GetTempCount() != 0) ? locations->GetTemp(0) : Location::NoLocation();
1984     // Note that potential implicit null checks are handled in this
1985     // CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier call.
1986     codegen_->GenerateFieldLoadWithBakerReadBarrier(
1987         instruction,
1988         out,
1989         base,
1990         offset,
1991         maybe_temp,
1992         /* needs_null_check= */ true,
1993         field_info.IsVolatile());
1994   } else {
1995     // General case.
1996     if (field_info.IsVolatile()) {
1997       // Note that a potential implicit null check is handled in this
1998       // CodeGeneratorARM64::LoadAcquire call.
1999       // NB: LoadAcquire will record the pc info if needed.
2000       codegen_->LoadAcquire(
2001           instruction, OutputCPURegister(instruction), field, /* needs_null_check= */ true);
2002     } else {
2003       // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
2004       EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
2005       codegen_->Load(load_type, OutputCPURegister(instruction), field);
2006       codegen_->MaybeRecordImplicitNullCheck(instruction);
2007     }
2008     if (load_type == DataType::Type::kReference) {
2009       // If read barriers are enabled, emit read barriers other than
2010       // Baker's using a slow path (and also unpoison the loaded
2011       // reference, if heap poisoning is enabled).
2012       codegen_->MaybeGenerateReadBarrierSlow(instruction, out, out, base_loc, offset);
2013     }
2014   }
2015 }
2016 
2017 void LocationsBuilderARM64::HandleFieldSet(HInstruction* instruction) {
2018   LocationSummary* locations =
2019       new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
2020   locations->SetInAt(0, Location::RequiresRegister());
2021   if (IsConstantZeroBitPattern(instruction->InputAt(1))) {
2022     locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant()));
2023   } else if (DataType::IsFloatingPointType(instruction->InputAt(1)->GetType())) {
2024     locations->SetInAt(1, Location::RequiresFpuRegister());
2025   } else {
2026     locations->SetInAt(1, Location::RequiresRegister());
2027   }
2028 }
2029 
2030 void InstructionCodeGeneratorARM64::HandleFieldSet(HInstruction* instruction,
2031                                                    const FieldInfo& field_info,
2032                                                    bool value_can_be_null) {
2033   DCHECK(instruction->IsInstanceFieldSet() || instruction->IsStaticFieldSet());
2034 
2035   Register obj = InputRegisterAt(instruction, 0);
2036   CPURegister value = InputCPURegisterOrZeroRegAt(instruction, 1);
2037   CPURegister source = value;
2038   Offset offset = field_info.GetFieldOffset();
2039   DataType::Type field_type = field_info.GetFieldType();
2040 
2041   {
2042     // We use a block to end the scratch scope before the write barrier, thus
2043     // freeing the temporary registers so they can be used in `MarkGCCard`.
2044     UseScratchRegisterScope temps(GetVIXLAssembler());
2045 
2046     if (kPoisonHeapReferences && field_type == DataType::Type::kReference) {
2047       DCHECK(value.IsW());
2048       Register temp = temps.AcquireW();
2049       __ Mov(temp, value.W());
2050       GetAssembler()->PoisonHeapReference(temp.W());
2051       source = temp;
2052     }
2053 
2054     if (field_info.IsVolatile()) {
2055       codegen_->StoreRelease(
2056           instruction, field_type, source, HeapOperand(obj, offset), /* needs_null_check= */ true);
2057     } else {
2058       // Ensure that between store and MaybeRecordImplicitNullCheck there are no pools emitted.
2059       EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
2060       codegen_->Store(field_type, source, HeapOperand(obj, offset));
2061       codegen_->MaybeRecordImplicitNullCheck(instruction);
2062     }
2063   }
2064 
2065   if (CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1))) {
2066     codegen_->MarkGCCard(obj, Register(value), value_can_be_null);
2067   }
2068 }
2069 
2070 void InstructionCodeGeneratorARM64::HandleBinaryOp(HBinaryOperation* instr) {
2071   DataType::Type type = instr->GetType();
2072 
2073   switch (type) {
2074     case DataType::Type::kInt32:
2075     case DataType::Type::kInt64: {
2076       Register dst = OutputRegister(instr);
2077       Register lhs = InputRegisterAt(instr, 0);
2078       Operand rhs = InputOperandAt(instr, 1);
2079       if (instr->IsAdd()) {
2080         __ Add(dst, lhs, rhs);
2081       } else if (instr->IsAnd()) {
2082         __ And(dst, lhs, rhs);
2083       } else if (instr->IsOr()) {
2084         __ Orr(dst, lhs, rhs);
2085       } else if (instr->IsSub()) {
2086         __ Sub(dst, lhs, rhs);
2087       } else if (instr->IsRor()) {
2088         if (rhs.IsImmediate()) {
2089           uint32_t shift = rhs.GetImmediate() & (lhs.GetSizeInBits() - 1);
2090           __ Ror(dst, lhs, shift);
2091         } else {
2092           // Ensure shift distance is in the same size register as the result. If
2093           // we are rotating a long and the shift comes in a w register originally,
2094           // we don't need to sxtw for use as an x since the shift distances are
2095           // all & reg_bits - 1.
2096           __ Ror(dst, lhs, RegisterFrom(instr->GetLocations()->InAt(1), type));
2097         }
2098       } else if (instr->IsMin() || instr->IsMax()) {
2099           __ Cmp(lhs, rhs);
2100           __ Csel(dst, lhs, rhs, instr->IsMin() ? lt : gt);
2101       } else {
2102         DCHECK(instr->IsXor());
2103         __ Eor(dst, lhs, rhs);
2104       }
2105       break;
2106     }
2107     case DataType::Type::kFloat32:
2108     case DataType::Type::kFloat64: {
2109       VRegister dst = OutputFPRegister(instr);
2110       VRegister lhs = InputFPRegisterAt(instr, 0);
2111       VRegister rhs = InputFPRegisterAt(instr, 1);
2112       if (instr->IsAdd()) {
2113         __ Fadd(dst, lhs, rhs);
2114       } else if (instr->IsSub()) {
2115         __ Fsub(dst, lhs, rhs);
2116       } else if (instr->IsMin()) {
2117         __ Fmin(dst, lhs, rhs);
2118       } else if (instr->IsMax()) {
2119         __ Fmax(dst, lhs, rhs);
2120       } else {
2121         LOG(FATAL) << "Unexpected floating-point binary operation";
2122       }
2123       break;
2124     }
2125     default:
2126       LOG(FATAL) << "Unexpected binary operation type " << type;
2127   }
2128 }
2129 
2130 void LocationsBuilderARM64::HandleShift(HBinaryOperation* instr) {
2131   DCHECK(instr->IsShl() || instr->IsShr() || instr->IsUShr());
2132 
2133   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instr);
2134   DataType::Type type = instr->GetResultType();
2135   switch (type) {
2136     case DataType::Type::kInt32:
2137     case DataType::Type::kInt64: {
2138       locations->SetInAt(0, Location::RequiresRegister());
2139       locations->SetInAt(1, Location::RegisterOrConstant(instr->InputAt(1)));
2140       locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
2141       break;
2142     }
2143     default:
2144       LOG(FATAL) << "Unexpected shift type " << type;
2145   }
2146 }
2147 
2148 void InstructionCodeGeneratorARM64::HandleShift(HBinaryOperation* instr) {
2149   DCHECK(instr->IsShl() || instr->IsShr() || instr->IsUShr());
2150 
2151   DataType::Type type = instr->GetType();
2152   switch (type) {
2153     case DataType::Type::kInt32:
2154     case DataType::Type::kInt64: {
2155       Register dst = OutputRegister(instr);
2156       Register lhs = InputRegisterAt(instr, 0);
2157       Operand rhs = InputOperandAt(instr, 1);
2158       if (rhs.IsImmediate()) {
2159         uint32_t shift_value = rhs.GetImmediate() &
2160             (type == DataType::Type::kInt32 ? kMaxIntShiftDistance : kMaxLongShiftDistance);
2161         if (instr->IsShl()) {
2162           __ Lsl(dst, lhs, shift_value);
2163         } else if (instr->IsShr()) {
2164           __ Asr(dst, lhs, shift_value);
2165         } else {
2166           __ Lsr(dst, lhs, shift_value);
2167         }
2168       } else {
2169         Register rhs_reg = dst.IsX() ? rhs.GetRegister().X() : rhs.GetRegister().W();
2170 
2171         if (instr->IsShl()) {
2172           __ Lsl(dst, lhs, rhs_reg);
2173         } else if (instr->IsShr()) {
2174           __ Asr(dst, lhs, rhs_reg);
2175         } else {
2176           __ Lsr(dst, lhs, rhs_reg);
2177         }
2178       }
2179       break;
2180     }
2181     default:
2182       LOG(FATAL) << "Unexpected shift operation type " << type;
2183   }
2184 }
2185 
2186 void LocationsBuilderARM64::VisitAdd(HAdd* instruction) {
2187   HandleBinaryOp(instruction);
2188 }
2189 
2190 void InstructionCodeGeneratorARM64::VisitAdd(HAdd* instruction) {
2191   HandleBinaryOp(instruction);
2192 }
2193 
2194 void LocationsBuilderARM64::VisitAnd(HAnd* instruction) {
2195   HandleBinaryOp(instruction);
2196 }
2197 
2198 void InstructionCodeGeneratorARM64::VisitAnd(HAnd* instruction) {
2199   HandleBinaryOp(instruction);
2200 }
2201 
2202 void LocationsBuilderARM64::VisitBitwiseNegatedRight(HBitwiseNegatedRight* instr) {
2203   DCHECK(DataType::IsIntegralType(instr->GetType())) << instr->GetType();
2204   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instr);
2205   locations->SetInAt(0, Location::RequiresRegister());
2206   // There is no immediate variant of negated bitwise instructions in AArch64.
2207   locations->SetInAt(1, Location::RequiresRegister());
2208   locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
2209 }
2210 
2211 void InstructionCodeGeneratorARM64::VisitBitwiseNegatedRight(HBitwiseNegatedRight* instr) {
2212   Register dst = OutputRegister(instr);
2213   Register lhs = InputRegisterAt(instr, 0);
2214   Register rhs = InputRegisterAt(instr, 1);
2215 
2216   switch (instr->GetOpKind()) {
2217     case HInstruction::kAnd:
2218       __ Bic(dst, lhs, rhs);
2219       break;
2220     case HInstruction::kOr:
2221       __ Orn(dst, lhs, rhs);
2222       break;
2223     case HInstruction::kXor:
2224       __ Eon(dst, lhs, rhs);
2225       break;
2226     default:
2227       LOG(FATAL) << "Unreachable";
2228   }
2229 }
2230 
2231 void LocationsBuilderARM64::VisitDataProcWithShifterOp(
2232     HDataProcWithShifterOp* instruction) {
2233   DCHECK(instruction->GetType() == DataType::Type::kInt32 ||
2234          instruction->GetType() == DataType::Type::kInt64);
2235   LocationSummary* locations =
2236       new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
2237   if (instruction->GetInstrKind() == HInstruction::kNeg) {
2238     locations->SetInAt(0, Location::ConstantLocation(instruction->InputAt(0)->AsConstant()));
2239   } else {
2240     locations->SetInAt(0, Location::RequiresRegister());
2241   }
2242   locations->SetInAt(1, Location::RequiresRegister());
2243   locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
2244 }
2245 
2246 void InstructionCodeGeneratorARM64::VisitDataProcWithShifterOp(
2247     HDataProcWithShifterOp* instruction) {
2248   DataType::Type type = instruction->GetType();
2249   HInstruction::InstructionKind kind = instruction->GetInstrKind();
2250   DCHECK(type == DataType::Type::kInt32 || type == DataType::Type::kInt64);
2251   Register out = OutputRegister(instruction);
2252   Register left;
2253   if (kind != HInstruction::kNeg) {
2254     left = InputRegisterAt(instruction, 0);
2255   }
2256   // If this `HDataProcWithShifterOp` was created by merging a type conversion as the
2257   // shifter operand operation, the IR generating `right_reg` (input to the type
2258   // conversion) can have a different type from the current instruction's type,
2259   // so we manually indicate the type.
2260   Register right_reg = RegisterFrom(instruction->GetLocations()->InAt(1), type);
2261   Operand right_operand(0);
2262 
2263   HDataProcWithShifterOp::OpKind op_kind = instruction->GetOpKind();
2264   if (HDataProcWithShifterOp::IsExtensionOp(op_kind)) {
2265     right_operand = Operand(right_reg, helpers::ExtendFromOpKind(op_kind));
2266   } else {
2267     right_operand = Operand(right_reg,
2268                             helpers::ShiftFromOpKind(op_kind),
2269                             instruction->GetShiftAmount());
2270   }
2271 
2272   // Logical binary operations do not support extension operations in the
2273   // operand. Note that VIXL would still manage if it was passed by generating
2274   // the extension as a separate instruction.
2275   // `HNeg` also does not support extension. See comments in `ShifterOperandSupportsExtension()`.
2276   DCHECK(!right_operand.IsExtendedRegister() ||
2277          (kind != HInstruction::kAnd && kind != HInstruction::kOr && kind != HInstruction::kXor &&
2278           kind != HInstruction::kNeg));
2279   switch (kind) {
2280     case HInstruction::kAdd:
2281       __ Add(out, left, right_operand);
2282       break;
2283     case HInstruction::kAnd:
2284       __ And(out, left, right_operand);
2285       break;
2286     case HInstruction::kNeg:
2287       DCHECK(instruction->InputAt(0)->AsConstant()->IsArithmeticZero());
2288       __ Neg(out, right_operand);
2289       break;
2290     case HInstruction::kOr:
2291       __ Orr(out, left, right_operand);
2292       break;
2293     case HInstruction::kSub:
2294       __ Sub(out, left, right_operand);
2295       break;
2296     case HInstruction::kXor:
2297       __ Eor(out, left, right_operand);
2298       break;
2299     default:
2300       LOG(FATAL) << "Unexpected operation kind: " << kind;
2301       UNREACHABLE();
2302   }
2303 }
2304 
2305 void LocationsBuilderARM64::VisitIntermediateAddress(HIntermediateAddress* instruction) {
2306   LocationSummary* locations =
2307       new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
2308   locations->SetInAt(0, Location::RequiresRegister());
2309   locations->SetInAt(1, ARM64EncodableConstantOrRegister(instruction->GetOffset(), instruction));
2310   locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
2311 }
2312 
2313 void InstructionCodeGeneratorARM64::VisitIntermediateAddress(HIntermediateAddress* instruction) {
2314   __ Add(OutputRegister(instruction),
2315          InputRegisterAt(instruction, 0),
2316          Operand(InputOperandAt(instruction, 1)));
2317 }
2318 
2319 void LocationsBuilderARM64::VisitIntermediateAddressIndex(HIntermediateAddressIndex* instruction) {
2320   LocationSummary* locations =
2321       new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
2322 
2323   HIntConstant* shift = instruction->GetShift()->AsIntConstant();
2324 
2325   locations->SetInAt(0, Location::RequiresRegister());
2326   // For byte case we don't need to shift the index variable so we can encode the data offset into
2327   // ADD instruction. For other cases we prefer the data_offset to be in register; that will hoist
2328   // data offset constant generation out of the loop and reduce the critical path length in the
2329   // loop.
2330   locations->SetInAt(1, shift->GetValue() == 0
2331                         ? Location::ConstantLocation(instruction->GetOffset()->AsIntConstant())
2332                         : Location::RequiresRegister());
2333   locations->SetInAt(2, Location::ConstantLocation(shift));
2334   locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
2335 }
2336 
2337 void InstructionCodeGeneratorARM64::VisitIntermediateAddressIndex(
2338     HIntermediateAddressIndex* instruction) {
2339   Register index_reg = InputRegisterAt(instruction, 0);
2340   uint32_t shift = Int64FromLocation(instruction->GetLocations()->InAt(2));
2341   uint32_t offset = instruction->GetOffset()->AsIntConstant()->GetValue();
2342 
2343   if (shift == 0) {
2344     __ Add(OutputRegister(instruction), index_reg, offset);
2345   } else {
2346     Register offset_reg = InputRegisterAt(instruction, 1);
2347     __ Add(OutputRegister(instruction), offset_reg, Operand(index_reg, LSL, shift));
2348   }
2349 }
2350 
2351 void LocationsBuilderARM64::VisitMultiplyAccumulate(HMultiplyAccumulate* instr) {
2352   LocationSummary* locations =
2353       new (GetGraph()->GetAllocator()) LocationSummary(instr, LocationSummary::kNoCall);
2354   HInstruction* accumulator = instr->InputAt(HMultiplyAccumulate::kInputAccumulatorIndex);
2355   if (instr->GetOpKind() == HInstruction::kSub &&
2356       accumulator->IsConstant() &&
2357       accumulator->AsConstant()->IsArithmeticZero()) {
2358     // Don't allocate register for Mneg instruction.
2359   } else {
2360     locations->SetInAt(HMultiplyAccumulate::kInputAccumulatorIndex,
2361                        Location::RequiresRegister());
2362   }
2363   locations->SetInAt(HMultiplyAccumulate::kInputMulLeftIndex, Location::RequiresRegister());
2364   locations->SetInAt(HMultiplyAccumulate::kInputMulRightIndex, Location::RequiresRegister());
2365   locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
2366 }
2367 
2368 void InstructionCodeGeneratorARM64::VisitMultiplyAccumulate(HMultiplyAccumulate* instr) {
2369   Register res = OutputRegister(instr);
2370   Register mul_left = InputRegisterAt(instr, HMultiplyAccumulate::kInputMulLeftIndex);
2371   Register mul_right = InputRegisterAt(instr, HMultiplyAccumulate::kInputMulRightIndex);
2372 
2373   // Avoid emitting code that could trigger Cortex A53's erratum 835769.
2374   // This fixup should be carried out for all multiply-accumulate instructions:
2375   // madd, msub, smaddl, smsubl, umaddl and umsubl.
2376   if (instr->GetType() == DataType::Type::kInt64 &&
2377       codegen_->GetInstructionSetFeatures().NeedFixCortexA53_835769()) {
2378     MacroAssembler* masm = down_cast<CodeGeneratorARM64*>(codegen_)->GetVIXLAssembler();
2379     vixl::aarch64::Instruction* prev =
2380         masm->GetCursorAddress<vixl::aarch64::Instruction*>() - kInstructionSize;
2381     if (prev->IsLoadOrStore()) {
2382       // Make sure we emit only exactly one nop.
2383       ExactAssemblyScope scope(masm, kInstructionSize, CodeBufferCheckScope::kExactSize);
2384       __ nop();
2385     }
2386   }
2387 
2388   if (instr->GetOpKind() == HInstruction::kAdd) {
2389     Register accumulator = InputRegisterAt(instr, HMultiplyAccumulate::kInputAccumulatorIndex);
2390     __ Madd(res, mul_left, mul_right, accumulator);
2391   } else {
2392     DCHECK(instr->GetOpKind() == HInstruction::kSub);
2393     HInstruction* accum_instr = instr->InputAt(HMultiplyAccumulate::kInputAccumulatorIndex);
2394     if (accum_instr->IsConstant() && accum_instr->AsConstant()->IsArithmeticZero()) {
2395       __ Mneg(res, mul_left, mul_right);
2396     } else {
2397       Register accumulator = InputRegisterAt(instr, HMultiplyAccumulate::kInputAccumulatorIndex);
2398       __ Msub(res, mul_left, mul_right, accumulator);
2399     }
2400   }
2401 }
2402 
2403 void LocationsBuilderARM64::VisitArrayGet(HArrayGet* instruction) {
2404   bool object_array_get_with_read_barrier =
2405       kEmitCompilerReadBarrier && (instruction->GetType() == DataType::Type::kReference);
2406   LocationSummary* locations =
2407       new (GetGraph()->GetAllocator()) LocationSummary(instruction,
2408                                                        object_array_get_with_read_barrier
2409                                                            ? LocationSummary::kCallOnSlowPath
2410                                                            : LocationSummary::kNoCall);
2411   if (object_array_get_with_read_barrier && kUseBakerReadBarrier) {
2412     locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
2413     if (instruction->GetIndex()->IsConstant()) {
2414       // Array loads with constant index are treated as field loads.
2415       // We need a temporary register for the read barrier load in
2416       // CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier()
2417       // only if the offset is too big.
2418       uint32_t offset = CodeGenerator::GetArrayDataOffset(instruction);
2419       uint32_t index = instruction->GetIndex()->AsIntConstant()->GetValue();
2420       offset += index << DataType::SizeShift(DataType::Type::kReference);
2421       if (offset >= kReferenceLoadMinFarOffset) {
2422         locations->AddTemp(FixedTempLocation());
2423       }
2424     } else if (!instruction->GetArray()->IsIntermediateAddress()) {
2425       // We need a non-scratch temporary for the array data pointer in
2426       // CodeGeneratorARM64::GenerateArrayLoadWithBakerReadBarrier() for the case with no
2427       // intermediate address.
2428       locations->AddTemp(Location::RequiresRegister());
2429     }
2430   }
2431   locations->SetInAt(0, Location::RequiresRegister());
2432   locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
2433   if (DataType::IsFloatingPointType(instruction->GetType())) {
2434     locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
2435   } else {
2436     // The output overlaps in the case of an object array get with
2437     // read barriers enabled: we do not want the move to overwrite the
2438     // array's location, as we need it to emit the read barrier.
2439     locations->SetOut(
2440         Location::RequiresRegister(),
2441         object_array_get_with_read_barrier ? Location::kOutputOverlap : Location::kNoOutputOverlap);
2442   }
2443 }
2444 
2445 void InstructionCodeGeneratorARM64::VisitArrayGet(HArrayGet* instruction) {
2446   DataType::Type type = instruction->GetType();
2447   Register obj = InputRegisterAt(instruction, 0);
2448   LocationSummary* locations = instruction->GetLocations();
2449   Location index = locations->InAt(1);
2450   Location out = locations->Out();
2451   uint32_t offset = CodeGenerator::GetArrayDataOffset(instruction);
2452   const bool maybe_compressed_char_at = mirror::kUseStringCompression &&
2453                                         instruction->IsStringCharAt();
2454   MacroAssembler* masm = GetVIXLAssembler();
2455   UseScratchRegisterScope temps(masm);
2456 
2457   // The non-Baker read barrier instrumentation of object ArrayGet instructions
2458   // does not support the HIntermediateAddress instruction.
2459   DCHECK(!((type == DataType::Type::kReference) &&
2460            instruction->GetArray()->IsIntermediateAddress() &&
2461            kEmitCompilerReadBarrier &&
2462            !kUseBakerReadBarrier));
2463 
2464   if (type == DataType::Type::kReference && kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
2465     // Object ArrayGet with Baker's read barrier case.
2466     // Note that a potential implicit null check is handled in the
2467     // CodeGeneratorARM64::GenerateArrayLoadWithBakerReadBarrier call.
2468     DCHECK(!instruction->CanDoImplicitNullCheckOn(instruction->InputAt(0)));
2469     if (index.IsConstant()) {
2470       DCHECK(!instruction->GetArray()->IsIntermediateAddress());
2471       // Array load with a constant index can be treated as a field load.
2472       offset += Int64FromLocation(index) << DataType::SizeShift(type);
2473       Location maybe_temp =
2474           (locations->GetTempCount() != 0) ? locations->GetTemp(0) : Location::NoLocation();
2475       codegen_->GenerateFieldLoadWithBakerReadBarrier(instruction,
2476                                                       out,
2477                                                       obj.W(),
2478                                                       offset,
2479                                                       maybe_temp,
2480                                                       /* needs_null_check= */ false,
2481                                                       /* use_load_acquire= */ false);
2482     } else {
2483       codegen_->GenerateArrayLoadWithBakerReadBarrier(
2484           instruction, out, obj.W(), offset, index, /* needs_null_check= */ false);
2485     }
2486   } else {
2487     // General case.
2488     MemOperand source = HeapOperand(obj);
2489     Register length;
2490     if (maybe_compressed_char_at) {
2491       uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
2492       length = temps.AcquireW();
2493       {
2494         // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
2495         EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
2496 
2497         if (instruction->GetArray()->IsIntermediateAddress()) {
2498           DCHECK_LT(count_offset, offset);
2499           int64_t adjusted_offset =
2500               static_cast<int64_t>(count_offset) - static_cast<int64_t>(offset);
2501           // Note that `adjusted_offset` is negative, so this will be a LDUR.
2502           __ Ldr(length, MemOperand(obj.X(), adjusted_offset));
2503         } else {
2504           __ Ldr(length, HeapOperand(obj, count_offset));
2505         }
2506         codegen_->MaybeRecordImplicitNullCheck(instruction);
2507       }
2508     }
2509     if (index.IsConstant()) {
2510       if (maybe_compressed_char_at) {
2511         vixl::aarch64::Label uncompressed_load, done;
2512         static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
2513                       "Expecting 0=compressed, 1=uncompressed");
2514         __ Tbnz(length.W(), 0, &uncompressed_load);
2515         __ Ldrb(Register(OutputCPURegister(instruction)),
2516                 HeapOperand(obj, offset + Int64FromLocation(index)));
2517         __ B(&done);
2518         __ Bind(&uncompressed_load);
2519         __ Ldrh(Register(OutputCPURegister(instruction)),
2520                 HeapOperand(obj, offset + (Int64FromLocation(index) << 1)));
2521         __ Bind(&done);
2522       } else {
2523         offset += Int64FromLocation(index) << DataType::SizeShift(type);
2524         source = HeapOperand(obj, offset);
2525       }
2526     } else {
2527       Register temp = temps.AcquireSameSizeAs(obj);
2528       if (instruction->GetArray()->IsIntermediateAddress()) {
2529         // We do not need to compute the intermediate address from the array: the
2530         // input instruction has done it already. See the comment in
2531         // `TryExtractArrayAccessAddress()`.
2532         if (kIsDebugBuild) {
2533           HIntermediateAddress* interm_addr = instruction->GetArray()->AsIntermediateAddress();
2534           DCHECK_EQ(interm_addr->GetOffset()->AsIntConstant()->GetValueAsUint64(), offset);
2535         }
2536         temp = obj;
2537       } else {
2538         __ Add(temp, obj, offset);
2539       }
2540       if (maybe_compressed_char_at) {
2541         vixl::aarch64::Label uncompressed_load, done;
2542         static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
2543                       "Expecting 0=compressed, 1=uncompressed");
2544         __ Tbnz(length.W(), 0, &uncompressed_load);
2545         __ Ldrb(Register(OutputCPURegister(instruction)),
2546                 HeapOperand(temp, XRegisterFrom(index), LSL, 0));
2547         __ B(&done);
2548         __ Bind(&uncompressed_load);
2549         __ Ldrh(Register(OutputCPURegister(instruction)),
2550                 HeapOperand(temp, XRegisterFrom(index), LSL, 1));
2551         __ Bind(&done);
2552       } else {
2553         source = HeapOperand(temp, XRegisterFrom(index), LSL, DataType::SizeShift(type));
2554       }
2555     }
2556     if (!maybe_compressed_char_at) {
2557       // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
2558       EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
2559       codegen_->Load(type, OutputCPURegister(instruction), source);
2560       codegen_->MaybeRecordImplicitNullCheck(instruction);
2561     }
2562 
2563     if (type == DataType::Type::kReference) {
2564       static_assert(
2565           sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
2566           "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
2567       Location obj_loc = locations->InAt(0);
2568       if (index.IsConstant()) {
2569         codegen_->MaybeGenerateReadBarrierSlow(instruction, out, out, obj_loc, offset);
2570       } else {
2571         codegen_->MaybeGenerateReadBarrierSlow(instruction, out, out, obj_loc, offset, index);
2572       }
2573     }
2574   }
2575 }
2576 
2577 void LocationsBuilderARM64::VisitArrayLength(HArrayLength* instruction) {
2578   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
2579   locations->SetInAt(0, Location::RequiresRegister());
2580   locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
2581 }
2582 
2583 void InstructionCodeGeneratorARM64::VisitArrayLength(HArrayLength* instruction) {
2584   uint32_t offset = CodeGenerator::GetArrayLengthOffset(instruction);
2585   vixl::aarch64::Register out = OutputRegister(instruction);
2586   {
2587     // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
2588     EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
2589     __ Ldr(out, HeapOperand(InputRegisterAt(instruction, 0), offset));
2590     codegen_->MaybeRecordImplicitNullCheck(instruction);
2591   }
2592   // Mask out compression flag from String's array length.
2593   if (mirror::kUseStringCompression && instruction->IsStringLength()) {
2594     __ Lsr(out.W(), out.W(), 1u);
2595   }
2596 }
2597 
2598 void LocationsBuilderARM64::VisitArraySet(HArraySet* instruction) {
2599   DataType::Type value_type = instruction->GetComponentType();
2600 
2601   bool needs_type_check = instruction->NeedsTypeCheck();
2602   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
2603       instruction,
2604       needs_type_check ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall);
2605   locations->SetInAt(0, Location::RequiresRegister());
2606   locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
2607   if (IsConstantZeroBitPattern(instruction->InputAt(2))) {
2608     locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant()));
2609   } else if (DataType::IsFloatingPointType(value_type)) {
2610     locations->SetInAt(2, Location::RequiresFpuRegister());
2611   } else {
2612     locations->SetInAt(2, Location::RequiresRegister());
2613   }
2614 }
2615 
2616 void InstructionCodeGeneratorARM64::VisitArraySet(HArraySet* instruction) {
2617   DataType::Type value_type = instruction->GetComponentType();
2618   LocationSummary* locations = instruction->GetLocations();
2619   bool needs_type_check = instruction->NeedsTypeCheck();
2620   bool needs_write_barrier =
2621       CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue());
2622 
2623   Register array = InputRegisterAt(instruction, 0);
2624   CPURegister value = InputCPURegisterOrZeroRegAt(instruction, 2);
2625   CPURegister source = value;
2626   Location index = locations->InAt(1);
2627   size_t offset = mirror::Array::DataOffset(DataType::Size(value_type)).Uint32Value();
2628   MemOperand destination = HeapOperand(array);
2629   MacroAssembler* masm = GetVIXLAssembler();
2630 
2631   if (!needs_write_barrier) {
2632     DCHECK(!needs_type_check);
2633     if (index.IsConstant()) {
2634       offset += Int64FromLocation(index) << DataType::SizeShift(value_type);
2635       destination = HeapOperand(array, offset);
2636     } else {
2637       UseScratchRegisterScope temps(masm);
2638       Register temp = temps.AcquireSameSizeAs(array);
2639       if (instruction->GetArray()->IsIntermediateAddress()) {
2640         // We do not need to compute the intermediate address from the array: the
2641         // input instruction has done it already. See the comment in
2642         // `TryExtractArrayAccessAddress()`.
2643         if (kIsDebugBuild) {
2644           HIntermediateAddress* interm_addr = instruction->GetArray()->AsIntermediateAddress();
2645           DCHECK(interm_addr->GetOffset()->AsIntConstant()->GetValueAsUint64() == offset);
2646         }
2647         temp = array;
2648       } else {
2649         __ Add(temp, array, offset);
2650       }
2651       destination = HeapOperand(temp,
2652                                 XRegisterFrom(index),
2653                                 LSL,
2654                                 DataType::SizeShift(value_type));
2655     }
2656     {
2657       // Ensure that between store and MaybeRecordImplicitNullCheck there are no pools emitted.
2658       EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
2659       codegen_->Store(value_type, value, destination);
2660       codegen_->MaybeRecordImplicitNullCheck(instruction);
2661     }
2662   } else {
2663     DCHECK(!instruction->GetArray()->IsIntermediateAddress());
2664 
2665     bool can_value_be_null = instruction->GetValueCanBeNull();
2666     vixl::aarch64::Label do_store;
2667     if (can_value_be_null) {
2668       __ Cbz(Register(value), &do_store);
2669     }
2670 
2671     SlowPathCodeARM64* slow_path = nullptr;
2672     if (needs_type_check) {
2673       slow_path = new (codegen_->GetScopedAllocator()) ArraySetSlowPathARM64(instruction);
2674       codegen_->AddSlowPath(slow_path);
2675 
2676       const uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
2677       const uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
2678       const uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
2679 
2680       UseScratchRegisterScope temps(masm);
2681       Register temp = temps.AcquireSameSizeAs(array);
2682       Register temp2 = temps.AcquireSameSizeAs(array);
2683 
2684       // Note that when Baker read barriers are enabled, the type
2685       // checks are performed without read barriers.  This is fine,
2686       // even in the case where a class object is in the from-space
2687       // after the flip, as a comparison involving such a type would
2688       // not produce a false positive; it may of course produce a
2689       // false negative, in which case we would take the ArraySet
2690       // slow path.
2691 
2692       // /* HeapReference<Class> */ temp = array->klass_
2693       {
2694         // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
2695         EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
2696         __ Ldr(temp, HeapOperand(array, class_offset));
2697         codegen_->MaybeRecordImplicitNullCheck(instruction);
2698       }
2699       GetAssembler()->MaybeUnpoisonHeapReference(temp);
2700 
2701       // /* HeapReference<Class> */ temp = temp->component_type_
2702       __ Ldr(temp, HeapOperand(temp, component_offset));
2703       // /* HeapReference<Class> */ temp2 = value->klass_
2704       __ Ldr(temp2, HeapOperand(Register(value), class_offset));
2705       // If heap poisoning is enabled, no need to unpoison `temp`
2706       // nor `temp2`, as we are comparing two poisoned references.
2707       __ Cmp(temp, temp2);
2708 
2709       if (instruction->StaticTypeOfArrayIsObjectArray()) {
2710         vixl::aarch64::Label do_put;
2711         __ B(eq, &do_put);
2712         // If heap poisoning is enabled, the `temp` reference has
2713         // not been unpoisoned yet; unpoison it now.
2714         GetAssembler()->MaybeUnpoisonHeapReference(temp);
2715 
2716         // /* HeapReference<Class> */ temp = temp->super_class_
2717         __ Ldr(temp, HeapOperand(temp, super_offset));
2718         // If heap poisoning is enabled, no need to unpoison
2719         // `temp`, as we are comparing against null below.
2720         __ Cbnz(temp, slow_path->GetEntryLabel());
2721         __ Bind(&do_put);
2722       } else {
2723         __ B(ne, slow_path->GetEntryLabel());
2724       }
2725     }
2726 
2727     codegen_->MarkGCCard(array, value.W(), /* value_can_be_null= */ false);
2728 
2729     if (can_value_be_null) {
2730       DCHECK(do_store.IsLinked());
2731       __ Bind(&do_store);
2732     }
2733 
2734     UseScratchRegisterScope temps(masm);
2735     if (kPoisonHeapReferences) {
2736       Register temp_source = temps.AcquireSameSizeAs(array);
2737         DCHECK(value.IsW());
2738       __ Mov(temp_source, value.W());
2739       GetAssembler()->PoisonHeapReference(temp_source);
2740       source = temp_source;
2741     }
2742 
2743     if (index.IsConstant()) {
2744       offset += Int64FromLocation(index) << DataType::SizeShift(value_type);
2745       destination = HeapOperand(array, offset);
2746     } else {
2747       Register temp_base = temps.AcquireSameSizeAs(array);
2748       __ Add(temp_base, array, offset);
2749       destination = HeapOperand(temp_base,
2750                                 XRegisterFrom(index),
2751                                 LSL,
2752                                 DataType::SizeShift(value_type));
2753     }
2754 
2755     {
2756       // Ensure that between store and MaybeRecordImplicitNullCheck there are no pools emitted.
2757       EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
2758       __ Str(source, destination);
2759 
2760       if (can_value_be_null || !needs_type_check) {
2761         codegen_->MaybeRecordImplicitNullCheck(instruction);
2762       }
2763     }
2764 
2765     if (slow_path != nullptr) {
2766       __ Bind(slow_path->GetExitLabel());
2767     }
2768   }
2769 }
2770 
2771 void LocationsBuilderARM64::VisitBoundsCheck(HBoundsCheck* instruction) {
2772   RegisterSet caller_saves = RegisterSet::Empty();
2773   InvokeRuntimeCallingConvention calling_convention;
2774   caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0).GetCode()));
2775   caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(1).GetCode()));
2776   LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction, caller_saves);
2777 
2778   // If both index and length are constant, we can check the bounds statically and
2779   // generate code accordingly. We want to make sure we generate constant locations
2780   // in that case, regardless of whether they are encodable in the comparison or not.
2781   HInstruction* index = instruction->InputAt(0);
2782   HInstruction* length = instruction->InputAt(1);
2783   bool both_const = index->IsConstant() && length->IsConstant();
2784   locations->SetInAt(0, both_const
2785       ? Location::ConstantLocation(index->AsConstant())
2786       : ARM64EncodableConstantOrRegister(index, instruction));
2787   locations->SetInAt(1, both_const
2788       ? Location::ConstantLocation(length->AsConstant())
2789       : ARM64EncodableConstantOrRegister(length, instruction));
2790 }
2791 
2792 void InstructionCodeGeneratorARM64::VisitBoundsCheck(HBoundsCheck* instruction) {
2793   LocationSummary* locations = instruction->GetLocations();
2794   Location index_loc = locations->InAt(0);
2795   Location length_loc = locations->InAt(1);
2796 
2797   int cmp_first_input = 0;
2798   int cmp_second_input = 1;
2799   Condition cond = hs;
2800 
2801   if (index_loc.IsConstant()) {
2802     int64_t index = Int64FromLocation(index_loc);
2803     if (length_loc.IsConstant()) {
2804       int64_t length = Int64FromLocation(length_loc);
2805       if (index < 0 || index >= length) {
2806         BoundsCheckSlowPathARM64* slow_path =
2807             new (codegen_->GetScopedAllocator()) BoundsCheckSlowPathARM64(instruction);
2808         codegen_->AddSlowPath(slow_path);
2809         __ B(slow_path->GetEntryLabel());
2810       } else {
2811         // BCE will remove the bounds check if we are guaranteed to pass.
2812         // However, some optimization after BCE may have generated this, and we should not
2813         // generate a bounds check if it is a valid range.
2814       }
2815       return;
2816     }
2817     // Only the index is constant: change the order of the operands and commute the condition
2818     // so we can use an immediate constant for the index (only the second input to a cmp
2819     // instruction can be an immediate).
2820     cmp_first_input = 1;
2821     cmp_second_input = 0;
2822     cond = ls;
2823   }
2824   BoundsCheckSlowPathARM64* slow_path =
2825       new (codegen_->GetScopedAllocator()) BoundsCheckSlowPathARM64(instruction);
2826   __ Cmp(InputRegisterAt(instruction, cmp_first_input),
2827          InputOperandAt(instruction, cmp_second_input));
2828   codegen_->AddSlowPath(slow_path);
2829   __ B(slow_path->GetEntryLabel(), cond);
2830 }
2831 
2832 void LocationsBuilderARM64::VisitClinitCheck(HClinitCheck* check) {
2833   LocationSummary* locations =
2834       new (GetGraph()->GetAllocator()) LocationSummary(check, LocationSummary::kCallOnSlowPath);
2835   locations->SetInAt(0, Location::RequiresRegister());
2836   if (check->HasUses()) {
2837     locations->SetOut(Location::SameAsFirstInput());
2838   }
2839   // Rely on the type initialization to save everything we need.
2840   locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves());
2841 }
2842 
2843 void InstructionCodeGeneratorARM64::VisitClinitCheck(HClinitCheck* check) {
2844   // We assume the class is not null.
2845   SlowPathCodeARM64* slow_path =
2846       new (codegen_->GetScopedAllocator()) LoadClassSlowPathARM64(check->GetLoadClass(), check);
2847   codegen_->AddSlowPath(slow_path);
2848   GenerateClassInitializationCheck(slow_path, InputRegisterAt(check, 0));
2849 }
2850 
2851 static bool IsFloatingPointZeroConstant(HInstruction* inst) {
2852   return (inst->IsFloatConstant() && (inst->AsFloatConstant()->IsArithmeticZero()))
2853       || (inst->IsDoubleConstant() && (inst->AsDoubleConstant()->IsArithmeticZero()));
2854 }
2855 
2856 void InstructionCodeGeneratorARM64::GenerateFcmp(HInstruction* instruction) {
2857   VRegister lhs_reg = InputFPRegisterAt(instruction, 0);
2858   Location rhs_loc = instruction->GetLocations()->InAt(1);
2859   if (rhs_loc.IsConstant()) {
2860     // 0.0 is the only immediate that can be encoded directly in
2861     // an FCMP instruction.
2862     //
2863     // Both the JLS (section 15.20.1) and the JVMS (section 6.5)
2864     // specify that in a floating-point comparison, positive zero
2865     // and negative zero are considered equal, so we can use the
2866     // literal 0.0 for both cases here.
2867     //
2868     // Note however that some methods (Float.equal, Float.compare,
2869     // Float.compareTo, Double.equal, Double.compare,
2870     // Double.compareTo, Math.max, Math.min, StrictMath.max,
2871     // StrictMath.min) consider 0.0 to be (strictly) greater than
2872     // -0.0. So if we ever translate calls to these methods into a
2873     // HCompare instruction, we must handle the -0.0 case with
2874     // care here.
2875     DCHECK(IsFloatingPointZeroConstant(rhs_loc.GetConstant()));
2876     __ Fcmp(lhs_reg, 0.0);
2877   } else {
2878     __ Fcmp(lhs_reg, InputFPRegisterAt(instruction, 1));
2879   }
2880 }
2881 
2882 void LocationsBuilderARM64::VisitCompare(HCompare* compare) {
2883   LocationSummary* locations =
2884       new (GetGraph()->GetAllocator()) LocationSummary(compare, LocationSummary::kNoCall);
2885   DataType::Type in_type = compare->InputAt(0)->GetType();
2886   switch (in_type) {
2887     case DataType::Type::kBool:
2888     case DataType::Type::kUint8:
2889     case DataType::Type::kInt8:
2890     case DataType::Type::kUint16:
2891     case DataType::Type::kInt16:
2892     case DataType::Type::kInt32:
2893     case DataType::Type::kInt64: {
2894       locations->SetInAt(0, Location::RequiresRegister());
2895       locations->SetInAt(1, ARM64EncodableConstantOrRegister(compare->InputAt(1), compare));
2896       locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
2897       break;
2898     }
2899     case DataType::Type::kFloat32:
2900     case DataType::Type::kFloat64: {
2901       locations->SetInAt(0, Location::RequiresFpuRegister());
2902       locations->SetInAt(1,
2903                          IsFloatingPointZeroConstant(compare->InputAt(1))
2904                              ? Location::ConstantLocation(compare->InputAt(1)->AsConstant())
2905                              : Location::RequiresFpuRegister());
2906       locations->SetOut(Location::RequiresRegister());
2907       break;
2908     }
2909     default:
2910       LOG(FATAL) << "Unexpected type for compare operation " << in_type;
2911   }
2912 }
2913 
2914 void InstructionCodeGeneratorARM64::VisitCompare(HCompare* compare) {
2915   DataType::Type in_type = compare->InputAt(0)->GetType();
2916 
2917   //  0 if: left == right
2918   //  1 if: left  > right
2919   // -1 if: left  < right
2920   switch (in_type) {
2921     case DataType::Type::kBool:
2922     case DataType::Type::kUint8:
2923     case DataType::Type::kInt8:
2924     case DataType::Type::kUint16:
2925     case DataType::Type::kInt16:
2926     case DataType::Type::kInt32:
2927     case DataType::Type::kInt64: {
2928       Register result = OutputRegister(compare);
2929       Register left = InputRegisterAt(compare, 0);
2930       Operand right = InputOperandAt(compare, 1);
2931       __ Cmp(left, right);
2932       __ Cset(result, ne);          // result == +1 if NE or 0 otherwise
2933       __ Cneg(result, result, lt);  // result == -1 if LT or unchanged otherwise
2934       break;
2935     }
2936     case DataType::Type::kFloat32:
2937     case DataType::Type::kFloat64: {
2938       Register result = OutputRegister(compare);
2939       GenerateFcmp(compare);
2940       __ Cset(result, ne);
2941       __ Cneg(result, result, ARM64FPCondition(kCondLT, compare->IsGtBias()));
2942       break;
2943     }
2944     default:
2945       LOG(FATAL) << "Unimplemented compare type " << in_type;
2946   }
2947 }
2948 
2949 void LocationsBuilderARM64::HandleCondition(HCondition* instruction) {
2950   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
2951 
2952   if (DataType::IsFloatingPointType(instruction->InputAt(0)->GetType())) {
2953     locations->SetInAt(0, Location::RequiresFpuRegister());
2954     locations->SetInAt(1,
2955                        IsFloatingPointZeroConstant(instruction->InputAt(1))
2956                            ? Location::ConstantLocation(instruction->InputAt(1)->AsConstant())
2957                            : Location::RequiresFpuRegister());
2958   } else {
2959     // Integer cases.
2960     locations->SetInAt(0, Location::RequiresRegister());
2961     locations->SetInAt(1, ARM64EncodableConstantOrRegister(instruction->InputAt(1), instruction));
2962   }
2963 
2964   if (!instruction->IsEmittedAtUseSite()) {
2965     locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
2966   }
2967 }
2968 
2969 void InstructionCodeGeneratorARM64::HandleCondition(HCondition* instruction) {
2970   if (instruction->IsEmittedAtUseSite()) {
2971     return;
2972   }
2973 
2974   LocationSummary* locations = instruction->GetLocations();
2975   Register res = RegisterFrom(locations->Out(), instruction->GetType());
2976   IfCondition if_cond = instruction->GetCondition();
2977 
2978   if (DataType::IsFloatingPointType(instruction->InputAt(0)->GetType())) {
2979     GenerateFcmp(instruction);
2980     __ Cset(res, ARM64FPCondition(if_cond, instruction->IsGtBias()));
2981   } else {
2982     // Integer cases.
2983     Register lhs = InputRegisterAt(instruction, 0);
2984     Operand rhs = InputOperandAt(instruction, 1);
2985     __ Cmp(lhs, rhs);
2986     __ Cset(res, ARM64Condition(if_cond));
2987   }
2988 }
2989 
2990 #define FOR_EACH_CONDITION_INSTRUCTION(M)                                                \
2991   M(Equal)                                                                               \
2992   M(NotEqual)                                                                            \
2993   M(LessThan)                                                                            \
2994   M(LessThanOrEqual)                                                                     \
2995   M(GreaterThan)                                                                         \
2996   M(GreaterThanOrEqual)                                                                  \
2997   M(Below)                                                                               \
2998   M(BelowOrEqual)                                                                        \
2999   M(Above)                                                                               \
3000   M(AboveOrEqual)
3001 #define DEFINE_CONDITION_VISITORS(Name)                                                  \
3002 void LocationsBuilderARM64::Visit##Name(H##Name* comp) { HandleCondition(comp); }         \
3003 void InstructionCodeGeneratorARM64::Visit##Name(H##Name* comp) { HandleCondition(comp); }
3004 FOR_EACH_CONDITION_INSTRUCTION(DEFINE_CONDITION_VISITORS)
3005 #undef DEFINE_CONDITION_VISITORS
3006 #undef FOR_EACH_CONDITION_INSTRUCTION
3007 
3008 void InstructionCodeGeneratorARM64::GenerateIntDivForPower2Denom(HDiv* instruction) {
3009   int64_t imm = Int64FromLocation(instruction->GetLocations()->InAt(1));
3010   uint64_t abs_imm = static_cast<uint64_t>(AbsOrMin(imm));
3011   DCHECK(IsPowerOfTwo(abs_imm)) << abs_imm;
3012 
3013   Register out = OutputRegister(instruction);
3014   Register dividend = InputRegisterAt(instruction, 0);
3015 
3016   if (abs_imm == 2) {
3017     int bits = DataType::Size(instruction->GetResultType()) * kBitsPerByte;
3018     __ Add(out, dividend, Operand(dividend, LSR, bits - 1));
3019   } else {
3020     UseScratchRegisterScope temps(GetVIXLAssembler());
3021     Register temp = temps.AcquireSameSizeAs(out);
3022     __ Add(temp, dividend, abs_imm - 1);
3023     __ Cmp(dividend, 0);
3024     __ Csel(out, temp, dividend, lt);
3025   }
3026 
3027   int ctz_imm = CTZ(abs_imm);
3028   if (imm > 0) {
3029     __ Asr(out, out, ctz_imm);
3030   } else {
3031     __ Neg(out, Operand(out, ASR, ctz_imm));
3032   }
3033 }
3034 
3035 void InstructionCodeGeneratorARM64::GenerateDivRemWithAnyConstant(HBinaryOperation* instruction) {
3036   DCHECK(instruction->IsDiv() || instruction->IsRem());
3037 
3038   LocationSummary* locations = instruction->GetLocations();
3039   Location second = locations->InAt(1);
3040   DCHECK(second.IsConstant());
3041 
3042   Register out = OutputRegister(instruction);
3043   Register dividend = InputRegisterAt(instruction, 0);
3044   int64_t imm = Int64FromConstant(second.GetConstant());
3045 
3046   DataType::Type type = instruction->GetResultType();
3047   DCHECK(type == DataType::Type::kInt32 || type == DataType::Type::kInt64);
3048 
3049   int64_t magic;
3050   int shift;
3051   CalculateMagicAndShiftForDivRem(
3052       imm, /* is_long= */ type == DataType::Type::kInt64, &magic, &shift);
3053 
3054   UseScratchRegisterScope temps(GetVIXLAssembler());
3055   Register temp = temps.AcquireSameSizeAs(out);
3056 
3057   // temp = get_high(dividend * magic)
3058   __ Mov(temp, magic);
3059   if (type == DataType::Type::kInt64) {
3060     __ Smulh(temp, dividend, temp);
3061   } else {
3062     __ Smull(temp.X(), dividend, temp);
3063     __ Lsr(temp.X(), temp.X(), 32);
3064   }
3065 
3066   if (imm > 0 && magic < 0) {
3067     __ Add(temp, temp, dividend);
3068   } else if (imm < 0 && magic > 0) {
3069     __ Sub(temp, temp, dividend);
3070   }
3071 
3072   if (shift != 0) {
3073     __ Asr(temp, temp, shift);
3074   }
3075 
3076   if (instruction->IsDiv()) {
3077     __ Sub(out, temp, Operand(temp, ASR, type == DataType::Type::kInt64 ? 63 : 31));
3078   } else {
3079     __ Sub(temp, temp, Operand(temp, ASR, type == DataType::Type::kInt64 ? 63 : 31));
3080     // TODO: Strength reduction for msub.
3081     Register temp_imm = temps.AcquireSameSizeAs(out);
3082     __ Mov(temp_imm, imm);
3083     __ Msub(out, temp, temp_imm, dividend);
3084   }
3085 }
3086 
3087 void InstructionCodeGeneratorARM64::GenerateIntDivForConstDenom(HDiv *instruction) {
3088   int64_t imm = Int64FromLocation(instruction->GetLocations()->InAt(1));
3089 
3090   if (imm == 0) {
3091     // Do not generate anything. DivZeroCheck would prevent any code to be executed.
3092     return;
3093   }
3094 
3095   if (IsPowerOfTwo(AbsOrMin(imm))) {
3096     GenerateIntDivForPower2Denom(instruction);
3097   } else {
3098     // Cases imm == -1 or imm == 1 are handled by InstructionSimplifier.
3099     DCHECK(imm < -2 || imm > 2) << imm;
3100     GenerateDivRemWithAnyConstant(instruction);
3101   }
3102 }
3103 
3104 void InstructionCodeGeneratorARM64::GenerateIntDiv(HDiv *instruction) {
3105   DCHECK(DataType::IsIntOrLongType(instruction->GetResultType()))
3106        << instruction->GetResultType();
3107 
3108   if (instruction->GetLocations()->InAt(1).IsConstant()) {
3109     GenerateIntDivForConstDenom(instruction);
3110   } else {
3111     Register out = OutputRegister(instruction);
3112     Register dividend = InputRegisterAt(instruction, 0);
3113     Register divisor = InputRegisterAt(instruction, 1);
3114     __ Sdiv(out, dividend, divisor);
3115   }
3116 }
3117 
3118 void LocationsBuilderARM64::VisitDiv(HDiv* div) {
3119   LocationSummary* locations =
3120       new (GetGraph()->GetAllocator()) LocationSummary(div, LocationSummary::kNoCall);
3121   switch (div->GetResultType()) {
3122     case DataType::Type::kInt32:
3123     case DataType::Type::kInt64:
3124       locations->SetInAt(0, Location::RequiresRegister());
3125       locations->SetInAt(1, Location::RegisterOrConstant(div->InputAt(1)));
3126       locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
3127       break;
3128 
3129     case DataType::Type::kFloat32:
3130     case DataType::Type::kFloat64:
3131       locations->SetInAt(0, Location::RequiresFpuRegister());
3132       locations->SetInAt(1, Location::RequiresFpuRegister());
3133       locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
3134       break;
3135 
3136     default:
3137       LOG(FATAL) << "Unexpected div type " << div->GetResultType();
3138   }
3139 }
3140 
3141 void InstructionCodeGeneratorARM64::VisitDiv(HDiv* div) {
3142   DataType::Type type = div->GetResultType();
3143   switch (type) {
3144     case DataType::Type::kInt32:
3145     case DataType::Type::kInt64:
3146       GenerateIntDiv(div);
3147       break;
3148 
3149     case DataType::Type::kFloat32:
3150     case DataType::Type::kFloat64:
3151       __ Fdiv(OutputFPRegister(div), InputFPRegisterAt(div, 0), InputFPRegisterAt(div, 1));
3152       break;
3153 
3154     default:
3155       LOG(FATAL) << "Unexpected div type " << type;
3156   }
3157 }
3158 
3159 void LocationsBuilderARM64::VisitDivZeroCheck(HDivZeroCheck* instruction) {
3160   LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction);
3161   locations->SetInAt(0, Location::RegisterOrConstant(instruction->InputAt(0)));
3162 }
3163 
3164 void InstructionCodeGeneratorARM64::VisitDivZeroCheck(HDivZeroCheck* instruction) {
3165   SlowPathCodeARM64* slow_path =
3166       new (codegen_->GetScopedAllocator()) DivZeroCheckSlowPathARM64(instruction);
3167   codegen_->AddSlowPath(slow_path);
3168   Location value = instruction->GetLocations()->InAt(0);
3169 
3170   DataType::Type type = instruction->GetType();
3171 
3172   if (!DataType::IsIntegralType(type)) {
3173     LOG(FATAL) << "Unexpected type " << type << " for DivZeroCheck.";
3174     UNREACHABLE();
3175   }
3176 
3177   if (value.IsConstant()) {
3178     int64_t divisor = Int64FromLocation(value);
3179     if (divisor == 0) {
3180       __ B(slow_path->GetEntryLabel());
3181     } else {
3182       // A division by a non-null constant is valid. We don't need to perform
3183       // any check, so simply fall through.
3184     }
3185   } else {
3186     __ Cbz(InputRegisterAt(instruction, 0), slow_path->GetEntryLabel());
3187   }
3188 }
3189 
3190 void LocationsBuilderARM64::VisitDoubleConstant(HDoubleConstant* constant) {
3191   LocationSummary* locations =
3192       new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall);
3193   locations->SetOut(Location::ConstantLocation(constant));
3194 }
3195 
3196 void InstructionCodeGeneratorARM64::VisitDoubleConstant(
3197     HDoubleConstant* constant ATTRIBUTE_UNUSED) {
3198   // Will be generated at use site.
3199 }
3200 
3201 void LocationsBuilderARM64::VisitExit(HExit* exit) {
3202   exit->SetLocations(nullptr);
3203 }
3204 
3205 void InstructionCodeGeneratorARM64::VisitExit(HExit* exit ATTRIBUTE_UNUSED) {
3206 }
3207 
3208 void LocationsBuilderARM64::VisitFloatConstant(HFloatConstant* constant) {
3209   LocationSummary* locations =
3210       new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall);
3211   locations->SetOut(Location::ConstantLocation(constant));
3212 }
3213 
3214 void InstructionCodeGeneratorARM64::VisitFloatConstant(HFloatConstant* constant ATTRIBUTE_UNUSED) {
3215   // Will be generated at use site.
3216 }
3217 
3218 void InstructionCodeGeneratorARM64::HandleGoto(HInstruction* got, HBasicBlock* successor) {
3219   if (successor->IsExitBlock()) {
3220     DCHECK(got->GetPrevious()->AlwaysThrows());
3221     return;  // no code needed
3222   }
3223 
3224   HBasicBlock* block = got->GetBlock();
3225   HInstruction* previous = got->GetPrevious();
3226   HLoopInformation* info = block->GetLoopInformation();
3227 
3228   if (info != nullptr && info->IsBackEdge(*block) && info->HasSuspendCheck()) {
3229     codegen_->MaybeIncrementHotness(/* is_frame_entry= */ false);
3230     GenerateSuspendCheck(info->GetSuspendCheck(), successor);
3231     return;
3232   }
3233   if (block->IsEntryBlock() && (previous != nullptr) && previous->IsSuspendCheck()) {
3234     GenerateSuspendCheck(previous->AsSuspendCheck(), nullptr);
3235     codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__);
3236   }
3237   if (!codegen_->GoesToNextBlock(block, successor)) {
3238     __ B(codegen_->GetLabelOf(successor));
3239   }
3240 }
3241 
3242 void LocationsBuilderARM64::VisitGoto(HGoto* got) {
3243   got->SetLocations(nullptr);
3244 }
3245 
3246 void InstructionCodeGeneratorARM64::VisitGoto(HGoto* got) {
3247   HandleGoto(got, got->GetSuccessor());
3248 }
3249 
3250 void LocationsBuilderARM64::VisitTryBoundary(HTryBoundary* try_boundary) {
3251   try_boundary->SetLocations(nullptr);
3252 }
3253 
3254 void InstructionCodeGeneratorARM64::VisitTryBoundary(HTryBoundary* try_boundary) {
3255   HBasicBlock* successor = try_boundary->GetNormalFlowSuccessor();
3256   if (!successor->IsExitBlock()) {
3257     HandleGoto(try_boundary, successor);
3258   }
3259 }
3260 
3261 void InstructionCodeGeneratorARM64::GenerateTestAndBranch(HInstruction* instruction,
3262                                                           size_t condition_input_index,
3263                                                           vixl::aarch64::Label* true_target,
3264                                                           vixl::aarch64::Label* false_target) {
3265   HInstruction* cond = instruction->InputAt(condition_input_index);
3266 
3267   if (true_target == nullptr && false_target == nullptr) {
3268     // Nothing to do. The code always falls through.
3269     return;
3270   } else if (cond->IsIntConstant()) {
3271     // Constant condition, statically compared against "true" (integer value 1).
3272     if (cond->AsIntConstant()->IsTrue()) {
3273       if (true_target != nullptr) {
3274         __ B(true_target);
3275       }
3276     } else {
3277       DCHECK(cond->AsIntConstant()->IsFalse()) << cond->AsIntConstant()->GetValue();
3278       if (false_target != nullptr) {
3279         __ B(false_target);
3280       }
3281     }
3282     return;
3283   }
3284 
3285   // The following code generates these patterns:
3286   //  (1) true_target == nullptr && false_target != nullptr
3287   //        - opposite condition true => branch to false_target
3288   //  (2) true_target != nullptr && false_target == nullptr
3289   //        - condition true => branch to true_target
3290   //  (3) true_target != nullptr && false_target != nullptr
3291   //        - condition true => branch to true_target
3292   //        - branch to false_target
3293   if (IsBooleanValueOrMaterializedCondition(cond)) {
3294     // The condition instruction has been materialized, compare the output to 0.
3295     Location cond_val = instruction->GetLocations()->InAt(condition_input_index);
3296     DCHECK(cond_val.IsRegister());
3297       if (true_target == nullptr) {
3298       __ Cbz(InputRegisterAt(instruction, condition_input_index), false_target);
3299     } else {
3300       __ Cbnz(InputRegisterAt(instruction, condition_input_index), true_target);
3301     }
3302   } else {
3303     // The condition instruction has not been materialized, use its inputs as
3304     // the comparison and its condition as the branch condition.
3305     HCondition* condition = cond->AsCondition();
3306 
3307     DataType::Type type = condition->InputAt(0)->GetType();
3308     if (DataType::IsFloatingPointType(type)) {
3309       GenerateFcmp(condition);
3310       if (true_target == nullptr) {
3311         IfCondition opposite_condition = condition->GetOppositeCondition();
3312         __ B(ARM64FPCondition(opposite_condition, condition->IsGtBias()), false_target);
3313       } else {
3314         __ B(ARM64FPCondition(condition->GetCondition(), condition->IsGtBias()), true_target);
3315       }
3316     } else {
3317       // Integer cases.
3318       Register lhs = InputRegisterAt(condition, 0);
3319       Operand rhs = InputOperandAt(condition, 1);
3320 
3321       Condition arm64_cond;
3322       vixl::aarch64::Label* non_fallthrough_target;
3323       if (true_target == nullptr) {
3324         arm64_cond = ARM64Condition(condition->GetOppositeCondition());
3325         non_fallthrough_target = false_target;
3326       } else {
3327         arm64_cond = ARM64Condition(condition->GetCondition());
3328         non_fallthrough_target = true_target;
3329       }
3330 
3331       if ((arm64_cond == eq || arm64_cond == ne || arm64_cond == lt || arm64_cond == ge) &&
3332           rhs.IsImmediate() && (rhs.GetImmediate() == 0)) {
3333         switch (arm64_cond) {
3334           case eq:
3335             __ Cbz(lhs, non_fallthrough_target);
3336             break;
3337           case ne:
3338             __ Cbnz(lhs, non_fallthrough_target);
3339             break;
3340           case lt:
3341             // Test the sign bit and branch accordingly.
3342             __ Tbnz(lhs, (lhs.IsX() ? kXRegSize : kWRegSize) - 1, non_fallthrough_target);
3343             break;
3344           case ge:
3345             // Test the sign bit and branch accordingly.
3346             __ Tbz(lhs, (lhs.IsX() ? kXRegSize : kWRegSize) - 1, non_fallthrough_target);
3347             break;
3348           default:
3349             // Without the `static_cast` the compiler throws an error for
3350             // `-Werror=sign-promo`.
3351             LOG(FATAL) << "Unexpected condition: " << static_cast<int>(arm64_cond);
3352         }
3353       } else {
3354         __ Cmp(lhs, rhs);
3355         __ B(arm64_cond, non_fallthrough_target);
3356       }
3357     }
3358   }
3359 
3360   // If neither branch falls through (case 3), the conditional branch to `true_target`
3361   // was already emitted (case 2) and we need to emit a jump to `false_target`.
3362   if (true_target != nullptr && false_target != nullptr) {
3363     __ B(false_target);
3364   }
3365 }
3366 
3367 void LocationsBuilderARM64::VisitIf(HIf* if_instr) {
3368   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(if_instr);
3369   if (IsBooleanValueOrMaterializedCondition(if_instr->InputAt(0))) {
3370     locations->SetInAt(0, Location::RequiresRegister());
3371   }
3372 }
3373 
3374 void InstructionCodeGeneratorARM64::VisitIf(HIf* if_instr) {
3375   HBasicBlock* true_successor = if_instr->IfTrueSuccessor();
3376   HBasicBlock* false_successor = if_instr->IfFalseSuccessor();
3377   vixl::aarch64::Label* true_target = codegen_->GetLabelOf(true_successor);
3378   if (codegen_->GoesToNextBlock(if_instr->GetBlock(), true_successor)) {
3379     true_target = nullptr;
3380   }
3381   vixl::aarch64::Label* false_target = codegen_->GetLabelOf(false_successor);
3382   if (codegen_->GoesToNextBlock(if_instr->GetBlock(), false_successor)) {
3383     false_target = nullptr;
3384   }
3385   GenerateTestAndBranch(if_instr, /* condition_input_index= */ 0, true_target, false_target);
3386 }
3387 
3388 void LocationsBuilderARM64::VisitDeoptimize(HDeoptimize* deoptimize) {
3389   LocationSummary* locations = new (GetGraph()->GetAllocator())
3390       LocationSummary(deoptimize, LocationSummary::kCallOnSlowPath);
3391   InvokeRuntimeCallingConvention calling_convention;
3392   RegisterSet caller_saves = RegisterSet::Empty();
3393   caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0).GetCode()));
3394   locations->SetCustomSlowPathCallerSaves(caller_saves);
3395   if (IsBooleanValueOrMaterializedCondition(deoptimize->InputAt(0))) {
3396     locations->SetInAt(0, Location::RequiresRegister());
3397   }
3398 }
3399 
3400 void InstructionCodeGeneratorARM64::VisitDeoptimize(HDeoptimize* deoptimize) {
3401   SlowPathCodeARM64* slow_path =
3402       deopt_slow_paths_.NewSlowPath<DeoptimizationSlowPathARM64>(deoptimize);
3403   GenerateTestAndBranch(deoptimize,
3404                         /* condition_input_index= */ 0,
3405                         slow_path->GetEntryLabel(),
3406                         /* false_target= */ nullptr);
3407 }
3408 
3409 void LocationsBuilderARM64::VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag* flag) {
3410   LocationSummary* locations = new (GetGraph()->GetAllocator())
3411       LocationSummary(flag, LocationSummary::kNoCall);
3412   locations->SetOut(Location::RequiresRegister());
3413 }
3414 
3415 void InstructionCodeGeneratorARM64::VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag* flag) {
3416   __ Ldr(OutputRegister(flag),
3417          MemOperand(sp, codegen_->GetStackOffsetOfShouldDeoptimizeFlag()));
3418 }
3419 
3420 static inline bool IsConditionOnFloatingPointValues(HInstruction* condition) {
3421   return condition->IsCondition() &&
3422          DataType::IsFloatingPointType(condition->InputAt(0)->GetType());
3423 }
3424 
3425 static inline Condition GetConditionForSelect(HCondition* condition) {
3426   IfCondition cond = condition->AsCondition()->GetCondition();
3427   return IsConditionOnFloatingPointValues(condition) ? ARM64FPCondition(cond, condition->IsGtBias())
3428                                                      : ARM64Condition(cond);
3429 }
3430 
3431 void LocationsBuilderARM64::VisitSelect(HSelect* select) {
3432   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(select);
3433   if (DataType::IsFloatingPointType(select->GetType())) {
3434     locations->SetInAt(0, Location::RequiresFpuRegister());
3435     locations->SetInAt(1, Location::RequiresFpuRegister());
3436     locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
3437   } else {
3438     HConstant* cst_true_value = select->GetTrueValue()->AsConstant();
3439     HConstant* cst_false_value = select->GetFalseValue()->AsConstant();
3440     bool is_true_value_constant = cst_true_value != nullptr;
3441     bool is_false_value_constant = cst_false_value != nullptr;
3442     // Ask VIXL whether we should synthesize constants in registers.
3443     // We give an arbitrary register to VIXL when dealing with non-constant inputs.
3444     Operand true_op = is_true_value_constant ?
3445         Operand(Int64FromConstant(cst_true_value)) : Operand(x1);
3446     Operand false_op = is_false_value_constant ?
3447         Operand(Int64FromConstant(cst_false_value)) : Operand(x2);
3448     bool true_value_in_register = false;
3449     bool false_value_in_register = false;
3450     MacroAssembler::GetCselSynthesisInformation(
3451         x0, true_op, false_op, &true_value_in_register, &false_value_in_register);
3452     true_value_in_register |= !is_true_value_constant;
3453     false_value_in_register |= !is_false_value_constant;
3454 
3455     locations->SetInAt(1, true_value_in_register ? Location::RequiresRegister()
3456                                                  : Location::ConstantLocation(cst_true_value));
3457     locations->SetInAt(0, false_value_in_register ? Location::RequiresRegister()
3458                                                   : Location::ConstantLocation(cst_false_value));
3459     locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
3460   }
3461 
3462   if (IsBooleanValueOrMaterializedCondition(select->GetCondition())) {
3463     locations->SetInAt(2, Location::RequiresRegister());
3464   }
3465 }
3466 
3467 void InstructionCodeGeneratorARM64::VisitSelect(HSelect* select) {
3468   HInstruction* cond = select->GetCondition();
3469   Condition csel_cond;
3470 
3471   if (IsBooleanValueOrMaterializedCondition(cond)) {
3472     if (cond->IsCondition() && cond->GetNext() == select) {
3473       // Use the condition flags set by the previous instruction.
3474       csel_cond = GetConditionForSelect(cond->AsCondition());
3475     } else {
3476       __ Cmp(InputRegisterAt(select, 2), 0);
3477       csel_cond = ne;
3478     }
3479   } else if (IsConditionOnFloatingPointValues(cond)) {
3480     GenerateFcmp(cond);
3481     csel_cond = GetConditionForSelect(cond->AsCondition());
3482   } else {
3483     __ Cmp(InputRegisterAt(cond, 0), InputOperandAt(cond, 1));
3484     csel_cond = GetConditionForSelect(cond->AsCondition());
3485   }
3486 
3487   if (DataType::IsFloatingPointType(select->GetType())) {
3488     __ Fcsel(OutputFPRegister(select),
3489              InputFPRegisterAt(select, 1),
3490              InputFPRegisterAt(select, 0),
3491              csel_cond);
3492   } else {
3493     __ Csel(OutputRegister(select),
3494             InputOperandAt(select, 1),
3495             InputOperandAt(select, 0),
3496             csel_cond);
3497   }
3498 }
3499 
3500 void LocationsBuilderARM64::VisitNativeDebugInfo(HNativeDebugInfo* info) {
3501   new (GetGraph()->GetAllocator()) LocationSummary(info);
3502 }
3503 
3504 void InstructionCodeGeneratorARM64::VisitNativeDebugInfo(HNativeDebugInfo*) {
3505   // MaybeRecordNativeDebugInfo is already called implicitly in CodeGenerator::Compile.
3506 }
3507 
3508 void CodeGeneratorARM64::GenerateNop() {
3509   __ Nop();
3510 }
3511 
3512 void LocationsBuilderARM64::VisitInstanceFieldGet(HInstanceFieldGet* instruction) {
3513   HandleFieldGet(instruction, instruction->GetFieldInfo());
3514 }
3515 
3516 void InstructionCodeGeneratorARM64::VisitInstanceFieldGet(HInstanceFieldGet* instruction) {
3517   HandleFieldGet(instruction, instruction->GetFieldInfo());
3518 }
3519 
3520 void LocationsBuilderARM64::VisitInstanceFieldSet(HInstanceFieldSet* instruction) {
3521   HandleFieldSet(instruction);
3522 }
3523 
3524 void InstructionCodeGeneratorARM64::VisitInstanceFieldSet(HInstanceFieldSet* instruction) {
3525   HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetValueCanBeNull());
3526 }
3527 
3528 // Temp is used for read barrier.
3529 static size_t NumberOfInstanceOfTemps(TypeCheckKind type_check_kind) {
3530   if (kEmitCompilerReadBarrier &&
3531       (kUseBakerReadBarrier ||
3532           type_check_kind == TypeCheckKind::kAbstractClassCheck ||
3533           type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
3534           type_check_kind == TypeCheckKind::kArrayObjectCheck)) {
3535     return 1;
3536   }
3537   return 0;
3538 }
3539 
3540 // Interface case has 3 temps, one for holding the number of interfaces, one for the current
3541 // interface pointer, one for loading the current interface.
3542 // The other checks have one temp for loading the object's class.
3543 static size_t NumberOfCheckCastTemps(TypeCheckKind type_check_kind) {
3544   if (type_check_kind == TypeCheckKind::kInterfaceCheck) {
3545     return 3;
3546   }
3547   return 1 + NumberOfInstanceOfTemps(type_check_kind);
3548 }
3549 
3550 void LocationsBuilderARM64::VisitInstanceOf(HInstanceOf* instruction) {
3551   LocationSummary::CallKind call_kind = LocationSummary::kNoCall;
3552   TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
3553   bool baker_read_barrier_slow_path = false;
3554   switch (type_check_kind) {
3555     case TypeCheckKind::kExactCheck:
3556     case TypeCheckKind::kAbstractClassCheck:
3557     case TypeCheckKind::kClassHierarchyCheck:
3558     case TypeCheckKind::kArrayObjectCheck: {
3559       bool needs_read_barrier = CodeGenerator::InstanceOfNeedsReadBarrier(instruction);
3560       call_kind = needs_read_barrier ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall;
3561       baker_read_barrier_slow_path = kUseBakerReadBarrier && needs_read_barrier;
3562       break;
3563     }
3564     case TypeCheckKind::kArrayCheck:
3565     case TypeCheckKind::kUnresolvedCheck:
3566     case TypeCheckKind::kInterfaceCheck:
3567       call_kind = LocationSummary::kCallOnSlowPath;
3568       break;
3569     case TypeCheckKind::kBitstringCheck:
3570       break;
3571   }
3572 
3573   LocationSummary* locations =
3574       new (GetGraph()->GetAllocator()) LocationSummary(instruction, call_kind);
3575   if (baker_read_barrier_slow_path) {
3576     locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
3577   }
3578   locations->SetInAt(0, Location::RequiresRegister());
3579   if (type_check_kind == TypeCheckKind::kBitstringCheck) {
3580     locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant()));
3581     locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant()));
3582     locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant()));
3583   } else {
3584     locations->SetInAt(1, Location::RequiresRegister());
3585   }
3586   // The "out" register is used as a temporary, so it overlaps with the inputs.
3587   // Note that TypeCheckSlowPathARM64 uses this register too.
3588   locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
3589   // Add temps if necessary for read barriers.
3590   locations->AddRegisterTemps(NumberOfInstanceOfTemps(type_check_kind));
3591 }
3592 
3593 void InstructionCodeGeneratorARM64::VisitInstanceOf(HInstanceOf* instruction) {
3594   TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
3595   LocationSummary* locations = instruction->GetLocations();
3596   Location obj_loc = locations->InAt(0);
3597   Register obj = InputRegisterAt(instruction, 0);
3598   Register cls = (type_check_kind == TypeCheckKind::kBitstringCheck)
3599       ? Register()
3600       : InputRegisterAt(instruction, 1);
3601   Location out_loc = locations->Out();
3602   Register out = OutputRegister(instruction);
3603   const size_t num_temps = NumberOfInstanceOfTemps(type_check_kind);
3604   DCHECK_LE(num_temps, 1u);
3605   Location maybe_temp_loc = (num_temps >= 1) ? locations->GetTemp(0) : Location::NoLocation();
3606   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
3607   uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
3608   uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
3609   uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
3610 
3611   vixl::aarch64::Label done, zero;
3612   SlowPathCodeARM64* slow_path = nullptr;
3613 
3614   // Return 0 if `obj` is null.
3615   // Avoid null check if we know `obj` is not null.
3616   if (instruction->MustDoNullCheck()) {
3617     __ Cbz(obj, &zero);
3618   }
3619 
3620   switch (type_check_kind) {
3621     case TypeCheckKind::kExactCheck: {
3622       ReadBarrierOption read_barrier_option =
3623           CodeGenerator::ReadBarrierOptionForInstanceOf(instruction);
3624       // /* HeapReference<Class> */ out = obj->klass_
3625       GenerateReferenceLoadTwoRegisters(instruction,
3626                                         out_loc,
3627                                         obj_loc,
3628                                         class_offset,
3629                                         maybe_temp_loc,
3630                                         read_barrier_option);
3631       __ Cmp(out, cls);
3632       __ Cset(out, eq);
3633       if (zero.IsLinked()) {
3634         __ B(&done);
3635       }
3636       break;
3637     }
3638 
3639     case TypeCheckKind::kAbstractClassCheck: {
3640       ReadBarrierOption read_barrier_option =
3641           CodeGenerator::ReadBarrierOptionForInstanceOf(instruction);
3642       // /* HeapReference<Class> */ out = obj->klass_
3643       GenerateReferenceLoadTwoRegisters(instruction,
3644                                         out_loc,
3645                                         obj_loc,
3646                                         class_offset,
3647                                         maybe_temp_loc,
3648                                         read_barrier_option);
3649       // If the class is abstract, we eagerly fetch the super class of the
3650       // object to avoid doing a comparison we know will fail.
3651       vixl::aarch64::Label loop, success;
3652       __ Bind(&loop);
3653       // /* HeapReference<Class> */ out = out->super_class_
3654       GenerateReferenceLoadOneRegister(instruction,
3655                                        out_loc,
3656                                        super_offset,
3657                                        maybe_temp_loc,
3658                                        read_barrier_option);
3659       // If `out` is null, we use it for the result, and jump to `done`.
3660       __ Cbz(out, &done);
3661       __ Cmp(out, cls);
3662       __ B(ne, &loop);
3663       __ Mov(out, 1);
3664       if (zero.IsLinked()) {
3665         __ B(&done);
3666       }
3667       break;
3668     }
3669 
3670     case TypeCheckKind::kClassHierarchyCheck: {
3671       ReadBarrierOption read_barrier_option =
3672           CodeGenerator::ReadBarrierOptionForInstanceOf(instruction);
3673       // /* HeapReference<Class> */ out = obj->klass_
3674       GenerateReferenceLoadTwoRegisters(instruction,
3675                                         out_loc,
3676                                         obj_loc,
3677                                         class_offset,
3678                                         maybe_temp_loc,
3679                                         read_barrier_option);
3680       // Walk over the class hierarchy to find a match.
3681       vixl::aarch64::Label loop, success;
3682       __ Bind(&loop);
3683       __ Cmp(out, cls);
3684       __ B(eq, &success);
3685       // /* HeapReference<Class> */ out = out->super_class_
3686       GenerateReferenceLoadOneRegister(instruction,
3687                                        out_loc,
3688                                        super_offset,
3689                                        maybe_temp_loc,
3690                                        read_barrier_option);
3691       __ Cbnz(out, &loop);
3692       // If `out` is null, we use it for the result, and jump to `done`.
3693       __ B(&done);
3694       __ Bind(&success);
3695       __ Mov(out, 1);
3696       if (zero.IsLinked()) {
3697         __ B(&done);
3698       }
3699       break;
3700     }
3701 
3702     case TypeCheckKind::kArrayObjectCheck: {
3703       ReadBarrierOption read_barrier_option =
3704           CodeGenerator::ReadBarrierOptionForInstanceOf(instruction);
3705       // /* HeapReference<Class> */ out = obj->klass_
3706       GenerateReferenceLoadTwoRegisters(instruction,
3707                                         out_loc,
3708                                         obj_loc,
3709                                         class_offset,
3710                                         maybe_temp_loc,
3711                                         read_barrier_option);
3712       // Do an exact check.
3713       vixl::aarch64::Label exact_check;
3714       __ Cmp(out, cls);
3715       __ B(eq, &exact_check);
3716       // Otherwise, we need to check that the object's class is a non-primitive array.
3717       // /* HeapReference<Class> */ out = out->component_type_
3718       GenerateReferenceLoadOneRegister(instruction,
3719                                        out_loc,
3720                                        component_offset,
3721                                        maybe_temp_loc,
3722                                        read_barrier_option);
3723       // If `out` is null, we use it for the result, and jump to `done`.
3724       __ Cbz(out, &done);
3725       __ Ldrh(out, HeapOperand(out, primitive_offset));
3726       static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
3727       __ Cbnz(out, &zero);
3728       __ Bind(&exact_check);
3729       __ Mov(out, 1);
3730       __ B(&done);
3731       break;
3732     }
3733 
3734     case TypeCheckKind::kArrayCheck: {
3735       // No read barrier since the slow path will retry upon failure.
3736       // /* HeapReference<Class> */ out = obj->klass_
3737       GenerateReferenceLoadTwoRegisters(instruction,
3738                                         out_loc,
3739                                         obj_loc,
3740                                         class_offset,
3741                                         maybe_temp_loc,
3742                                         kWithoutReadBarrier);
3743       __ Cmp(out, cls);
3744       DCHECK(locations->OnlyCallsOnSlowPath());
3745       slow_path = new (codegen_->GetScopedAllocator()) TypeCheckSlowPathARM64(
3746           instruction, /* is_fatal= */ false);
3747       codegen_->AddSlowPath(slow_path);
3748       __ B(ne, slow_path->GetEntryLabel());
3749       __ Mov(out, 1);
3750       if (zero.IsLinked()) {
3751         __ B(&done);
3752       }
3753       break;
3754     }
3755 
3756     case TypeCheckKind::kUnresolvedCheck:
3757     case TypeCheckKind::kInterfaceCheck: {
3758       // Note that we indeed only call on slow path, but we always go
3759       // into the slow path for the unresolved and interface check
3760       // cases.
3761       //
3762       // We cannot directly call the InstanceofNonTrivial runtime
3763       // entry point without resorting to a type checking slow path
3764       // here (i.e. by calling InvokeRuntime directly), as it would
3765       // require to assign fixed registers for the inputs of this
3766       // HInstanceOf instruction (following the runtime calling
3767       // convention), which might be cluttered by the potential first
3768       // read barrier emission at the beginning of this method.
3769       //
3770       // TODO: Introduce a new runtime entry point taking the object
3771       // to test (instead of its class) as argument, and let it deal
3772       // with the read barrier issues. This will let us refactor this
3773       // case of the `switch` code as it was previously (with a direct
3774       // call to the runtime not using a type checking slow path).
3775       // This should also be beneficial for the other cases above.
3776       DCHECK(locations->OnlyCallsOnSlowPath());
3777       slow_path = new (codegen_->GetScopedAllocator()) TypeCheckSlowPathARM64(
3778           instruction, /* is_fatal= */ false);
3779       codegen_->AddSlowPath(slow_path);
3780       __ B(slow_path->GetEntryLabel());
3781       if (zero.IsLinked()) {
3782         __ B(&done);
3783       }
3784       break;
3785     }
3786 
3787     case TypeCheckKind::kBitstringCheck: {
3788       // /* HeapReference<Class> */ temp = obj->klass_
3789       GenerateReferenceLoadTwoRegisters(instruction,
3790                                         out_loc,
3791                                         obj_loc,
3792                                         class_offset,
3793                                         maybe_temp_loc,
3794                                         kWithoutReadBarrier);
3795 
3796       GenerateBitstringTypeCheckCompare(instruction, out);
3797       __ Cset(out, eq);
3798       if (zero.IsLinked()) {
3799         __ B(&done);
3800       }
3801       break;
3802     }
3803   }
3804 
3805   if (zero.IsLinked()) {
3806     __ Bind(&zero);
3807     __ Mov(out, 0);
3808   }
3809 
3810   if (done.IsLinked()) {
3811     __ Bind(&done);
3812   }
3813 
3814   if (slow_path != nullptr) {
3815     __ Bind(slow_path->GetExitLabel());
3816   }
3817 }
3818 
3819 void LocationsBuilderARM64::VisitCheckCast(HCheckCast* instruction) {
3820   TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
3821   LocationSummary::CallKind call_kind = CodeGenerator::GetCheckCastCallKind(instruction);
3822   LocationSummary* locations =
3823       new (GetGraph()->GetAllocator()) LocationSummary(instruction, call_kind);
3824   locations->SetInAt(0, Location::RequiresRegister());
3825   if (type_check_kind == TypeCheckKind::kBitstringCheck) {
3826     locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant()));
3827     locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant()));
3828     locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant()));
3829   } else {
3830     locations->SetInAt(1, Location::RequiresRegister());
3831   }
3832   // Add temps for read barriers and other uses. One is used by TypeCheckSlowPathARM64.
3833   locations->AddRegisterTemps(NumberOfCheckCastTemps(type_check_kind));
3834 }
3835 
3836 void InstructionCodeGeneratorARM64::VisitCheckCast(HCheckCast* instruction) {
3837   TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
3838   LocationSummary* locations = instruction->GetLocations();
3839   Location obj_loc = locations->InAt(0);
3840   Register obj = InputRegisterAt(instruction, 0);
3841   Register cls = (type_check_kind == TypeCheckKind::kBitstringCheck)
3842       ? Register()
3843       : InputRegisterAt(instruction, 1);
3844   const size_t num_temps = NumberOfCheckCastTemps(type_check_kind);
3845   DCHECK_GE(num_temps, 1u);
3846   DCHECK_LE(num_temps, 3u);
3847   Location temp_loc = locations->GetTemp(0);
3848   Location maybe_temp2_loc = (num_temps >= 2) ? locations->GetTemp(1) : Location::NoLocation();
3849   Location maybe_temp3_loc = (num_temps >= 3) ? locations->GetTemp(2) : Location::NoLocation();
3850   Register temp = WRegisterFrom(temp_loc);
3851   const uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
3852   const uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
3853   const uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
3854   const uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
3855   const uint32_t iftable_offset = mirror::Class::IfTableOffset().Uint32Value();
3856   const uint32_t array_length_offset = mirror::Array::LengthOffset().Uint32Value();
3857   const uint32_t object_array_data_offset =
3858       mirror::Array::DataOffset(kHeapReferenceSize).Uint32Value();
3859 
3860   bool is_type_check_slow_path_fatal = CodeGenerator::IsTypeCheckSlowPathFatal(instruction);
3861   SlowPathCodeARM64* type_check_slow_path =
3862       new (codegen_->GetScopedAllocator()) TypeCheckSlowPathARM64(
3863           instruction, is_type_check_slow_path_fatal);
3864   codegen_->AddSlowPath(type_check_slow_path);
3865 
3866   vixl::aarch64::Label done;
3867   // Avoid null check if we know obj is not null.
3868   if (instruction->MustDoNullCheck()) {
3869     __ Cbz(obj, &done);
3870   }
3871 
3872   switch (type_check_kind) {
3873     case TypeCheckKind::kExactCheck:
3874     case TypeCheckKind::kArrayCheck: {
3875       // /* HeapReference<Class> */ temp = obj->klass_
3876       GenerateReferenceLoadTwoRegisters(instruction,
3877                                         temp_loc,
3878                                         obj_loc,
3879                                         class_offset,
3880                                         maybe_temp2_loc,
3881                                         kWithoutReadBarrier);
3882 
3883       __ Cmp(temp, cls);
3884       // Jump to slow path for throwing the exception or doing a
3885       // more involved array check.
3886       __ B(ne, type_check_slow_path->GetEntryLabel());
3887       break;
3888     }
3889 
3890     case TypeCheckKind::kAbstractClassCheck: {
3891       // /* HeapReference<Class> */ temp = obj->klass_
3892       GenerateReferenceLoadTwoRegisters(instruction,
3893                                         temp_loc,
3894                                         obj_loc,
3895                                         class_offset,
3896                                         maybe_temp2_loc,
3897                                         kWithoutReadBarrier);
3898 
3899       // If the class is abstract, we eagerly fetch the super class of the
3900       // object to avoid doing a comparison we know will fail.
3901       vixl::aarch64::Label loop;
3902       __ Bind(&loop);
3903       // /* HeapReference<Class> */ temp = temp->super_class_
3904       GenerateReferenceLoadOneRegister(instruction,
3905                                        temp_loc,
3906                                        super_offset,
3907                                        maybe_temp2_loc,
3908                                        kWithoutReadBarrier);
3909 
3910       // If the class reference currently in `temp` is null, jump to the slow path to throw the
3911       // exception.
3912       __ Cbz(temp, type_check_slow_path->GetEntryLabel());
3913       // Otherwise, compare classes.
3914       __ Cmp(temp, cls);
3915       __ B(ne, &loop);
3916       break;
3917     }
3918 
3919     case TypeCheckKind::kClassHierarchyCheck: {
3920       // /* HeapReference<Class> */ temp = obj->klass_
3921       GenerateReferenceLoadTwoRegisters(instruction,
3922                                         temp_loc,
3923                                         obj_loc,
3924                                         class_offset,
3925                                         maybe_temp2_loc,
3926                                         kWithoutReadBarrier);
3927 
3928       // Walk over the class hierarchy to find a match.
3929       vixl::aarch64::Label loop;
3930       __ Bind(&loop);
3931       __ Cmp(temp, cls);
3932       __ B(eq, &done);
3933 
3934       // /* HeapReference<Class> */ temp = temp->super_class_
3935       GenerateReferenceLoadOneRegister(instruction,
3936                                        temp_loc,
3937                                        super_offset,
3938                                        maybe_temp2_loc,
3939                                        kWithoutReadBarrier);
3940 
3941       // If the class reference currently in `temp` is not null, jump
3942       // back at the beginning of the loop.
3943       __ Cbnz(temp, &loop);
3944       // Otherwise, jump to the slow path to throw the exception.
3945       __ B(type_check_slow_path->GetEntryLabel());
3946       break;
3947     }
3948 
3949     case TypeCheckKind::kArrayObjectCheck: {
3950       // /* HeapReference<Class> */ temp = obj->klass_
3951       GenerateReferenceLoadTwoRegisters(instruction,
3952                                         temp_loc,
3953                                         obj_loc,
3954                                         class_offset,
3955                                         maybe_temp2_loc,
3956                                         kWithoutReadBarrier);
3957 
3958       // Do an exact check.
3959       __ Cmp(temp, cls);
3960       __ B(eq, &done);
3961 
3962       // Otherwise, we need to check that the object's class is a non-primitive array.
3963       // /* HeapReference<Class> */ temp = temp->component_type_
3964       GenerateReferenceLoadOneRegister(instruction,
3965                                        temp_loc,
3966                                        component_offset,
3967                                        maybe_temp2_loc,
3968                                        kWithoutReadBarrier);
3969 
3970       // If the component type is null, jump to the slow path to throw the exception.
3971       __ Cbz(temp, type_check_slow_path->GetEntryLabel());
3972       // Otherwise, the object is indeed an array. Further check that this component type is not a
3973       // primitive type.
3974       __ Ldrh(temp, HeapOperand(temp, primitive_offset));
3975       static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
3976       __ Cbnz(temp, type_check_slow_path->GetEntryLabel());
3977       break;
3978     }
3979 
3980     case TypeCheckKind::kUnresolvedCheck:
3981       // We always go into the type check slow path for the unresolved check cases.
3982       //
3983       // We cannot directly call the CheckCast runtime entry point
3984       // without resorting to a type checking slow path here (i.e. by
3985       // calling InvokeRuntime directly), as it would require to
3986       // assign fixed registers for the inputs of this HInstanceOf
3987       // instruction (following the runtime calling convention), which
3988       // might be cluttered by the potential first read barrier
3989       // emission at the beginning of this method.
3990       __ B(type_check_slow_path->GetEntryLabel());
3991       break;
3992     case TypeCheckKind::kInterfaceCheck: {
3993       // /* HeapReference<Class> */ temp = obj->klass_
3994       GenerateReferenceLoadTwoRegisters(instruction,
3995                                         temp_loc,
3996                                         obj_loc,
3997                                         class_offset,
3998                                         maybe_temp2_loc,
3999                                         kWithoutReadBarrier);
4000 
4001       // /* HeapReference<Class> */ temp = temp->iftable_
4002       GenerateReferenceLoadTwoRegisters(instruction,
4003                                         temp_loc,
4004                                         temp_loc,
4005                                         iftable_offset,
4006                                         maybe_temp2_loc,
4007                                         kWithoutReadBarrier);
4008       // Iftable is never null.
4009       __ Ldr(WRegisterFrom(maybe_temp2_loc), HeapOperand(temp.W(), array_length_offset));
4010       // Loop through the iftable and check if any class matches.
4011       vixl::aarch64::Label start_loop;
4012       __ Bind(&start_loop);
4013       __ Cbz(WRegisterFrom(maybe_temp2_loc), type_check_slow_path->GetEntryLabel());
4014       __ Ldr(WRegisterFrom(maybe_temp3_loc), HeapOperand(temp.W(), object_array_data_offset));
4015       GetAssembler()->MaybeUnpoisonHeapReference(WRegisterFrom(maybe_temp3_loc));
4016       // Go to next interface.
4017       __ Add(temp, temp, 2 * kHeapReferenceSize);
4018       __ Sub(WRegisterFrom(maybe_temp2_loc), WRegisterFrom(maybe_temp2_loc), 2);
4019       // Compare the classes and continue the loop if they do not match.
4020       __ Cmp(cls, WRegisterFrom(maybe_temp3_loc));
4021       __ B(ne, &start_loop);
4022       break;
4023     }
4024 
4025     case TypeCheckKind::kBitstringCheck: {
4026       // /* HeapReference<Class> */ temp = obj->klass_
4027       GenerateReferenceLoadTwoRegisters(instruction,
4028                                         temp_loc,
4029                                         obj_loc,
4030                                         class_offset,
4031                                         maybe_temp2_loc,
4032                                         kWithoutReadBarrier);
4033 
4034       GenerateBitstringTypeCheckCompare(instruction, temp);
4035       __ B(ne, type_check_slow_path->GetEntryLabel());
4036       break;
4037     }
4038   }
4039   __ Bind(&done);
4040 
4041   __ Bind(type_check_slow_path->GetExitLabel());
4042 }
4043 
4044 void LocationsBuilderARM64::VisitIntConstant(HIntConstant* constant) {
4045   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(constant);
4046   locations->SetOut(Location::ConstantLocation(constant));
4047 }
4048 
4049 void InstructionCodeGeneratorARM64::VisitIntConstant(HIntConstant* constant ATTRIBUTE_UNUSED) {
4050   // Will be generated at use site.
4051 }
4052 
4053 void LocationsBuilderARM64::VisitNullConstant(HNullConstant* constant) {
4054   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(constant);
4055   locations->SetOut(Location::ConstantLocation(constant));
4056 }
4057 
4058 void InstructionCodeGeneratorARM64::VisitNullConstant(HNullConstant* constant ATTRIBUTE_UNUSED) {
4059   // Will be generated at use site.
4060 }
4061 
4062 void LocationsBuilderARM64::VisitInvokeUnresolved(HInvokeUnresolved* invoke) {
4063   // The trampoline uses the same calling convention as dex calling conventions,
4064   // except instead of loading arg0/r0 with the target Method*, arg0/r0 will contain
4065   // the method_idx.
4066   HandleInvoke(invoke);
4067 }
4068 
4069 void InstructionCodeGeneratorARM64::VisitInvokeUnresolved(HInvokeUnresolved* invoke) {
4070   codegen_->GenerateInvokeUnresolvedRuntimeCall(invoke);
4071   codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__);
4072 }
4073 
4074 void LocationsBuilderARM64::HandleInvoke(HInvoke* invoke) {
4075   InvokeDexCallingConventionVisitorARM64 calling_convention_visitor;
4076   CodeGenerator::CreateCommonInvokeLocationSummary(invoke, &calling_convention_visitor);
4077 }
4078 
4079 void LocationsBuilderARM64::VisitInvokeInterface(HInvokeInterface* invoke) {
4080   HandleInvoke(invoke);
4081 }
4082 
4083 void CodeGeneratorARM64::MaybeGenerateInlineCacheCheck(HInstruction* instruction,
4084                                                        Register klass) {
4085   DCHECK_EQ(klass.GetCode(), 0u);
4086   // We know the destination of an intrinsic, so no need to record inline
4087   // caches.
4088   if (!instruction->GetLocations()->Intrinsified() &&
4089       GetGraph()->IsCompilingBaseline() &&
4090       !Runtime::Current()->IsAotCompiler()) {
4091     DCHECK(!instruction->GetEnvironment()->IsFromInlinedInvoke());
4092     ScopedObjectAccess soa(Thread::Current());
4093     ProfilingInfo* info = GetGraph()->GetArtMethod()->GetProfilingInfo(kRuntimePointerSize);
4094     if (info != nullptr) {
4095       InlineCache* cache = info->GetInlineCache(instruction->GetDexPc());
4096       uint64_t address = reinterpret_cast64<uint64_t>(cache);
4097       vixl::aarch64::Label done;
4098       __ Mov(x8, address);
4099       __ Ldr(x9, MemOperand(x8, InlineCache::ClassesOffset().Int32Value()));
4100       // Fast path for a monomorphic cache.
4101       __ Cmp(klass, x9);
4102       __ B(eq, &done);
4103       InvokeRuntime(kQuickUpdateInlineCache, instruction, instruction->GetDexPc());
4104       __ Bind(&done);
4105     }
4106   }
4107 }
4108 
4109 void InstructionCodeGeneratorARM64::VisitInvokeInterface(HInvokeInterface* invoke) {
4110   // TODO: b/18116999, our IMTs can miss an IncompatibleClassChangeError.
4111   LocationSummary* locations = invoke->GetLocations();
4112   Register temp = XRegisterFrom(locations->GetTemp(0));
4113   Location receiver = locations->InAt(0);
4114   Offset class_offset = mirror::Object::ClassOffset();
4115   Offset entry_point = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArm64PointerSize);
4116 
4117   // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
4118   if (receiver.IsStackSlot()) {
4119     __ Ldr(temp.W(), StackOperandFrom(receiver));
4120     {
4121       EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
4122       // /* HeapReference<Class> */ temp = temp->klass_
4123       __ Ldr(temp.W(), HeapOperand(temp.W(), class_offset));
4124       codegen_->MaybeRecordImplicitNullCheck(invoke);
4125     }
4126   } else {
4127     EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
4128     // /* HeapReference<Class> */ temp = receiver->klass_
4129     __ Ldr(temp.W(), HeapOperandFrom(receiver, class_offset));
4130     codegen_->MaybeRecordImplicitNullCheck(invoke);
4131   }
4132 
4133   // Instead of simply (possibly) unpoisoning `temp` here, we should
4134   // emit a read barrier for the previous class reference load.
4135   // However this is not required in practice, as this is an
4136   // intermediate/temporary reference and because the current
4137   // concurrent copying collector keeps the from-space memory
4138   // intact/accessible until the end of the marking phase (the
4139   // concurrent copying collector may not in the future).
4140   GetAssembler()->MaybeUnpoisonHeapReference(temp.W());
4141 
4142   // If we're compiling baseline, update the inline cache.
4143   codegen_->MaybeGenerateInlineCacheCheck(invoke, temp);
4144 
4145   // The register ip1 is required to be used for the hidden argument in
4146   // art_quick_imt_conflict_trampoline, so prevent VIXL from using it.
4147   MacroAssembler* masm = GetVIXLAssembler();
4148   UseScratchRegisterScope scratch_scope(masm);
4149   scratch_scope.Exclude(ip1);
4150   __ Mov(ip1, invoke->GetDexMethodIndex());
4151 
4152   __ Ldr(temp,
4153       MemOperand(temp, mirror::Class::ImtPtrOffset(kArm64PointerSize).Uint32Value()));
4154   uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
4155       invoke->GetImtIndex(), kArm64PointerSize));
4156   // temp = temp->GetImtEntryAt(method_offset);
4157   __ Ldr(temp, MemOperand(temp, method_offset));
4158   // lr = temp->GetEntryPoint();
4159   __ Ldr(lr, MemOperand(temp, entry_point.Int32Value()));
4160 
4161   {
4162     // Ensure the pc position is recorded immediately after the `blr` instruction.
4163     ExactAssemblyScope eas(GetVIXLAssembler(), kInstructionSize, CodeBufferCheckScope::kExactSize);
4164 
4165     // lr();
4166     __ blr(lr);
4167     DCHECK(!codegen_->IsLeafMethod());
4168     codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
4169   }
4170 
4171   codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__);
4172 }
4173 
4174 void LocationsBuilderARM64::VisitInvokeVirtual(HInvokeVirtual* invoke) {
4175   IntrinsicLocationsBuilderARM64 intrinsic(GetGraph()->GetAllocator(), codegen_);
4176   if (intrinsic.TryDispatch(invoke)) {
4177     return;
4178   }
4179 
4180   HandleInvoke(invoke);
4181 }
4182 
4183 void LocationsBuilderARM64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
4184   // Explicit clinit checks triggered by static invokes must have been pruned by
4185   // art::PrepareForRegisterAllocation.
4186   DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
4187 
4188   IntrinsicLocationsBuilderARM64 intrinsic(GetGraph()->GetAllocator(), codegen_);
4189   if (intrinsic.TryDispatch(invoke)) {
4190     return;
4191   }
4192 
4193   HandleInvoke(invoke);
4194 }
4195 
4196 static bool TryGenerateIntrinsicCode(HInvoke* invoke, CodeGeneratorARM64* codegen) {
4197   if (invoke->GetLocations()->Intrinsified()) {
4198     IntrinsicCodeGeneratorARM64 intrinsic(codegen);
4199     intrinsic.Dispatch(invoke);
4200     return true;
4201   }
4202   return false;
4203 }
4204 
4205 HInvokeStaticOrDirect::DispatchInfo CodeGeneratorARM64::GetSupportedInvokeStaticOrDirectDispatch(
4206       const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info,
4207       ArtMethod* method ATTRIBUTE_UNUSED) {
4208   // On ARM64 we support all dispatch types.
4209   return desired_dispatch_info;
4210 }
4211 
4212 void CodeGeneratorARM64::GenerateStaticOrDirectCall(
4213     HInvokeStaticOrDirect* invoke, Location temp, SlowPathCode* slow_path) {
4214   // Make sure that ArtMethod* is passed in kArtMethodRegister as per the calling convention.
4215   Location callee_method = temp;  // For all kinds except kRecursive, callee will be in temp.
4216   switch (invoke->GetMethodLoadKind()) {
4217     case HInvokeStaticOrDirect::MethodLoadKind::kStringInit: {
4218       uint32_t offset =
4219           GetThreadOffset<kArm64PointerSize>(invoke->GetStringInitEntryPoint()).Int32Value();
4220       // temp = thread->string_init_entrypoint
4221       __ Ldr(XRegisterFrom(temp), MemOperand(tr, offset));
4222       break;
4223     }
4224     case HInvokeStaticOrDirect::MethodLoadKind::kRecursive:
4225       callee_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex());
4226       break;
4227     case HInvokeStaticOrDirect::MethodLoadKind::kBootImageLinkTimePcRelative: {
4228       DCHECK(GetCompilerOptions().IsBootImage() || GetCompilerOptions().IsBootImageExtension());
4229       // Add ADRP with its PC-relative method patch.
4230       vixl::aarch64::Label* adrp_label = NewBootImageMethodPatch(invoke->GetTargetMethod());
4231       EmitAdrpPlaceholder(adrp_label, XRegisterFrom(temp));
4232       // Add ADD with its PC-relative method patch.
4233       vixl::aarch64::Label* add_label =
4234           NewBootImageMethodPatch(invoke->GetTargetMethod(), adrp_label);
4235       EmitAddPlaceholder(add_label, XRegisterFrom(temp), XRegisterFrom(temp));
4236       break;
4237     }
4238     case HInvokeStaticOrDirect::MethodLoadKind::kBootImageRelRo: {
4239       // Add ADRP with its PC-relative .data.bimg.rel.ro patch.
4240       uint32_t boot_image_offset = GetBootImageOffset(invoke);
4241       vixl::aarch64::Label* adrp_label = NewBootImageRelRoPatch(boot_image_offset);
4242       EmitAdrpPlaceholder(adrp_label, XRegisterFrom(temp));
4243       // Add LDR with its PC-relative .data.bimg.rel.ro patch.
4244       vixl::aarch64::Label* ldr_label = NewBootImageRelRoPatch(boot_image_offset, adrp_label);
4245       // Note: Boot image is in the low 4GiB and the entry is 32-bit, so emit a 32-bit load.
4246       EmitLdrOffsetPlaceholder(ldr_label, WRegisterFrom(temp), XRegisterFrom(temp));
4247       break;
4248     }
4249     case HInvokeStaticOrDirect::MethodLoadKind::kBssEntry: {
4250       // Add ADRP with its PC-relative .bss entry patch.
4251       MethodReference target_method(&GetGraph()->GetDexFile(), invoke->GetDexMethodIndex());
4252       vixl::aarch64::Label* adrp_label = NewMethodBssEntryPatch(target_method);
4253       EmitAdrpPlaceholder(adrp_label, XRegisterFrom(temp));
4254       // Add LDR with its PC-relative .bss entry patch.
4255       vixl::aarch64::Label* ldr_label =
4256           NewMethodBssEntryPatch(target_method, adrp_label);
4257       // All aligned loads are implicitly atomic consume operations on ARM64.
4258       EmitLdrOffsetPlaceholder(ldr_label, XRegisterFrom(temp), XRegisterFrom(temp));
4259       break;
4260     }
4261     case HInvokeStaticOrDirect::MethodLoadKind::kJitDirectAddress:
4262       // Load method address from literal pool.
4263       __ Ldr(XRegisterFrom(temp), DeduplicateUint64Literal(invoke->GetMethodAddress()));
4264       break;
4265     case HInvokeStaticOrDirect::MethodLoadKind::kRuntimeCall: {
4266       GenerateInvokeStaticOrDirectRuntimeCall(invoke, temp, slow_path);
4267       return;  // No code pointer retrieval; the runtime performs the call directly.
4268     }
4269   }
4270 
4271   switch (invoke->GetCodePtrLocation()) {
4272     case HInvokeStaticOrDirect::CodePtrLocation::kCallSelf:
4273       {
4274         // Use a scope to help guarantee that `RecordPcInfo()` records the correct pc.
4275         ExactAssemblyScope eas(GetVIXLAssembler(),
4276                                kInstructionSize,
4277                                CodeBufferCheckScope::kExactSize);
4278         __ bl(&frame_entry_label_);
4279         RecordPcInfo(invoke, invoke->GetDexPc(), slow_path);
4280       }
4281       break;
4282     case HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod:
4283       // LR = callee_method->entry_point_from_quick_compiled_code_;
4284       __ Ldr(lr, MemOperand(
4285           XRegisterFrom(callee_method),
4286           ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArm64PointerSize).Int32Value()));
4287       {
4288         // Use a scope to help guarantee that `RecordPcInfo()` records the correct pc.
4289         ExactAssemblyScope eas(GetVIXLAssembler(),
4290                                kInstructionSize,
4291                                CodeBufferCheckScope::kExactSize);
4292         // lr()
4293         __ blr(lr);
4294         RecordPcInfo(invoke, invoke->GetDexPc(), slow_path);
4295       }
4296       break;
4297   }
4298 
4299   DCHECK(!IsLeafMethod());
4300 }
4301 
4302 void CodeGeneratorARM64::GenerateVirtualCall(
4303     HInvokeVirtual* invoke, Location temp_in, SlowPathCode* slow_path) {
4304   // Use the calling convention instead of the location of the receiver, as
4305   // intrinsics may have put the receiver in a different register. In the intrinsics
4306   // slow path, the arguments have been moved to the right place, so here we are
4307   // guaranteed that the receiver is the first register of the calling convention.
4308   InvokeDexCallingConvention calling_convention;
4309   Register receiver = calling_convention.GetRegisterAt(0);
4310   Register temp = XRegisterFrom(temp_in);
4311   size_t method_offset = mirror::Class::EmbeddedVTableEntryOffset(
4312       invoke->GetVTableIndex(), kArm64PointerSize).SizeValue();
4313   Offset class_offset = mirror::Object::ClassOffset();
4314   Offset entry_point = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArm64PointerSize);
4315 
4316   DCHECK(receiver.IsRegister());
4317 
4318   {
4319     // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
4320     EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
4321     // /* HeapReference<Class> */ temp = receiver->klass_
4322     __ Ldr(temp.W(), HeapOperandFrom(LocationFrom(receiver), class_offset));
4323     MaybeRecordImplicitNullCheck(invoke);
4324   }
4325   // Instead of simply (possibly) unpoisoning `temp` here, we should
4326   // emit a read barrier for the previous class reference load.
4327   // intermediate/temporary reference and because the current
4328   // concurrent copying collector keeps the from-space memory
4329   // intact/accessible until the end of the marking phase (the
4330   // concurrent copying collector may not in the future).
4331   GetAssembler()->MaybeUnpoisonHeapReference(temp.W());
4332 
4333   // If we're compiling baseline, update the inline cache.
4334   MaybeGenerateInlineCacheCheck(invoke, temp);
4335 
4336   // temp = temp->GetMethodAt(method_offset);
4337   __ Ldr(temp, MemOperand(temp, method_offset));
4338   // lr = temp->GetEntryPoint();
4339   __ Ldr(lr, MemOperand(temp, entry_point.SizeValue()));
4340   {
4341     // Use a scope to help guarantee that `RecordPcInfo()` records the correct pc.
4342     ExactAssemblyScope eas(GetVIXLAssembler(), kInstructionSize, CodeBufferCheckScope::kExactSize);
4343     // lr();
4344     __ blr(lr);
4345     RecordPcInfo(invoke, invoke->GetDexPc(), slow_path);
4346   }
4347 }
4348 
4349 void LocationsBuilderARM64::VisitInvokePolymorphic(HInvokePolymorphic* invoke) {
4350   HandleInvoke(invoke);
4351 }
4352 
4353 void InstructionCodeGeneratorARM64::VisitInvokePolymorphic(HInvokePolymorphic* invoke) {
4354   codegen_->GenerateInvokePolymorphicCall(invoke);
4355   codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__);
4356 }
4357 
4358 void LocationsBuilderARM64::VisitInvokeCustom(HInvokeCustom* invoke) {
4359   HandleInvoke(invoke);
4360 }
4361 
4362 void InstructionCodeGeneratorARM64::VisitInvokeCustom(HInvokeCustom* invoke) {
4363   codegen_->GenerateInvokeCustomCall(invoke);
4364   codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__);
4365 }
4366 
4367 vixl::aarch64::Label* CodeGeneratorARM64::NewBootImageIntrinsicPatch(
4368     uint32_t intrinsic_data,
4369     vixl::aarch64::Label* adrp_label) {
4370   return NewPcRelativePatch(
4371       /* dex_file= */ nullptr, intrinsic_data, adrp_label, &boot_image_other_patches_);
4372 }
4373 
4374 vixl::aarch64::Label* CodeGeneratorARM64::NewBootImageRelRoPatch(
4375     uint32_t boot_image_offset,
4376     vixl::aarch64::Label* adrp_label) {
4377   return NewPcRelativePatch(
4378       /* dex_file= */ nullptr, boot_image_offset, adrp_label, &boot_image_other_patches_);
4379 }
4380 
4381 vixl::aarch64::Label* CodeGeneratorARM64::NewBootImageMethodPatch(
4382     MethodReference target_method,
4383     vixl::aarch64::Label* adrp_label) {
4384   return NewPcRelativePatch(
4385       target_method.dex_file, target_method.index, adrp_label, &boot_image_method_patches_);
4386 }
4387 
4388 vixl::aarch64::Label* CodeGeneratorARM64::NewMethodBssEntryPatch(
4389     MethodReference target_method,
4390     vixl::aarch64::Label* adrp_label) {
4391   return NewPcRelativePatch(
4392       target_method.dex_file, target_method.index, adrp_label, &method_bss_entry_patches_);
4393 }
4394 
4395 vixl::aarch64::Label* CodeGeneratorARM64::NewBootImageTypePatch(
4396     const DexFile& dex_file,
4397     dex::TypeIndex type_index,
4398     vixl::aarch64::Label* adrp_label) {
4399   return NewPcRelativePatch(&dex_file, type_index.index_, adrp_label, &boot_image_type_patches_);
4400 }
4401 
4402 vixl::aarch64::Label* CodeGeneratorARM64::NewBssEntryTypePatch(
4403     const DexFile& dex_file,
4404     dex::TypeIndex type_index,
4405     vixl::aarch64::Label* adrp_label) {
4406   return NewPcRelativePatch(&dex_file, type_index.index_, adrp_label, &type_bss_entry_patches_);
4407 }
4408 
4409 vixl::aarch64::Label* CodeGeneratorARM64::NewBootImageStringPatch(
4410     const DexFile& dex_file,
4411     dex::StringIndex string_index,
4412     vixl::aarch64::Label* adrp_label) {
4413   return NewPcRelativePatch(
4414       &dex_file, string_index.index_, adrp_label, &boot_image_string_patches_);
4415 }
4416 
4417 vixl::aarch64::Label* CodeGeneratorARM64::NewStringBssEntryPatch(
4418     const DexFile& dex_file,
4419     dex::StringIndex string_index,
4420     vixl::aarch64::Label* adrp_label) {
4421   return NewPcRelativePatch(&dex_file, string_index.index_, adrp_label, &string_bss_entry_patches_);
4422 }
4423 
4424 void CodeGeneratorARM64::EmitEntrypointThunkCall(ThreadOffset64 entrypoint_offset) {
4425   DCHECK(!__ AllowMacroInstructions());  // In ExactAssemblyScope.
4426   DCHECK(!Runtime::Current()->UseJitCompilation());
4427   call_entrypoint_patches_.emplace_back(/*dex_file*/ nullptr, entrypoint_offset.Uint32Value());
4428   vixl::aarch64::Label* bl_label = &call_entrypoint_patches_.back().label;
4429   __ bind(bl_label);
4430   __ bl(static_cast<int64_t>(0));  // Placeholder, patched at link-time.
4431 }
4432 
4433 void CodeGeneratorARM64::EmitBakerReadBarrierCbnz(uint32_t custom_data) {
4434   DCHECK(!__ AllowMacroInstructions());  // In ExactAssemblyScope.
4435   if (Runtime::Current()->UseJitCompilation()) {
4436     auto it = jit_baker_read_barrier_slow_paths_.FindOrAdd(custom_data);
4437     vixl::aarch64::Label* slow_path_entry = &it->second.label;
4438     __ cbnz(mr, slow_path_entry);
4439   } else {
4440     baker_read_barrier_patches_.emplace_back(custom_data);
4441     vixl::aarch64::Label* cbnz_label = &baker_read_barrier_patches_.back().label;
4442     __ bind(cbnz_label);
4443     __ cbnz(mr, static_cast<int64_t>(0));  // Placeholder, patched at link-time.
4444   }
4445 }
4446 
4447 vixl::aarch64::Label* CodeGeneratorARM64::NewPcRelativePatch(
4448     const DexFile* dex_file,
4449     uint32_t offset_or_index,
4450     vixl::aarch64::Label* adrp_label,
4451     ArenaDeque<PcRelativePatchInfo>* patches) {
4452   // Add a patch entry and return the label.
4453   patches->emplace_back(dex_file, offset_or_index);
4454   PcRelativePatchInfo* info = &patches->back();
4455   vixl::aarch64::Label* label = &info->label;
4456   // If adrp_label is null, this is the ADRP patch and needs to point to its own label.
4457   info->pc_insn_label = (adrp_label != nullptr) ? adrp_label : label;
4458   return label;
4459 }
4460 
4461 vixl::aarch64::Literal<uint32_t>* CodeGeneratorARM64::DeduplicateBootImageAddressLiteral(
4462     uint64_t address) {
4463   return DeduplicateUint32Literal(dchecked_integral_cast<uint32_t>(address));
4464 }
4465 
4466 vixl::aarch64::Literal<uint32_t>* CodeGeneratorARM64::DeduplicateJitStringLiteral(
4467     const DexFile& dex_file, dex::StringIndex string_index, Handle<mirror::String> handle) {
4468   ReserveJitStringRoot(StringReference(&dex_file, string_index), handle);
4469   return jit_string_patches_.GetOrCreate(
4470       StringReference(&dex_file, string_index),
4471       [this]() { return __ CreateLiteralDestroyedWithPool<uint32_t>(/* value= */ 0u); });
4472 }
4473 
4474 vixl::aarch64::Literal<uint32_t>* CodeGeneratorARM64::DeduplicateJitClassLiteral(
4475     const DexFile& dex_file, dex::TypeIndex type_index, Handle<mirror::Class> handle) {
4476   ReserveJitClassRoot(TypeReference(&dex_file, type_index), handle);
4477   return jit_class_patches_.GetOrCreate(
4478       TypeReference(&dex_file, type_index),
4479       [this]() { return __ CreateLiteralDestroyedWithPool<uint32_t>(/* value= */ 0u); });
4480 }
4481 
4482 void CodeGeneratorARM64::EmitAdrpPlaceholder(vixl::aarch64::Label* fixup_label,
4483                                              vixl::aarch64::Register reg) {
4484   DCHECK(reg.IsX());
4485   SingleEmissionCheckScope guard(GetVIXLAssembler());
4486   __ Bind(fixup_label);
4487   __ adrp(reg, /* offset placeholder */ static_cast<int64_t>(0));
4488 }
4489 
4490 void CodeGeneratorARM64::EmitAddPlaceholder(vixl::aarch64::Label* fixup_label,
4491                                             vixl::aarch64::Register out,
4492                                             vixl::aarch64::Register base) {
4493   DCHECK(out.IsX());
4494   DCHECK(base.IsX());
4495   SingleEmissionCheckScope guard(GetVIXLAssembler());
4496   __ Bind(fixup_label);
4497   __ add(out, base, Operand(/* offset placeholder */ 0));
4498 }
4499 
4500 void CodeGeneratorARM64::EmitLdrOffsetPlaceholder(vixl::aarch64::Label* fixup_label,
4501                                                   vixl::aarch64::Register out,
4502                                                   vixl::aarch64::Register base) {
4503   DCHECK(base.IsX());
4504   SingleEmissionCheckScope guard(GetVIXLAssembler());
4505   __ Bind(fixup_label);
4506   __ ldr(out, MemOperand(base, /* offset placeholder */ 0));
4507 }
4508 
4509 void CodeGeneratorARM64::LoadBootImageAddress(vixl::aarch64::Register reg,
4510                                               uint32_t boot_image_reference) {
4511   if (GetCompilerOptions().IsBootImage()) {
4512     // Add ADRP with its PC-relative type patch.
4513     vixl::aarch64::Label* adrp_label = NewBootImageIntrinsicPatch(boot_image_reference);
4514     EmitAdrpPlaceholder(adrp_label, reg.X());
4515     // Add ADD with its PC-relative type patch.
4516     vixl::aarch64::Label* add_label = NewBootImageIntrinsicPatch(boot_image_reference, adrp_label);
4517     EmitAddPlaceholder(add_label, reg.X(), reg.X());
4518   } else if (GetCompilerOptions().GetCompilePic()) {
4519     // Add ADRP with its PC-relative .data.bimg.rel.ro patch.
4520     vixl::aarch64::Label* adrp_label = NewBootImageRelRoPatch(boot_image_reference);
4521     EmitAdrpPlaceholder(adrp_label, reg.X());
4522     // Add LDR with its PC-relative .data.bimg.rel.ro patch.
4523     vixl::aarch64::Label* ldr_label = NewBootImageRelRoPatch(boot_image_reference, adrp_label);
4524     EmitLdrOffsetPlaceholder(ldr_label, reg.W(), reg.X());
4525   } else {
4526     DCHECK(Runtime::Current()->UseJitCompilation());
4527     gc::Heap* heap = Runtime::Current()->GetHeap();
4528     DCHECK(!heap->GetBootImageSpaces().empty());
4529     const uint8_t* address = heap->GetBootImageSpaces()[0]->Begin() + boot_image_reference;
4530     __ Ldr(reg.W(), DeduplicateBootImageAddressLiteral(reinterpret_cast<uintptr_t>(address)));
4531   }
4532 }
4533 
4534 void CodeGeneratorARM64::AllocateInstanceForIntrinsic(HInvokeStaticOrDirect* invoke,
4535                                                       uint32_t boot_image_offset) {
4536   DCHECK(invoke->IsStatic());
4537   InvokeRuntimeCallingConvention calling_convention;
4538   Register argument = calling_convention.GetRegisterAt(0);
4539   if (GetCompilerOptions().IsBootImage()) {
4540     DCHECK_EQ(boot_image_offset, IntrinsicVisitor::IntegerValueOfInfo::kInvalidReference);
4541     // Load the class the same way as for HLoadClass::LoadKind::kBootImageLinkTimePcRelative.
4542     MethodReference target_method = invoke->GetTargetMethod();
4543     dex::TypeIndex type_idx = target_method.dex_file->GetMethodId(target_method.index).class_idx_;
4544     // Add ADRP with its PC-relative type patch.
4545     vixl::aarch64::Label* adrp_label = NewBootImageTypePatch(*target_method.dex_file, type_idx);
4546     EmitAdrpPlaceholder(adrp_label, argument.X());
4547     // Add ADD with its PC-relative type patch.
4548     vixl::aarch64::Label* add_label =
4549         NewBootImageTypePatch(*target_method.dex_file, type_idx, adrp_label);
4550     EmitAddPlaceholder(add_label, argument.X(), argument.X());
4551   } else {
4552     LoadBootImageAddress(argument, boot_image_offset);
4553   }
4554   InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc());
4555   CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
4556 }
4557 
4558 template <linker::LinkerPatch (*Factory)(size_t, const DexFile*, uint32_t, uint32_t)>
4559 inline void CodeGeneratorARM64::EmitPcRelativeLinkerPatches(
4560     const ArenaDeque<PcRelativePatchInfo>& infos,
4561     ArenaVector<linker::LinkerPatch>* linker_patches) {
4562   for (const PcRelativePatchInfo& info : infos) {
4563     linker_patches->push_back(Factory(info.label.GetLocation(),
4564                                       info.target_dex_file,
4565                                       info.pc_insn_label->GetLocation(),
4566                                       info.offset_or_index));
4567   }
4568 }
4569 
4570 template <linker::LinkerPatch (*Factory)(size_t, uint32_t, uint32_t)>
4571 linker::LinkerPatch NoDexFileAdapter(size_t literal_offset,
4572                                      const DexFile* target_dex_file,
4573                                      uint32_t pc_insn_offset,
4574                                      uint32_t boot_image_offset) {
4575   DCHECK(target_dex_file == nullptr);  // Unused for these patches, should be null.
4576   return Factory(literal_offset, pc_insn_offset, boot_image_offset);
4577 }
4578 
4579 void CodeGeneratorARM64::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linker_patches) {
4580   DCHECK(linker_patches->empty());
4581   size_t size =
4582       boot_image_method_patches_.size() +
4583       method_bss_entry_patches_.size() +
4584       boot_image_type_patches_.size() +
4585       type_bss_entry_patches_.size() +
4586       boot_image_string_patches_.size() +
4587       string_bss_entry_patches_.size() +
4588       boot_image_other_patches_.size() +
4589       call_entrypoint_patches_.size() +
4590       baker_read_barrier_patches_.size();
4591   linker_patches->reserve(size);
4592   if (GetCompilerOptions().IsBootImage() || GetCompilerOptions().IsBootImageExtension()) {
4593     EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeMethodPatch>(
4594         boot_image_method_patches_, linker_patches);
4595     EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeTypePatch>(
4596         boot_image_type_patches_, linker_patches);
4597     EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeStringPatch>(
4598         boot_image_string_patches_, linker_patches);
4599   } else {
4600     DCHECK(boot_image_method_patches_.empty());
4601     DCHECK(boot_image_type_patches_.empty());
4602     DCHECK(boot_image_string_patches_.empty());
4603   }
4604   if (GetCompilerOptions().IsBootImage()) {
4605     EmitPcRelativeLinkerPatches<NoDexFileAdapter<linker::LinkerPatch::IntrinsicReferencePatch>>(
4606         boot_image_other_patches_, linker_patches);
4607   } else {
4608     EmitPcRelativeLinkerPatches<NoDexFileAdapter<linker::LinkerPatch::DataBimgRelRoPatch>>(
4609         boot_image_other_patches_, linker_patches);
4610   }
4611   EmitPcRelativeLinkerPatches<linker::LinkerPatch::MethodBssEntryPatch>(
4612       method_bss_entry_patches_, linker_patches);
4613   EmitPcRelativeLinkerPatches<linker::LinkerPatch::TypeBssEntryPatch>(
4614       type_bss_entry_patches_, linker_patches);
4615   EmitPcRelativeLinkerPatches<linker::LinkerPatch::StringBssEntryPatch>(
4616       string_bss_entry_patches_, linker_patches);
4617   for (const PatchInfo<vixl::aarch64::Label>& info : call_entrypoint_patches_) {
4618     DCHECK(info.target_dex_file == nullptr);
4619     linker_patches->push_back(linker::LinkerPatch::CallEntrypointPatch(
4620         info.label.GetLocation(), info.offset_or_index));
4621   }
4622   for (const BakerReadBarrierPatchInfo& info : baker_read_barrier_patches_) {
4623     linker_patches->push_back(linker::LinkerPatch::BakerReadBarrierBranchPatch(
4624         info.label.GetLocation(), info.custom_data));
4625   }
4626   DCHECK_EQ(size, linker_patches->size());
4627 }
4628 
4629 bool CodeGeneratorARM64::NeedsThunkCode(const linker::LinkerPatch& patch) const {
4630   return patch.GetType() == linker::LinkerPatch::Type::kCallEntrypoint ||
4631          patch.GetType() == linker::LinkerPatch::Type::kBakerReadBarrierBranch ||
4632          patch.GetType() == linker::LinkerPatch::Type::kCallRelative;
4633 }
4634 
4635 void CodeGeneratorARM64::EmitThunkCode(const linker::LinkerPatch& patch,
4636                                        /*out*/ ArenaVector<uint8_t>* code,
4637                                        /*out*/ std::string* debug_name) {
4638   Arm64Assembler assembler(GetGraph()->GetAllocator());
4639   switch (patch.GetType()) {
4640     case linker::LinkerPatch::Type::kCallRelative: {
4641       // The thunk just uses the entry point in the ArtMethod. This works even for calls
4642       // to the generic JNI and interpreter trampolines.
4643       Offset offset(ArtMethod::EntryPointFromQuickCompiledCodeOffset(
4644           kArm64PointerSize).Int32Value());
4645       assembler.JumpTo(ManagedRegister(arm64::X0), offset, ManagedRegister(arm64::IP0));
4646       if (GetCompilerOptions().GenerateAnyDebugInfo()) {
4647         *debug_name = "MethodCallThunk";
4648       }
4649       break;
4650     }
4651     case linker::LinkerPatch::Type::kCallEntrypoint: {
4652       Offset offset(patch.EntrypointOffset());
4653       assembler.JumpTo(ManagedRegister(arm64::TR), offset, ManagedRegister(arm64::IP0));
4654       if (GetCompilerOptions().GenerateAnyDebugInfo()) {
4655         *debug_name = "EntrypointCallThunk_" + std::to_string(offset.Uint32Value());
4656       }
4657       break;
4658     }
4659     case linker::LinkerPatch::Type::kBakerReadBarrierBranch: {
4660       DCHECK_EQ(patch.GetBakerCustomValue2(), 0u);
4661       CompileBakerReadBarrierThunk(assembler, patch.GetBakerCustomValue1(), debug_name);
4662       break;
4663     }
4664     default:
4665       LOG(FATAL) << "Unexpected patch type " << patch.GetType();
4666       UNREACHABLE();
4667   }
4668 
4669   // Ensure we emit the literal pool if any.
4670   assembler.FinalizeCode();
4671   code->resize(assembler.CodeSize());
4672   MemoryRegion code_region(code->data(), code->size());
4673   assembler.FinalizeInstructions(code_region);
4674 }
4675 
4676 vixl::aarch64::Literal<uint32_t>* CodeGeneratorARM64::DeduplicateUint32Literal(uint32_t value) {
4677   return uint32_literals_.GetOrCreate(
4678       value,
4679       [this, value]() { return __ CreateLiteralDestroyedWithPool<uint32_t>(value); });
4680 }
4681 
4682 vixl::aarch64::Literal<uint64_t>* CodeGeneratorARM64::DeduplicateUint64Literal(uint64_t value) {
4683   return uint64_literals_.GetOrCreate(
4684       value,
4685       [this, value]() { return __ CreateLiteralDestroyedWithPool<uint64_t>(value); });
4686 }
4687 
4688 void InstructionCodeGeneratorARM64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
4689   // Explicit clinit checks triggered by static invokes must have been pruned by
4690   // art::PrepareForRegisterAllocation.
4691   DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
4692 
4693   if (TryGenerateIntrinsicCode(invoke, codegen_)) {
4694     codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__);
4695     return;
4696   }
4697 
4698   {
4699     // Ensure that between the BLR (emitted by GenerateStaticOrDirectCall) and RecordPcInfo there
4700     // are no pools emitted.
4701     EmissionCheckScope guard(GetVIXLAssembler(), kInvokeCodeMarginSizeInBytes);
4702     LocationSummary* locations = invoke->GetLocations();
4703     codegen_->GenerateStaticOrDirectCall(
4704         invoke, locations->HasTemps() ? locations->GetTemp(0) : Location::NoLocation());
4705   }
4706 
4707   codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__);
4708 }
4709 
4710 void InstructionCodeGeneratorARM64::VisitInvokeVirtual(HInvokeVirtual* invoke) {
4711   if (TryGenerateIntrinsicCode(invoke, codegen_)) {
4712     codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__);
4713     return;
4714   }
4715 
4716   {
4717     // Ensure that between the BLR (emitted by GenerateVirtualCall) and RecordPcInfo there
4718     // are no pools emitted.
4719     EmissionCheckScope guard(GetVIXLAssembler(), kInvokeCodeMarginSizeInBytes);
4720     codegen_->GenerateVirtualCall(invoke, invoke->GetLocations()->GetTemp(0));
4721     DCHECK(!codegen_->IsLeafMethod());
4722   }
4723 
4724   codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__);
4725 }
4726 
4727 HLoadClass::LoadKind CodeGeneratorARM64::GetSupportedLoadClassKind(
4728     HLoadClass::LoadKind desired_class_load_kind) {
4729   switch (desired_class_load_kind) {
4730     case HLoadClass::LoadKind::kInvalid:
4731       LOG(FATAL) << "UNREACHABLE";
4732       UNREACHABLE();
4733     case HLoadClass::LoadKind::kReferrersClass:
4734       break;
4735     case HLoadClass::LoadKind::kBootImageLinkTimePcRelative:
4736     case HLoadClass::LoadKind::kBootImageRelRo:
4737     case HLoadClass::LoadKind::kBssEntry:
4738       DCHECK(!Runtime::Current()->UseJitCompilation());
4739       break;
4740     case HLoadClass::LoadKind::kJitBootImageAddress:
4741     case HLoadClass::LoadKind::kJitTableAddress:
4742       DCHECK(Runtime::Current()->UseJitCompilation());
4743       break;
4744     case HLoadClass::LoadKind::kRuntimeCall:
4745       break;
4746   }
4747   return desired_class_load_kind;
4748 }
4749 
4750 void LocationsBuilderARM64::VisitLoadClass(HLoadClass* cls) {
4751   HLoadClass::LoadKind load_kind = cls->GetLoadKind();
4752   if (load_kind == HLoadClass::LoadKind::kRuntimeCall) {
4753     InvokeRuntimeCallingConvention calling_convention;
4754     CodeGenerator::CreateLoadClassRuntimeCallLocationSummary(
4755         cls,
4756         LocationFrom(calling_convention.GetRegisterAt(0)),
4757         LocationFrom(vixl::aarch64::x0));
4758     DCHECK(calling_convention.GetRegisterAt(0).Is(vixl::aarch64::x0));
4759     return;
4760   }
4761   DCHECK(!cls->NeedsAccessCheck());
4762 
4763   const bool requires_read_barrier = kEmitCompilerReadBarrier && !cls->IsInBootImage();
4764   LocationSummary::CallKind call_kind = (cls->NeedsEnvironment() || requires_read_barrier)
4765       ? LocationSummary::kCallOnSlowPath
4766       : LocationSummary::kNoCall;
4767   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(cls, call_kind);
4768   if (kUseBakerReadBarrier && requires_read_barrier && !cls->NeedsEnvironment()) {
4769     locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
4770   }
4771 
4772   if (load_kind == HLoadClass::LoadKind::kReferrersClass) {
4773     locations->SetInAt(0, Location::RequiresRegister());
4774   }
4775   locations->SetOut(Location::RequiresRegister());
4776   if (cls->GetLoadKind() == HLoadClass::LoadKind::kBssEntry) {
4777     if (!kUseReadBarrier || kUseBakerReadBarrier) {
4778       // Rely on the type resolution or initialization and marking to save everything we need.
4779       locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves());
4780     } else {
4781       // For non-Baker read barrier we have a temp-clobbering call.
4782     }
4783   }
4784 }
4785 
4786 // NO_THREAD_SAFETY_ANALYSIS as we manipulate handles whose internal object we know does not
4787 // move.
4788 void InstructionCodeGeneratorARM64::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAFETY_ANALYSIS {
4789   HLoadClass::LoadKind load_kind = cls->GetLoadKind();
4790   if (load_kind == HLoadClass::LoadKind::kRuntimeCall) {
4791     codegen_->GenerateLoadClassRuntimeCall(cls);
4792     codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__);
4793     return;
4794   }
4795   DCHECK(!cls->NeedsAccessCheck());
4796 
4797   Location out_loc = cls->GetLocations()->Out();
4798   Register out = OutputRegister(cls);
4799 
4800   const ReadBarrierOption read_barrier_option = cls->IsInBootImage()
4801       ? kWithoutReadBarrier
4802       : kCompilerReadBarrierOption;
4803   bool generate_null_check = false;
4804   switch (load_kind) {
4805     case HLoadClass::LoadKind::kReferrersClass: {
4806       DCHECK(!cls->CanCallRuntime());
4807       DCHECK(!cls->MustGenerateClinitCheck());
4808       // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_
4809       Register current_method = InputRegisterAt(cls, 0);
4810       codegen_->GenerateGcRootFieldLoad(cls,
4811                                         out_loc,
4812                                         current_method,
4813                                         ArtMethod::DeclaringClassOffset().Int32Value(),
4814                                         /* fixup_label= */ nullptr,
4815                                         read_barrier_option);
4816       break;
4817     }
4818     case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: {
4819       DCHECK(codegen_->GetCompilerOptions().IsBootImage() ||
4820              codegen_->GetCompilerOptions().IsBootImageExtension());
4821       DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
4822       // Add ADRP with its PC-relative type patch.
4823       const DexFile& dex_file = cls->GetDexFile();
4824       dex::TypeIndex type_index = cls->GetTypeIndex();
4825       vixl::aarch64::Label* adrp_label = codegen_->NewBootImageTypePatch(dex_file, type_index);
4826       codegen_->EmitAdrpPlaceholder(adrp_label, out.X());
4827       // Add ADD with its PC-relative type patch.
4828       vixl::aarch64::Label* add_label =
4829           codegen_->NewBootImageTypePatch(dex_file, type_index, adrp_label);
4830       codegen_->EmitAddPlaceholder(add_label, out.X(), out.X());
4831       break;
4832     }
4833     case HLoadClass::LoadKind::kBootImageRelRo: {
4834       DCHECK(!codegen_->GetCompilerOptions().IsBootImage());
4835       uint32_t boot_image_offset = codegen_->GetBootImageOffset(cls);
4836       // Add ADRP with its PC-relative .data.bimg.rel.ro patch.
4837       vixl::aarch64::Label* adrp_label = codegen_->NewBootImageRelRoPatch(boot_image_offset);
4838       codegen_->EmitAdrpPlaceholder(adrp_label, out.X());
4839       // Add LDR with its PC-relative .data.bimg.rel.ro patch.
4840       vixl::aarch64::Label* ldr_label =
4841           codegen_->NewBootImageRelRoPatch(boot_image_offset, adrp_label);
4842       codegen_->EmitLdrOffsetPlaceholder(ldr_label, out.W(), out.X());
4843       break;
4844     }
4845     case HLoadClass::LoadKind::kBssEntry: {
4846       // Add ADRP with its PC-relative Class .bss entry patch.
4847       const DexFile& dex_file = cls->GetDexFile();
4848       dex::TypeIndex type_index = cls->GetTypeIndex();
4849       vixl::aarch64::Register temp = XRegisterFrom(out_loc);
4850       vixl::aarch64::Label* adrp_label = codegen_->NewBssEntryTypePatch(dex_file, type_index);
4851       codegen_->EmitAdrpPlaceholder(adrp_label, temp);
4852       // Add LDR with its PC-relative Class .bss entry patch.
4853       vixl::aarch64::Label* ldr_label =
4854           codegen_->NewBssEntryTypePatch(dex_file, type_index, adrp_label);
4855       // /* GcRoot<mirror::Class> */ out = *(base_address + offset)  /* PC-relative */
4856       // All aligned loads are implicitly atomic consume operations on ARM64.
4857       codegen_->GenerateGcRootFieldLoad(cls,
4858                                         out_loc,
4859                                         temp,
4860                                         /* offset placeholder */ 0u,
4861                                         ldr_label,
4862                                         read_barrier_option);
4863       generate_null_check = true;
4864       break;
4865     }
4866     case HLoadClass::LoadKind::kJitBootImageAddress: {
4867       DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
4868       uint32_t address = reinterpret_cast32<uint32_t>(cls->GetClass().Get());
4869       DCHECK_NE(address, 0u);
4870       __ Ldr(out.W(), codegen_->DeduplicateBootImageAddressLiteral(address));
4871       break;
4872     }
4873     case HLoadClass::LoadKind::kJitTableAddress: {
4874       __ Ldr(out, codegen_->DeduplicateJitClassLiteral(cls->GetDexFile(),
4875                                                        cls->GetTypeIndex(),
4876                                                        cls->GetClass()));
4877       codegen_->GenerateGcRootFieldLoad(cls,
4878                                         out_loc,
4879                                         out.X(),
4880                                         /* offset= */ 0,
4881                                         /* fixup_label= */ nullptr,
4882                                         read_barrier_option);
4883       break;
4884     }
4885     case HLoadClass::LoadKind::kRuntimeCall:
4886     case HLoadClass::LoadKind::kInvalid:
4887       LOG(FATAL) << "UNREACHABLE";
4888       UNREACHABLE();
4889   }
4890 
4891   bool do_clinit = cls->MustGenerateClinitCheck();
4892   if (generate_null_check || do_clinit) {
4893     DCHECK(cls->CanCallRuntime());
4894     SlowPathCodeARM64* slow_path =
4895         new (codegen_->GetScopedAllocator()) LoadClassSlowPathARM64(cls, cls);
4896     codegen_->AddSlowPath(slow_path);
4897     if (generate_null_check) {
4898       __ Cbz(out, slow_path->GetEntryLabel());
4899     }
4900     if (cls->MustGenerateClinitCheck()) {
4901       GenerateClassInitializationCheck(slow_path, out);
4902     } else {
4903       __ Bind(slow_path->GetExitLabel());
4904     }
4905     codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__);
4906   }
4907 }
4908 
4909 void LocationsBuilderARM64::VisitLoadMethodHandle(HLoadMethodHandle* load) {
4910   InvokeRuntimeCallingConvention calling_convention;
4911   Location location = LocationFrom(calling_convention.GetRegisterAt(0));
4912   CodeGenerator::CreateLoadMethodHandleRuntimeCallLocationSummary(load, location, location);
4913 }
4914 
4915 void InstructionCodeGeneratorARM64::VisitLoadMethodHandle(HLoadMethodHandle* load) {
4916   codegen_->GenerateLoadMethodHandleRuntimeCall(load);
4917 }
4918 
4919 void LocationsBuilderARM64::VisitLoadMethodType(HLoadMethodType* load) {
4920   InvokeRuntimeCallingConvention calling_convention;
4921   Location location = LocationFrom(calling_convention.GetRegisterAt(0));
4922   CodeGenerator::CreateLoadMethodTypeRuntimeCallLocationSummary(load, location, location);
4923 }
4924 
4925 void InstructionCodeGeneratorARM64::VisitLoadMethodType(HLoadMethodType* load) {
4926   codegen_->GenerateLoadMethodTypeRuntimeCall(load);
4927 }
4928 
4929 static MemOperand GetExceptionTlsAddress() {
4930   return MemOperand(tr, Thread::ExceptionOffset<kArm64PointerSize>().Int32Value());
4931 }
4932 
4933 void LocationsBuilderARM64::VisitLoadException(HLoadException* load) {
4934   LocationSummary* locations =
4935       new (GetGraph()->GetAllocator()) LocationSummary(load, LocationSummary::kNoCall);
4936   locations->SetOut(Location::RequiresRegister());
4937 }
4938 
4939 void InstructionCodeGeneratorARM64::VisitLoadException(HLoadException* instruction) {
4940   __ Ldr(OutputRegister(instruction), GetExceptionTlsAddress());
4941 }
4942 
4943 void LocationsBuilderARM64::VisitClearException(HClearException* clear) {
4944   new (GetGraph()->GetAllocator()) LocationSummary(clear, LocationSummary::kNoCall);
4945 }
4946 
4947 void InstructionCodeGeneratorARM64::VisitClearException(HClearException* clear ATTRIBUTE_UNUSED) {
4948   __ Str(wzr, GetExceptionTlsAddress());
4949 }
4950 
4951 HLoadString::LoadKind CodeGeneratorARM64::GetSupportedLoadStringKind(
4952     HLoadString::LoadKind desired_string_load_kind) {
4953   switch (desired_string_load_kind) {
4954     case HLoadString::LoadKind::kBootImageLinkTimePcRelative:
4955     case HLoadString::LoadKind::kBootImageRelRo:
4956     case HLoadString::LoadKind::kBssEntry:
4957       DCHECK(!Runtime::Current()->UseJitCompilation());
4958       break;
4959     case HLoadString::LoadKind::kJitBootImageAddress:
4960     case HLoadString::LoadKind::kJitTableAddress:
4961       DCHECK(Runtime::Current()->UseJitCompilation());
4962       break;
4963     case HLoadString::LoadKind::kRuntimeCall:
4964       break;
4965   }
4966   return desired_string_load_kind;
4967 }
4968 
4969 void LocationsBuilderARM64::VisitLoadString(HLoadString* load) {
4970   LocationSummary::CallKind call_kind = CodeGenerator::GetLoadStringCallKind(load);
4971   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(load, call_kind);
4972   if (load->GetLoadKind() == HLoadString::LoadKind::kRuntimeCall) {
4973     InvokeRuntimeCallingConvention calling_convention;
4974     locations->SetOut(calling_convention.GetReturnLocation(load->GetType()));
4975   } else {
4976     locations->SetOut(Location::RequiresRegister());
4977     if (load->GetLoadKind() == HLoadString::LoadKind::kBssEntry) {
4978       if (!kUseReadBarrier || kUseBakerReadBarrier) {
4979         // Rely on the pResolveString and marking to save everything we need.
4980         locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves());
4981       } else {
4982         // For non-Baker read barrier we have a temp-clobbering call.
4983       }
4984     }
4985   }
4986 }
4987 
4988 // NO_THREAD_SAFETY_ANALYSIS as we manipulate handles whose internal object we know does not
4989 // move.
4990 void InstructionCodeGeneratorARM64::VisitLoadString(HLoadString* load) NO_THREAD_SAFETY_ANALYSIS {
4991   Register out = OutputRegister(load);
4992   Location out_loc = load->GetLocations()->Out();
4993 
4994   switch (load->GetLoadKind()) {
4995     case HLoadString::LoadKind::kBootImageLinkTimePcRelative: {
4996       DCHECK(codegen_->GetCompilerOptions().IsBootImage() ||
4997              codegen_->GetCompilerOptions().IsBootImageExtension());
4998       // Add ADRP with its PC-relative String patch.
4999       const DexFile& dex_file = load->GetDexFile();
5000       const dex::StringIndex string_index = load->GetStringIndex();
5001       vixl::aarch64::Label* adrp_label = codegen_->NewBootImageStringPatch(dex_file, string_index);
5002       codegen_->EmitAdrpPlaceholder(adrp_label, out.X());
5003       // Add ADD with its PC-relative String patch.
5004       vixl::aarch64::Label* add_label =
5005           codegen_->NewBootImageStringPatch(dex_file, string_index, adrp_label);
5006       codegen_->EmitAddPlaceholder(add_label, out.X(), out.X());
5007       return;
5008     }
5009     case HLoadString::LoadKind::kBootImageRelRo: {
5010       DCHECK(!codegen_->GetCompilerOptions().IsBootImage());
5011       // Add ADRP with its PC-relative .data.bimg.rel.ro patch.
5012       uint32_t boot_image_offset = codegen_->GetBootImageOffset(load);
5013       vixl::aarch64::Label* adrp_label = codegen_->NewBootImageRelRoPatch(boot_image_offset);
5014       codegen_->EmitAdrpPlaceholder(adrp_label, out.X());
5015       // Add LDR with its PC-relative .data.bimg.rel.ro patch.
5016       vixl::aarch64::Label* ldr_label =
5017           codegen_->NewBootImageRelRoPatch(boot_image_offset, adrp_label);
5018       codegen_->EmitLdrOffsetPlaceholder(ldr_label, out.W(), out.X());
5019       return;
5020     }
5021     case HLoadString::LoadKind::kBssEntry: {
5022       // Add ADRP with its PC-relative String .bss entry patch.
5023       const DexFile& dex_file = load->GetDexFile();
5024       const dex::StringIndex string_index = load->GetStringIndex();
5025       Register temp = XRegisterFrom(out_loc);
5026       vixl::aarch64::Label* adrp_label = codegen_->NewStringBssEntryPatch(dex_file, string_index);
5027       codegen_->EmitAdrpPlaceholder(adrp_label, temp);
5028       // Add LDR with its PC-relative String .bss entry patch.
5029       vixl::aarch64::Label* ldr_label =
5030           codegen_->NewStringBssEntryPatch(dex_file, string_index, adrp_label);
5031       // /* GcRoot<mirror::String> */ out = *(base_address + offset)  /* PC-relative */
5032       // All aligned loads are implicitly atomic consume operations on ARM64.
5033       codegen_->GenerateGcRootFieldLoad(load,
5034                                         out_loc,
5035                                         temp,
5036                                         /* offset placeholder */ 0u,
5037                                         ldr_label,
5038                                         kCompilerReadBarrierOption);
5039       SlowPathCodeARM64* slow_path =
5040           new (codegen_->GetScopedAllocator()) LoadStringSlowPathARM64(load);
5041       codegen_->AddSlowPath(slow_path);
5042       __ Cbz(out.X(), slow_path->GetEntryLabel());
5043       __ Bind(slow_path->GetExitLabel());
5044       codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__);
5045       return;
5046     }
5047     case HLoadString::LoadKind::kJitBootImageAddress: {
5048       uint32_t address = reinterpret_cast32<uint32_t>(load->GetString().Get());
5049       DCHECK_NE(address, 0u);
5050       __ Ldr(out.W(), codegen_->DeduplicateBootImageAddressLiteral(address));
5051       return;
5052     }
5053     case HLoadString::LoadKind::kJitTableAddress: {
5054       __ Ldr(out, codegen_->DeduplicateJitStringLiteral(load->GetDexFile(),
5055                                                         load->GetStringIndex(),
5056                                                         load->GetString()));
5057       codegen_->GenerateGcRootFieldLoad(load,
5058                                         out_loc,
5059                                         out.X(),
5060                                         /* offset= */ 0,
5061                                         /* fixup_label= */ nullptr,
5062                                         kCompilerReadBarrierOption);
5063       return;
5064     }
5065     default:
5066       break;
5067   }
5068 
5069   // TODO: Re-add the compiler code to do string dex cache lookup again.
5070   InvokeRuntimeCallingConvention calling_convention;
5071   DCHECK_EQ(calling_convention.GetRegisterAt(0).GetCode(), out.GetCode());
5072   __ Mov(calling_convention.GetRegisterAt(0).W(), load->GetStringIndex().index_);
5073   codegen_->InvokeRuntime(kQuickResolveString, load, load->GetDexPc());
5074   CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>();
5075   codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__);
5076 }
5077 
5078 void LocationsBuilderARM64::VisitLongConstant(HLongConstant* constant) {
5079   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(constant);
5080   locations->SetOut(Location::ConstantLocation(constant));
5081 }
5082 
5083 void InstructionCodeGeneratorARM64::VisitLongConstant(HLongConstant* constant ATTRIBUTE_UNUSED) {
5084   // Will be generated at use site.
5085 }
5086 
5087 void LocationsBuilderARM64::VisitMonitorOperation(HMonitorOperation* instruction) {
5088   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
5089       instruction, LocationSummary::kCallOnMainOnly);
5090   InvokeRuntimeCallingConvention calling_convention;
5091   locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
5092 }
5093 
5094 void InstructionCodeGeneratorARM64::VisitMonitorOperation(HMonitorOperation* instruction) {
5095   codegen_->InvokeRuntime(instruction->IsEnter() ? kQuickLockObject : kQuickUnlockObject,
5096                           instruction,
5097                           instruction->GetDexPc());
5098   if (instruction->IsEnter()) {
5099     CheckEntrypointTypes<kQuickLockObject, void, mirror::Object*>();
5100   } else {
5101     CheckEntrypointTypes<kQuickUnlockObject, void, mirror::Object*>();
5102   }
5103   codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__);
5104 }
5105 
5106 void LocationsBuilderARM64::VisitMul(HMul* mul) {
5107   LocationSummary* locations =
5108       new (GetGraph()->GetAllocator()) LocationSummary(mul, LocationSummary::kNoCall);
5109   switch (mul->GetResultType()) {
5110     case DataType::Type::kInt32:
5111     case DataType::Type::kInt64:
5112       locations->SetInAt(0, Location::RequiresRegister());
5113       locations->SetInAt(1, Location::RequiresRegister());
5114       locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
5115       break;
5116 
5117     case DataType::Type::kFloat32:
5118     case DataType::Type::kFloat64:
5119       locations->SetInAt(0, Location::RequiresFpuRegister());
5120       locations->SetInAt(1, Location::RequiresFpuRegister());
5121       locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
5122       break;
5123 
5124     default:
5125       LOG(FATAL) << "Unexpected mul type " << mul->GetResultType();
5126   }
5127 }
5128 
5129 void InstructionCodeGeneratorARM64::VisitMul(HMul* mul) {
5130   switch (mul->GetResultType()) {
5131     case DataType::Type::kInt32:
5132     case DataType::Type::kInt64:
5133       __ Mul(OutputRegister(mul), InputRegisterAt(mul, 0), InputRegisterAt(mul, 1));
5134       break;
5135 
5136     case DataType::Type::kFloat32:
5137     case DataType::Type::kFloat64:
5138       __ Fmul(OutputFPRegister(mul), InputFPRegisterAt(mul, 0), InputFPRegisterAt(mul, 1));
5139       break;
5140 
5141     default:
5142       LOG(FATAL) << "Unexpected mul type " << mul->GetResultType();
5143   }
5144 }
5145 
5146 void LocationsBuilderARM64::VisitNeg(HNeg* neg) {
5147   LocationSummary* locations =
5148       new (GetGraph()->GetAllocator()) LocationSummary(neg, LocationSummary::kNoCall);
5149   switch (neg->GetResultType()) {
5150     case DataType::Type::kInt32:
5151     case DataType::Type::kInt64:
5152       locations->SetInAt(0, ARM64EncodableConstantOrRegister(neg->InputAt(0), neg));
5153       locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
5154       break;
5155 
5156     case DataType::Type::kFloat32:
5157     case DataType::Type::kFloat64:
5158       locations->SetInAt(0, Location::RequiresFpuRegister());
5159       locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
5160       break;
5161 
5162     default:
5163       LOG(FATAL) << "Unexpected neg type " << neg->GetResultType();
5164   }
5165 }
5166 
5167 void InstructionCodeGeneratorARM64::VisitNeg(HNeg* neg) {
5168   switch (neg->GetResultType()) {
5169     case DataType::Type::kInt32:
5170     case DataType::Type::kInt64:
5171       __ Neg(OutputRegister(neg), InputOperandAt(neg, 0));
5172       break;
5173 
5174     case DataType::Type::kFloat32:
5175     case DataType::Type::kFloat64:
5176       __ Fneg(OutputFPRegister(neg), InputFPRegisterAt(neg, 0));
5177       break;
5178 
5179     default:
5180       LOG(FATAL) << "Unexpected neg type " << neg->GetResultType();
5181   }
5182 }
5183 
5184 void LocationsBuilderARM64::VisitNewArray(HNewArray* instruction) {
5185   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
5186       instruction, LocationSummary::kCallOnMainOnly);
5187   InvokeRuntimeCallingConvention calling_convention;
5188   locations->SetOut(LocationFrom(x0));
5189   locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
5190   locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1)));
5191 }
5192 
5193 void InstructionCodeGeneratorARM64::VisitNewArray(HNewArray* instruction) {
5194   // Note: if heap poisoning is enabled, the entry point takes care of poisoning the reference.
5195   QuickEntrypointEnum entrypoint = CodeGenerator::GetArrayAllocationEntrypoint(instruction);
5196   codegen_->InvokeRuntime(entrypoint, instruction, instruction->GetDexPc());
5197   CheckEntrypointTypes<kQuickAllocArrayResolved, void*, mirror::Class*, int32_t>();
5198   codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__);
5199 }
5200 
5201 void LocationsBuilderARM64::VisitNewInstance(HNewInstance* instruction) {
5202   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
5203       instruction, LocationSummary::kCallOnMainOnly);
5204   InvokeRuntimeCallingConvention calling_convention;
5205   locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
5206   locations->SetOut(calling_convention.GetReturnLocation(DataType::Type::kReference));
5207 }
5208 
5209 void InstructionCodeGeneratorARM64::VisitNewInstance(HNewInstance* instruction) {
5210   codegen_->InvokeRuntime(instruction->GetEntrypoint(), instruction, instruction->GetDexPc());
5211   CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
5212   codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__);
5213 }
5214 
5215 void LocationsBuilderARM64::VisitNot(HNot* instruction) {
5216   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
5217   locations->SetInAt(0, Location::RequiresRegister());
5218   locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
5219 }
5220 
5221 void InstructionCodeGeneratorARM64::VisitNot(HNot* instruction) {
5222   switch (instruction->GetResultType()) {
5223     case DataType::Type::kInt32:
5224     case DataType::Type::kInt64:
5225       __ Mvn(OutputRegister(instruction), InputOperandAt(instruction, 0));
5226       break;
5227 
5228     default:
5229       LOG(FATAL) << "Unexpected type for not operation " << instruction->GetResultType();
5230   }
5231 }
5232 
5233 void LocationsBuilderARM64::VisitBooleanNot(HBooleanNot* instruction) {
5234   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
5235   locations->SetInAt(0, Location::RequiresRegister());
5236   locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
5237 }
5238 
5239 void InstructionCodeGeneratorARM64::VisitBooleanNot(HBooleanNot* instruction) {
5240   __ Eor(OutputRegister(instruction), InputRegisterAt(instruction, 0), vixl::aarch64::Operand(1));
5241 }
5242 
5243 void LocationsBuilderARM64::VisitNullCheck(HNullCheck* instruction) {
5244   LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction);
5245   locations->SetInAt(0, Location::RequiresRegister());
5246 }
5247 
5248 void CodeGeneratorARM64::GenerateImplicitNullCheck(HNullCheck* instruction) {
5249   if (CanMoveNullCheckToUser(instruction)) {
5250     return;
5251   }
5252   {
5253     // Ensure that between load and RecordPcInfo there are no pools emitted.
5254     EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
5255     Location obj = instruction->GetLocations()->InAt(0);
5256     __ Ldr(wzr, HeapOperandFrom(obj, Offset(0)));
5257     RecordPcInfo(instruction, instruction->GetDexPc());
5258   }
5259 }
5260 
5261 void CodeGeneratorARM64::GenerateExplicitNullCheck(HNullCheck* instruction) {
5262   SlowPathCodeARM64* slow_path = new (GetScopedAllocator()) NullCheckSlowPathARM64(instruction);
5263   AddSlowPath(slow_path);
5264 
5265   LocationSummary* locations = instruction->GetLocations();
5266   Location obj = locations->InAt(0);
5267 
5268   __ Cbz(RegisterFrom(obj, instruction->InputAt(0)->GetType()), slow_path->GetEntryLabel());
5269 }
5270 
5271 void InstructionCodeGeneratorARM64::VisitNullCheck(HNullCheck* instruction) {
5272   codegen_->GenerateNullCheck(instruction);
5273 }
5274 
5275 void LocationsBuilderARM64::VisitOr(HOr* instruction) {
5276   HandleBinaryOp(instruction);
5277 }
5278 
5279 void InstructionCodeGeneratorARM64::VisitOr(HOr* instruction) {
5280   HandleBinaryOp(instruction);
5281 }
5282 
5283 void LocationsBuilderARM64::VisitParallelMove(HParallelMove* instruction ATTRIBUTE_UNUSED) {
5284   LOG(FATAL) << "Unreachable";
5285 }
5286 
5287 void InstructionCodeGeneratorARM64::VisitParallelMove(HParallelMove* instruction) {
5288   if (instruction->GetNext()->IsSuspendCheck() &&
5289       instruction->GetBlock()->GetLoopInformation() != nullptr) {
5290     HSuspendCheck* suspend_check = instruction->GetNext()->AsSuspendCheck();
5291     // The back edge will generate the suspend check.
5292     codegen_->ClearSpillSlotsFromLoopPhisInStackMap(suspend_check, instruction);
5293   }
5294 
5295   codegen_->GetMoveResolver()->EmitNativeCode(instruction);
5296 }
5297 
5298 void LocationsBuilderARM64::VisitParameterValue(HParameterValue* instruction) {
5299   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
5300   Location location = parameter_visitor_.GetNextLocation(instruction->GetType());
5301   if (location.IsStackSlot()) {
5302     location = Location::StackSlot(location.GetStackIndex() + codegen_->GetFrameSize());
5303   } else if (location.IsDoubleStackSlot()) {
5304     location = Location::DoubleStackSlot(location.GetStackIndex() + codegen_->GetFrameSize());
5305   }
5306   locations->SetOut(location);
5307 }
5308 
5309 void InstructionCodeGeneratorARM64::VisitParameterValue(
5310     HParameterValue* instruction ATTRIBUTE_UNUSED) {
5311   // Nothing to do, the parameter is already at its location.
5312 }
5313 
5314 void LocationsBuilderARM64::VisitCurrentMethod(HCurrentMethod* instruction) {
5315   LocationSummary* locations =
5316       new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
5317   locations->SetOut(LocationFrom(kArtMethodRegister));
5318 }
5319 
5320 void InstructionCodeGeneratorARM64::VisitCurrentMethod(
5321     HCurrentMethod* instruction ATTRIBUTE_UNUSED) {
5322   // Nothing to do, the method is already at its location.
5323 }
5324 
5325 void LocationsBuilderARM64::VisitPhi(HPhi* instruction) {
5326   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
5327   for (size_t i = 0, e = locations->GetInputCount(); i < e; ++i) {
5328     locations->SetInAt(i, Location::Any());
5329   }
5330   locations->SetOut(Location::Any());
5331 }
5332 
5333 void InstructionCodeGeneratorARM64::VisitPhi(HPhi* instruction ATTRIBUTE_UNUSED) {
5334   LOG(FATAL) << "Unreachable";
5335 }
5336 
5337 void LocationsBuilderARM64::VisitRem(HRem* rem) {
5338   DataType::Type type = rem->GetResultType();
5339   LocationSummary::CallKind call_kind =
5340       DataType::IsFloatingPointType(type) ? LocationSummary::kCallOnMainOnly
5341                                            : LocationSummary::kNoCall;
5342   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(rem, call_kind);
5343 
5344   switch (type) {
5345     case DataType::Type::kInt32:
5346     case DataType::Type::kInt64:
5347       locations->SetInAt(0, Location::RequiresRegister());
5348       locations->SetInAt(1, Location::RegisterOrConstant(rem->InputAt(1)));
5349       locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
5350       break;
5351 
5352     case DataType::Type::kFloat32:
5353     case DataType::Type::kFloat64: {
5354       InvokeRuntimeCallingConvention calling_convention;
5355       locations->SetInAt(0, LocationFrom(calling_convention.GetFpuRegisterAt(0)));
5356       locations->SetInAt(1, LocationFrom(calling_convention.GetFpuRegisterAt(1)));
5357       locations->SetOut(calling_convention.GetReturnLocation(type));
5358 
5359       break;
5360     }
5361 
5362     default:
5363       LOG(FATAL) << "Unexpected rem type " << type;
5364   }
5365 }
5366 
5367 void InstructionCodeGeneratorARM64::GenerateIntRemForPower2Denom(HRem *instruction) {
5368   int64_t imm = Int64FromLocation(instruction->GetLocations()->InAt(1));
5369   uint64_t abs_imm = static_cast<uint64_t>(AbsOrMin(imm));
5370   DCHECK(IsPowerOfTwo(abs_imm)) << abs_imm;
5371 
5372   Register out = OutputRegister(instruction);
5373   Register dividend = InputRegisterAt(instruction, 0);
5374 
5375   if (abs_imm == 2) {
5376     __ Cmp(dividend, 0);
5377     __ And(out, dividend, 1);
5378     __ Csneg(out, out, out, ge);
5379   } else {
5380     UseScratchRegisterScope temps(GetVIXLAssembler());
5381     Register temp = temps.AcquireSameSizeAs(out);
5382 
5383     __ Negs(temp, dividend);
5384     __ And(out, dividend, abs_imm - 1);
5385     __ And(temp, temp, abs_imm - 1);
5386     __ Csneg(out, out, temp, mi);
5387   }
5388 }
5389 
5390 void InstructionCodeGeneratorARM64::GenerateIntRemForConstDenom(HRem *instruction) {
5391   int64_t imm = Int64FromLocation(instruction->GetLocations()->InAt(1));
5392 
5393   if (imm == 0) {
5394     // Do not generate anything.
5395     // DivZeroCheck would prevent any code to be executed.
5396     return;
5397   }
5398 
5399   if (IsPowerOfTwo(AbsOrMin(imm))) {
5400     // Cases imm == -1 or imm == 1 are handled in constant folding by
5401     // InstructionWithAbsorbingInputSimplifier.
5402     // If the cases have survided till code generation they are handled in
5403     // GenerateIntRemForPower2Denom becauses -1 and 1 are the power of 2 (2^0).
5404     // The correct code is generated for them, just more instructions.
5405     GenerateIntRemForPower2Denom(instruction);
5406   } else {
5407     DCHECK(imm < -2 || imm > 2) << imm;
5408     GenerateDivRemWithAnyConstant(instruction);
5409   }
5410 }
5411 
5412 void InstructionCodeGeneratorARM64::GenerateIntRem(HRem* instruction) {
5413   DCHECK(DataType::IsIntOrLongType(instruction->GetResultType()))
5414          << instruction->GetResultType();
5415 
5416   if (instruction->GetLocations()->InAt(1).IsConstant()) {
5417     GenerateIntRemForConstDenom(instruction);
5418   } else {
5419     Register out = OutputRegister(instruction);
5420     Register dividend = InputRegisterAt(instruction, 0);
5421     Register divisor = InputRegisterAt(instruction, 1);
5422     UseScratchRegisterScope temps(GetVIXLAssembler());
5423     Register temp = temps.AcquireSameSizeAs(out);
5424     __ Sdiv(temp, dividend, divisor);
5425     __ Msub(out, temp, divisor, dividend);
5426   }
5427 }
5428 
5429 void InstructionCodeGeneratorARM64::VisitRem(HRem* rem) {
5430   DataType::Type type = rem->GetResultType();
5431 
5432   switch (type) {
5433     case DataType::Type::kInt32:
5434     case DataType::Type::kInt64: {
5435       GenerateIntRem(rem);
5436       break;
5437     }
5438 
5439     case DataType::Type::kFloat32:
5440     case DataType::Type::kFloat64: {
5441       QuickEntrypointEnum entrypoint =
5442           (type == DataType::Type::kFloat32) ? kQuickFmodf : kQuickFmod;
5443       codegen_->InvokeRuntime(entrypoint, rem, rem->GetDexPc());
5444       if (type == DataType::Type::kFloat32) {
5445         CheckEntrypointTypes<kQuickFmodf, float, float, float>();
5446       } else {
5447         CheckEntrypointTypes<kQuickFmod, double, double, double>();
5448       }
5449       break;
5450     }
5451 
5452     default:
5453       LOG(FATAL) << "Unexpected rem type " << type;
5454       UNREACHABLE();
5455   }
5456 }
5457 
5458 void LocationsBuilderARM64::VisitMin(HMin* min) {
5459   HandleBinaryOp(min);
5460 }
5461 
5462 void InstructionCodeGeneratorARM64::VisitMin(HMin* min) {
5463   HandleBinaryOp(min);
5464 }
5465 
5466 void LocationsBuilderARM64::VisitMax(HMax* max) {
5467   HandleBinaryOp(max);
5468 }
5469 
5470 void InstructionCodeGeneratorARM64::VisitMax(HMax* max) {
5471   HandleBinaryOp(max);
5472 }
5473 
5474 void LocationsBuilderARM64::VisitAbs(HAbs* abs) {
5475   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(abs);
5476   switch (abs->GetResultType()) {
5477     case DataType::Type::kInt32:
5478     case DataType::Type::kInt64:
5479       locations->SetInAt(0, Location::RequiresRegister());
5480       locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
5481       break;
5482     case DataType::Type::kFloat32:
5483     case DataType::Type::kFloat64:
5484       locations->SetInAt(0, Location::RequiresFpuRegister());
5485       locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
5486       break;
5487     default:
5488       LOG(FATAL) << "Unexpected type for abs operation " << abs->GetResultType();
5489   }
5490 }
5491 
5492 void InstructionCodeGeneratorARM64::VisitAbs(HAbs* abs) {
5493   switch (abs->GetResultType()) {
5494     case DataType::Type::kInt32:
5495     case DataType::Type::kInt64: {
5496       Register in_reg = InputRegisterAt(abs, 0);
5497       Register out_reg = OutputRegister(abs);
5498       __ Cmp(in_reg, Operand(0));
5499       __ Cneg(out_reg, in_reg, lt);
5500       break;
5501     }
5502     case DataType::Type::kFloat32:
5503     case DataType::Type::kFloat64: {
5504       VRegister in_reg = InputFPRegisterAt(abs, 0);
5505       VRegister out_reg = OutputFPRegister(abs);
5506       __ Fabs(out_reg, in_reg);
5507       break;
5508     }
5509     default:
5510       LOG(FATAL) << "Unexpected type for abs operation " << abs->GetResultType();
5511   }
5512 }
5513 
5514 void LocationsBuilderARM64::VisitConstructorFence(HConstructorFence* constructor_fence) {
5515   constructor_fence->SetLocations(nullptr);
5516 }
5517 
5518 void InstructionCodeGeneratorARM64::VisitConstructorFence(
5519     HConstructorFence* constructor_fence ATTRIBUTE_UNUSED) {
5520   codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore);
5521 }
5522 
5523 void LocationsBuilderARM64::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) {
5524   memory_barrier->SetLocations(nullptr);
5525 }
5526 
5527 void InstructionCodeGeneratorARM64::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) {
5528   codegen_->GenerateMemoryBarrier(memory_barrier->GetBarrierKind());
5529 }
5530 
5531 void LocationsBuilderARM64::VisitReturn(HReturn* instruction) {
5532   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
5533   DataType::Type return_type = instruction->InputAt(0)->GetType();
5534   locations->SetInAt(0, ARM64ReturnLocation(return_type));
5535 }
5536 
5537 void InstructionCodeGeneratorARM64::VisitReturn(HReturn* ret) {
5538   if (GetGraph()->IsCompilingOsr()) {
5539     // To simplify callers of an OSR method, we put the return value in both
5540     // floating point and core register.
5541     switch (ret->InputAt(0)->GetType()) {
5542       case DataType::Type::kFloat32:
5543         __ Fmov(w0, s0);
5544         break;
5545       case DataType::Type::kFloat64:
5546         __ Fmov(x0, d0);
5547         break;
5548       default:
5549         break;
5550     }
5551   }
5552   codegen_->GenerateFrameExit();
5553 }
5554 
5555 void LocationsBuilderARM64::VisitReturnVoid(HReturnVoid* instruction) {
5556   instruction->SetLocations(nullptr);
5557 }
5558 
5559 void InstructionCodeGeneratorARM64::VisitReturnVoid(HReturnVoid* instruction ATTRIBUTE_UNUSED) {
5560   codegen_->GenerateFrameExit();
5561 }
5562 
5563 void LocationsBuilderARM64::VisitRor(HRor* ror) {
5564   HandleBinaryOp(ror);
5565 }
5566 
5567 void InstructionCodeGeneratorARM64::VisitRor(HRor* ror) {
5568   HandleBinaryOp(ror);
5569 }
5570 
5571 void LocationsBuilderARM64::VisitShl(HShl* shl) {
5572   HandleShift(shl);
5573 }
5574 
5575 void InstructionCodeGeneratorARM64::VisitShl(HShl* shl) {
5576   HandleShift(shl);
5577 }
5578 
5579 void LocationsBuilderARM64::VisitShr(HShr* shr) {
5580   HandleShift(shr);
5581 }
5582 
5583 void InstructionCodeGeneratorARM64::VisitShr(HShr* shr) {
5584   HandleShift(shr);
5585 }
5586 
5587 void LocationsBuilderARM64::VisitSub(HSub* instruction) {
5588   HandleBinaryOp(instruction);
5589 }
5590 
5591 void InstructionCodeGeneratorARM64::VisitSub(HSub* instruction) {
5592   HandleBinaryOp(instruction);
5593 }
5594 
5595 void LocationsBuilderARM64::VisitStaticFieldGet(HStaticFieldGet* instruction) {
5596   HandleFieldGet(instruction, instruction->GetFieldInfo());
5597 }
5598 
5599 void InstructionCodeGeneratorARM64::VisitStaticFieldGet(HStaticFieldGet* instruction) {
5600   HandleFieldGet(instruction, instruction->GetFieldInfo());
5601 }
5602 
5603 void LocationsBuilderARM64::VisitStaticFieldSet(HStaticFieldSet* instruction) {
5604   HandleFieldSet(instruction);
5605 }
5606 
5607 void InstructionCodeGeneratorARM64::VisitStaticFieldSet(HStaticFieldSet* instruction) {
5608   HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetValueCanBeNull());
5609 }
5610 
5611 void LocationsBuilderARM64::VisitStringBuilderAppend(HStringBuilderAppend* instruction) {
5612   codegen_->CreateStringBuilderAppendLocations(instruction, LocationFrom(x0));
5613 }
5614 
5615 void InstructionCodeGeneratorARM64::VisitStringBuilderAppend(HStringBuilderAppend* instruction) {
5616   __ Mov(w0, instruction->GetFormat()->GetValue());
5617   codegen_->InvokeRuntime(kQuickStringBuilderAppend, instruction, instruction->GetDexPc());
5618 }
5619 
5620 void LocationsBuilderARM64::VisitUnresolvedInstanceFieldGet(
5621     HUnresolvedInstanceFieldGet* instruction) {
5622   FieldAccessCallingConventionARM64 calling_convention;
5623   codegen_->CreateUnresolvedFieldLocationSummary(
5624       instruction, instruction->GetFieldType(), calling_convention);
5625 }
5626 
5627 void InstructionCodeGeneratorARM64::VisitUnresolvedInstanceFieldGet(
5628     HUnresolvedInstanceFieldGet* instruction) {
5629   FieldAccessCallingConventionARM64 calling_convention;
5630   codegen_->GenerateUnresolvedFieldAccess(instruction,
5631                                           instruction->GetFieldType(),
5632                                           instruction->GetFieldIndex(),
5633                                           instruction->GetDexPc(),
5634                                           calling_convention);
5635 }
5636 
5637 void LocationsBuilderARM64::VisitUnresolvedInstanceFieldSet(
5638     HUnresolvedInstanceFieldSet* instruction) {
5639   FieldAccessCallingConventionARM64 calling_convention;
5640   codegen_->CreateUnresolvedFieldLocationSummary(
5641       instruction, instruction->GetFieldType(), calling_convention);
5642 }
5643 
5644 void InstructionCodeGeneratorARM64::VisitUnresolvedInstanceFieldSet(
5645     HUnresolvedInstanceFieldSet* instruction) {
5646   FieldAccessCallingConventionARM64 calling_convention;
5647   codegen_->GenerateUnresolvedFieldAccess(instruction,
5648                                           instruction->GetFieldType(),
5649                                           instruction->GetFieldIndex(),
5650                                           instruction->GetDexPc(),
5651                                           calling_convention);
5652 }
5653 
5654 void LocationsBuilderARM64::VisitUnresolvedStaticFieldGet(
5655     HUnresolvedStaticFieldGet* instruction) {
5656   FieldAccessCallingConventionARM64 calling_convention;
5657   codegen_->CreateUnresolvedFieldLocationSummary(
5658       instruction, instruction->GetFieldType(), calling_convention);
5659 }
5660 
5661 void InstructionCodeGeneratorARM64::VisitUnresolvedStaticFieldGet(
5662     HUnresolvedStaticFieldGet* instruction) {
5663   FieldAccessCallingConventionARM64 calling_convention;
5664   codegen_->GenerateUnresolvedFieldAccess(instruction,
5665                                           instruction->GetFieldType(),
5666                                           instruction->GetFieldIndex(),
5667                                           instruction->GetDexPc(),
5668                                           calling_convention);
5669 }
5670 
5671 void LocationsBuilderARM64::VisitUnresolvedStaticFieldSet(
5672     HUnresolvedStaticFieldSet* instruction) {
5673   FieldAccessCallingConventionARM64 calling_convention;
5674   codegen_->CreateUnresolvedFieldLocationSummary(
5675       instruction, instruction->GetFieldType(), calling_convention);
5676 }
5677 
5678 void InstructionCodeGeneratorARM64::VisitUnresolvedStaticFieldSet(
5679     HUnresolvedStaticFieldSet* instruction) {
5680   FieldAccessCallingConventionARM64 calling_convention;
5681   codegen_->GenerateUnresolvedFieldAccess(instruction,
5682                                           instruction->GetFieldType(),
5683                                           instruction->GetFieldIndex(),
5684                                           instruction->GetDexPc(),
5685                                           calling_convention);
5686 }
5687 
5688 void LocationsBuilderARM64::VisitSuspendCheck(HSuspendCheck* instruction) {
5689   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
5690       instruction, LocationSummary::kCallOnSlowPath);
5691   // In suspend check slow path, usually there are no caller-save registers at all.
5692   // If SIMD instructions are present, however, we force spilling all live SIMD
5693   // registers in full width (since the runtime only saves/restores lower part).
5694   locations->SetCustomSlowPathCallerSaves(
5695       GetGraph()->HasSIMD() ? RegisterSet::AllFpu() : RegisterSet::Empty());
5696 }
5697 
5698 void InstructionCodeGeneratorARM64::VisitSuspendCheck(HSuspendCheck* instruction) {
5699   HBasicBlock* block = instruction->GetBlock();
5700   if (block->GetLoopInformation() != nullptr) {
5701     DCHECK(block->GetLoopInformation()->GetSuspendCheck() == instruction);
5702     // The back edge will generate the suspend check.
5703     return;
5704   }
5705   if (block->IsEntryBlock() && instruction->GetNext()->IsGoto()) {
5706     // The goto will generate the suspend check.
5707     return;
5708   }
5709   GenerateSuspendCheck(instruction, nullptr);
5710   codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__);
5711 }
5712 
5713 void LocationsBuilderARM64::VisitThrow(HThrow* instruction) {
5714   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
5715       instruction, LocationSummary::kCallOnMainOnly);
5716   InvokeRuntimeCallingConvention calling_convention;
5717   locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
5718 }
5719 
5720 void InstructionCodeGeneratorARM64::VisitThrow(HThrow* instruction) {
5721   codegen_->InvokeRuntime(kQuickDeliverException, instruction, instruction->GetDexPc());
5722   CheckEntrypointTypes<kQuickDeliverException, void, mirror::Object*>();
5723 }
5724 
5725 void LocationsBuilderARM64::VisitTypeConversion(HTypeConversion* conversion) {
5726   LocationSummary* locations =
5727       new (GetGraph()->GetAllocator()) LocationSummary(conversion, LocationSummary::kNoCall);
5728   DataType::Type input_type = conversion->GetInputType();
5729   DataType::Type result_type = conversion->GetResultType();
5730   DCHECK(!DataType::IsTypeConversionImplicit(input_type, result_type))
5731       << input_type << " -> " << result_type;
5732   if ((input_type == DataType::Type::kReference) || (input_type == DataType::Type::kVoid) ||
5733       (result_type == DataType::Type::kReference) || (result_type == DataType::Type::kVoid)) {
5734     LOG(FATAL) << "Unexpected type conversion from " << input_type << " to " << result_type;
5735   }
5736 
5737   if (DataType::IsFloatingPointType(input_type)) {
5738     locations->SetInAt(0, Location::RequiresFpuRegister());
5739   } else {
5740     locations->SetInAt(0, Location::RequiresRegister());
5741   }
5742 
5743   if (DataType::IsFloatingPointType(result_type)) {
5744     locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
5745   } else {
5746     locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
5747   }
5748 }
5749 
5750 void InstructionCodeGeneratorARM64::VisitTypeConversion(HTypeConversion* conversion) {
5751   DataType::Type result_type = conversion->GetResultType();
5752   DataType::Type input_type = conversion->GetInputType();
5753 
5754   DCHECK(!DataType::IsTypeConversionImplicit(input_type, result_type))
5755       << input_type << " -> " << result_type;
5756 
5757   if (DataType::IsIntegralType(result_type) && DataType::IsIntegralType(input_type)) {
5758     int result_size = DataType::Size(result_type);
5759     int input_size = DataType::Size(input_type);
5760     int min_size = std::min(result_size, input_size);
5761     Register output = OutputRegister(conversion);
5762     Register source = InputRegisterAt(conversion, 0);
5763     if (result_type == DataType::Type::kInt32 && input_type == DataType::Type::kInt64) {
5764       // 'int' values are used directly as W registers, discarding the top
5765       // bits, so we don't need to sign-extend and can just perform a move.
5766       // We do not pass the `kDiscardForSameWReg` argument to force clearing the
5767       // top 32 bits of the target register. We theoretically could leave those
5768       // bits unchanged, but we would have to make sure that no code uses a
5769       // 32bit input value as a 64bit value assuming that the top 32 bits are
5770       // zero.
5771       __ Mov(output.W(), source.W());
5772     } else if (DataType::IsUnsignedType(result_type) ||
5773                (DataType::IsUnsignedType(input_type) && input_size < result_size)) {
5774       __ Ubfx(output, output.IsX() ? source.X() : source.W(), 0, result_size * kBitsPerByte);
5775     } else {
5776       __ Sbfx(output, output.IsX() ? source.X() : source.W(), 0, min_size * kBitsPerByte);
5777     }
5778   } else if (DataType::IsFloatingPointType(result_type) && DataType::IsIntegralType(input_type)) {
5779     __ Scvtf(OutputFPRegister(conversion), InputRegisterAt(conversion, 0));
5780   } else if (DataType::IsIntegralType(result_type) && DataType::IsFloatingPointType(input_type)) {
5781     CHECK(result_type == DataType::Type::kInt32 || result_type == DataType::Type::kInt64);
5782     __ Fcvtzs(OutputRegister(conversion), InputFPRegisterAt(conversion, 0));
5783   } else if (DataType::IsFloatingPointType(result_type) &&
5784              DataType::IsFloatingPointType(input_type)) {
5785     __ Fcvt(OutputFPRegister(conversion), InputFPRegisterAt(conversion, 0));
5786   } else {
5787     LOG(FATAL) << "Unexpected or unimplemented type conversion from " << input_type
5788                 << " to " << result_type;
5789   }
5790 }
5791 
5792 void LocationsBuilderARM64::VisitUShr(HUShr* ushr) {
5793   HandleShift(ushr);
5794 }
5795 
5796 void InstructionCodeGeneratorARM64::VisitUShr(HUShr* ushr) {
5797   HandleShift(ushr);
5798 }
5799 
5800 void LocationsBuilderARM64::VisitXor(HXor* instruction) {
5801   HandleBinaryOp(instruction);
5802 }
5803 
5804 void InstructionCodeGeneratorARM64::VisitXor(HXor* instruction) {
5805   HandleBinaryOp(instruction);
5806 }
5807 
5808 void LocationsBuilderARM64::VisitBoundType(HBoundType* instruction ATTRIBUTE_UNUSED) {
5809   // Nothing to do, this should be removed during prepare for register allocator.
5810   LOG(FATAL) << "Unreachable";
5811 }
5812 
5813 void InstructionCodeGeneratorARM64::VisitBoundType(HBoundType* instruction ATTRIBUTE_UNUSED) {
5814   // Nothing to do, this should be removed during prepare for register allocator.
5815   LOG(FATAL) << "Unreachable";
5816 }
5817 
5818 // Simple implementation of packed switch - generate cascaded compare/jumps.
5819 void LocationsBuilderARM64::VisitPackedSwitch(HPackedSwitch* switch_instr) {
5820   LocationSummary* locations =
5821       new (GetGraph()->GetAllocator()) LocationSummary(switch_instr, LocationSummary::kNoCall);
5822   locations->SetInAt(0, Location::RequiresRegister());
5823 }
5824 
5825 void InstructionCodeGeneratorARM64::VisitPackedSwitch(HPackedSwitch* switch_instr) {
5826   int32_t lower_bound = switch_instr->GetStartValue();
5827   uint32_t num_entries = switch_instr->GetNumEntries();
5828   Register value_reg = InputRegisterAt(switch_instr, 0);
5829   HBasicBlock* default_block = switch_instr->GetDefaultBlock();
5830 
5831   // Roughly set 16 as max average assemblies generated per HIR in a graph.
5832   static constexpr int32_t kMaxExpectedSizePerHInstruction = 16 * kInstructionSize;
5833   // ADR has a limited range(+/-1MB), so we set a threshold for the number of HIRs in the graph to
5834   // make sure we don't emit it if the target may run out of range.
5835   // TODO: Instead of emitting all jump tables at the end of the code, we could keep track of ADR
5836   // ranges and emit the tables only as required.
5837   static constexpr int32_t kJumpTableInstructionThreshold = 1* MB / kMaxExpectedSizePerHInstruction;
5838 
5839   if (num_entries <= kPackedSwitchCompareJumpThreshold ||
5840       // Current instruction id is an upper bound of the number of HIRs in the graph.
5841       GetGraph()->GetCurrentInstructionId() > kJumpTableInstructionThreshold) {
5842     // Create a series of compare/jumps.
5843     UseScratchRegisterScope temps(codegen_->GetVIXLAssembler());
5844     Register temp = temps.AcquireW();
5845     __ Subs(temp, value_reg, Operand(lower_bound));
5846 
5847     const ArenaVector<HBasicBlock*>& successors = switch_instr->GetBlock()->GetSuccessors();
5848     // Jump to successors[0] if value == lower_bound.
5849     __ B(eq, codegen_->GetLabelOf(successors[0]));
5850     int32_t last_index = 0;
5851     for (; num_entries - last_index > 2; last_index += 2) {
5852       __ Subs(temp, temp, Operand(2));
5853       // Jump to successors[last_index + 1] if value < case_value[last_index + 2].
5854       __ B(lo, codegen_->GetLabelOf(successors[last_index + 1]));
5855       // Jump to successors[last_index + 2] if value == case_value[last_index + 2].
5856       __ B(eq, codegen_->GetLabelOf(successors[last_index + 2]));
5857     }
5858     if (num_entries - last_index == 2) {
5859       // The last missing case_value.
5860       __ Cmp(temp, Operand(1));
5861       __ B(eq, codegen_->GetLabelOf(successors[last_index + 1]));
5862     }
5863 
5864     // And the default for any other value.
5865     if (!codegen_->GoesToNextBlock(switch_instr->GetBlock(), default_block)) {
5866       __ B(codegen_->GetLabelOf(default_block));
5867     }
5868   } else {
5869     JumpTableARM64* jump_table = codegen_->CreateJumpTable(switch_instr);
5870 
5871     UseScratchRegisterScope temps(codegen_->GetVIXLAssembler());
5872 
5873     // Below instructions should use at most one blocked register. Since there are two blocked
5874     // registers, we are free to block one.
5875     Register temp_w = temps.AcquireW();
5876     Register index;
5877     // Remove the bias.
5878     if (lower_bound != 0) {
5879       index = temp_w;
5880       __ Sub(index, value_reg, Operand(lower_bound));
5881     } else {
5882       index = value_reg;
5883     }
5884 
5885     // Jump to default block if index is out of the range.
5886     __ Cmp(index, Operand(num_entries));
5887     __ B(hs, codegen_->GetLabelOf(default_block));
5888 
5889     // In current VIXL implementation, it won't require any blocked registers to encode the
5890     // immediate value for Adr. So we are free to use both VIXL blocked registers to reduce the
5891     // register pressure.
5892     Register table_base = temps.AcquireX();
5893     // Load jump offset from the table.
5894     __ Adr(table_base, jump_table->GetTableStartLabel());
5895     Register jump_offset = temp_w;
5896     __ Ldr(jump_offset, MemOperand(table_base, index, UXTW, 2));
5897 
5898     // Jump to target block by branching to table_base(pc related) + offset.
5899     Register target_address = table_base;
5900     __ Add(target_address, table_base, Operand(jump_offset, SXTW));
5901     __ Br(target_address);
5902   }
5903 }
5904 
5905 void InstructionCodeGeneratorARM64::GenerateReferenceLoadOneRegister(
5906     HInstruction* instruction,
5907     Location out,
5908     uint32_t offset,
5909     Location maybe_temp,
5910     ReadBarrierOption read_barrier_option) {
5911   DataType::Type type = DataType::Type::kReference;
5912   Register out_reg = RegisterFrom(out, type);
5913   if (read_barrier_option == kWithReadBarrier) {
5914     CHECK(kEmitCompilerReadBarrier);
5915     if (kUseBakerReadBarrier) {
5916       // Load with fast path based Baker's read barrier.
5917       // /* HeapReference<Object> */ out = *(out + offset)
5918       codegen_->GenerateFieldLoadWithBakerReadBarrier(instruction,
5919                                                       out,
5920                                                       out_reg,
5921                                                       offset,
5922                                                       maybe_temp,
5923                                                       /* needs_null_check= */ false,
5924                                                       /* use_load_acquire= */ false);
5925     } else {
5926       // Load with slow path based read barrier.
5927       // Save the value of `out` into `maybe_temp` before overwriting it
5928       // in the following move operation, as we will need it for the
5929       // read barrier below.
5930       Register temp_reg = RegisterFrom(maybe_temp, type);
5931       __ Mov(temp_reg, out_reg);
5932       // /* HeapReference<Object> */ out = *(out + offset)
5933       __ Ldr(out_reg, HeapOperand(out_reg, offset));
5934       codegen_->GenerateReadBarrierSlow(instruction, out, out, maybe_temp, offset);
5935     }
5936   } else {
5937     // Plain load with no read barrier.
5938     // /* HeapReference<Object> */ out = *(out + offset)
5939     __ Ldr(out_reg, HeapOperand(out_reg, offset));
5940     GetAssembler()->MaybeUnpoisonHeapReference(out_reg);
5941   }
5942 }
5943 
5944 void InstructionCodeGeneratorARM64::GenerateReferenceLoadTwoRegisters(
5945     HInstruction* instruction,
5946     Location out,
5947     Location obj,
5948     uint32_t offset,
5949     Location maybe_temp,
5950     ReadBarrierOption read_barrier_option) {
5951   DataType::Type type = DataType::Type::kReference;
5952   Register out_reg = RegisterFrom(out, type);
5953   Register obj_reg = RegisterFrom(obj, type);
5954   if (read_barrier_option == kWithReadBarrier) {
5955     CHECK(kEmitCompilerReadBarrier);
5956     if (kUseBakerReadBarrier) {
5957       // Load with fast path based Baker's read barrier.
5958       // /* HeapReference<Object> */ out = *(obj + offset)
5959       codegen_->GenerateFieldLoadWithBakerReadBarrier(instruction,
5960                                                       out,
5961                                                       obj_reg,
5962                                                       offset,
5963                                                       maybe_temp,
5964                                                       /* needs_null_check= */ false,
5965                                                       /* use_load_acquire= */ false);
5966     } else {
5967       // Load with slow path based read barrier.
5968       // /* HeapReference<Object> */ out = *(obj + offset)
5969       __ Ldr(out_reg, HeapOperand(obj_reg, offset));
5970       codegen_->GenerateReadBarrierSlow(instruction, out, out, obj, offset);
5971     }
5972   } else {
5973     // Plain load with no read barrier.
5974     // /* HeapReference<Object> */ out = *(obj + offset)
5975     __ Ldr(out_reg, HeapOperand(obj_reg, offset));
5976     GetAssembler()->MaybeUnpoisonHeapReference(out_reg);
5977   }
5978 }
5979 
5980 void CodeGeneratorARM64::GenerateGcRootFieldLoad(
5981     HInstruction* instruction,
5982     Location root,
5983     Register obj,
5984     uint32_t offset,
5985     vixl::aarch64::Label* fixup_label,
5986     ReadBarrierOption read_barrier_option) {
5987   DCHECK(fixup_label == nullptr || offset == 0u);
5988   Register root_reg = RegisterFrom(root, DataType::Type::kReference);
5989   if (read_barrier_option == kWithReadBarrier) {
5990     DCHECK(kEmitCompilerReadBarrier);
5991     if (kUseBakerReadBarrier) {
5992       // Fast path implementation of art::ReadBarrier::BarrierForRoot when
5993       // Baker's read barrier are used.
5994 
5995       // Query `art::Thread::Current()->GetIsGcMarking()` (stored in
5996       // the Marking Register) to decide whether we need to enter
5997       // the slow path to mark the GC root.
5998       //
5999       // We use shared thunks for the slow path; shared within the method
6000       // for JIT, across methods for AOT. That thunk checks the reference
6001       // and jumps to the entrypoint if needed.
6002       //
6003       //     lr = &return_address;
6004       //     GcRoot<mirror::Object> root = *(obj+offset);  // Original reference load.
6005       //     if (mr) {  // Thread::Current()->GetIsGcMarking()
6006       //       goto gc_root_thunk<root_reg>(lr)
6007       //     }
6008       //   return_address:
6009 
6010       UseScratchRegisterScope temps(GetVIXLAssembler());
6011       DCHECK(temps.IsAvailable(ip0));
6012       DCHECK(temps.IsAvailable(ip1));
6013       temps.Exclude(ip0, ip1);
6014       uint32_t custom_data = EncodeBakerReadBarrierGcRootData(root_reg.GetCode());
6015 
6016       ExactAssemblyScope guard(GetVIXLAssembler(), 3 * vixl::aarch64::kInstructionSize);
6017       vixl::aarch64::Label return_address;
6018       __ adr(lr, &return_address);
6019       if (fixup_label != nullptr) {
6020         __ bind(fixup_label);
6021       }
6022       static_assert(BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_OFFSET == -8,
6023                     "GC root LDR must be 2 instructions (8B) before the return address label.");
6024       __ ldr(root_reg, MemOperand(obj.X(), offset));
6025       EmitBakerReadBarrierCbnz(custom_data);
6026       __ bind(&return_address);
6027     } else {
6028       // GC root loaded through a slow path for read barriers other
6029       // than Baker's.
6030       // /* GcRoot<mirror::Object>* */ root = obj + offset
6031       if (fixup_label == nullptr) {
6032         __ Add(root_reg.X(), obj.X(), offset);
6033       } else {
6034         EmitAddPlaceholder(fixup_label, root_reg.X(), obj.X());
6035       }
6036       // /* mirror::Object* */ root = root->Read()
6037       GenerateReadBarrierForRootSlow(instruction, root, root);
6038     }
6039   } else {
6040     // Plain GC root load with no read barrier.
6041     // /* GcRoot<mirror::Object> */ root = *(obj + offset)
6042     if (fixup_label == nullptr) {
6043       __ Ldr(root_reg, MemOperand(obj, offset));
6044     } else {
6045       EmitLdrOffsetPlaceholder(fixup_label, root_reg, obj.X());
6046     }
6047     // Note that GC roots are not affected by heap poisoning, thus we
6048     // do not have to unpoison `root_reg` here.
6049   }
6050   MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__);
6051 }
6052 
6053 void CodeGeneratorARM64::GenerateUnsafeCasOldValueMovWithBakerReadBarrier(
6054     vixl::aarch64::Register marked,
6055     vixl::aarch64::Register old_value) {
6056   DCHECK(kEmitCompilerReadBarrier);
6057   DCHECK(kUseBakerReadBarrier);
6058 
6059   // Similar to the Baker RB path in GenerateGcRootFieldLoad(), with a MOV instead of LDR.
6060   uint32_t custom_data = EncodeBakerReadBarrierGcRootData(marked.GetCode());
6061 
6062   ExactAssemblyScope guard(GetVIXLAssembler(), 3 * vixl::aarch64::kInstructionSize);
6063   vixl::aarch64::Label return_address;
6064   __ adr(lr, &return_address);
6065   static_assert(BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_OFFSET == -8,
6066                 "GC root LDR must be 2 instructions (8B) before the return address label.");
6067   __ mov(marked, old_value);
6068   EmitBakerReadBarrierCbnz(custom_data);
6069   __ bind(&return_address);
6070 }
6071 
6072 void CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
6073                                                                Location ref,
6074                                                                vixl::aarch64::Register obj,
6075                                                                const vixl::aarch64::MemOperand& src,
6076                                                                bool needs_null_check,
6077                                                                bool use_load_acquire) {
6078   DCHECK(kEmitCompilerReadBarrier);
6079   DCHECK(kUseBakerReadBarrier);
6080 
6081   // Query `art::Thread::Current()->GetIsGcMarking()` (stored in the
6082   // Marking Register) to decide whether we need to enter the slow
6083   // path to mark the reference. Then, in the slow path, check the
6084   // gray bit in the lock word of the reference's holder (`obj`) to
6085   // decide whether to mark `ref` or not.
6086   //
6087   // We use shared thunks for the slow path; shared within the method
6088   // for JIT, across methods for AOT. That thunk checks the holder
6089   // and jumps to the entrypoint if needed. If the holder is not gray,
6090   // it creates a fake dependency and returns to the LDR instruction.
6091   //
6092   //     lr = &gray_return_address;
6093   //     if (mr) {  // Thread::Current()->GetIsGcMarking()
6094   //       goto field_thunk<holder_reg, base_reg, use_load_acquire>(lr)
6095   //     }
6096   //   not_gray_return_address:
6097   //     // Original reference load. If the offset is too large to fit
6098   //     // into LDR, we use an adjusted base register here.
6099   //     HeapReference<mirror::Object> reference = *(obj+offset);
6100   //   gray_return_address:
6101 
6102   DCHECK(src.GetAddrMode() == vixl::aarch64::Offset);
6103   DCHECK_ALIGNED(src.GetOffset(), sizeof(mirror::HeapReference<mirror::Object>));
6104 
6105   UseScratchRegisterScope temps(GetVIXLAssembler());
6106   DCHECK(temps.IsAvailable(ip0));
6107   DCHECK(temps.IsAvailable(ip1));
6108   temps.Exclude(ip0, ip1);
6109   uint32_t custom_data = use_load_acquire
6110       ? EncodeBakerReadBarrierAcquireData(src.GetBaseRegister().GetCode(), obj.GetCode())
6111       : EncodeBakerReadBarrierFieldData(src.GetBaseRegister().GetCode(), obj.GetCode());
6112 
6113   {
6114     ExactAssemblyScope guard(GetVIXLAssembler(),
6115                              (kPoisonHeapReferences ? 4u : 3u) * vixl::aarch64::kInstructionSize);
6116     vixl::aarch64::Label return_address;
6117     __ adr(lr, &return_address);
6118     EmitBakerReadBarrierCbnz(custom_data);
6119     static_assert(BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET == (kPoisonHeapReferences ? -8 : -4),
6120                   "Field LDR must be 1 instruction (4B) before the return address label; "
6121                   " 2 instructions (8B) for heap poisoning.");
6122     Register ref_reg = RegisterFrom(ref, DataType::Type::kReference);
6123     if (use_load_acquire) {
6124       DCHECK_EQ(src.GetOffset(), 0);
6125       __ ldar(ref_reg, src);
6126     } else {
6127       __ ldr(ref_reg, src);
6128     }
6129     if (needs_null_check) {
6130       MaybeRecordImplicitNullCheck(instruction);
6131     }
6132     // Unpoison the reference explicitly if needed. MaybeUnpoisonHeapReference() uses
6133     // macro instructions disallowed in ExactAssemblyScope.
6134     if (kPoisonHeapReferences) {
6135       __ neg(ref_reg, Operand(ref_reg));
6136     }
6137     __ bind(&return_address);
6138   }
6139   MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__, /* temp_loc= */ LocationFrom(ip1));
6140 }
6141 
6142 void CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
6143                                                                Location ref,
6144                                                                Register obj,
6145                                                                uint32_t offset,
6146                                                                Location maybe_temp,
6147                                                                bool needs_null_check,
6148                                                                bool use_load_acquire) {
6149   DCHECK_ALIGNED(offset, sizeof(mirror::HeapReference<mirror::Object>));
6150   Register base = obj;
6151   if (use_load_acquire) {
6152     DCHECK(maybe_temp.IsRegister());
6153     base = WRegisterFrom(maybe_temp);
6154     __ Add(base, obj, offset);
6155     offset = 0u;
6156   } else if (offset >= kReferenceLoadMinFarOffset) {
6157     DCHECK(maybe_temp.IsRegister());
6158     base = WRegisterFrom(maybe_temp);
6159     static_assert(IsPowerOfTwo(kReferenceLoadMinFarOffset), "Expecting a power of 2.");
6160     __ Add(base, obj, Operand(offset & ~(kReferenceLoadMinFarOffset - 1u)));
6161     offset &= (kReferenceLoadMinFarOffset - 1u);
6162   }
6163   MemOperand src(base.X(), offset);
6164   GenerateFieldLoadWithBakerReadBarrier(
6165       instruction, ref, obj, src, needs_null_check, use_load_acquire);
6166 }
6167 
6168 void CodeGeneratorARM64::GenerateArrayLoadWithBakerReadBarrier(HArrayGet* instruction,
6169                                                                Location ref,
6170                                                                Register obj,
6171                                                                uint32_t data_offset,
6172                                                                Location index,
6173                                                                bool needs_null_check) {
6174   DCHECK(kEmitCompilerReadBarrier);
6175   DCHECK(kUseBakerReadBarrier);
6176 
6177   static_assert(
6178       sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
6179       "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
6180   size_t scale_factor = DataType::SizeShift(DataType::Type::kReference);
6181 
6182   // Query `art::Thread::Current()->GetIsGcMarking()` (stored in the
6183   // Marking Register) to decide whether we need to enter the slow
6184   // path to mark the reference. Then, in the slow path, check the
6185   // gray bit in the lock word of the reference's holder (`obj`) to
6186   // decide whether to mark `ref` or not.
6187   //
6188   // We use shared thunks for the slow path; shared within the method
6189   // for JIT, across methods for AOT. That thunk checks the holder
6190   // and jumps to the entrypoint if needed. If the holder is not gray,
6191   // it creates a fake dependency and returns to the LDR instruction.
6192   //
6193   //     lr = &gray_return_address;
6194   //     if (mr) {  // Thread::Current()->GetIsGcMarking()
6195   //       goto array_thunk<base_reg>(lr)
6196   //     }
6197   //   not_gray_return_address:
6198   //     // Original reference load. If the offset is too large to fit
6199   //     // into LDR, we use an adjusted base register here.
6200   //     HeapReference<mirror::Object> reference = data[index];
6201   //   gray_return_address:
6202 
6203   DCHECK(index.IsValid());
6204   Register index_reg = RegisterFrom(index, DataType::Type::kInt32);
6205   Register ref_reg = RegisterFrom(ref, DataType::Type::kReference);
6206 
6207   UseScratchRegisterScope temps(GetVIXLAssembler());
6208   DCHECK(temps.IsAvailable(ip0));
6209   DCHECK(temps.IsAvailable(ip1));
6210   temps.Exclude(ip0, ip1);
6211 
6212   Register temp;
6213   if (instruction->GetArray()->IsIntermediateAddress()) {
6214     // We do not need to compute the intermediate address from the array: the
6215     // input instruction has done it already. See the comment in
6216     // `TryExtractArrayAccessAddress()`.
6217     if (kIsDebugBuild) {
6218       HIntermediateAddress* interm_addr = instruction->GetArray()->AsIntermediateAddress();
6219       DCHECK_EQ(interm_addr->GetOffset()->AsIntConstant()->GetValueAsUint64(), data_offset);
6220     }
6221     temp = obj;
6222   } else {
6223     temp = WRegisterFrom(instruction->GetLocations()->GetTemp(0));
6224     __ Add(temp.X(), obj.X(), Operand(data_offset));
6225   }
6226 
6227   uint32_t custom_data = EncodeBakerReadBarrierArrayData(temp.GetCode());
6228 
6229   {
6230     ExactAssemblyScope guard(GetVIXLAssembler(),
6231                              (kPoisonHeapReferences ? 4u : 3u) * vixl::aarch64::kInstructionSize);
6232     vixl::aarch64::Label return_address;
6233     __ adr(lr, &return_address);
6234     EmitBakerReadBarrierCbnz(custom_data);
6235     static_assert(BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET == (kPoisonHeapReferences ? -8 : -4),
6236                   "Array LDR must be 1 instruction (4B) before the return address label; "
6237                   " 2 instructions (8B) for heap poisoning.");
6238     __ ldr(ref_reg, MemOperand(temp.X(), index_reg.X(), LSL, scale_factor));
6239     DCHECK(!needs_null_check);  // The thunk cannot handle the null check.
6240     // Unpoison the reference explicitly if needed. MaybeUnpoisonHeapReference() uses
6241     // macro instructions disallowed in ExactAssemblyScope.
6242     if (kPoisonHeapReferences) {
6243       __ neg(ref_reg, Operand(ref_reg));
6244     }
6245     __ bind(&return_address);
6246   }
6247   MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__, /* temp_loc= */ LocationFrom(ip1));
6248 }
6249 
6250 void CodeGeneratorARM64::MaybeGenerateMarkingRegisterCheck(int code, Location temp_loc) {
6251   // The following condition is a compile-time one, so it does not have a run-time cost.
6252   if (kEmitCompilerReadBarrier && kUseBakerReadBarrier && kIsDebugBuild) {
6253     // The following condition is a run-time one; it is executed after the
6254     // previous compile-time test, to avoid penalizing non-debug builds.
6255     if (GetCompilerOptions().EmitRunTimeChecksInDebugMode()) {
6256       UseScratchRegisterScope temps(GetVIXLAssembler());
6257       Register temp = temp_loc.IsValid() ? WRegisterFrom(temp_loc) : temps.AcquireW();
6258       GetAssembler()->GenerateMarkingRegisterCheck(temp, code);
6259     }
6260   }
6261 }
6262 
6263 void CodeGeneratorARM64::GenerateReadBarrierSlow(HInstruction* instruction,
6264                                                  Location out,
6265                                                  Location ref,
6266                                                  Location obj,
6267                                                  uint32_t offset,
6268                                                  Location index) {
6269   DCHECK(kEmitCompilerReadBarrier);
6270 
6271   // Insert a slow path based read barrier *after* the reference load.
6272   //
6273   // If heap poisoning is enabled, the unpoisoning of the loaded
6274   // reference will be carried out by the runtime within the slow
6275   // path.
6276   //
6277   // Note that `ref` currently does not get unpoisoned (when heap
6278   // poisoning is enabled), which is alright as the `ref` argument is
6279   // not used by the artReadBarrierSlow entry point.
6280   //
6281   // TODO: Unpoison `ref` when it is used by artReadBarrierSlow.
6282   SlowPathCodeARM64* slow_path = new (GetScopedAllocator())
6283       ReadBarrierForHeapReferenceSlowPathARM64(instruction, out, ref, obj, offset, index);
6284   AddSlowPath(slow_path);
6285 
6286   __ B(slow_path->GetEntryLabel());
6287   __ Bind(slow_path->GetExitLabel());
6288 }
6289 
6290 void CodeGeneratorARM64::MaybeGenerateReadBarrierSlow(HInstruction* instruction,
6291                                                       Location out,
6292                                                       Location ref,
6293                                                       Location obj,
6294                                                       uint32_t offset,
6295                                                       Location index) {
6296   if (kEmitCompilerReadBarrier) {
6297     // Baker's read barriers shall be handled by the fast path
6298     // (CodeGeneratorARM64::GenerateReferenceLoadWithBakerReadBarrier).
6299     DCHECK(!kUseBakerReadBarrier);
6300     // If heap poisoning is enabled, unpoisoning will be taken care of
6301     // by the runtime within the slow path.
6302     GenerateReadBarrierSlow(instruction, out, ref, obj, offset, index);
6303   } else if (kPoisonHeapReferences) {
6304     GetAssembler()->UnpoisonHeapReference(WRegisterFrom(out));
6305   }
6306 }
6307 
6308 void CodeGeneratorARM64::GenerateReadBarrierForRootSlow(HInstruction* instruction,
6309                                                         Location out,
6310                                                         Location root) {
6311   DCHECK(kEmitCompilerReadBarrier);
6312 
6313   // Insert a slow path based read barrier *after* the GC root load.
6314   //
6315   // Note that GC roots are not affected by heap poisoning, so we do
6316   // not need to do anything special for this here.
6317   SlowPathCodeARM64* slow_path =
6318       new (GetScopedAllocator()) ReadBarrierForRootSlowPathARM64(instruction, out, root);
6319   AddSlowPath(slow_path);
6320 
6321   __ B(slow_path->GetEntryLabel());
6322   __ Bind(slow_path->GetExitLabel());
6323 }
6324 
6325 void LocationsBuilderARM64::VisitClassTableGet(HClassTableGet* instruction) {
6326   LocationSummary* locations =
6327       new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
6328   locations->SetInAt(0, Location::RequiresRegister());
6329   locations->SetOut(Location::RequiresRegister());
6330 }
6331 
6332 void InstructionCodeGeneratorARM64::VisitClassTableGet(HClassTableGet* instruction) {
6333   LocationSummary* locations = instruction->GetLocations();
6334   if (instruction->GetTableKind() == HClassTableGet::TableKind::kVTable) {
6335     uint32_t method_offset = mirror::Class::EmbeddedVTableEntryOffset(
6336         instruction->GetIndex(), kArm64PointerSize).SizeValue();
6337     __ Ldr(XRegisterFrom(locations->Out()),
6338            MemOperand(XRegisterFrom(locations->InAt(0)), method_offset));
6339   } else {
6340     uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
6341         instruction->GetIndex(), kArm64PointerSize));
6342     __ Ldr(XRegisterFrom(locations->Out()), MemOperand(XRegisterFrom(locations->InAt(0)),
6343         mirror::Class::ImtPtrOffset(kArm64PointerSize).Uint32Value()));
6344     __ Ldr(XRegisterFrom(locations->Out()),
6345            MemOperand(XRegisterFrom(locations->Out()), method_offset));
6346   }
6347 }
6348 
6349 static void PatchJitRootUse(uint8_t* code,
6350                             const uint8_t* roots_data,
6351                             vixl::aarch64::Literal<uint32_t>* literal,
6352                             uint64_t index_in_table) {
6353   uint32_t literal_offset = literal->GetOffset();
6354   uintptr_t address =
6355       reinterpret_cast<uintptr_t>(roots_data) + index_in_table * sizeof(GcRoot<mirror::Object>);
6356   uint8_t* data = code + literal_offset;
6357   reinterpret_cast<uint32_t*>(data)[0] = dchecked_integral_cast<uint32_t>(address);
6358 }
6359 
6360 void CodeGeneratorARM64::EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) {
6361   for (const auto& entry : jit_string_patches_) {
6362     const StringReference& string_reference = entry.first;
6363     vixl::aarch64::Literal<uint32_t>* table_entry_literal = entry.second;
6364     uint64_t index_in_table = GetJitStringRootIndex(string_reference);
6365     PatchJitRootUse(code, roots_data, table_entry_literal, index_in_table);
6366   }
6367   for (const auto& entry : jit_class_patches_) {
6368     const TypeReference& type_reference = entry.first;
6369     vixl::aarch64::Literal<uint32_t>* table_entry_literal = entry.second;
6370     uint64_t index_in_table = GetJitClassRootIndex(type_reference);
6371     PatchJitRootUse(code, roots_data, table_entry_literal, index_in_table);
6372   }
6373 }
6374 
6375 #undef __
6376 #undef QUICK_ENTRY_POINT
6377 
6378 #define __ assembler.GetVIXLAssembler()->
6379 
6380 static void EmitGrayCheckAndFastPath(arm64::Arm64Assembler& assembler,
6381                                      vixl::aarch64::Register base_reg,
6382                                      vixl::aarch64::MemOperand& lock_word,
6383                                      vixl::aarch64::Label* slow_path,
6384                                      vixl::aarch64::Label* throw_npe = nullptr) {
6385   // Load the lock word containing the rb_state.
6386   __ Ldr(ip0.W(), lock_word);
6387   // Given the numeric representation, it's enough to check the low bit of the rb_state.
6388   static_assert(ReadBarrier::NonGrayState() == 0, "Expecting non-gray to have value 0");
6389   static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
6390   __ Tbnz(ip0.W(), LockWord::kReadBarrierStateShift, slow_path);
6391   static_assert(
6392       BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET == BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET,
6393       "Field and array LDR offsets must be the same to reuse the same code.");
6394   // To throw NPE, we return to the fast path; the artificial dependence below does not matter.
6395   if (throw_npe != nullptr) {
6396     __ Bind(throw_npe);
6397   }
6398   // Adjust the return address back to the LDR (1 instruction; 2 for heap poisoning).
6399   static_assert(BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET == (kPoisonHeapReferences ? -8 : -4),
6400                 "Field LDR must be 1 instruction (4B) before the return address label; "
6401                 " 2 instructions (8B) for heap poisoning.");
6402   __ Add(lr, lr, BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET);
6403   // Introduce a dependency on the lock_word including rb_state,
6404   // to prevent load-load reordering, and without using
6405   // a memory barrier (which would be more expensive).
6406   __ Add(base_reg, base_reg, Operand(ip0, LSR, 32));
6407   __ Br(lr);          // And return back to the function.
6408   // Note: The fake dependency is unnecessary for the slow path.
6409 }
6410 
6411 // Load the read barrier introspection entrypoint in register `entrypoint`.
6412 static void LoadReadBarrierMarkIntrospectionEntrypoint(arm64::Arm64Assembler& assembler,
6413                                                        vixl::aarch64::Register entrypoint) {
6414   // entrypoint = Thread::Current()->pReadBarrierMarkReg16, i.e. pReadBarrierMarkIntrospection.
6415   DCHECK_EQ(ip0.GetCode(), 16u);
6416   const int32_t entry_point_offset =
6417       Thread::ReadBarrierMarkEntryPointsOffset<kArm64PointerSize>(ip0.GetCode());
6418   __ Ldr(entrypoint, MemOperand(tr, entry_point_offset));
6419 }
6420 
6421 void CodeGeneratorARM64::CompileBakerReadBarrierThunk(Arm64Assembler& assembler,
6422                                                       uint32_t encoded_data,
6423                                                       /*out*/ std::string* debug_name) {
6424   BakerReadBarrierKind kind = BakerReadBarrierKindField::Decode(encoded_data);
6425   switch (kind) {
6426     case BakerReadBarrierKind::kField:
6427     case BakerReadBarrierKind::kAcquire: {
6428       auto base_reg =
6429           Register::GetXRegFromCode(BakerReadBarrierFirstRegField::Decode(encoded_data));
6430       CheckValidReg(base_reg.GetCode());
6431       auto holder_reg =
6432           Register::GetXRegFromCode(BakerReadBarrierSecondRegField::Decode(encoded_data));
6433       CheckValidReg(holder_reg.GetCode());
6434       UseScratchRegisterScope temps(assembler.GetVIXLAssembler());
6435       temps.Exclude(ip0, ip1);
6436       // In the case of a field load (with relaxed semantic), if `base_reg` differs from
6437       // `holder_reg`, the offset was too large and we must have emitted (during the construction
6438       // of the HIR graph, see `art::HInstructionBuilder::BuildInstanceFieldAccess`) and preserved
6439       // (see `art::PrepareForRegisterAllocation::VisitNullCheck`) an explicit null check before
6440       // the load. Otherwise, for implicit null checks, we need to null-check the holder as we do
6441       // not necessarily do that check before going to the thunk.
6442       //
6443       // In the case of a field load with load-acquire semantics (where `base_reg` always differs
6444       // from `holder_reg`), we also need an explicit null check when implicit null checks are
6445       // allowed, as we do not emit one before going to the thunk.
6446       vixl::aarch64::Label throw_npe_label;
6447       vixl::aarch64::Label* throw_npe = nullptr;
6448       if (GetCompilerOptions().GetImplicitNullChecks() &&
6449           (holder_reg.Is(base_reg) || (kind == BakerReadBarrierKind::kAcquire))) {
6450         throw_npe = &throw_npe_label;
6451         __ Cbz(holder_reg.W(), throw_npe);
6452       }
6453       // Check if the holder is gray and, if not, add fake dependency to the base register
6454       // and return to the LDR instruction to load the reference. Otherwise, use introspection
6455       // to load the reference and call the entrypoint that performs further checks on the
6456       // reference and marks it if needed.
6457       vixl::aarch64::Label slow_path;
6458       MemOperand lock_word(holder_reg, mirror::Object::MonitorOffset().Int32Value());
6459       EmitGrayCheckAndFastPath(assembler, base_reg, lock_word, &slow_path, throw_npe);
6460       __ Bind(&slow_path);
6461       if (kind == BakerReadBarrierKind::kField) {
6462         MemOperand ldr_address(lr, BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET);
6463         __ Ldr(ip0.W(), ldr_address);         // Load the LDR (immediate) unsigned offset.
6464         LoadReadBarrierMarkIntrospectionEntrypoint(assembler, ip1);
6465         __ Ubfx(ip0.W(), ip0.W(), 10, 12);    // Extract the offset.
6466         __ Ldr(ip0.W(), MemOperand(base_reg, ip0, LSL, 2));   // Load the reference.
6467       } else {
6468         DCHECK(kind == BakerReadBarrierKind::kAcquire);
6469         DCHECK(!base_reg.Is(holder_reg));
6470         LoadReadBarrierMarkIntrospectionEntrypoint(assembler, ip1);
6471         __ Ldar(ip0.W(), MemOperand(base_reg));
6472       }
6473       // Do not unpoison. With heap poisoning enabled, the entrypoint expects a poisoned reference.
6474       __ Br(ip1);                           // Jump to the entrypoint.
6475       break;
6476     }
6477     case BakerReadBarrierKind::kArray: {
6478       auto base_reg =
6479           Register::GetXRegFromCode(BakerReadBarrierFirstRegField::Decode(encoded_data));
6480       CheckValidReg(base_reg.GetCode());
6481       DCHECK_EQ(kBakerReadBarrierInvalidEncodedReg,
6482                 BakerReadBarrierSecondRegField::Decode(encoded_data));
6483       UseScratchRegisterScope temps(assembler.GetVIXLAssembler());
6484       temps.Exclude(ip0, ip1);
6485       vixl::aarch64::Label slow_path;
6486       int32_t data_offset =
6487           mirror::Array::DataOffset(Primitive::ComponentSize(Primitive::kPrimNot)).Int32Value();
6488       MemOperand lock_word(base_reg, mirror::Object::MonitorOffset().Int32Value() - data_offset);
6489       DCHECK_LT(lock_word.GetOffset(), 0);
6490       EmitGrayCheckAndFastPath(assembler, base_reg, lock_word, &slow_path);
6491       __ Bind(&slow_path);
6492       MemOperand ldr_address(lr, BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET);
6493       __ Ldr(ip0.W(), ldr_address);         // Load the LDR (register) unsigned offset.
6494       LoadReadBarrierMarkIntrospectionEntrypoint(assembler, ip1);
6495       __ Ubfx(ip0, ip0, 16, 6);             // Extract the index register, plus 32 (bit 21 is set).
6496       __ Bfi(ip1, ip0, 3, 6);               // Insert ip0 to the entrypoint address to create
6497                                             // a switch case target based on the index register.
6498       __ Mov(ip0, base_reg);                // Move the base register to ip0.
6499       __ Br(ip1);                           // Jump to the entrypoint's array switch case.
6500       break;
6501     }
6502     case BakerReadBarrierKind::kGcRoot: {
6503       // Check if the reference needs to be marked and if so (i.e. not null, not marked yet
6504       // and it does not have a forwarding address), call the correct introspection entrypoint;
6505       // otherwise return the reference (or the extracted forwarding address).
6506       // There is no gray bit check for GC roots.
6507       auto root_reg =
6508           Register::GetWRegFromCode(BakerReadBarrierFirstRegField::Decode(encoded_data));
6509       CheckValidReg(root_reg.GetCode());
6510       DCHECK_EQ(kBakerReadBarrierInvalidEncodedReg,
6511                 BakerReadBarrierSecondRegField::Decode(encoded_data));
6512       UseScratchRegisterScope temps(assembler.GetVIXLAssembler());
6513       temps.Exclude(ip0, ip1);
6514       vixl::aarch64::Label return_label, not_marked, forwarding_address;
6515       __ Cbz(root_reg, &return_label);
6516       MemOperand lock_word(root_reg.X(), mirror::Object::MonitorOffset().Int32Value());
6517       __ Ldr(ip0.W(), lock_word);
6518       __ Tbz(ip0.W(), LockWord::kMarkBitStateShift, &not_marked);
6519       __ Bind(&return_label);
6520       __ Br(lr);
6521       __ Bind(&not_marked);
6522       __ Tst(ip0.W(), Operand(ip0.W(), LSL, 1));
6523       __ B(&forwarding_address, mi);
6524       LoadReadBarrierMarkIntrospectionEntrypoint(assembler, ip1);
6525       // Adjust the art_quick_read_barrier_mark_introspection address in IP1 to
6526       // art_quick_read_barrier_mark_introspection_gc_roots.
6527       __ Add(ip1, ip1, Operand(BAKER_MARK_INTROSPECTION_GC_ROOT_ENTRYPOINT_OFFSET));
6528       __ Mov(ip0.W(), root_reg);
6529       __ Br(ip1);
6530       __ Bind(&forwarding_address);
6531       __ Lsl(root_reg, ip0.W(), LockWord::kForwardingAddressShift);
6532       __ Br(lr);
6533       break;
6534     }
6535     default:
6536       LOG(FATAL) << "Unexpected kind: " << static_cast<uint32_t>(kind);
6537       UNREACHABLE();
6538   }
6539 
6540   // For JIT, the slow path is considered part of the compiled method,
6541   // so JIT should pass null as `debug_name`. Tests may not have a runtime.
6542   DCHECK(Runtime::Current() == nullptr ||
6543          !Runtime::Current()->UseJitCompilation() ||
6544          debug_name == nullptr);
6545   if (debug_name != nullptr && GetCompilerOptions().GenerateAnyDebugInfo()) {
6546     std::ostringstream oss;
6547     oss << "BakerReadBarrierThunk";
6548     switch (kind) {
6549       case BakerReadBarrierKind::kField:
6550         oss << "Field_r" << BakerReadBarrierFirstRegField::Decode(encoded_data)
6551             << "_r" << BakerReadBarrierSecondRegField::Decode(encoded_data);
6552         break;
6553       case BakerReadBarrierKind::kAcquire:
6554         oss << "Acquire_r" << BakerReadBarrierFirstRegField::Decode(encoded_data)
6555             << "_r" << BakerReadBarrierSecondRegField::Decode(encoded_data);
6556         break;
6557       case BakerReadBarrierKind::kArray:
6558         oss << "Array_r" << BakerReadBarrierFirstRegField::Decode(encoded_data);
6559         DCHECK_EQ(kBakerReadBarrierInvalidEncodedReg,
6560                   BakerReadBarrierSecondRegField::Decode(encoded_data));
6561         break;
6562       case BakerReadBarrierKind::kGcRoot:
6563         oss << "GcRoot_r" << BakerReadBarrierFirstRegField::Decode(encoded_data);
6564         DCHECK_EQ(kBakerReadBarrierInvalidEncodedReg,
6565                   BakerReadBarrierSecondRegField::Decode(encoded_data));
6566         break;
6567     }
6568     *debug_name = oss.str();
6569   }
6570 }
6571 
6572 #undef __
6573 
6574 }  // namespace arm64
6575 }  // namespace art
6576