1 /* 2 * Copyright (C) 2014 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #include "code_generator_arm64.h" 18 19 #include "arch/arm64/asm_support_arm64.h" 20 #include "arch/arm64/instruction_set_features_arm64.h" 21 #include "art_method-inl.h" 22 #include "base/bit_utils.h" 23 #include "base/bit_utils_iterator.h" 24 #include "class_table.h" 25 #include "code_generator_utils.h" 26 #include "compiled_method.h" 27 #include "entrypoints/quick/quick_entrypoints.h" 28 #include "entrypoints/quick/quick_entrypoints_enum.h" 29 #include "gc/accounting/card_table.h" 30 #include "gc/space/image_space.h" 31 #include "heap_poisoning.h" 32 #include "intrinsics.h" 33 #include "intrinsics_arm64.h" 34 #include "linker/linker_patch.h" 35 #include "lock_word.h" 36 #include "mirror/array-inl.h" 37 #include "mirror/class-inl.h" 38 #include "offsets.h" 39 #include "thread.h" 40 #include "utils/arm64/assembler_arm64.h" 41 #include "utils/assembler.h" 42 #include "utils/stack_checks.h" 43 44 using namespace vixl::aarch64; // NOLINT(build/namespaces) 45 using vixl::ExactAssemblyScope; 46 using vixl::CodeBufferCheckScope; 47 using vixl::EmissionCheckScope; 48 49 #ifdef __ 50 #error "ARM64 Codegen VIXL macro-assembler macro already defined." 51 #endif 52 53 namespace art { 54 55 template<class MirrorType> 56 class GcRoot; 57 58 namespace arm64 { 59 60 using helpers::ARM64EncodableConstantOrRegister; 61 using helpers::ArtVixlRegCodeCoherentForRegSet; 62 using helpers::CPURegisterFrom; 63 using helpers::DRegisterFrom; 64 using helpers::FPRegisterFrom; 65 using helpers::HeapOperand; 66 using helpers::HeapOperandFrom; 67 using helpers::InputCPURegisterOrZeroRegAt; 68 using helpers::InputFPRegisterAt; 69 using helpers::InputOperandAt; 70 using helpers::InputRegisterAt; 71 using helpers::Int64FromLocation; 72 using helpers::IsConstantZeroBitPattern; 73 using helpers::LocationFrom; 74 using helpers::OperandFromMemOperand; 75 using helpers::OutputCPURegister; 76 using helpers::OutputFPRegister; 77 using helpers::OutputRegister; 78 using helpers::QRegisterFrom; 79 using helpers::RegisterFrom; 80 using helpers::StackOperandFrom; 81 using helpers::VIXLRegCodeFromART; 82 using helpers::WRegisterFrom; 83 using helpers::XRegisterFrom; 84 85 // The compare/jump sequence will generate about (1.5 * num_entries + 3) instructions. While jump 86 // table version generates 7 instructions and num_entries literals. Compare/jump sequence will 87 // generates less code/data with a small num_entries. 88 static constexpr uint32_t kPackedSwitchCompareJumpThreshold = 7; 89 90 // Reference load (except object array loads) is using LDR Wt, [Xn, #offset] which can handle 91 // offset < 16KiB. For offsets >= 16KiB, the load shall be emitted as two or more instructions. 92 // For the Baker read barrier implementation using link-time generated thunks we need to split 93 // the offset explicitly. 94 constexpr uint32_t kReferenceLoadMinFarOffset = 16 * KB; 95 96 inline Condition ARM64Condition(IfCondition cond) { 97 switch (cond) { 98 case kCondEQ: return eq; 99 case kCondNE: return ne; 100 case kCondLT: return lt; 101 case kCondLE: return le; 102 case kCondGT: return gt; 103 case kCondGE: return ge; 104 case kCondB: return lo; 105 case kCondBE: return ls; 106 case kCondA: return hi; 107 case kCondAE: return hs; 108 } 109 LOG(FATAL) << "Unreachable"; 110 UNREACHABLE(); 111 } 112 113 inline Condition ARM64FPCondition(IfCondition cond, bool gt_bias) { 114 // The ARM64 condition codes can express all the necessary branches, see the 115 // "Meaning (floating-point)" column in the table C1-1 in the ARMv8 reference manual. 116 // There is no dex instruction or HIR that would need the missing conditions 117 // "equal or unordered" or "not equal". 118 switch (cond) { 119 case kCondEQ: return eq; 120 case kCondNE: return ne /* unordered */; 121 case kCondLT: return gt_bias ? cc : lt /* unordered */; 122 case kCondLE: return gt_bias ? ls : le /* unordered */; 123 case kCondGT: return gt_bias ? hi /* unordered */ : gt; 124 case kCondGE: return gt_bias ? cs /* unordered */ : ge; 125 default: 126 LOG(FATAL) << "UNREACHABLE"; 127 UNREACHABLE(); 128 } 129 } 130 131 Location ARM64ReturnLocation(DataType::Type return_type) { 132 // Note that in practice, `LocationFrom(x0)` and `LocationFrom(w0)` create the 133 // same Location object, and so do `LocationFrom(d0)` and `LocationFrom(s0)`, 134 // but we use the exact registers for clarity. 135 if (return_type == DataType::Type::kFloat32) { 136 return LocationFrom(s0); 137 } else if (return_type == DataType::Type::kFloat64) { 138 return LocationFrom(d0); 139 } else if (return_type == DataType::Type::kInt64) { 140 return LocationFrom(x0); 141 } else if (return_type == DataType::Type::kVoid) { 142 return Location::NoLocation(); 143 } else { 144 return LocationFrom(w0); 145 } 146 } 147 148 Location InvokeRuntimeCallingConvention::GetReturnLocation(DataType::Type return_type) { 149 return ARM64ReturnLocation(return_type); 150 } 151 152 static RegisterSet OneRegInReferenceOutSaveEverythingCallerSaves() { 153 InvokeRuntimeCallingConvention calling_convention; 154 RegisterSet caller_saves = RegisterSet::Empty(); 155 caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0).GetCode())); 156 DCHECK_EQ(calling_convention.GetRegisterAt(0).GetCode(), 157 RegisterFrom(calling_convention.GetReturnLocation(DataType::Type::kReference), 158 DataType::Type::kReference).GetCode()); 159 return caller_saves; 160 } 161 162 // NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy. 163 #define __ down_cast<CodeGeneratorARM64*>(codegen)->GetVIXLAssembler()-> // NOLINT 164 #define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kArm64PointerSize, x).Int32Value() 165 166 // Calculate memory accessing operand for save/restore live registers. 167 static void SaveRestoreLiveRegistersHelper(CodeGenerator* codegen, 168 LocationSummary* locations, 169 int64_t spill_offset, 170 bool is_save) { 171 const uint32_t core_spills = codegen->GetSlowPathSpills(locations, /* core_registers= */ true); 172 const uint32_t fp_spills = codegen->GetSlowPathSpills(locations, /* core_registers= */ false); 173 DCHECK(ArtVixlRegCodeCoherentForRegSet(core_spills, 174 codegen->GetNumberOfCoreRegisters(), 175 fp_spills, 176 codegen->GetNumberOfFloatingPointRegisters())); 177 178 CPURegList core_list = CPURegList(CPURegister::kRegister, kXRegSize, core_spills); 179 unsigned v_reg_size = codegen->GetGraph()->HasSIMD() ? kQRegSize : kDRegSize; 180 CPURegList fp_list = CPURegList(CPURegister::kVRegister, v_reg_size, fp_spills); 181 182 MacroAssembler* masm = down_cast<CodeGeneratorARM64*>(codegen)->GetVIXLAssembler(); 183 UseScratchRegisterScope temps(masm); 184 185 Register base = masm->StackPointer(); 186 int64_t core_spill_size = core_list.GetTotalSizeInBytes(); 187 int64_t fp_spill_size = fp_list.GetTotalSizeInBytes(); 188 int64_t reg_size = kXRegSizeInBytes; 189 int64_t max_ls_pair_offset = spill_offset + core_spill_size + fp_spill_size - 2 * reg_size; 190 uint32_t ls_access_size = WhichPowerOf2(reg_size); 191 if (((core_list.GetCount() > 1) || (fp_list.GetCount() > 1)) && 192 !masm->IsImmLSPair(max_ls_pair_offset, ls_access_size)) { 193 // If the offset does not fit in the instruction's immediate field, use an alternate register 194 // to compute the base address(float point registers spill base address). 195 Register new_base = temps.AcquireSameSizeAs(base); 196 __ Add(new_base, base, Operand(spill_offset + core_spill_size)); 197 base = new_base; 198 spill_offset = -core_spill_size; 199 int64_t new_max_ls_pair_offset = fp_spill_size - 2 * reg_size; 200 DCHECK(masm->IsImmLSPair(spill_offset, ls_access_size)); 201 DCHECK(masm->IsImmLSPair(new_max_ls_pair_offset, ls_access_size)); 202 } 203 204 if (is_save) { 205 __ StoreCPURegList(core_list, MemOperand(base, spill_offset)); 206 __ StoreCPURegList(fp_list, MemOperand(base, spill_offset + core_spill_size)); 207 } else { 208 __ LoadCPURegList(core_list, MemOperand(base, spill_offset)); 209 __ LoadCPURegList(fp_list, MemOperand(base, spill_offset + core_spill_size)); 210 } 211 } 212 213 void SlowPathCodeARM64::SaveLiveRegisters(CodeGenerator* codegen, LocationSummary* locations) { 214 size_t stack_offset = codegen->GetFirstRegisterSlotInSlowPath(); 215 const uint32_t core_spills = codegen->GetSlowPathSpills(locations, /* core_registers= */ true); 216 for (uint32_t i : LowToHighBits(core_spills)) { 217 // If the register holds an object, update the stack mask. 218 if (locations->RegisterContainsObject(i)) { 219 locations->SetStackBit(stack_offset / kVRegSize); 220 } 221 DCHECK_LT(stack_offset, codegen->GetFrameSize() - codegen->FrameEntrySpillSize()); 222 DCHECK_LT(i, kMaximumNumberOfExpectedRegisters); 223 saved_core_stack_offsets_[i] = stack_offset; 224 stack_offset += kXRegSizeInBytes; 225 } 226 227 const size_t fp_reg_size = codegen->GetGraph()->HasSIMD() ? kQRegSizeInBytes : kDRegSizeInBytes; 228 const uint32_t fp_spills = codegen->GetSlowPathSpills(locations, /* core_registers= */ false); 229 for (uint32_t i : LowToHighBits(fp_spills)) { 230 DCHECK_LT(stack_offset, codegen->GetFrameSize() - codegen->FrameEntrySpillSize()); 231 DCHECK_LT(i, kMaximumNumberOfExpectedRegisters); 232 saved_fpu_stack_offsets_[i] = stack_offset; 233 stack_offset += fp_reg_size; 234 } 235 236 SaveRestoreLiveRegistersHelper(codegen, 237 locations, 238 codegen->GetFirstRegisterSlotInSlowPath(), /* is_save= */ true); 239 } 240 241 void SlowPathCodeARM64::RestoreLiveRegisters(CodeGenerator* codegen, LocationSummary* locations) { 242 SaveRestoreLiveRegistersHelper(codegen, 243 locations, 244 codegen->GetFirstRegisterSlotInSlowPath(), /* is_save= */ false); 245 } 246 247 class BoundsCheckSlowPathARM64 : public SlowPathCodeARM64 { 248 public: 249 explicit BoundsCheckSlowPathARM64(HBoundsCheck* instruction) : SlowPathCodeARM64(instruction) {} 250 251 void EmitNativeCode(CodeGenerator* codegen) override { 252 LocationSummary* locations = instruction_->GetLocations(); 253 CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen); 254 255 __ Bind(GetEntryLabel()); 256 if (instruction_->CanThrowIntoCatchBlock()) { 257 // Live registers will be restored in the catch block if caught. 258 SaveLiveRegisters(codegen, instruction_->GetLocations()); 259 } 260 // We're moving two locations to locations that could overlap, so we need a parallel 261 // move resolver. 262 InvokeRuntimeCallingConvention calling_convention; 263 codegen->EmitParallelMoves(locations->InAt(0), 264 LocationFrom(calling_convention.GetRegisterAt(0)), 265 DataType::Type::kInt32, 266 locations->InAt(1), 267 LocationFrom(calling_convention.GetRegisterAt(1)), 268 DataType::Type::kInt32); 269 QuickEntrypointEnum entrypoint = instruction_->AsBoundsCheck()->IsStringCharAt() 270 ? kQuickThrowStringBounds 271 : kQuickThrowArrayBounds; 272 arm64_codegen->InvokeRuntime(entrypoint, instruction_, instruction_->GetDexPc(), this); 273 CheckEntrypointTypes<kQuickThrowStringBounds, void, int32_t, int32_t>(); 274 CheckEntrypointTypes<kQuickThrowArrayBounds, void, int32_t, int32_t>(); 275 } 276 277 bool IsFatal() const override { return true; } 278 279 const char* GetDescription() const override { return "BoundsCheckSlowPathARM64"; } 280 281 private: 282 DISALLOW_COPY_AND_ASSIGN(BoundsCheckSlowPathARM64); 283 }; 284 285 class DivZeroCheckSlowPathARM64 : public SlowPathCodeARM64 { 286 public: 287 explicit DivZeroCheckSlowPathARM64(HDivZeroCheck* instruction) : SlowPathCodeARM64(instruction) {} 288 289 void EmitNativeCode(CodeGenerator* codegen) override { 290 CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen); 291 __ Bind(GetEntryLabel()); 292 arm64_codegen->InvokeRuntime(kQuickThrowDivZero, instruction_, instruction_->GetDexPc(), this); 293 CheckEntrypointTypes<kQuickThrowDivZero, void, void>(); 294 } 295 296 bool IsFatal() const override { return true; } 297 298 const char* GetDescription() const override { return "DivZeroCheckSlowPathARM64"; } 299 300 private: 301 DISALLOW_COPY_AND_ASSIGN(DivZeroCheckSlowPathARM64); 302 }; 303 304 class LoadClassSlowPathARM64 : public SlowPathCodeARM64 { 305 public: 306 LoadClassSlowPathARM64(HLoadClass* cls, HInstruction* at) 307 : SlowPathCodeARM64(at), cls_(cls) { 308 DCHECK(at->IsLoadClass() || at->IsClinitCheck()); 309 DCHECK_EQ(instruction_->IsLoadClass(), cls_ == instruction_); 310 } 311 312 void EmitNativeCode(CodeGenerator* codegen) override { 313 LocationSummary* locations = instruction_->GetLocations(); 314 Location out = locations->Out(); 315 const uint32_t dex_pc = instruction_->GetDexPc(); 316 bool must_resolve_type = instruction_->IsLoadClass() && cls_->MustResolveTypeOnSlowPath(); 317 bool must_do_clinit = instruction_->IsClinitCheck() || cls_->MustGenerateClinitCheck(); 318 319 CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen); 320 __ Bind(GetEntryLabel()); 321 SaveLiveRegisters(codegen, locations); 322 323 InvokeRuntimeCallingConvention calling_convention; 324 if (must_resolve_type) { 325 DCHECK(IsSameDexFile(cls_->GetDexFile(), arm64_codegen->GetGraph()->GetDexFile())); 326 dex::TypeIndex type_index = cls_->GetTypeIndex(); 327 __ Mov(calling_convention.GetRegisterAt(0).W(), type_index.index_); 328 arm64_codegen->InvokeRuntime(kQuickResolveType, instruction_, dex_pc, this); 329 CheckEntrypointTypes<kQuickResolveType, void*, uint32_t>(); 330 // If we also must_do_clinit, the resolved type is now in the correct register. 331 } else { 332 DCHECK(must_do_clinit); 333 Location source = instruction_->IsLoadClass() ? out : locations->InAt(0); 334 arm64_codegen->MoveLocation(LocationFrom(calling_convention.GetRegisterAt(0)), 335 source, 336 cls_->GetType()); 337 } 338 if (must_do_clinit) { 339 arm64_codegen->InvokeRuntime(kQuickInitializeStaticStorage, instruction_, dex_pc, this); 340 CheckEntrypointTypes<kQuickInitializeStaticStorage, void*, mirror::Class*>(); 341 } 342 343 // Move the class to the desired location. 344 if (out.IsValid()) { 345 DCHECK(out.IsRegister() && !locations->GetLiveRegisters()->ContainsCoreRegister(out.reg())); 346 DataType::Type type = instruction_->GetType(); 347 arm64_codegen->MoveLocation(out, calling_convention.GetReturnLocation(type), type); 348 } 349 RestoreLiveRegisters(codegen, locations); 350 __ B(GetExitLabel()); 351 } 352 353 const char* GetDescription() const override { return "LoadClassSlowPathARM64"; } 354 355 private: 356 // The class this slow path will load. 357 HLoadClass* const cls_; 358 359 DISALLOW_COPY_AND_ASSIGN(LoadClassSlowPathARM64); 360 }; 361 362 class LoadStringSlowPathARM64 : public SlowPathCodeARM64 { 363 public: 364 explicit LoadStringSlowPathARM64(HLoadString* instruction) 365 : SlowPathCodeARM64(instruction) {} 366 367 void EmitNativeCode(CodeGenerator* codegen) override { 368 LocationSummary* locations = instruction_->GetLocations(); 369 DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg())); 370 CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen); 371 372 __ Bind(GetEntryLabel()); 373 SaveLiveRegisters(codegen, locations); 374 375 InvokeRuntimeCallingConvention calling_convention; 376 const dex::StringIndex string_index = instruction_->AsLoadString()->GetStringIndex(); 377 __ Mov(calling_convention.GetRegisterAt(0).W(), string_index.index_); 378 arm64_codegen->InvokeRuntime(kQuickResolveString, instruction_, instruction_->GetDexPc(), this); 379 CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>(); 380 DataType::Type type = instruction_->GetType(); 381 arm64_codegen->MoveLocation(locations->Out(), calling_convention.GetReturnLocation(type), type); 382 383 RestoreLiveRegisters(codegen, locations); 384 385 __ B(GetExitLabel()); 386 } 387 388 const char* GetDescription() const override { return "LoadStringSlowPathARM64"; } 389 390 private: 391 DISALLOW_COPY_AND_ASSIGN(LoadStringSlowPathARM64); 392 }; 393 394 class NullCheckSlowPathARM64 : public SlowPathCodeARM64 { 395 public: 396 explicit NullCheckSlowPathARM64(HNullCheck* instr) : SlowPathCodeARM64(instr) {} 397 398 void EmitNativeCode(CodeGenerator* codegen) override { 399 CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen); 400 __ Bind(GetEntryLabel()); 401 if (instruction_->CanThrowIntoCatchBlock()) { 402 // Live registers will be restored in the catch block if caught. 403 SaveLiveRegisters(codegen, instruction_->GetLocations()); 404 } 405 arm64_codegen->InvokeRuntime(kQuickThrowNullPointer, 406 instruction_, 407 instruction_->GetDexPc(), 408 this); 409 CheckEntrypointTypes<kQuickThrowNullPointer, void, void>(); 410 } 411 412 bool IsFatal() const override { return true; } 413 414 const char* GetDescription() const override { return "NullCheckSlowPathARM64"; } 415 416 private: 417 DISALLOW_COPY_AND_ASSIGN(NullCheckSlowPathARM64); 418 }; 419 420 class SuspendCheckSlowPathARM64 : public SlowPathCodeARM64 { 421 public: 422 SuspendCheckSlowPathARM64(HSuspendCheck* instruction, HBasicBlock* successor) 423 : SlowPathCodeARM64(instruction), successor_(successor) {} 424 425 void EmitNativeCode(CodeGenerator* codegen) override { 426 LocationSummary* locations = instruction_->GetLocations(); 427 CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen); 428 __ Bind(GetEntryLabel()); 429 SaveLiveRegisters(codegen, locations); // Only saves live 128-bit regs for SIMD. 430 arm64_codegen->InvokeRuntime(kQuickTestSuspend, instruction_, instruction_->GetDexPc(), this); 431 CheckEntrypointTypes<kQuickTestSuspend, void, void>(); 432 RestoreLiveRegisters(codegen, locations); // Only restores live 128-bit regs for SIMD. 433 if (successor_ == nullptr) { 434 __ B(GetReturnLabel()); 435 } else { 436 __ B(arm64_codegen->GetLabelOf(successor_)); 437 } 438 } 439 440 vixl::aarch64::Label* GetReturnLabel() { 441 DCHECK(successor_ == nullptr); 442 return &return_label_; 443 } 444 445 HBasicBlock* GetSuccessor() const { 446 return successor_; 447 } 448 449 const char* GetDescription() const override { return "SuspendCheckSlowPathARM64"; } 450 451 private: 452 // If not null, the block to branch to after the suspend check. 453 HBasicBlock* const successor_; 454 455 // If `successor_` is null, the label to branch to after the suspend check. 456 vixl::aarch64::Label return_label_; 457 458 DISALLOW_COPY_AND_ASSIGN(SuspendCheckSlowPathARM64); 459 }; 460 461 class TypeCheckSlowPathARM64 : public SlowPathCodeARM64 { 462 public: 463 TypeCheckSlowPathARM64(HInstruction* instruction, bool is_fatal) 464 : SlowPathCodeARM64(instruction), is_fatal_(is_fatal) {} 465 466 void EmitNativeCode(CodeGenerator* codegen) override { 467 LocationSummary* locations = instruction_->GetLocations(); 468 469 DCHECK(instruction_->IsCheckCast() 470 || !locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg())); 471 CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen); 472 uint32_t dex_pc = instruction_->GetDexPc(); 473 474 __ Bind(GetEntryLabel()); 475 476 if (!is_fatal_ || instruction_->CanThrowIntoCatchBlock()) { 477 SaveLiveRegisters(codegen, locations); 478 } 479 480 // We're moving two locations to locations that could overlap, so we need a parallel 481 // move resolver. 482 InvokeRuntimeCallingConvention calling_convention; 483 codegen->EmitParallelMoves(locations->InAt(0), 484 LocationFrom(calling_convention.GetRegisterAt(0)), 485 DataType::Type::kReference, 486 locations->InAt(1), 487 LocationFrom(calling_convention.GetRegisterAt(1)), 488 DataType::Type::kReference); 489 if (instruction_->IsInstanceOf()) { 490 arm64_codegen->InvokeRuntime(kQuickInstanceofNonTrivial, instruction_, dex_pc, this); 491 CheckEntrypointTypes<kQuickInstanceofNonTrivial, size_t, mirror::Object*, mirror::Class*>(); 492 DataType::Type ret_type = instruction_->GetType(); 493 Location ret_loc = calling_convention.GetReturnLocation(ret_type); 494 arm64_codegen->MoveLocation(locations->Out(), ret_loc, ret_type); 495 } else { 496 DCHECK(instruction_->IsCheckCast()); 497 arm64_codegen->InvokeRuntime(kQuickCheckInstanceOf, instruction_, dex_pc, this); 498 CheckEntrypointTypes<kQuickCheckInstanceOf, void, mirror::Object*, mirror::Class*>(); 499 } 500 501 if (!is_fatal_) { 502 RestoreLiveRegisters(codegen, locations); 503 __ B(GetExitLabel()); 504 } 505 } 506 507 const char* GetDescription() const override { return "TypeCheckSlowPathARM64"; } 508 bool IsFatal() const override { return is_fatal_; } 509 510 private: 511 const bool is_fatal_; 512 513 DISALLOW_COPY_AND_ASSIGN(TypeCheckSlowPathARM64); 514 }; 515 516 class DeoptimizationSlowPathARM64 : public SlowPathCodeARM64 { 517 public: 518 explicit DeoptimizationSlowPathARM64(HDeoptimize* instruction) 519 : SlowPathCodeARM64(instruction) {} 520 521 void EmitNativeCode(CodeGenerator* codegen) override { 522 CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen); 523 __ Bind(GetEntryLabel()); 524 LocationSummary* locations = instruction_->GetLocations(); 525 SaveLiveRegisters(codegen, locations); 526 InvokeRuntimeCallingConvention calling_convention; 527 __ Mov(calling_convention.GetRegisterAt(0), 528 static_cast<uint32_t>(instruction_->AsDeoptimize()->GetDeoptimizationKind())); 529 arm64_codegen->InvokeRuntime(kQuickDeoptimize, instruction_, instruction_->GetDexPc(), this); 530 CheckEntrypointTypes<kQuickDeoptimize, void, DeoptimizationKind>(); 531 } 532 533 const char* GetDescription() const override { return "DeoptimizationSlowPathARM64"; } 534 535 private: 536 DISALLOW_COPY_AND_ASSIGN(DeoptimizationSlowPathARM64); 537 }; 538 539 class ArraySetSlowPathARM64 : public SlowPathCodeARM64 { 540 public: 541 explicit ArraySetSlowPathARM64(HInstruction* instruction) : SlowPathCodeARM64(instruction) {} 542 543 void EmitNativeCode(CodeGenerator* codegen) override { 544 LocationSummary* locations = instruction_->GetLocations(); 545 __ Bind(GetEntryLabel()); 546 SaveLiveRegisters(codegen, locations); 547 548 InvokeRuntimeCallingConvention calling_convention; 549 HParallelMove parallel_move(codegen->GetGraph()->GetAllocator()); 550 parallel_move.AddMove( 551 locations->InAt(0), 552 LocationFrom(calling_convention.GetRegisterAt(0)), 553 DataType::Type::kReference, 554 nullptr); 555 parallel_move.AddMove( 556 locations->InAt(1), 557 LocationFrom(calling_convention.GetRegisterAt(1)), 558 DataType::Type::kInt32, 559 nullptr); 560 parallel_move.AddMove( 561 locations->InAt(2), 562 LocationFrom(calling_convention.GetRegisterAt(2)), 563 DataType::Type::kReference, 564 nullptr); 565 codegen->GetMoveResolver()->EmitNativeCode(¶llel_move); 566 567 CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen); 568 arm64_codegen->InvokeRuntime(kQuickAputObject, instruction_, instruction_->GetDexPc(), this); 569 CheckEntrypointTypes<kQuickAputObject, void, mirror::Array*, int32_t, mirror::Object*>(); 570 RestoreLiveRegisters(codegen, locations); 571 __ B(GetExitLabel()); 572 } 573 574 const char* GetDescription() const override { return "ArraySetSlowPathARM64"; } 575 576 private: 577 DISALLOW_COPY_AND_ASSIGN(ArraySetSlowPathARM64); 578 }; 579 580 void JumpTableARM64::EmitTable(CodeGeneratorARM64* codegen) { 581 uint32_t num_entries = switch_instr_->GetNumEntries(); 582 DCHECK_GE(num_entries, kPackedSwitchCompareJumpThreshold); 583 584 // We are about to use the assembler to place literals directly. Make sure we have enough 585 // underlying code buffer and we have generated the jump table with right size. 586 EmissionCheckScope scope(codegen->GetVIXLAssembler(), 587 num_entries * sizeof(int32_t), 588 CodeBufferCheckScope::kExactSize); 589 590 __ Bind(&table_start_); 591 const ArenaVector<HBasicBlock*>& successors = switch_instr_->GetBlock()->GetSuccessors(); 592 for (uint32_t i = 0; i < num_entries; i++) { 593 vixl::aarch64::Label* target_label = codegen->GetLabelOf(successors[i]); 594 DCHECK(target_label->IsBound()); 595 ptrdiff_t jump_offset = target_label->GetLocation() - table_start_.GetLocation(); 596 DCHECK_GT(jump_offset, std::numeric_limits<int32_t>::min()); 597 DCHECK_LE(jump_offset, std::numeric_limits<int32_t>::max()); 598 Literal<int32_t> literal(jump_offset); 599 __ place(&literal); 600 } 601 } 602 603 // Slow path generating a read barrier for a heap reference. 604 class ReadBarrierForHeapReferenceSlowPathARM64 : public SlowPathCodeARM64 { 605 public: 606 ReadBarrierForHeapReferenceSlowPathARM64(HInstruction* instruction, 607 Location out, 608 Location ref, 609 Location obj, 610 uint32_t offset, 611 Location index) 612 : SlowPathCodeARM64(instruction), 613 out_(out), 614 ref_(ref), 615 obj_(obj), 616 offset_(offset), 617 index_(index) { 618 DCHECK(kEmitCompilerReadBarrier); 619 // If `obj` is equal to `out` or `ref`, it means the initial object 620 // has been overwritten by (or after) the heap object reference load 621 // to be instrumented, e.g.: 622 // 623 // __ Ldr(out, HeapOperand(out, class_offset); 624 // codegen_->GenerateReadBarrierSlow(instruction, out_loc, out_loc, out_loc, offset); 625 // 626 // In that case, we have lost the information about the original 627 // object, and the emitted read barrier cannot work properly. 628 DCHECK(!obj.Equals(out)) << "obj=" << obj << " out=" << out; 629 DCHECK(!obj.Equals(ref)) << "obj=" << obj << " ref=" << ref; 630 } 631 632 void EmitNativeCode(CodeGenerator* codegen) override { 633 CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen); 634 LocationSummary* locations = instruction_->GetLocations(); 635 DataType::Type type = DataType::Type::kReference; 636 DCHECK(locations->CanCall()); 637 DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(out_.reg())); 638 DCHECK(instruction_->IsInstanceFieldGet() || 639 instruction_->IsStaticFieldGet() || 640 instruction_->IsArrayGet() || 641 instruction_->IsInstanceOf() || 642 instruction_->IsCheckCast() || 643 (instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified())) 644 << "Unexpected instruction in read barrier for heap reference slow path: " 645 << instruction_->DebugName(); 646 // The read barrier instrumentation of object ArrayGet 647 // instructions does not support the HIntermediateAddress 648 // instruction. 649 DCHECK(!(instruction_->IsArrayGet() && 650 instruction_->AsArrayGet()->GetArray()->IsIntermediateAddress())); 651 652 __ Bind(GetEntryLabel()); 653 654 SaveLiveRegisters(codegen, locations); 655 656 // We may have to change the index's value, but as `index_` is a 657 // constant member (like other "inputs" of this slow path), 658 // introduce a copy of it, `index`. 659 Location index = index_; 660 if (index_.IsValid()) { 661 // Handle `index_` for HArrayGet and UnsafeGetObject/UnsafeGetObjectVolatile intrinsics. 662 if (instruction_->IsArrayGet()) { 663 // Compute the actual memory offset and store it in `index`. 664 Register index_reg = RegisterFrom(index_, DataType::Type::kInt32); 665 DCHECK(locations->GetLiveRegisters()->ContainsCoreRegister(index_.reg())); 666 if (codegen->IsCoreCalleeSaveRegister(index_.reg())) { 667 // We are about to change the value of `index_reg` (see the 668 // calls to vixl::MacroAssembler::Lsl and 669 // vixl::MacroAssembler::Mov below), but it has 670 // not been saved by the previous call to 671 // art::SlowPathCode::SaveLiveRegisters, as it is a 672 // callee-save register -- 673 // art::SlowPathCode::SaveLiveRegisters does not consider 674 // callee-save registers, as it has been designed with the 675 // assumption that callee-save registers are supposed to be 676 // handled by the called function. So, as a callee-save 677 // register, `index_reg` _would_ eventually be saved onto 678 // the stack, but it would be too late: we would have 679 // changed its value earlier. Therefore, we manually save 680 // it here into another freely available register, 681 // `free_reg`, chosen of course among the caller-save 682 // registers (as a callee-save `free_reg` register would 683 // exhibit the same problem). 684 // 685 // Note we could have requested a temporary register from 686 // the register allocator instead; but we prefer not to, as 687 // this is a slow path, and we know we can find a 688 // caller-save register that is available. 689 Register free_reg = FindAvailableCallerSaveRegister(codegen); 690 __ Mov(free_reg.W(), index_reg); 691 index_reg = free_reg; 692 index = LocationFrom(index_reg); 693 } else { 694 // The initial register stored in `index_` has already been 695 // saved in the call to art::SlowPathCode::SaveLiveRegisters 696 // (as it is not a callee-save register), so we can freely 697 // use it. 698 } 699 // Shifting the index value contained in `index_reg` by the scale 700 // factor (2) cannot overflow in practice, as the runtime is 701 // unable to allocate object arrays with a size larger than 702 // 2^26 - 1 (that is, 2^28 - 4 bytes). 703 __ Lsl(index_reg, index_reg, DataType::SizeShift(type)); 704 static_assert( 705 sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t), 706 "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes."); 707 __ Add(index_reg, index_reg, Operand(offset_)); 708 } else { 709 // In the case of the UnsafeGetObject/UnsafeGetObjectVolatile 710 // intrinsics, `index_` is not shifted by a scale factor of 2 711 // (as in the case of ArrayGet), as it is actually an offset 712 // to an object field within an object. 713 DCHECK(instruction_->IsInvoke()) << instruction_->DebugName(); 714 DCHECK(instruction_->GetLocations()->Intrinsified()); 715 DCHECK((instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObject) || 716 (instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile)) 717 << instruction_->AsInvoke()->GetIntrinsic(); 718 DCHECK_EQ(offset_, 0u); 719 DCHECK(index_.IsRegister()); 720 } 721 } 722 723 // We're moving two or three locations to locations that could 724 // overlap, so we need a parallel move resolver. 725 InvokeRuntimeCallingConvention calling_convention; 726 HParallelMove parallel_move(codegen->GetGraph()->GetAllocator()); 727 parallel_move.AddMove(ref_, 728 LocationFrom(calling_convention.GetRegisterAt(0)), 729 type, 730 nullptr); 731 parallel_move.AddMove(obj_, 732 LocationFrom(calling_convention.GetRegisterAt(1)), 733 type, 734 nullptr); 735 if (index.IsValid()) { 736 parallel_move.AddMove(index, 737 LocationFrom(calling_convention.GetRegisterAt(2)), 738 DataType::Type::kInt32, 739 nullptr); 740 codegen->GetMoveResolver()->EmitNativeCode(¶llel_move); 741 } else { 742 codegen->GetMoveResolver()->EmitNativeCode(¶llel_move); 743 arm64_codegen->MoveConstant(LocationFrom(calling_convention.GetRegisterAt(2)), offset_); 744 } 745 arm64_codegen->InvokeRuntime(kQuickReadBarrierSlow, 746 instruction_, 747 instruction_->GetDexPc(), 748 this); 749 CheckEntrypointTypes< 750 kQuickReadBarrierSlow, mirror::Object*, mirror::Object*, mirror::Object*, uint32_t>(); 751 arm64_codegen->MoveLocation(out_, calling_convention.GetReturnLocation(type), type); 752 753 RestoreLiveRegisters(codegen, locations); 754 755 __ B(GetExitLabel()); 756 } 757 758 const char* GetDescription() const override { return "ReadBarrierForHeapReferenceSlowPathARM64"; } 759 760 private: 761 Register FindAvailableCallerSaveRegister(CodeGenerator* codegen) { 762 size_t ref = static_cast<int>(XRegisterFrom(ref_).GetCode()); 763 size_t obj = static_cast<int>(XRegisterFrom(obj_).GetCode()); 764 for (size_t i = 0, e = codegen->GetNumberOfCoreRegisters(); i < e; ++i) { 765 if (i != ref && i != obj && !codegen->IsCoreCalleeSaveRegister(i)) { 766 return Register(VIXLRegCodeFromART(i), kXRegSize); 767 } 768 } 769 // We shall never fail to find a free caller-save register, as 770 // there are more than two core caller-save registers on ARM64 771 // (meaning it is possible to find one which is different from 772 // `ref` and `obj`). 773 DCHECK_GT(codegen->GetNumberOfCoreCallerSaveRegisters(), 2u); 774 LOG(FATAL) << "Could not find a free register"; 775 UNREACHABLE(); 776 } 777 778 const Location out_; 779 const Location ref_; 780 const Location obj_; 781 const uint32_t offset_; 782 // An additional location containing an index to an array. 783 // Only used for HArrayGet and the UnsafeGetObject & 784 // UnsafeGetObjectVolatile intrinsics. 785 const Location index_; 786 787 DISALLOW_COPY_AND_ASSIGN(ReadBarrierForHeapReferenceSlowPathARM64); 788 }; 789 790 // Slow path generating a read barrier for a GC root. 791 class ReadBarrierForRootSlowPathARM64 : public SlowPathCodeARM64 { 792 public: 793 ReadBarrierForRootSlowPathARM64(HInstruction* instruction, Location out, Location root) 794 : SlowPathCodeARM64(instruction), out_(out), root_(root) { 795 DCHECK(kEmitCompilerReadBarrier); 796 } 797 798 void EmitNativeCode(CodeGenerator* codegen) override { 799 LocationSummary* locations = instruction_->GetLocations(); 800 DataType::Type type = DataType::Type::kReference; 801 DCHECK(locations->CanCall()); 802 DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(out_.reg())); 803 DCHECK(instruction_->IsLoadClass() || instruction_->IsLoadString()) 804 << "Unexpected instruction in read barrier for GC root slow path: " 805 << instruction_->DebugName(); 806 807 __ Bind(GetEntryLabel()); 808 SaveLiveRegisters(codegen, locations); 809 810 InvokeRuntimeCallingConvention calling_convention; 811 CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen); 812 // The argument of the ReadBarrierForRootSlow is not a managed 813 // reference (`mirror::Object*`), but a `GcRoot<mirror::Object>*`; 814 // thus we need a 64-bit move here, and we cannot use 815 // 816 // arm64_codegen->MoveLocation( 817 // LocationFrom(calling_convention.GetRegisterAt(0)), 818 // root_, 819 // type); 820 // 821 // which would emit a 32-bit move, as `type` is a (32-bit wide) 822 // reference type (`DataType::Type::kReference`). 823 __ Mov(calling_convention.GetRegisterAt(0), XRegisterFrom(out_)); 824 arm64_codegen->InvokeRuntime(kQuickReadBarrierForRootSlow, 825 instruction_, 826 instruction_->GetDexPc(), 827 this); 828 CheckEntrypointTypes<kQuickReadBarrierForRootSlow, mirror::Object*, GcRoot<mirror::Object>*>(); 829 arm64_codegen->MoveLocation(out_, calling_convention.GetReturnLocation(type), type); 830 831 RestoreLiveRegisters(codegen, locations); 832 __ B(GetExitLabel()); 833 } 834 835 const char* GetDescription() const override { return "ReadBarrierForRootSlowPathARM64"; } 836 837 private: 838 const Location out_; 839 const Location root_; 840 841 DISALLOW_COPY_AND_ASSIGN(ReadBarrierForRootSlowPathARM64); 842 }; 843 844 #undef __ 845 846 Location InvokeDexCallingConventionVisitorARM64::GetNextLocation(DataType::Type type) { 847 Location next_location; 848 if (type == DataType::Type::kVoid) { 849 LOG(FATAL) << "Unreachable type " << type; 850 } 851 852 if (DataType::IsFloatingPointType(type) && 853 (float_index_ < calling_convention.GetNumberOfFpuRegisters())) { 854 next_location = LocationFrom(calling_convention.GetFpuRegisterAt(float_index_++)); 855 } else if (!DataType::IsFloatingPointType(type) && 856 (gp_index_ < calling_convention.GetNumberOfRegisters())) { 857 next_location = LocationFrom(calling_convention.GetRegisterAt(gp_index_++)); 858 } else { 859 size_t stack_offset = calling_convention.GetStackOffsetOf(stack_index_); 860 next_location = DataType::Is64BitType(type) ? Location::DoubleStackSlot(stack_offset) 861 : Location::StackSlot(stack_offset); 862 } 863 864 // Space on the stack is reserved for all arguments. 865 stack_index_ += DataType::Is64BitType(type) ? 2 : 1; 866 return next_location; 867 } 868 869 Location InvokeDexCallingConventionVisitorARM64::GetMethodLocation() const { 870 return LocationFrom(kArtMethodRegister); 871 } 872 873 CodeGeneratorARM64::CodeGeneratorARM64(HGraph* graph, 874 const CompilerOptions& compiler_options, 875 OptimizingCompilerStats* stats) 876 : CodeGenerator(graph, 877 kNumberOfAllocatableRegisters, 878 kNumberOfAllocatableFPRegisters, 879 kNumberOfAllocatableRegisterPairs, 880 callee_saved_core_registers.GetList(), 881 callee_saved_fp_registers.GetList(), 882 compiler_options, 883 stats), 884 block_labels_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), 885 jump_tables_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), 886 location_builder_(graph, this), 887 instruction_visitor_(graph, this), 888 move_resolver_(graph->GetAllocator(), this), 889 assembler_(graph->GetAllocator(), 890 compiler_options.GetInstructionSetFeatures()->AsArm64InstructionSetFeatures()), 891 boot_image_method_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), 892 method_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), 893 boot_image_type_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), 894 type_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), 895 boot_image_string_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), 896 string_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), 897 boot_image_other_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), 898 call_entrypoint_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), 899 baker_read_barrier_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), 900 uint32_literals_(std::less<uint32_t>(), 901 graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), 902 uint64_literals_(std::less<uint64_t>(), 903 graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), 904 jit_string_patches_(StringReferenceValueComparator(), 905 graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), 906 jit_class_patches_(TypeReferenceValueComparator(), 907 graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), 908 jit_baker_read_barrier_slow_paths_(std::less<uint32_t>(), 909 graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)) { 910 // Save the link register (containing the return address) to mimic Quick. 911 AddAllocatedRegister(LocationFrom(lr)); 912 } 913 914 #define __ GetVIXLAssembler()-> 915 916 void CodeGeneratorARM64::EmitJumpTables() { 917 for (auto&& jump_table : jump_tables_) { 918 jump_table->EmitTable(this); 919 } 920 } 921 922 void CodeGeneratorARM64::Finalize(CodeAllocator* allocator) { 923 EmitJumpTables(); 924 925 // Emit JIT baker read barrier slow paths. 926 DCHECK(Runtime::Current()->UseJitCompilation() || jit_baker_read_barrier_slow_paths_.empty()); 927 for (auto& entry : jit_baker_read_barrier_slow_paths_) { 928 uint32_t encoded_data = entry.first; 929 vixl::aarch64::Label* slow_path_entry = &entry.second.label; 930 __ Bind(slow_path_entry); 931 CompileBakerReadBarrierThunk(*GetAssembler(), encoded_data, /* debug_name= */ nullptr); 932 } 933 934 // Ensure we emit the literal pool. 935 __ FinalizeCode(); 936 937 CodeGenerator::Finalize(allocator); 938 939 // Verify Baker read barrier linker patches. 940 if (kIsDebugBuild) { 941 ArrayRef<const uint8_t> code = allocator->GetMemory(); 942 for (const BakerReadBarrierPatchInfo& info : baker_read_barrier_patches_) { 943 DCHECK(info.label.IsBound()); 944 uint32_t literal_offset = info.label.GetLocation(); 945 DCHECK_ALIGNED(literal_offset, 4u); 946 947 auto GetInsn = [&code](uint32_t offset) { 948 DCHECK_ALIGNED(offset, 4u); 949 return 950 (static_cast<uint32_t>(code[offset + 0]) << 0) + 951 (static_cast<uint32_t>(code[offset + 1]) << 8) + 952 (static_cast<uint32_t>(code[offset + 2]) << 16)+ 953 (static_cast<uint32_t>(code[offset + 3]) << 24); 954 }; 955 956 const uint32_t encoded_data = info.custom_data; 957 BakerReadBarrierKind kind = BakerReadBarrierKindField::Decode(encoded_data); 958 // Check that the next instruction matches the expected LDR. 959 switch (kind) { 960 case BakerReadBarrierKind::kField: 961 case BakerReadBarrierKind::kAcquire: { 962 DCHECK_GE(code.size() - literal_offset, 8u); 963 uint32_t next_insn = GetInsn(literal_offset + 4u); 964 CheckValidReg(next_insn & 0x1fu); // Check destination register. 965 const uint32_t base_reg = BakerReadBarrierFirstRegField::Decode(encoded_data); 966 if (kind == BakerReadBarrierKind::kField) { 967 // LDR (immediate) with correct base_reg. 968 CHECK_EQ(next_insn & 0xffc003e0u, 0xb9400000u | (base_reg << 5)); 969 } else { 970 DCHECK(kind == BakerReadBarrierKind::kAcquire); 971 // LDAR with correct base_reg. 972 CHECK_EQ(next_insn & 0xffffffe0u, 0x88dffc00u | (base_reg << 5)); 973 } 974 break; 975 } 976 case BakerReadBarrierKind::kArray: { 977 DCHECK_GE(code.size() - literal_offset, 8u); 978 uint32_t next_insn = GetInsn(literal_offset + 4u); 979 // LDR (register) with the correct base_reg, size=10 (32-bit), option=011 (extend = LSL), 980 // and S=1 (shift amount = 2 for 32-bit version), i.e. LDR Wt, [Xn, Xm, LSL #2]. 981 CheckValidReg(next_insn & 0x1fu); // Check destination register. 982 const uint32_t base_reg = BakerReadBarrierFirstRegField::Decode(encoded_data); 983 CHECK_EQ(next_insn & 0xffe0ffe0u, 0xb8607800u | (base_reg << 5)); 984 CheckValidReg((next_insn >> 16) & 0x1f); // Check index register 985 break; 986 } 987 case BakerReadBarrierKind::kGcRoot: { 988 DCHECK_GE(literal_offset, 4u); 989 uint32_t prev_insn = GetInsn(literal_offset - 4u); 990 const uint32_t root_reg = BakerReadBarrierFirstRegField::Decode(encoded_data); 991 // Usually LDR (immediate) with correct root_reg but 992 // we may have a "MOV marked, old_value" for UnsafeCASObject. 993 if ((prev_insn & 0xffe0ffff) != (0x2a0003e0 | root_reg)) { // MOV? 994 CHECK_EQ(prev_insn & 0xffc0001fu, 0xb9400000u | root_reg); // LDR? 995 } 996 break; 997 } 998 default: 999 LOG(FATAL) << "Unexpected kind: " << static_cast<uint32_t>(kind); 1000 UNREACHABLE(); 1001 } 1002 } 1003 } 1004 } 1005 1006 void ParallelMoveResolverARM64::PrepareForEmitNativeCode() { 1007 // Note: There are 6 kinds of moves: 1008 // 1. constant -> GPR/FPR (non-cycle) 1009 // 2. constant -> stack (non-cycle) 1010 // 3. GPR/FPR -> GPR/FPR 1011 // 4. GPR/FPR -> stack 1012 // 5. stack -> GPR/FPR 1013 // 6. stack -> stack (non-cycle) 1014 // Case 1, 2 and 6 should never be included in a dependency cycle on ARM64. For case 3, 4, and 5 1015 // VIXL uses at most 1 GPR. VIXL has 2 GPR and 1 FPR temps, and there should be no intersecting 1016 // cycles on ARM64, so we always have 1 GPR and 1 FPR available VIXL temps to resolve the 1017 // dependency. 1018 vixl_temps_.Open(GetVIXLAssembler()); 1019 } 1020 1021 void ParallelMoveResolverARM64::FinishEmitNativeCode() { 1022 vixl_temps_.Close(); 1023 } 1024 1025 Location ParallelMoveResolverARM64::AllocateScratchLocationFor(Location::Kind kind) { 1026 DCHECK(kind == Location::kRegister || kind == Location::kFpuRegister 1027 || kind == Location::kStackSlot || kind == Location::kDoubleStackSlot 1028 || kind == Location::kSIMDStackSlot); 1029 kind = (kind == Location::kFpuRegister || kind == Location::kSIMDStackSlot) 1030 ? Location::kFpuRegister 1031 : Location::kRegister; 1032 Location scratch = GetScratchLocation(kind); 1033 if (!scratch.Equals(Location::NoLocation())) { 1034 return scratch; 1035 } 1036 // Allocate from VIXL temp registers. 1037 if (kind == Location::kRegister) { 1038 scratch = LocationFrom(vixl_temps_.AcquireX()); 1039 } else { 1040 DCHECK_EQ(kind, Location::kFpuRegister); 1041 scratch = LocationFrom(codegen_->GetGraph()->HasSIMD() 1042 ? vixl_temps_.AcquireVRegisterOfSize(kQRegSize) 1043 : vixl_temps_.AcquireD()); 1044 } 1045 AddScratchLocation(scratch); 1046 return scratch; 1047 } 1048 1049 void ParallelMoveResolverARM64::FreeScratchLocation(Location loc) { 1050 if (loc.IsRegister()) { 1051 vixl_temps_.Release(XRegisterFrom(loc)); 1052 } else { 1053 DCHECK(loc.IsFpuRegister()); 1054 vixl_temps_.Release(codegen_->GetGraph()->HasSIMD() ? QRegisterFrom(loc) : DRegisterFrom(loc)); 1055 } 1056 RemoveScratchLocation(loc); 1057 } 1058 1059 void ParallelMoveResolverARM64::EmitMove(size_t index) { 1060 MoveOperands* move = moves_[index]; 1061 codegen_->MoveLocation(move->GetDestination(), move->GetSource(), DataType::Type::kVoid); 1062 } 1063 1064 void CodeGeneratorARM64::MaybeIncrementHotness(bool is_frame_entry) { 1065 MacroAssembler* masm = GetVIXLAssembler(); 1066 if (GetCompilerOptions().CountHotnessInCompiledCode()) { 1067 UseScratchRegisterScope temps(masm); 1068 Register counter = temps.AcquireX(); 1069 Register method = is_frame_entry ? kArtMethodRegister : temps.AcquireX(); 1070 if (!is_frame_entry) { 1071 __ Ldr(method, MemOperand(sp, 0)); 1072 } 1073 __ Ldrh(counter, MemOperand(method, ArtMethod::HotnessCountOffset().Int32Value())); 1074 __ Add(counter, counter, 1); 1075 // Subtract one if the counter would overflow. 1076 __ Sub(counter, counter, Operand(counter, LSR, 16)); 1077 __ Strh(counter, MemOperand(method, ArtMethod::HotnessCountOffset().Int32Value())); 1078 } 1079 1080 if (GetGraph()->IsCompilingBaseline() && !Runtime::Current()->IsAotCompiler()) { 1081 ScopedObjectAccess soa(Thread::Current()); 1082 ProfilingInfo* info = GetGraph()->GetArtMethod()->GetProfilingInfo(kRuntimePointerSize); 1083 if (info != nullptr) { 1084 uint64_t address = reinterpret_cast64<uint64_t>(info); 1085 vixl::aarch64::Label done; 1086 UseScratchRegisterScope temps(masm); 1087 Register temp = temps.AcquireX(); 1088 Register counter = temps.AcquireW(); 1089 __ Mov(temp, address); 1090 __ Ldrh(counter, MemOperand(temp, ProfilingInfo::BaselineHotnessCountOffset().Int32Value())); 1091 __ Add(counter, counter, 1); 1092 __ Strh(counter, MemOperand(temp, ProfilingInfo::BaselineHotnessCountOffset().Int32Value())); 1093 __ Tst(counter, 0xffff); 1094 __ B(ne, &done); 1095 if (is_frame_entry) { 1096 if (HasEmptyFrame()) { 1097 // The entyrpoint expects the method at the bottom of the stack. We 1098 // claim stack space necessary for alignment. 1099 __ Claim(kStackAlignment); 1100 __ Stp(kArtMethodRegister, lr, MemOperand(sp, 0)); 1101 } else if (!RequiresCurrentMethod()) { 1102 __ Str(kArtMethodRegister, MemOperand(sp, 0)); 1103 } 1104 } else { 1105 CHECK(RequiresCurrentMethod()); 1106 } 1107 uint32_t entrypoint_offset = 1108 GetThreadOffset<kArm64PointerSize>(kQuickCompileOptimized).Int32Value(); 1109 __ Ldr(lr, MemOperand(tr, entrypoint_offset)); 1110 // Note: we don't record the call here (and therefore don't generate a stack 1111 // map), as the entrypoint should never be suspended. 1112 __ Blr(lr); 1113 if (HasEmptyFrame()) { 1114 CHECK(is_frame_entry); 1115 __ Ldr(lr, MemOperand(sp, 8)); 1116 __ Drop(kStackAlignment); 1117 } 1118 __ Bind(&done); 1119 } 1120 } 1121 } 1122 1123 void CodeGeneratorARM64::GenerateFrameEntry() { 1124 MacroAssembler* masm = GetVIXLAssembler(); 1125 __ Bind(&frame_entry_label_); 1126 1127 bool do_overflow_check = 1128 FrameNeedsStackCheck(GetFrameSize(), InstructionSet::kArm64) || !IsLeafMethod(); 1129 if (do_overflow_check) { 1130 UseScratchRegisterScope temps(masm); 1131 Register temp = temps.AcquireX(); 1132 DCHECK(GetCompilerOptions().GetImplicitStackOverflowChecks()); 1133 __ Sub(temp, sp, static_cast<int32_t>(GetStackOverflowReservedBytes(InstructionSet::kArm64))); 1134 { 1135 // Ensure that between load and RecordPcInfo there are no pools emitted. 1136 ExactAssemblyScope eas(GetVIXLAssembler(), 1137 kInstructionSize, 1138 CodeBufferCheckScope::kExactSize); 1139 __ ldr(wzr, MemOperand(temp, 0)); 1140 RecordPcInfo(nullptr, 0); 1141 } 1142 } 1143 1144 if (!HasEmptyFrame()) { 1145 // Stack layout: 1146 // sp[frame_size - 8] : lr. 1147 // ... : other preserved core registers. 1148 // ... : other preserved fp registers. 1149 // ... : reserved frame space. 1150 // sp[0] : current method. 1151 int32_t frame_size = dchecked_integral_cast<int32_t>(GetFrameSize()); 1152 uint32_t core_spills_offset = frame_size - GetCoreSpillSize(); 1153 CPURegList preserved_core_registers = GetFramePreservedCoreRegisters(); 1154 DCHECK(!preserved_core_registers.IsEmpty()); 1155 uint32_t fp_spills_offset = frame_size - FrameEntrySpillSize(); 1156 CPURegList preserved_fp_registers = GetFramePreservedFPRegisters(); 1157 1158 // Save the current method if we need it, or if using STP reduces code 1159 // size. Note that we do not do this in HCurrentMethod, as the 1160 // instruction might have been removed in the SSA graph. 1161 CPURegister lowest_spill; 1162 if (core_spills_offset == kXRegSizeInBytes) { 1163 // If there is no gap between the method and the lowest core spill, use 1164 // aligned STP pre-index to store both. Max difference is 512. We do 1165 // that to reduce code size even if we do not have to save the method. 1166 DCHECK_LE(frame_size, 512); // 32 core registers are only 256 bytes. 1167 lowest_spill = preserved_core_registers.PopLowestIndex(); 1168 __ Stp(kArtMethodRegister, lowest_spill, MemOperand(sp, -frame_size, PreIndex)); 1169 } else if (RequiresCurrentMethod()) { 1170 __ Str(kArtMethodRegister, MemOperand(sp, -frame_size, PreIndex)); 1171 } else { 1172 __ Claim(frame_size); 1173 } 1174 GetAssembler()->cfi().AdjustCFAOffset(frame_size); 1175 if (lowest_spill.IsValid()) { 1176 GetAssembler()->cfi().RelOffset(DWARFReg(lowest_spill), core_spills_offset); 1177 core_spills_offset += kXRegSizeInBytes; 1178 } 1179 GetAssembler()->SpillRegisters(preserved_core_registers, core_spills_offset); 1180 GetAssembler()->SpillRegisters(preserved_fp_registers, fp_spills_offset); 1181 1182 if (GetGraph()->HasShouldDeoptimizeFlag()) { 1183 // Initialize should_deoptimize flag to 0. 1184 Register wzr = Register(VIXLRegCodeFromART(WZR), kWRegSize); 1185 __ Str(wzr, MemOperand(sp, GetStackOffsetOfShouldDeoptimizeFlag())); 1186 } 1187 } 1188 MaybeIncrementHotness(/* is_frame_entry= */ true); 1189 MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__); 1190 } 1191 1192 void CodeGeneratorARM64::GenerateFrameExit() { 1193 GetAssembler()->cfi().RememberState(); 1194 if (!HasEmptyFrame()) { 1195 int32_t frame_size = dchecked_integral_cast<int32_t>(GetFrameSize()); 1196 uint32_t core_spills_offset = frame_size - GetCoreSpillSize(); 1197 CPURegList preserved_core_registers = GetFramePreservedCoreRegisters(); 1198 DCHECK(!preserved_core_registers.IsEmpty()); 1199 uint32_t fp_spills_offset = frame_size - FrameEntrySpillSize(); 1200 CPURegList preserved_fp_registers = GetFramePreservedFPRegisters(); 1201 1202 CPURegister lowest_spill; 1203 if (core_spills_offset == kXRegSizeInBytes) { 1204 // If there is no gap between the method and the lowest core spill, use 1205 // aligned LDP pre-index to pop both. Max difference is 504. We do 1206 // that to reduce code size even though the loaded method is unused. 1207 DCHECK_LE(frame_size, 504); // 32 core registers are only 256 bytes. 1208 lowest_spill = preserved_core_registers.PopLowestIndex(); 1209 core_spills_offset += kXRegSizeInBytes; 1210 } 1211 GetAssembler()->UnspillRegisters(preserved_fp_registers, fp_spills_offset); 1212 GetAssembler()->UnspillRegisters(preserved_core_registers, core_spills_offset); 1213 if (lowest_spill.IsValid()) { 1214 __ Ldp(xzr, lowest_spill, MemOperand(sp, frame_size, PostIndex)); 1215 GetAssembler()->cfi().Restore(DWARFReg(lowest_spill)); 1216 } else { 1217 __ Drop(frame_size); 1218 } 1219 GetAssembler()->cfi().AdjustCFAOffset(-frame_size); 1220 } 1221 __ Ret(); 1222 GetAssembler()->cfi().RestoreState(); 1223 GetAssembler()->cfi().DefCFAOffset(GetFrameSize()); 1224 } 1225 1226 CPURegList CodeGeneratorARM64::GetFramePreservedCoreRegisters() const { 1227 DCHECK(ArtVixlRegCodeCoherentForRegSet(core_spill_mask_, GetNumberOfCoreRegisters(), 0, 0)); 1228 return CPURegList(CPURegister::kRegister, kXRegSize, 1229 core_spill_mask_); 1230 } 1231 1232 CPURegList CodeGeneratorARM64::GetFramePreservedFPRegisters() const { 1233 DCHECK(ArtVixlRegCodeCoherentForRegSet(0, 0, fpu_spill_mask_, 1234 GetNumberOfFloatingPointRegisters())); 1235 return CPURegList(CPURegister::kVRegister, kDRegSize, 1236 fpu_spill_mask_); 1237 } 1238 1239 void CodeGeneratorARM64::Bind(HBasicBlock* block) { 1240 __ Bind(GetLabelOf(block)); 1241 } 1242 1243 void CodeGeneratorARM64::MoveConstant(Location location, int32_t value) { 1244 DCHECK(location.IsRegister()); 1245 __ Mov(RegisterFrom(location, DataType::Type::kInt32), value); 1246 } 1247 1248 void CodeGeneratorARM64::AddLocationAsTemp(Location location, LocationSummary* locations) { 1249 if (location.IsRegister()) { 1250 locations->AddTemp(location); 1251 } else { 1252 UNIMPLEMENTED(FATAL) << "AddLocationAsTemp not implemented for location " << location; 1253 } 1254 } 1255 1256 void CodeGeneratorARM64::MarkGCCard(Register object, Register value, bool value_can_be_null) { 1257 UseScratchRegisterScope temps(GetVIXLAssembler()); 1258 Register card = temps.AcquireX(); 1259 Register temp = temps.AcquireW(); // Index within the CardTable - 32bit. 1260 vixl::aarch64::Label done; 1261 if (value_can_be_null) { 1262 __ Cbz(value, &done); 1263 } 1264 // Load the address of the card table into `card`. 1265 __ Ldr(card, MemOperand(tr, Thread::CardTableOffset<kArm64PointerSize>().Int32Value())); 1266 // Calculate the offset (in the card table) of the card corresponding to 1267 // `object`. 1268 __ Lsr(temp, object, gc::accounting::CardTable::kCardShift); 1269 // Write the `art::gc::accounting::CardTable::kCardDirty` value into the 1270 // `object`'s card. 1271 // 1272 // Register `card` contains the address of the card table. Note that the card 1273 // table's base is biased during its creation so that it always starts at an 1274 // address whose least-significant byte is equal to `kCardDirty` (see 1275 // art::gc::accounting::CardTable::Create). Therefore the STRB instruction 1276 // below writes the `kCardDirty` (byte) value into the `object`'s card 1277 // (located at `card + object >> kCardShift`). 1278 // 1279 // This dual use of the value in register `card` (1. to calculate the location 1280 // of the card to mark; and 2. to load the `kCardDirty` value) saves a load 1281 // (no need to explicitly load `kCardDirty` as an immediate value). 1282 __ Strb(card, MemOperand(card, temp.X())); 1283 if (value_can_be_null) { 1284 __ Bind(&done); 1285 } 1286 } 1287 1288 void CodeGeneratorARM64::SetupBlockedRegisters() const { 1289 // Blocked core registers: 1290 // lr : Runtime reserved. 1291 // tr : Runtime reserved. 1292 // mr : Runtime reserved. 1293 // ip1 : VIXL core temp. 1294 // ip0 : VIXL core temp. 1295 // x18 : Platform register. 1296 // 1297 // Blocked fp registers: 1298 // d31 : VIXL fp temp. 1299 CPURegList reserved_core_registers = vixl_reserved_core_registers; 1300 reserved_core_registers.Combine(runtime_reserved_core_registers); 1301 while (!reserved_core_registers.IsEmpty()) { 1302 blocked_core_registers_[reserved_core_registers.PopLowestIndex().GetCode()] = true; 1303 } 1304 blocked_core_registers_[X18] = true; 1305 1306 CPURegList reserved_fp_registers = vixl_reserved_fp_registers; 1307 while (!reserved_fp_registers.IsEmpty()) { 1308 blocked_fpu_registers_[reserved_fp_registers.PopLowestIndex().GetCode()] = true; 1309 } 1310 1311 if (GetGraph()->IsDebuggable()) { 1312 // Stubs do not save callee-save floating point registers. If the graph 1313 // is debuggable, we need to deal with these registers differently. For 1314 // now, just block them. 1315 CPURegList reserved_fp_registers_debuggable = callee_saved_fp_registers; 1316 while (!reserved_fp_registers_debuggable.IsEmpty()) { 1317 blocked_fpu_registers_[reserved_fp_registers_debuggable.PopLowestIndex().GetCode()] = true; 1318 } 1319 } 1320 } 1321 1322 size_t CodeGeneratorARM64::SaveCoreRegister(size_t stack_index, uint32_t reg_id) { 1323 Register reg = Register(VIXLRegCodeFromART(reg_id), kXRegSize); 1324 __ Str(reg, MemOperand(sp, stack_index)); 1325 return kArm64WordSize; 1326 } 1327 1328 size_t CodeGeneratorARM64::RestoreCoreRegister(size_t stack_index, uint32_t reg_id) { 1329 Register reg = Register(VIXLRegCodeFromART(reg_id), kXRegSize); 1330 __ Ldr(reg, MemOperand(sp, stack_index)); 1331 return kArm64WordSize; 1332 } 1333 1334 size_t CodeGeneratorARM64::SaveFloatingPointRegister(size_t stack_index ATTRIBUTE_UNUSED, 1335 uint32_t reg_id ATTRIBUTE_UNUSED) { 1336 LOG(FATAL) << "FP registers shouldn't be saved/restored individually, " 1337 << "use SaveRestoreLiveRegistersHelper"; 1338 UNREACHABLE(); 1339 } 1340 1341 size_t CodeGeneratorARM64::RestoreFloatingPointRegister(size_t stack_index ATTRIBUTE_UNUSED, 1342 uint32_t reg_id ATTRIBUTE_UNUSED) { 1343 LOG(FATAL) << "FP registers shouldn't be saved/restored individually, " 1344 << "use SaveRestoreLiveRegistersHelper"; 1345 UNREACHABLE(); 1346 } 1347 1348 void CodeGeneratorARM64::DumpCoreRegister(std::ostream& stream, int reg) const { 1349 stream << XRegister(reg); 1350 } 1351 1352 void CodeGeneratorARM64::DumpFloatingPointRegister(std::ostream& stream, int reg) const { 1353 stream << DRegister(reg); 1354 } 1355 1356 const Arm64InstructionSetFeatures& CodeGeneratorARM64::GetInstructionSetFeatures() const { 1357 return *GetCompilerOptions().GetInstructionSetFeatures()->AsArm64InstructionSetFeatures(); 1358 } 1359 1360 void CodeGeneratorARM64::MoveConstant(CPURegister destination, HConstant* constant) { 1361 if (constant->IsIntConstant()) { 1362 __ Mov(Register(destination), constant->AsIntConstant()->GetValue()); 1363 } else if (constant->IsLongConstant()) { 1364 __ Mov(Register(destination), constant->AsLongConstant()->GetValue()); 1365 } else if (constant->IsNullConstant()) { 1366 __ Mov(Register(destination), 0); 1367 } else if (constant->IsFloatConstant()) { 1368 __ Fmov(VRegister(destination), constant->AsFloatConstant()->GetValue()); 1369 } else { 1370 DCHECK(constant->IsDoubleConstant()); 1371 __ Fmov(VRegister(destination), constant->AsDoubleConstant()->GetValue()); 1372 } 1373 } 1374 1375 1376 static bool CoherentConstantAndType(Location constant, DataType::Type type) { 1377 DCHECK(constant.IsConstant()); 1378 HConstant* cst = constant.GetConstant(); 1379 return (cst->IsIntConstant() && type == DataType::Type::kInt32) || 1380 // Null is mapped to a core W register, which we associate with kPrimInt. 1381 (cst->IsNullConstant() && type == DataType::Type::kInt32) || 1382 (cst->IsLongConstant() && type == DataType::Type::kInt64) || 1383 (cst->IsFloatConstant() && type == DataType::Type::kFloat32) || 1384 (cst->IsDoubleConstant() && type == DataType::Type::kFloat64); 1385 } 1386 1387 // Allocate a scratch register from the VIXL pool, querying first 1388 // the floating-point register pool, and then the core register 1389 // pool. This is essentially a reimplementation of 1390 // vixl::aarch64::UseScratchRegisterScope::AcquireCPURegisterOfSize 1391 // using a different allocation strategy. 1392 static CPURegister AcquireFPOrCoreCPURegisterOfSize(vixl::aarch64::MacroAssembler* masm, 1393 vixl::aarch64::UseScratchRegisterScope* temps, 1394 int size_in_bits) { 1395 return masm->GetScratchVRegisterList()->IsEmpty() 1396 ? CPURegister(temps->AcquireRegisterOfSize(size_in_bits)) 1397 : CPURegister(temps->AcquireVRegisterOfSize(size_in_bits)); 1398 } 1399 1400 void CodeGeneratorARM64::MoveLocation(Location destination, 1401 Location source, 1402 DataType::Type dst_type) { 1403 if (source.Equals(destination)) { 1404 return; 1405 } 1406 1407 // A valid move can always be inferred from the destination and source 1408 // locations. When moving from and to a register, the argument type can be 1409 // used to generate 32bit instead of 64bit moves. In debug mode we also 1410 // checks the coherency of the locations and the type. 1411 bool unspecified_type = (dst_type == DataType::Type::kVoid); 1412 1413 if (destination.IsRegister() || destination.IsFpuRegister()) { 1414 if (unspecified_type) { 1415 HConstant* src_cst = source.IsConstant() ? source.GetConstant() : nullptr; 1416 if (source.IsStackSlot() || 1417 (src_cst != nullptr && (src_cst->IsIntConstant() 1418 || src_cst->IsFloatConstant() 1419 || src_cst->IsNullConstant()))) { 1420 // For stack slots and 32bit constants, a 64bit type is appropriate. 1421 dst_type = destination.IsRegister() ? DataType::Type::kInt32 : DataType::Type::kFloat32; 1422 } else { 1423 // If the source is a double stack slot or a 64bit constant, a 64bit 1424 // type is appropriate. Else the source is a register, and since the 1425 // type has not been specified, we chose a 64bit type to force a 64bit 1426 // move. 1427 dst_type = destination.IsRegister() ? DataType::Type::kInt64 : DataType::Type::kFloat64; 1428 } 1429 } 1430 DCHECK((destination.IsFpuRegister() && DataType::IsFloatingPointType(dst_type)) || 1431 (destination.IsRegister() && !DataType::IsFloatingPointType(dst_type))); 1432 CPURegister dst = CPURegisterFrom(destination, dst_type); 1433 if (source.IsStackSlot() || source.IsDoubleStackSlot()) { 1434 DCHECK(dst.Is64Bits() == source.IsDoubleStackSlot()); 1435 __ Ldr(dst, StackOperandFrom(source)); 1436 } else if (source.IsSIMDStackSlot()) { 1437 __ Ldr(QRegisterFrom(destination), StackOperandFrom(source)); 1438 } else if (source.IsConstant()) { 1439 DCHECK(CoherentConstantAndType(source, dst_type)); 1440 MoveConstant(dst, source.GetConstant()); 1441 } else if (source.IsRegister()) { 1442 if (destination.IsRegister()) { 1443 __ Mov(Register(dst), RegisterFrom(source, dst_type)); 1444 } else { 1445 DCHECK(destination.IsFpuRegister()); 1446 DataType::Type source_type = DataType::Is64BitType(dst_type) 1447 ? DataType::Type::kInt64 1448 : DataType::Type::kInt32; 1449 __ Fmov(FPRegisterFrom(destination, dst_type), RegisterFrom(source, source_type)); 1450 } 1451 } else { 1452 DCHECK(source.IsFpuRegister()); 1453 if (destination.IsRegister()) { 1454 DataType::Type source_type = DataType::Is64BitType(dst_type) 1455 ? DataType::Type::kFloat64 1456 : DataType::Type::kFloat32; 1457 __ Fmov(RegisterFrom(destination, dst_type), FPRegisterFrom(source, source_type)); 1458 } else { 1459 DCHECK(destination.IsFpuRegister()); 1460 if (GetGraph()->HasSIMD()) { 1461 __ Mov(QRegisterFrom(destination), QRegisterFrom(source)); 1462 } else { 1463 __ Fmov(VRegister(dst), FPRegisterFrom(source, dst_type)); 1464 } 1465 } 1466 } 1467 } else if (destination.IsSIMDStackSlot()) { 1468 if (source.IsFpuRegister()) { 1469 __ Str(QRegisterFrom(source), StackOperandFrom(destination)); 1470 } else { 1471 DCHECK(source.IsSIMDStackSlot()); 1472 UseScratchRegisterScope temps(GetVIXLAssembler()); 1473 if (GetVIXLAssembler()->GetScratchVRegisterList()->IsEmpty()) { 1474 Register temp = temps.AcquireX(); 1475 __ Ldr(temp, MemOperand(sp, source.GetStackIndex())); 1476 __ Str(temp, MemOperand(sp, destination.GetStackIndex())); 1477 __ Ldr(temp, MemOperand(sp, source.GetStackIndex() + kArm64WordSize)); 1478 __ Str(temp, MemOperand(sp, destination.GetStackIndex() + kArm64WordSize)); 1479 } else { 1480 VRegister temp = temps.AcquireVRegisterOfSize(kQRegSize); 1481 __ Ldr(temp, StackOperandFrom(source)); 1482 __ Str(temp, StackOperandFrom(destination)); 1483 } 1484 } 1485 } else { // The destination is not a register. It must be a stack slot. 1486 DCHECK(destination.IsStackSlot() || destination.IsDoubleStackSlot()); 1487 if (source.IsRegister() || source.IsFpuRegister()) { 1488 if (unspecified_type) { 1489 if (source.IsRegister()) { 1490 dst_type = destination.IsStackSlot() ? DataType::Type::kInt32 : DataType::Type::kInt64; 1491 } else { 1492 dst_type = 1493 destination.IsStackSlot() ? DataType::Type::kFloat32 : DataType::Type::kFloat64; 1494 } 1495 } 1496 DCHECK((destination.IsDoubleStackSlot() == DataType::Is64BitType(dst_type)) && 1497 (source.IsFpuRegister() == DataType::IsFloatingPointType(dst_type))); 1498 __ Str(CPURegisterFrom(source, dst_type), StackOperandFrom(destination)); 1499 } else if (source.IsConstant()) { 1500 DCHECK(unspecified_type || CoherentConstantAndType(source, dst_type)) 1501 << source << " " << dst_type; 1502 UseScratchRegisterScope temps(GetVIXLAssembler()); 1503 HConstant* src_cst = source.GetConstant(); 1504 CPURegister temp; 1505 if (src_cst->IsZeroBitPattern()) { 1506 temp = (src_cst->IsLongConstant() || src_cst->IsDoubleConstant()) 1507 ? Register(xzr) 1508 : Register(wzr); 1509 } else { 1510 if (src_cst->IsIntConstant()) { 1511 temp = temps.AcquireW(); 1512 } else if (src_cst->IsLongConstant()) { 1513 temp = temps.AcquireX(); 1514 } else if (src_cst->IsFloatConstant()) { 1515 temp = temps.AcquireS(); 1516 } else { 1517 DCHECK(src_cst->IsDoubleConstant()); 1518 temp = temps.AcquireD(); 1519 } 1520 MoveConstant(temp, src_cst); 1521 } 1522 __ Str(temp, StackOperandFrom(destination)); 1523 } else { 1524 DCHECK(source.IsStackSlot() || source.IsDoubleStackSlot()); 1525 DCHECK(source.IsDoubleStackSlot() == destination.IsDoubleStackSlot()); 1526 UseScratchRegisterScope temps(GetVIXLAssembler()); 1527 // Use any scratch register (a core or a floating-point one) 1528 // from VIXL scratch register pools as a temporary. 1529 // 1530 // We used to only use the FP scratch register pool, but in some 1531 // rare cases the only register from this pool (D31) would 1532 // already be used (e.g. within a ParallelMove instruction, when 1533 // a move is blocked by a another move requiring a scratch FP 1534 // register, which would reserve D31). To prevent this issue, we 1535 // ask for a scratch register of any type (core or FP). 1536 // 1537 // Also, we start by asking for a FP scratch register first, as the 1538 // demand of scratch core registers is higher. This is why we 1539 // use AcquireFPOrCoreCPURegisterOfSize instead of 1540 // UseScratchRegisterScope::AcquireCPURegisterOfSize, which 1541 // allocates core scratch registers first. 1542 CPURegister temp = AcquireFPOrCoreCPURegisterOfSize( 1543 GetVIXLAssembler(), 1544 &temps, 1545 (destination.IsDoubleStackSlot() ? kXRegSize : kWRegSize)); 1546 __ Ldr(temp, StackOperandFrom(source)); 1547 __ Str(temp, StackOperandFrom(destination)); 1548 } 1549 } 1550 } 1551 1552 void CodeGeneratorARM64::Load(DataType::Type type, 1553 CPURegister dst, 1554 const MemOperand& src) { 1555 switch (type) { 1556 case DataType::Type::kBool: 1557 case DataType::Type::kUint8: 1558 __ Ldrb(Register(dst), src); 1559 break; 1560 case DataType::Type::kInt8: 1561 __ Ldrsb(Register(dst), src); 1562 break; 1563 case DataType::Type::kUint16: 1564 __ Ldrh(Register(dst), src); 1565 break; 1566 case DataType::Type::kInt16: 1567 __ Ldrsh(Register(dst), src); 1568 break; 1569 case DataType::Type::kInt32: 1570 case DataType::Type::kReference: 1571 case DataType::Type::kInt64: 1572 case DataType::Type::kFloat32: 1573 case DataType::Type::kFloat64: 1574 DCHECK_EQ(dst.Is64Bits(), DataType::Is64BitType(type)); 1575 __ Ldr(dst, src); 1576 break; 1577 case DataType::Type::kUint32: 1578 case DataType::Type::kUint64: 1579 case DataType::Type::kVoid: 1580 LOG(FATAL) << "Unreachable type " << type; 1581 } 1582 } 1583 1584 void CodeGeneratorARM64::LoadAcquire(HInstruction* instruction, 1585 CPURegister dst, 1586 const MemOperand& src, 1587 bool needs_null_check) { 1588 MacroAssembler* masm = GetVIXLAssembler(); 1589 UseScratchRegisterScope temps(masm); 1590 Register temp_base = temps.AcquireX(); 1591 DataType::Type type = instruction->GetType(); 1592 1593 DCHECK(!src.IsPreIndex()); 1594 DCHECK(!src.IsPostIndex()); 1595 1596 // TODO(vixl): Let the MacroAssembler handle MemOperand. 1597 __ Add(temp_base, src.GetBaseRegister(), OperandFromMemOperand(src)); 1598 { 1599 // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted. 1600 MemOperand base = MemOperand(temp_base); 1601 switch (type) { 1602 case DataType::Type::kBool: 1603 case DataType::Type::kUint8: 1604 case DataType::Type::kInt8: 1605 { 1606 ExactAssemblyScope eas(masm, kInstructionSize, CodeBufferCheckScope::kExactSize); 1607 __ ldarb(Register(dst), base); 1608 if (needs_null_check) { 1609 MaybeRecordImplicitNullCheck(instruction); 1610 } 1611 } 1612 if (type == DataType::Type::kInt8) { 1613 __ Sbfx(Register(dst), Register(dst), 0, DataType::Size(type) * kBitsPerByte); 1614 } 1615 break; 1616 case DataType::Type::kUint16: 1617 case DataType::Type::kInt16: 1618 { 1619 ExactAssemblyScope eas(masm, kInstructionSize, CodeBufferCheckScope::kExactSize); 1620 __ ldarh(Register(dst), base); 1621 if (needs_null_check) { 1622 MaybeRecordImplicitNullCheck(instruction); 1623 } 1624 } 1625 if (type == DataType::Type::kInt16) { 1626 __ Sbfx(Register(dst), Register(dst), 0, DataType::Size(type) * kBitsPerByte); 1627 } 1628 break; 1629 case DataType::Type::kInt32: 1630 case DataType::Type::kReference: 1631 case DataType::Type::kInt64: 1632 DCHECK_EQ(dst.Is64Bits(), DataType::Is64BitType(type)); 1633 { 1634 ExactAssemblyScope eas(masm, kInstructionSize, CodeBufferCheckScope::kExactSize); 1635 __ ldar(Register(dst), base); 1636 if (needs_null_check) { 1637 MaybeRecordImplicitNullCheck(instruction); 1638 } 1639 } 1640 break; 1641 case DataType::Type::kFloat32: 1642 case DataType::Type::kFloat64: { 1643 DCHECK(dst.IsFPRegister()); 1644 DCHECK_EQ(dst.Is64Bits(), DataType::Is64BitType(type)); 1645 1646 Register temp = dst.Is64Bits() ? temps.AcquireX() : temps.AcquireW(); 1647 { 1648 ExactAssemblyScope eas(masm, kInstructionSize, CodeBufferCheckScope::kExactSize); 1649 __ ldar(temp, base); 1650 if (needs_null_check) { 1651 MaybeRecordImplicitNullCheck(instruction); 1652 } 1653 } 1654 __ Fmov(VRegister(dst), temp); 1655 break; 1656 } 1657 case DataType::Type::kUint32: 1658 case DataType::Type::kUint64: 1659 case DataType::Type::kVoid: 1660 LOG(FATAL) << "Unreachable type " << type; 1661 } 1662 } 1663 } 1664 1665 void CodeGeneratorARM64::Store(DataType::Type type, 1666 CPURegister src, 1667 const MemOperand& dst) { 1668 switch (type) { 1669 case DataType::Type::kBool: 1670 case DataType::Type::kUint8: 1671 case DataType::Type::kInt8: 1672 __ Strb(Register(src), dst); 1673 break; 1674 case DataType::Type::kUint16: 1675 case DataType::Type::kInt16: 1676 __ Strh(Register(src), dst); 1677 break; 1678 case DataType::Type::kInt32: 1679 case DataType::Type::kReference: 1680 case DataType::Type::kInt64: 1681 case DataType::Type::kFloat32: 1682 case DataType::Type::kFloat64: 1683 DCHECK_EQ(src.Is64Bits(), DataType::Is64BitType(type)); 1684 __ Str(src, dst); 1685 break; 1686 case DataType::Type::kUint32: 1687 case DataType::Type::kUint64: 1688 case DataType::Type::kVoid: 1689 LOG(FATAL) << "Unreachable type " << type; 1690 } 1691 } 1692 1693 void CodeGeneratorARM64::StoreRelease(HInstruction* instruction, 1694 DataType::Type type, 1695 CPURegister src, 1696 const MemOperand& dst, 1697 bool needs_null_check) { 1698 MacroAssembler* masm = GetVIXLAssembler(); 1699 UseScratchRegisterScope temps(GetVIXLAssembler()); 1700 Register temp_base = temps.AcquireX(); 1701 1702 DCHECK(!dst.IsPreIndex()); 1703 DCHECK(!dst.IsPostIndex()); 1704 1705 // TODO(vixl): Let the MacroAssembler handle this. 1706 Operand op = OperandFromMemOperand(dst); 1707 __ Add(temp_base, dst.GetBaseRegister(), op); 1708 MemOperand base = MemOperand(temp_base); 1709 // Ensure that between store and MaybeRecordImplicitNullCheck there are no pools emitted. 1710 switch (type) { 1711 case DataType::Type::kBool: 1712 case DataType::Type::kUint8: 1713 case DataType::Type::kInt8: 1714 { 1715 ExactAssemblyScope eas(masm, kInstructionSize, CodeBufferCheckScope::kExactSize); 1716 __ stlrb(Register(src), base); 1717 if (needs_null_check) { 1718 MaybeRecordImplicitNullCheck(instruction); 1719 } 1720 } 1721 break; 1722 case DataType::Type::kUint16: 1723 case DataType::Type::kInt16: 1724 { 1725 ExactAssemblyScope eas(masm, kInstructionSize, CodeBufferCheckScope::kExactSize); 1726 __ stlrh(Register(src), base); 1727 if (needs_null_check) { 1728 MaybeRecordImplicitNullCheck(instruction); 1729 } 1730 } 1731 break; 1732 case DataType::Type::kInt32: 1733 case DataType::Type::kReference: 1734 case DataType::Type::kInt64: 1735 DCHECK_EQ(src.Is64Bits(), DataType::Is64BitType(type)); 1736 { 1737 ExactAssemblyScope eas(masm, kInstructionSize, CodeBufferCheckScope::kExactSize); 1738 __ stlr(Register(src), base); 1739 if (needs_null_check) { 1740 MaybeRecordImplicitNullCheck(instruction); 1741 } 1742 } 1743 break; 1744 case DataType::Type::kFloat32: 1745 case DataType::Type::kFloat64: { 1746 DCHECK_EQ(src.Is64Bits(), DataType::Is64BitType(type)); 1747 Register temp_src; 1748 if (src.IsZero()) { 1749 // The zero register is used to avoid synthesizing zero constants. 1750 temp_src = Register(src); 1751 } else { 1752 DCHECK(src.IsFPRegister()); 1753 temp_src = src.Is64Bits() ? temps.AcquireX() : temps.AcquireW(); 1754 __ Fmov(temp_src, VRegister(src)); 1755 } 1756 { 1757 ExactAssemblyScope eas(masm, kInstructionSize, CodeBufferCheckScope::kExactSize); 1758 __ stlr(temp_src, base); 1759 if (needs_null_check) { 1760 MaybeRecordImplicitNullCheck(instruction); 1761 } 1762 } 1763 break; 1764 } 1765 case DataType::Type::kUint32: 1766 case DataType::Type::kUint64: 1767 case DataType::Type::kVoid: 1768 LOG(FATAL) << "Unreachable type " << type; 1769 } 1770 } 1771 1772 void CodeGeneratorARM64::InvokeRuntime(QuickEntrypointEnum entrypoint, 1773 HInstruction* instruction, 1774 uint32_t dex_pc, 1775 SlowPathCode* slow_path) { 1776 ValidateInvokeRuntime(entrypoint, instruction, slow_path); 1777 1778 ThreadOffset64 entrypoint_offset = GetThreadOffset<kArm64PointerSize>(entrypoint); 1779 // Reduce code size for AOT by using shared trampolines for slow path runtime calls across the 1780 // entire oat file. This adds an extra branch and we do not want to slow down the main path. 1781 // For JIT, thunk sharing is per-method, so the gains would be smaller or even negative. 1782 if (slow_path == nullptr || Runtime::Current()->UseJitCompilation()) { 1783 __ Ldr(lr, MemOperand(tr, entrypoint_offset.Int32Value())); 1784 // Ensure the pc position is recorded immediately after the `blr` instruction. 1785 ExactAssemblyScope eas(GetVIXLAssembler(), kInstructionSize, CodeBufferCheckScope::kExactSize); 1786 __ blr(lr); 1787 if (EntrypointRequiresStackMap(entrypoint)) { 1788 RecordPcInfo(instruction, dex_pc, slow_path); 1789 } 1790 } else { 1791 // Ensure the pc position is recorded immediately after the `bl` instruction. 1792 ExactAssemblyScope eas(GetVIXLAssembler(), kInstructionSize, CodeBufferCheckScope::kExactSize); 1793 EmitEntrypointThunkCall(entrypoint_offset); 1794 if (EntrypointRequiresStackMap(entrypoint)) { 1795 RecordPcInfo(instruction, dex_pc, slow_path); 1796 } 1797 } 1798 } 1799 1800 void CodeGeneratorARM64::InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset, 1801 HInstruction* instruction, 1802 SlowPathCode* slow_path) { 1803 ValidateInvokeRuntimeWithoutRecordingPcInfo(instruction, slow_path); 1804 __ Ldr(lr, MemOperand(tr, entry_point_offset)); 1805 __ Blr(lr); 1806 } 1807 1808 void InstructionCodeGeneratorARM64::GenerateClassInitializationCheck(SlowPathCodeARM64* slow_path, 1809 Register class_reg) { 1810 UseScratchRegisterScope temps(GetVIXLAssembler()); 1811 Register temp = temps.AcquireW(); 1812 constexpr size_t status_lsb_position = SubtypeCheckBits::BitStructSizeOf(); 1813 const size_t status_byte_offset = 1814 mirror::Class::StatusOffset().SizeValue() + (status_lsb_position / kBitsPerByte); 1815 constexpr uint32_t shifted_visibly_initialized_value = 1816 enum_cast<uint32_t>(ClassStatus::kVisiblyInitialized) << (status_lsb_position % kBitsPerByte); 1817 1818 // CMP (immediate) is limited to imm12 or imm12<<12, so we would need to materialize 1819 // the constant 0xf0000000 for comparison with the full 32-bit field. To reduce the code 1820 // size, load only the high byte of the field and compare with 0xf0. 1821 // Note: The same code size could be achieved with LDR+MNV(asr #24)+CBNZ but benchmarks 1822 // show that this pattern is slower (tested on little cores). 1823 __ Ldrb(temp, HeapOperand(class_reg, status_byte_offset)); 1824 __ Cmp(temp, shifted_visibly_initialized_value); 1825 __ B(lo, slow_path->GetEntryLabel()); 1826 __ Bind(slow_path->GetExitLabel()); 1827 } 1828 1829 void InstructionCodeGeneratorARM64::GenerateBitstringTypeCheckCompare( 1830 HTypeCheckInstruction* check, vixl::aarch64::Register temp) { 1831 uint32_t path_to_root = check->GetBitstringPathToRoot(); 1832 uint32_t mask = check->GetBitstringMask(); 1833 DCHECK(IsPowerOfTwo(mask + 1)); 1834 size_t mask_bits = WhichPowerOf2(mask + 1); 1835 1836 if (mask_bits == 16u) { 1837 // Load only the bitstring part of the status word. 1838 __ Ldrh(temp, HeapOperand(temp, mirror::Class::StatusOffset())); 1839 } else { 1840 // /* uint32_t */ temp = temp->status_ 1841 __ Ldr(temp, HeapOperand(temp, mirror::Class::StatusOffset())); 1842 // Extract the bitstring bits. 1843 __ Ubfx(temp, temp, 0, mask_bits); 1844 } 1845 // Compare the bitstring bits to `path_to_root`. 1846 __ Cmp(temp, path_to_root); 1847 } 1848 1849 void CodeGeneratorARM64::GenerateMemoryBarrier(MemBarrierKind kind) { 1850 BarrierType type = BarrierAll; 1851 1852 switch (kind) { 1853 case MemBarrierKind::kAnyAny: 1854 case MemBarrierKind::kAnyStore: { 1855 type = BarrierAll; 1856 break; 1857 } 1858 case MemBarrierKind::kLoadAny: { 1859 type = BarrierReads; 1860 break; 1861 } 1862 case MemBarrierKind::kStoreStore: { 1863 type = BarrierWrites; 1864 break; 1865 } 1866 default: 1867 LOG(FATAL) << "Unexpected memory barrier " << kind; 1868 } 1869 __ Dmb(InnerShareable, type); 1870 } 1871 1872 void InstructionCodeGeneratorARM64::GenerateSuspendCheck(HSuspendCheck* instruction, 1873 HBasicBlock* successor) { 1874 SuspendCheckSlowPathARM64* slow_path = 1875 down_cast<SuspendCheckSlowPathARM64*>(instruction->GetSlowPath()); 1876 if (slow_path == nullptr) { 1877 slow_path = 1878 new (codegen_->GetScopedAllocator()) SuspendCheckSlowPathARM64(instruction, successor); 1879 instruction->SetSlowPath(slow_path); 1880 codegen_->AddSlowPath(slow_path); 1881 if (successor != nullptr) { 1882 DCHECK(successor->IsLoopHeader()); 1883 } 1884 } else { 1885 DCHECK_EQ(slow_path->GetSuccessor(), successor); 1886 } 1887 1888 UseScratchRegisterScope temps(codegen_->GetVIXLAssembler()); 1889 Register temp = temps.AcquireW(); 1890 1891 __ Ldrh(temp, MemOperand(tr, Thread::ThreadFlagsOffset<kArm64PointerSize>().SizeValue())); 1892 if (successor == nullptr) { 1893 __ Cbnz(temp, slow_path->GetEntryLabel()); 1894 __ Bind(slow_path->GetReturnLabel()); 1895 } else { 1896 __ Cbz(temp, codegen_->GetLabelOf(successor)); 1897 __ B(slow_path->GetEntryLabel()); 1898 // slow_path will return to GetLabelOf(successor). 1899 } 1900 } 1901 1902 InstructionCodeGeneratorARM64::InstructionCodeGeneratorARM64(HGraph* graph, 1903 CodeGeneratorARM64* codegen) 1904 : InstructionCodeGenerator(graph, codegen), 1905 assembler_(codegen->GetAssembler()), 1906 codegen_(codegen) {} 1907 1908 void LocationsBuilderARM64::HandleBinaryOp(HBinaryOperation* instr) { 1909 DCHECK_EQ(instr->InputCount(), 2U); 1910 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instr); 1911 DataType::Type type = instr->GetResultType(); 1912 switch (type) { 1913 case DataType::Type::kInt32: 1914 case DataType::Type::kInt64: 1915 locations->SetInAt(0, Location::RequiresRegister()); 1916 locations->SetInAt(1, ARM64EncodableConstantOrRegister(instr->InputAt(1), instr)); 1917 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); 1918 break; 1919 1920 case DataType::Type::kFloat32: 1921 case DataType::Type::kFloat64: 1922 locations->SetInAt(0, Location::RequiresFpuRegister()); 1923 locations->SetInAt(1, Location::RequiresFpuRegister()); 1924 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); 1925 break; 1926 1927 default: 1928 LOG(FATAL) << "Unexpected " << instr->DebugName() << " type " << type; 1929 } 1930 } 1931 1932 void LocationsBuilderARM64::HandleFieldGet(HInstruction* instruction, 1933 const FieldInfo& field_info) { 1934 DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet()); 1935 1936 bool object_field_get_with_read_barrier = 1937 kEmitCompilerReadBarrier && (instruction->GetType() == DataType::Type::kReference); 1938 LocationSummary* locations = 1939 new (GetGraph()->GetAllocator()) LocationSummary(instruction, 1940 object_field_get_with_read_barrier 1941 ? LocationSummary::kCallOnSlowPath 1942 : LocationSummary::kNoCall); 1943 if (object_field_get_with_read_barrier && kUseBakerReadBarrier) { 1944 locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. 1945 // We need a temporary register for the read barrier load in 1946 // CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier() 1947 // only if the field is volatile or the offset is too big. 1948 if (field_info.IsVolatile() || 1949 field_info.GetFieldOffset().Uint32Value() >= kReferenceLoadMinFarOffset) { 1950 locations->AddTemp(FixedTempLocation()); 1951 } 1952 } 1953 locations->SetInAt(0, Location::RequiresRegister()); 1954 if (DataType::IsFloatingPointType(instruction->GetType())) { 1955 locations->SetOut(Location::RequiresFpuRegister()); 1956 } else { 1957 // The output overlaps for an object field get when read barriers 1958 // are enabled: we do not want the load to overwrite the object's 1959 // location, as we need it to emit the read barrier. 1960 locations->SetOut( 1961 Location::RequiresRegister(), 1962 object_field_get_with_read_barrier ? Location::kOutputOverlap : Location::kNoOutputOverlap); 1963 } 1964 } 1965 1966 void InstructionCodeGeneratorARM64::HandleFieldGet(HInstruction* instruction, 1967 const FieldInfo& field_info) { 1968 DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet()); 1969 LocationSummary* locations = instruction->GetLocations(); 1970 Location base_loc = locations->InAt(0); 1971 Location out = locations->Out(); 1972 uint32_t offset = field_info.GetFieldOffset().Uint32Value(); 1973 DCHECK_EQ(DataType::Size(field_info.GetFieldType()), DataType::Size(instruction->GetType())); 1974 DataType::Type load_type = instruction->GetType(); 1975 MemOperand field = HeapOperand(InputRegisterAt(instruction, 0), field_info.GetFieldOffset()); 1976 1977 if (kEmitCompilerReadBarrier && kUseBakerReadBarrier && 1978 load_type == DataType::Type::kReference) { 1979 // Object FieldGet with Baker's read barrier case. 1980 // /* HeapReference<Object> */ out = *(base + offset) 1981 Register base = RegisterFrom(base_loc, DataType::Type::kReference); 1982 Location maybe_temp = 1983 (locations->GetTempCount() != 0) ? locations->GetTemp(0) : Location::NoLocation(); 1984 // Note that potential implicit null checks are handled in this 1985 // CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier call. 1986 codegen_->GenerateFieldLoadWithBakerReadBarrier( 1987 instruction, 1988 out, 1989 base, 1990 offset, 1991 maybe_temp, 1992 /* needs_null_check= */ true, 1993 field_info.IsVolatile()); 1994 } else { 1995 // General case. 1996 if (field_info.IsVolatile()) { 1997 // Note that a potential implicit null check is handled in this 1998 // CodeGeneratorARM64::LoadAcquire call. 1999 // NB: LoadAcquire will record the pc info if needed. 2000 codegen_->LoadAcquire( 2001 instruction, OutputCPURegister(instruction), field, /* needs_null_check= */ true); 2002 } else { 2003 // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted. 2004 EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes); 2005 codegen_->Load(load_type, OutputCPURegister(instruction), field); 2006 codegen_->MaybeRecordImplicitNullCheck(instruction); 2007 } 2008 if (load_type == DataType::Type::kReference) { 2009 // If read barriers are enabled, emit read barriers other than 2010 // Baker's using a slow path (and also unpoison the loaded 2011 // reference, if heap poisoning is enabled). 2012 codegen_->MaybeGenerateReadBarrierSlow(instruction, out, out, base_loc, offset); 2013 } 2014 } 2015 } 2016 2017 void LocationsBuilderARM64::HandleFieldSet(HInstruction* instruction) { 2018 LocationSummary* locations = 2019 new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall); 2020 locations->SetInAt(0, Location::RequiresRegister()); 2021 if (IsConstantZeroBitPattern(instruction->InputAt(1))) { 2022 locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant())); 2023 } else if (DataType::IsFloatingPointType(instruction->InputAt(1)->GetType())) { 2024 locations->SetInAt(1, Location::RequiresFpuRegister()); 2025 } else { 2026 locations->SetInAt(1, Location::RequiresRegister()); 2027 } 2028 } 2029 2030 void InstructionCodeGeneratorARM64::HandleFieldSet(HInstruction* instruction, 2031 const FieldInfo& field_info, 2032 bool value_can_be_null) { 2033 DCHECK(instruction->IsInstanceFieldSet() || instruction->IsStaticFieldSet()); 2034 2035 Register obj = InputRegisterAt(instruction, 0); 2036 CPURegister value = InputCPURegisterOrZeroRegAt(instruction, 1); 2037 CPURegister source = value; 2038 Offset offset = field_info.GetFieldOffset(); 2039 DataType::Type field_type = field_info.GetFieldType(); 2040 2041 { 2042 // We use a block to end the scratch scope before the write barrier, thus 2043 // freeing the temporary registers so they can be used in `MarkGCCard`. 2044 UseScratchRegisterScope temps(GetVIXLAssembler()); 2045 2046 if (kPoisonHeapReferences && field_type == DataType::Type::kReference) { 2047 DCHECK(value.IsW()); 2048 Register temp = temps.AcquireW(); 2049 __ Mov(temp, value.W()); 2050 GetAssembler()->PoisonHeapReference(temp.W()); 2051 source = temp; 2052 } 2053 2054 if (field_info.IsVolatile()) { 2055 codegen_->StoreRelease( 2056 instruction, field_type, source, HeapOperand(obj, offset), /* needs_null_check= */ true); 2057 } else { 2058 // Ensure that between store and MaybeRecordImplicitNullCheck there are no pools emitted. 2059 EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes); 2060 codegen_->Store(field_type, source, HeapOperand(obj, offset)); 2061 codegen_->MaybeRecordImplicitNullCheck(instruction); 2062 } 2063 } 2064 2065 if (CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1))) { 2066 codegen_->MarkGCCard(obj, Register(value), value_can_be_null); 2067 } 2068 } 2069 2070 void InstructionCodeGeneratorARM64::HandleBinaryOp(HBinaryOperation* instr) { 2071 DataType::Type type = instr->GetType(); 2072 2073 switch (type) { 2074 case DataType::Type::kInt32: 2075 case DataType::Type::kInt64: { 2076 Register dst = OutputRegister(instr); 2077 Register lhs = InputRegisterAt(instr, 0); 2078 Operand rhs = InputOperandAt(instr, 1); 2079 if (instr->IsAdd()) { 2080 __ Add(dst, lhs, rhs); 2081 } else if (instr->IsAnd()) { 2082 __ And(dst, lhs, rhs); 2083 } else if (instr->IsOr()) { 2084 __ Orr(dst, lhs, rhs); 2085 } else if (instr->IsSub()) { 2086 __ Sub(dst, lhs, rhs); 2087 } else if (instr->IsRor()) { 2088 if (rhs.IsImmediate()) { 2089 uint32_t shift = rhs.GetImmediate() & (lhs.GetSizeInBits() - 1); 2090 __ Ror(dst, lhs, shift); 2091 } else { 2092 // Ensure shift distance is in the same size register as the result. If 2093 // we are rotating a long and the shift comes in a w register originally, 2094 // we don't need to sxtw for use as an x since the shift distances are 2095 // all & reg_bits - 1. 2096 __ Ror(dst, lhs, RegisterFrom(instr->GetLocations()->InAt(1), type)); 2097 } 2098 } else if (instr->IsMin() || instr->IsMax()) { 2099 __ Cmp(lhs, rhs); 2100 __ Csel(dst, lhs, rhs, instr->IsMin() ? lt : gt); 2101 } else { 2102 DCHECK(instr->IsXor()); 2103 __ Eor(dst, lhs, rhs); 2104 } 2105 break; 2106 } 2107 case DataType::Type::kFloat32: 2108 case DataType::Type::kFloat64: { 2109 VRegister dst = OutputFPRegister(instr); 2110 VRegister lhs = InputFPRegisterAt(instr, 0); 2111 VRegister rhs = InputFPRegisterAt(instr, 1); 2112 if (instr->IsAdd()) { 2113 __ Fadd(dst, lhs, rhs); 2114 } else if (instr->IsSub()) { 2115 __ Fsub(dst, lhs, rhs); 2116 } else if (instr->IsMin()) { 2117 __ Fmin(dst, lhs, rhs); 2118 } else if (instr->IsMax()) { 2119 __ Fmax(dst, lhs, rhs); 2120 } else { 2121 LOG(FATAL) << "Unexpected floating-point binary operation"; 2122 } 2123 break; 2124 } 2125 default: 2126 LOG(FATAL) << "Unexpected binary operation type " << type; 2127 } 2128 } 2129 2130 void LocationsBuilderARM64::HandleShift(HBinaryOperation* instr) { 2131 DCHECK(instr->IsShl() || instr->IsShr() || instr->IsUShr()); 2132 2133 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instr); 2134 DataType::Type type = instr->GetResultType(); 2135 switch (type) { 2136 case DataType::Type::kInt32: 2137 case DataType::Type::kInt64: { 2138 locations->SetInAt(0, Location::RequiresRegister()); 2139 locations->SetInAt(1, Location::RegisterOrConstant(instr->InputAt(1))); 2140 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); 2141 break; 2142 } 2143 default: 2144 LOG(FATAL) << "Unexpected shift type " << type; 2145 } 2146 } 2147 2148 void InstructionCodeGeneratorARM64::HandleShift(HBinaryOperation* instr) { 2149 DCHECK(instr->IsShl() || instr->IsShr() || instr->IsUShr()); 2150 2151 DataType::Type type = instr->GetType(); 2152 switch (type) { 2153 case DataType::Type::kInt32: 2154 case DataType::Type::kInt64: { 2155 Register dst = OutputRegister(instr); 2156 Register lhs = InputRegisterAt(instr, 0); 2157 Operand rhs = InputOperandAt(instr, 1); 2158 if (rhs.IsImmediate()) { 2159 uint32_t shift_value = rhs.GetImmediate() & 2160 (type == DataType::Type::kInt32 ? kMaxIntShiftDistance : kMaxLongShiftDistance); 2161 if (instr->IsShl()) { 2162 __ Lsl(dst, lhs, shift_value); 2163 } else if (instr->IsShr()) { 2164 __ Asr(dst, lhs, shift_value); 2165 } else { 2166 __ Lsr(dst, lhs, shift_value); 2167 } 2168 } else { 2169 Register rhs_reg = dst.IsX() ? rhs.GetRegister().X() : rhs.GetRegister().W(); 2170 2171 if (instr->IsShl()) { 2172 __ Lsl(dst, lhs, rhs_reg); 2173 } else if (instr->IsShr()) { 2174 __ Asr(dst, lhs, rhs_reg); 2175 } else { 2176 __ Lsr(dst, lhs, rhs_reg); 2177 } 2178 } 2179 break; 2180 } 2181 default: 2182 LOG(FATAL) << "Unexpected shift operation type " << type; 2183 } 2184 } 2185 2186 void LocationsBuilderARM64::VisitAdd(HAdd* instruction) { 2187 HandleBinaryOp(instruction); 2188 } 2189 2190 void InstructionCodeGeneratorARM64::VisitAdd(HAdd* instruction) { 2191 HandleBinaryOp(instruction); 2192 } 2193 2194 void LocationsBuilderARM64::VisitAnd(HAnd* instruction) { 2195 HandleBinaryOp(instruction); 2196 } 2197 2198 void InstructionCodeGeneratorARM64::VisitAnd(HAnd* instruction) { 2199 HandleBinaryOp(instruction); 2200 } 2201 2202 void LocationsBuilderARM64::VisitBitwiseNegatedRight(HBitwiseNegatedRight* instr) { 2203 DCHECK(DataType::IsIntegralType(instr->GetType())) << instr->GetType(); 2204 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instr); 2205 locations->SetInAt(0, Location::RequiresRegister()); 2206 // There is no immediate variant of negated bitwise instructions in AArch64. 2207 locations->SetInAt(1, Location::RequiresRegister()); 2208 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); 2209 } 2210 2211 void InstructionCodeGeneratorARM64::VisitBitwiseNegatedRight(HBitwiseNegatedRight* instr) { 2212 Register dst = OutputRegister(instr); 2213 Register lhs = InputRegisterAt(instr, 0); 2214 Register rhs = InputRegisterAt(instr, 1); 2215 2216 switch (instr->GetOpKind()) { 2217 case HInstruction::kAnd: 2218 __ Bic(dst, lhs, rhs); 2219 break; 2220 case HInstruction::kOr: 2221 __ Orn(dst, lhs, rhs); 2222 break; 2223 case HInstruction::kXor: 2224 __ Eon(dst, lhs, rhs); 2225 break; 2226 default: 2227 LOG(FATAL) << "Unreachable"; 2228 } 2229 } 2230 2231 void LocationsBuilderARM64::VisitDataProcWithShifterOp( 2232 HDataProcWithShifterOp* instruction) { 2233 DCHECK(instruction->GetType() == DataType::Type::kInt32 || 2234 instruction->GetType() == DataType::Type::kInt64); 2235 LocationSummary* locations = 2236 new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall); 2237 if (instruction->GetInstrKind() == HInstruction::kNeg) { 2238 locations->SetInAt(0, Location::ConstantLocation(instruction->InputAt(0)->AsConstant())); 2239 } else { 2240 locations->SetInAt(0, Location::RequiresRegister()); 2241 } 2242 locations->SetInAt(1, Location::RequiresRegister()); 2243 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); 2244 } 2245 2246 void InstructionCodeGeneratorARM64::VisitDataProcWithShifterOp( 2247 HDataProcWithShifterOp* instruction) { 2248 DataType::Type type = instruction->GetType(); 2249 HInstruction::InstructionKind kind = instruction->GetInstrKind(); 2250 DCHECK(type == DataType::Type::kInt32 || type == DataType::Type::kInt64); 2251 Register out = OutputRegister(instruction); 2252 Register left; 2253 if (kind != HInstruction::kNeg) { 2254 left = InputRegisterAt(instruction, 0); 2255 } 2256 // If this `HDataProcWithShifterOp` was created by merging a type conversion as the 2257 // shifter operand operation, the IR generating `right_reg` (input to the type 2258 // conversion) can have a different type from the current instruction's type, 2259 // so we manually indicate the type. 2260 Register right_reg = RegisterFrom(instruction->GetLocations()->InAt(1), type); 2261 Operand right_operand(0); 2262 2263 HDataProcWithShifterOp::OpKind op_kind = instruction->GetOpKind(); 2264 if (HDataProcWithShifterOp::IsExtensionOp(op_kind)) { 2265 right_operand = Operand(right_reg, helpers::ExtendFromOpKind(op_kind)); 2266 } else { 2267 right_operand = Operand(right_reg, 2268 helpers::ShiftFromOpKind(op_kind), 2269 instruction->GetShiftAmount()); 2270 } 2271 2272 // Logical binary operations do not support extension operations in the 2273 // operand. Note that VIXL would still manage if it was passed by generating 2274 // the extension as a separate instruction. 2275 // `HNeg` also does not support extension. See comments in `ShifterOperandSupportsExtension()`. 2276 DCHECK(!right_operand.IsExtendedRegister() || 2277 (kind != HInstruction::kAnd && kind != HInstruction::kOr && kind != HInstruction::kXor && 2278 kind != HInstruction::kNeg)); 2279 switch (kind) { 2280 case HInstruction::kAdd: 2281 __ Add(out, left, right_operand); 2282 break; 2283 case HInstruction::kAnd: 2284 __ And(out, left, right_operand); 2285 break; 2286 case HInstruction::kNeg: 2287 DCHECK(instruction->InputAt(0)->AsConstant()->IsArithmeticZero()); 2288 __ Neg(out, right_operand); 2289 break; 2290 case HInstruction::kOr: 2291 __ Orr(out, left, right_operand); 2292 break; 2293 case HInstruction::kSub: 2294 __ Sub(out, left, right_operand); 2295 break; 2296 case HInstruction::kXor: 2297 __ Eor(out, left, right_operand); 2298 break; 2299 default: 2300 LOG(FATAL) << "Unexpected operation kind: " << kind; 2301 UNREACHABLE(); 2302 } 2303 } 2304 2305 void LocationsBuilderARM64::VisitIntermediateAddress(HIntermediateAddress* instruction) { 2306 LocationSummary* locations = 2307 new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall); 2308 locations->SetInAt(0, Location::RequiresRegister()); 2309 locations->SetInAt(1, ARM64EncodableConstantOrRegister(instruction->GetOffset(), instruction)); 2310 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); 2311 } 2312 2313 void InstructionCodeGeneratorARM64::VisitIntermediateAddress(HIntermediateAddress* instruction) { 2314 __ Add(OutputRegister(instruction), 2315 InputRegisterAt(instruction, 0), 2316 Operand(InputOperandAt(instruction, 1))); 2317 } 2318 2319 void LocationsBuilderARM64::VisitIntermediateAddressIndex(HIntermediateAddressIndex* instruction) { 2320 LocationSummary* locations = 2321 new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall); 2322 2323 HIntConstant* shift = instruction->GetShift()->AsIntConstant(); 2324 2325 locations->SetInAt(0, Location::RequiresRegister()); 2326 // For byte case we don't need to shift the index variable so we can encode the data offset into 2327 // ADD instruction. For other cases we prefer the data_offset to be in register; that will hoist 2328 // data offset constant generation out of the loop and reduce the critical path length in the 2329 // loop. 2330 locations->SetInAt(1, shift->GetValue() == 0 2331 ? Location::ConstantLocation(instruction->GetOffset()->AsIntConstant()) 2332 : Location::RequiresRegister()); 2333 locations->SetInAt(2, Location::ConstantLocation(shift)); 2334 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); 2335 } 2336 2337 void InstructionCodeGeneratorARM64::VisitIntermediateAddressIndex( 2338 HIntermediateAddressIndex* instruction) { 2339 Register index_reg = InputRegisterAt(instruction, 0); 2340 uint32_t shift = Int64FromLocation(instruction->GetLocations()->InAt(2)); 2341 uint32_t offset = instruction->GetOffset()->AsIntConstant()->GetValue(); 2342 2343 if (shift == 0) { 2344 __ Add(OutputRegister(instruction), index_reg, offset); 2345 } else { 2346 Register offset_reg = InputRegisterAt(instruction, 1); 2347 __ Add(OutputRegister(instruction), offset_reg, Operand(index_reg, LSL, shift)); 2348 } 2349 } 2350 2351 void LocationsBuilderARM64::VisitMultiplyAccumulate(HMultiplyAccumulate* instr) { 2352 LocationSummary* locations = 2353 new (GetGraph()->GetAllocator()) LocationSummary(instr, LocationSummary::kNoCall); 2354 HInstruction* accumulator = instr->InputAt(HMultiplyAccumulate::kInputAccumulatorIndex); 2355 if (instr->GetOpKind() == HInstruction::kSub && 2356 accumulator->IsConstant() && 2357 accumulator->AsConstant()->IsArithmeticZero()) { 2358 // Don't allocate register for Mneg instruction. 2359 } else { 2360 locations->SetInAt(HMultiplyAccumulate::kInputAccumulatorIndex, 2361 Location::RequiresRegister()); 2362 } 2363 locations->SetInAt(HMultiplyAccumulate::kInputMulLeftIndex, Location::RequiresRegister()); 2364 locations->SetInAt(HMultiplyAccumulate::kInputMulRightIndex, Location::RequiresRegister()); 2365 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); 2366 } 2367 2368 void InstructionCodeGeneratorARM64::VisitMultiplyAccumulate(HMultiplyAccumulate* instr) { 2369 Register res = OutputRegister(instr); 2370 Register mul_left = InputRegisterAt(instr, HMultiplyAccumulate::kInputMulLeftIndex); 2371 Register mul_right = InputRegisterAt(instr, HMultiplyAccumulate::kInputMulRightIndex); 2372 2373 // Avoid emitting code that could trigger Cortex A53's erratum 835769. 2374 // This fixup should be carried out for all multiply-accumulate instructions: 2375 // madd, msub, smaddl, smsubl, umaddl and umsubl. 2376 if (instr->GetType() == DataType::Type::kInt64 && 2377 codegen_->GetInstructionSetFeatures().NeedFixCortexA53_835769()) { 2378 MacroAssembler* masm = down_cast<CodeGeneratorARM64*>(codegen_)->GetVIXLAssembler(); 2379 vixl::aarch64::Instruction* prev = 2380 masm->GetCursorAddress<vixl::aarch64::Instruction*>() - kInstructionSize; 2381 if (prev->IsLoadOrStore()) { 2382 // Make sure we emit only exactly one nop. 2383 ExactAssemblyScope scope(masm, kInstructionSize, CodeBufferCheckScope::kExactSize); 2384 __ nop(); 2385 } 2386 } 2387 2388 if (instr->GetOpKind() == HInstruction::kAdd) { 2389 Register accumulator = InputRegisterAt(instr, HMultiplyAccumulate::kInputAccumulatorIndex); 2390 __ Madd(res, mul_left, mul_right, accumulator); 2391 } else { 2392 DCHECK(instr->GetOpKind() == HInstruction::kSub); 2393 HInstruction* accum_instr = instr->InputAt(HMultiplyAccumulate::kInputAccumulatorIndex); 2394 if (accum_instr->IsConstant() && accum_instr->AsConstant()->IsArithmeticZero()) { 2395 __ Mneg(res, mul_left, mul_right); 2396 } else { 2397 Register accumulator = InputRegisterAt(instr, HMultiplyAccumulate::kInputAccumulatorIndex); 2398 __ Msub(res, mul_left, mul_right, accumulator); 2399 } 2400 } 2401 } 2402 2403 void LocationsBuilderARM64::VisitArrayGet(HArrayGet* instruction) { 2404 bool object_array_get_with_read_barrier = 2405 kEmitCompilerReadBarrier && (instruction->GetType() == DataType::Type::kReference); 2406 LocationSummary* locations = 2407 new (GetGraph()->GetAllocator()) LocationSummary(instruction, 2408 object_array_get_with_read_barrier 2409 ? LocationSummary::kCallOnSlowPath 2410 : LocationSummary::kNoCall); 2411 if (object_array_get_with_read_barrier && kUseBakerReadBarrier) { 2412 locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. 2413 if (instruction->GetIndex()->IsConstant()) { 2414 // Array loads with constant index are treated as field loads. 2415 // We need a temporary register for the read barrier load in 2416 // CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier() 2417 // only if the offset is too big. 2418 uint32_t offset = CodeGenerator::GetArrayDataOffset(instruction); 2419 uint32_t index = instruction->GetIndex()->AsIntConstant()->GetValue(); 2420 offset += index << DataType::SizeShift(DataType::Type::kReference); 2421 if (offset >= kReferenceLoadMinFarOffset) { 2422 locations->AddTemp(FixedTempLocation()); 2423 } 2424 } else if (!instruction->GetArray()->IsIntermediateAddress()) { 2425 // We need a non-scratch temporary for the array data pointer in 2426 // CodeGeneratorARM64::GenerateArrayLoadWithBakerReadBarrier() for the case with no 2427 // intermediate address. 2428 locations->AddTemp(Location::RequiresRegister()); 2429 } 2430 } 2431 locations->SetInAt(0, Location::RequiresRegister()); 2432 locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1))); 2433 if (DataType::IsFloatingPointType(instruction->GetType())) { 2434 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); 2435 } else { 2436 // The output overlaps in the case of an object array get with 2437 // read barriers enabled: we do not want the move to overwrite the 2438 // array's location, as we need it to emit the read barrier. 2439 locations->SetOut( 2440 Location::RequiresRegister(), 2441 object_array_get_with_read_barrier ? Location::kOutputOverlap : Location::kNoOutputOverlap); 2442 } 2443 } 2444 2445 void InstructionCodeGeneratorARM64::VisitArrayGet(HArrayGet* instruction) { 2446 DataType::Type type = instruction->GetType(); 2447 Register obj = InputRegisterAt(instruction, 0); 2448 LocationSummary* locations = instruction->GetLocations(); 2449 Location index = locations->InAt(1); 2450 Location out = locations->Out(); 2451 uint32_t offset = CodeGenerator::GetArrayDataOffset(instruction); 2452 const bool maybe_compressed_char_at = mirror::kUseStringCompression && 2453 instruction->IsStringCharAt(); 2454 MacroAssembler* masm = GetVIXLAssembler(); 2455 UseScratchRegisterScope temps(masm); 2456 2457 // The non-Baker read barrier instrumentation of object ArrayGet instructions 2458 // does not support the HIntermediateAddress instruction. 2459 DCHECK(!((type == DataType::Type::kReference) && 2460 instruction->GetArray()->IsIntermediateAddress() && 2461 kEmitCompilerReadBarrier && 2462 !kUseBakerReadBarrier)); 2463 2464 if (type == DataType::Type::kReference && kEmitCompilerReadBarrier && kUseBakerReadBarrier) { 2465 // Object ArrayGet with Baker's read barrier case. 2466 // Note that a potential implicit null check is handled in the 2467 // CodeGeneratorARM64::GenerateArrayLoadWithBakerReadBarrier call. 2468 DCHECK(!instruction->CanDoImplicitNullCheckOn(instruction->InputAt(0))); 2469 if (index.IsConstant()) { 2470 DCHECK(!instruction->GetArray()->IsIntermediateAddress()); 2471 // Array load with a constant index can be treated as a field load. 2472 offset += Int64FromLocation(index) << DataType::SizeShift(type); 2473 Location maybe_temp = 2474 (locations->GetTempCount() != 0) ? locations->GetTemp(0) : Location::NoLocation(); 2475 codegen_->GenerateFieldLoadWithBakerReadBarrier(instruction, 2476 out, 2477 obj.W(), 2478 offset, 2479 maybe_temp, 2480 /* needs_null_check= */ false, 2481 /* use_load_acquire= */ false); 2482 } else { 2483 codegen_->GenerateArrayLoadWithBakerReadBarrier( 2484 instruction, out, obj.W(), offset, index, /* needs_null_check= */ false); 2485 } 2486 } else { 2487 // General case. 2488 MemOperand source = HeapOperand(obj); 2489 Register length; 2490 if (maybe_compressed_char_at) { 2491 uint32_t count_offset = mirror::String::CountOffset().Uint32Value(); 2492 length = temps.AcquireW(); 2493 { 2494 // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted. 2495 EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes); 2496 2497 if (instruction->GetArray()->IsIntermediateAddress()) { 2498 DCHECK_LT(count_offset, offset); 2499 int64_t adjusted_offset = 2500 static_cast<int64_t>(count_offset) - static_cast<int64_t>(offset); 2501 // Note that `adjusted_offset` is negative, so this will be a LDUR. 2502 __ Ldr(length, MemOperand(obj.X(), adjusted_offset)); 2503 } else { 2504 __ Ldr(length, HeapOperand(obj, count_offset)); 2505 } 2506 codegen_->MaybeRecordImplicitNullCheck(instruction); 2507 } 2508 } 2509 if (index.IsConstant()) { 2510 if (maybe_compressed_char_at) { 2511 vixl::aarch64::Label uncompressed_load, done; 2512 static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u, 2513 "Expecting 0=compressed, 1=uncompressed"); 2514 __ Tbnz(length.W(), 0, &uncompressed_load); 2515 __ Ldrb(Register(OutputCPURegister(instruction)), 2516 HeapOperand(obj, offset + Int64FromLocation(index))); 2517 __ B(&done); 2518 __ Bind(&uncompressed_load); 2519 __ Ldrh(Register(OutputCPURegister(instruction)), 2520 HeapOperand(obj, offset + (Int64FromLocation(index) << 1))); 2521 __ Bind(&done); 2522 } else { 2523 offset += Int64FromLocation(index) << DataType::SizeShift(type); 2524 source = HeapOperand(obj, offset); 2525 } 2526 } else { 2527 Register temp = temps.AcquireSameSizeAs(obj); 2528 if (instruction->GetArray()->IsIntermediateAddress()) { 2529 // We do not need to compute the intermediate address from the array: the 2530 // input instruction has done it already. See the comment in 2531 // `TryExtractArrayAccessAddress()`. 2532 if (kIsDebugBuild) { 2533 HIntermediateAddress* interm_addr = instruction->GetArray()->AsIntermediateAddress(); 2534 DCHECK_EQ(interm_addr->GetOffset()->AsIntConstant()->GetValueAsUint64(), offset); 2535 } 2536 temp = obj; 2537 } else { 2538 __ Add(temp, obj, offset); 2539 } 2540 if (maybe_compressed_char_at) { 2541 vixl::aarch64::Label uncompressed_load, done; 2542 static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u, 2543 "Expecting 0=compressed, 1=uncompressed"); 2544 __ Tbnz(length.W(), 0, &uncompressed_load); 2545 __ Ldrb(Register(OutputCPURegister(instruction)), 2546 HeapOperand(temp, XRegisterFrom(index), LSL, 0)); 2547 __ B(&done); 2548 __ Bind(&uncompressed_load); 2549 __ Ldrh(Register(OutputCPURegister(instruction)), 2550 HeapOperand(temp, XRegisterFrom(index), LSL, 1)); 2551 __ Bind(&done); 2552 } else { 2553 source = HeapOperand(temp, XRegisterFrom(index), LSL, DataType::SizeShift(type)); 2554 } 2555 } 2556 if (!maybe_compressed_char_at) { 2557 // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted. 2558 EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes); 2559 codegen_->Load(type, OutputCPURegister(instruction), source); 2560 codegen_->MaybeRecordImplicitNullCheck(instruction); 2561 } 2562 2563 if (type == DataType::Type::kReference) { 2564 static_assert( 2565 sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t), 2566 "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes."); 2567 Location obj_loc = locations->InAt(0); 2568 if (index.IsConstant()) { 2569 codegen_->MaybeGenerateReadBarrierSlow(instruction, out, out, obj_loc, offset); 2570 } else { 2571 codegen_->MaybeGenerateReadBarrierSlow(instruction, out, out, obj_loc, offset, index); 2572 } 2573 } 2574 } 2575 } 2576 2577 void LocationsBuilderARM64::VisitArrayLength(HArrayLength* instruction) { 2578 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction); 2579 locations->SetInAt(0, Location::RequiresRegister()); 2580 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); 2581 } 2582 2583 void InstructionCodeGeneratorARM64::VisitArrayLength(HArrayLength* instruction) { 2584 uint32_t offset = CodeGenerator::GetArrayLengthOffset(instruction); 2585 vixl::aarch64::Register out = OutputRegister(instruction); 2586 { 2587 // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted. 2588 EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes); 2589 __ Ldr(out, HeapOperand(InputRegisterAt(instruction, 0), offset)); 2590 codegen_->MaybeRecordImplicitNullCheck(instruction); 2591 } 2592 // Mask out compression flag from String's array length. 2593 if (mirror::kUseStringCompression && instruction->IsStringLength()) { 2594 __ Lsr(out.W(), out.W(), 1u); 2595 } 2596 } 2597 2598 void LocationsBuilderARM64::VisitArraySet(HArraySet* instruction) { 2599 DataType::Type value_type = instruction->GetComponentType(); 2600 2601 bool needs_type_check = instruction->NeedsTypeCheck(); 2602 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary( 2603 instruction, 2604 needs_type_check ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall); 2605 locations->SetInAt(0, Location::RequiresRegister()); 2606 locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1))); 2607 if (IsConstantZeroBitPattern(instruction->InputAt(2))) { 2608 locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant())); 2609 } else if (DataType::IsFloatingPointType(value_type)) { 2610 locations->SetInAt(2, Location::RequiresFpuRegister()); 2611 } else { 2612 locations->SetInAt(2, Location::RequiresRegister()); 2613 } 2614 } 2615 2616 void InstructionCodeGeneratorARM64::VisitArraySet(HArraySet* instruction) { 2617 DataType::Type value_type = instruction->GetComponentType(); 2618 LocationSummary* locations = instruction->GetLocations(); 2619 bool needs_type_check = instruction->NeedsTypeCheck(); 2620 bool needs_write_barrier = 2621 CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue()); 2622 2623 Register array = InputRegisterAt(instruction, 0); 2624 CPURegister value = InputCPURegisterOrZeroRegAt(instruction, 2); 2625 CPURegister source = value; 2626 Location index = locations->InAt(1); 2627 size_t offset = mirror::Array::DataOffset(DataType::Size(value_type)).Uint32Value(); 2628 MemOperand destination = HeapOperand(array); 2629 MacroAssembler* masm = GetVIXLAssembler(); 2630 2631 if (!needs_write_barrier) { 2632 DCHECK(!needs_type_check); 2633 if (index.IsConstant()) { 2634 offset += Int64FromLocation(index) << DataType::SizeShift(value_type); 2635 destination = HeapOperand(array, offset); 2636 } else { 2637 UseScratchRegisterScope temps(masm); 2638 Register temp = temps.AcquireSameSizeAs(array); 2639 if (instruction->GetArray()->IsIntermediateAddress()) { 2640 // We do not need to compute the intermediate address from the array: the 2641 // input instruction has done it already. See the comment in 2642 // `TryExtractArrayAccessAddress()`. 2643 if (kIsDebugBuild) { 2644 HIntermediateAddress* interm_addr = instruction->GetArray()->AsIntermediateAddress(); 2645 DCHECK(interm_addr->GetOffset()->AsIntConstant()->GetValueAsUint64() == offset); 2646 } 2647 temp = array; 2648 } else { 2649 __ Add(temp, array, offset); 2650 } 2651 destination = HeapOperand(temp, 2652 XRegisterFrom(index), 2653 LSL, 2654 DataType::SizeShift(value_type)); 2655 } 2656 { 2657 // Ensure that between store and MaybeRecordImplicitNullCheck there are no pools emitted. 2658 EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes); 2659 codegen_->Store(value_type, value, destination); 2660 codegen_->MaybeRecordImplicitNullCheck(instruction); 2661 } 2662 } else { 2663 DCHECK(!instruction->GetArray()->IsIntermediateAddress()); 2664 2665 bool can_value_be_null = instruction->GetValueCanBeNull(); 2666 vixl::aarch64::Label do_store; 2667 if (can_value_be_null) { 2668 __ Cbz(Register(value), &do_store); 2669 } 2670 2671 SlowPathCodeARM64* slow_path = nullptr; 2672 if (needs_type_check) { 2673 slow_path = new (codegen_->GetScopedAllocator()) ArraySetSlowPathARM64(instruction); 2674 codegen_->AddSlowPath(slow_path); 2675 2676 const uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); 2677 const uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value(); 2678 const uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value(); 2679 2680 UseScratchRegisterScope temps(masm); 2681 Register temp = temps.AcquireSameSizeAs(array); 2682 Register temp2 = temps.AcquireSameSizeAs(array); 2683 2684 // Note that when Baker read barriers are enabled, the type 2685 // checks are performed without read barriers. This is fine, 2686 // even in the case where a class object is in the from-space 2687 // after the flip, as a comparison involving such a type would 2688 // not produce a false positive; it may of course produce a 2689 // false negative, in which case we would take the ArraySet 2690 // slow path. 2691 2692 // /* HeapReference<Class> */ temp = array->klass_ 2693 { 2694 // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted. 2695 EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes); 2696 __ Ldr(temp, HeapOperand(array, class_offset)); 2697 codegen_->MaybeRecordImplicitNullCheck(instruction); 2698 } 2699 GetAssembler()->MaybeUnpoisonHeapReference(temp); 2700 2701 // /* HeapReference<Class> */ temp = temp->component_type_ 2702 __ Ldr(temp, HeapOperand(temp, component_offset)); 2703 // /* HeapReference<Class> */ temp2 = value->klass_ 2704 __ Ldr(temp2, HeapOperand(Register(value), class_offset)); 2705 // If heap poisoning is enabled, no need to unpoison `temp` 2706 // nor `temp2`, as we are comparing two poisoned references. 2707 __ Cmp(temp, temp2); 2708 2709 if (instruction->StaticTypeOfArrayIsObjectArray()) { 2710 vixl::aarch64::Label do_put; 2711 __ B(eq, &do_put); 2712 // If heap poisoning is enabled, the `temp` reference has 2713 // not been unpoisoned yet; unpoison it now. 2714 GetAssembler()->MaybeUnpoisonHeapReference(temp); 2715 2716 // /* HeapReference<Class> */ temp = temp->super_class_ 2717 __ Ldr(temp, HeapOperand(temp, super_offset)); 2718 // If heap poisoning is enabled, no need to unpoison 2719 // `temp`, as we are comparing against null below. 2720 __ Cbnz(temp, slow_path->GetEntryLabel()); 2721 __ Bind(&do_put); 2722 } else { 2723 __ B(ne, slow_path->GetEntryLabel()); 2724 } 2725 } 2726 2727 codegen_->MarkGCCard(array, value.W(), /* value_can_be_null= */ false); 2728 2729 if (can_value_be_null) { 2730 DCHECK(do_store.IsLinked()); 2731 __ Bind(&do_store); 2732 } 2733 2734 UseScratchRegisterScope temps(masm); 2735 if (kPoisonHeapReferences) { 2736 Register temp_source = temps.AcquireSameSizeAs(array); 2737 DCHECK(value.IsW()); 2738 __ Mov(temp_source, value.W()); 2739 GetAssembler()->PoisonHeapReference(temp_source); 2740 source = temp_source; 2741 } 2742 2743 if (index.IsConstant()) { 2744 offset += Int64FromLocation(index) << DataType::SizeShift(value_type); 2745 destination = HeapOperand(array, offset); 2746 } else { 2747 Register temp_base = temps.AcquireSameSizeAs(array); 2748 __ Add(temp_base, array, offset); 2749 destination = HeapOperand(temp_base, 2750 XRegisterFrom(index), 2751 LSL, 2752 DataType::SizeShift(value_type)); 2753 } 2754 2755 { 2756 // Ensure that between store and MaybeRecordImplicitNullCheck there are no pools emitted. 2757 EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes); 2758 __ Str(source, destination); 2759 2760 if (can_value_be_null || !needs_type_check) { 2761 codegen_->MaybeRecordImplicitNullCheck(instruction); 2762 } 2763 } 2764 2765 if (slow_path != nullptr) { 2766 __ Bind(slow_path->GetExitLabel()); 2767 } 2768 } 2769 } 2770 2771 void LocationsBuilderARM64::VisitBoundsCheck(HBoundsCheck* instruction) { 2772 RegisterSet caller_saves = RegisterSet::Empty(); 2773 InvokeRuntimeCallingConvention calling_convention; 2774 caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0).GetCode())); 2775 caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(1).GetCode())); 2776 LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction, caller_saves); 2777 2778 // If both index and length are constant, we can check the bounds statically and 2779 // generate code accordingly. We want to make sure we generate constant locations 2780 // in that case, regardless of whether they are encodable in the comparison or not. 2781 HInstruction* index = instruction->InputAt(0); 2782 HInstruction* length = instruction->InputAt(1); 2783 bool both_const = index->IsConstant() && length->IsConstant(); 2784 locations->SetInAt(0, both_const 2785 ? Location::ConstantLocation(index->AsConstant()) 2786 : ARM64EncodableConstantOrRegister(index, instruction)); 2787 locations->SetInAt(1, both_const 2788 ? Location::ConstantLocation(length->AsConstant()) 2789 : ARM64EncodableConstantOrRegister(length, instruction)); 2790 } 2791 2792 void InstructionCodeGeneratorARM64::VisitBoundsCheck(HBoundsCheck* instruction) { 2793 LocationSummary* locations = instruction->GetLocations(); 2794 Location index_loc = locations->InAt(0); 2795 Location length_loc = locations->InAt(1); 2796 2797 int cmp_first_input = 0; 2798 int cmp_second_input = 1; 2799 Condition cond = hs; 2800 2801 if (index_loc.IsConstant()) { 2802 int64_t index = Int64FromLocation(index_loc); 2803 if (length_loc.IsConstant()) { 2804 int64_t length = Int64FromLocation(length_loc); 2805 if (index < 0 || index >= length) { 2806 BoundsCheckSlowPathARM64* slow_path = 2807 new (codegen_->GetScopedAllocator()) BoundsCheckSlowPathARM64(instruction); 2808 codegen_->AddSlowPath(slow_path); 2809 __ B(slow_path->GetEntryLabel()); 2810 } else { 2811 // BCE will remove the bounds check if we are guaranteed to pass. 2812 // However, some optimization after BCE may have generated this, and we should not 2813 // generate a bounds check if it is a valid range. 2814 } 2815 return; 2816 } 2817 // Only the index is constant: change the order of the operands and commute the condition 2818 // so we can use an immediate constant for the index (only the second input to a cmp 2819 // instruction can be an immediate). 2820 cmp_first_input = 1; 2821 cmp_second_input = 0; 2822 cond = ls; 2823 } 2824 BoundsCheckSlowPathARM64* slow_path = 2825 new (codegen_->GetScopedAllocator()) BoundsCheckSlowPathARM64(instruction); 2826 __ Cmp(InputRegisterAt(instruction, cmp_first_input), 2827 InputOperandAt(instruction, cmp_second_input)); 2828 codegen_->AddSlowPath(slow_path); 2829 __ B(slow_path->GetEntryLabel(), cond); 2830 } 2831 2832 void LocationsBuilderARM64::VisitClinitCheck(HClinitCheck* check) { 2833 LocationSummary* locations = 2834 new (GetGraph()->GetAllocator()) LocationSummary(check, LocationSummary::kCallOnSlowPath); 2835 locations->SetInAt(0, Location::RequiresRegister()); 2836 if (check->HasUses()) { 2837 locations->SetOut(Location::SameAsFirstInput()); 2838 } 2839 // Rely on the type initialization to save everything we need. 2840 locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves()); 2841 } 2842 2843 void InstructionCodeGeneratorARM64::VisitClinitCheck(HClinitCheck* check) { 2844 // We assume the class is not null. 2845 SlowPathCodeARM64* slow_path = 2846 new (codegen_->GetScopedAllocator()) LoadClassSlowPathARM64(check->GetLoadClass(), check); 2847 codegen_->AddSlowPath(slow_path); 2848 GenerateClassInitializationCheck(slow_path, InputRegisterAt(check, 0)); 2849 } 2850 2851 static bool IsFloatingPointZeroConstant(HInstruction* inst) { 2852 return (inst->IsFloatConstant() && (inst->AsFloatConstant()->IsArithmeticZero())) 2853 || (inst->IsDoubleConstant() && (inst->AsDoubleConstant()->IsArithmeticZero())); 2854 } 2855 2856 void InstructionCodeGeneratorARM64::GenerateFcmp(HInstruction* instruction) { 2857 VRegister lhs_reg = InputFPRegisterAt(instruction, 0); 2858 Location rhs_loc = instruction->GetLocations()->InAt(1); 2859 if (rhs_loc.IsConstant()) { 2860 // 0.0 is the only immediate that can be encoded directly in 2861 // an FCMP instruction. 2862 // 2863 // Both the JLS (section 15.20.1) and the JVMS (section 6.5) 2864 // specify that in a floating-point comparison, positive zero 2865 // and negative zero are considered equal, so we can use the 2866 // literal 0.0 for both cases here. 2867 // 2868 // Note however that some methods (Float.equal, Float.compare, 2869 // Float.compareTo, Double.equal, Double.compare, 2870 // Double.compareTo, Math.max, Math.min, StrictMath.max, 2871 // StrictMath.min) consider 0.0 to be (strictly) greater than 2872 // -0.0. So if we ever translate calls to these methods into a 2873 // HCompare instruction, we must handle the -0.0 case with 2874 // care here. 2875 DCHECK(IsFloatingPointZeroConstant(rhs_loc.GetConstant())); 2876 __ Fcmp(lhs_reg, 0.0); 2877 } else { 2878 __ Fcmp(lhs_reg, InputFPRegisterAt(instruction, 1)); 2879 } 2880 } 2881 2882 void LocationsBuilderARM64::VisitCompare(HCompare* compare) { 2883 LocationSummary* locations = 2884 new (GetGraph()->GetAllocator()) LocationSummary(compare, LocationSummary::kNoCall); 2885 DataType::Type in_type = compare->InputAt(0)->GetType(); 2886 switch (in_type) { 2887 case DataType::Type::kBool: 2888 case DataType::Type::kUint8: 2889 case DataType::Type::kInt8: 2890 case DataType::Type::kUint16: 2891 case DataType::Type::kInt16: 2892 case DataType::Type::kInt32: 2893 case DataType::Type::kInt64: { 2894 locations->SetInAt(0, Location::RequiresRegister()); 2895 locations->SetInAt(1, ARM64EncodableConstantOrRegister(compare->InputAt(1), compare)); 2896 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); 2897 break; 2898 } 2899 case DataType::Type::kFloat32: 2900 case DataType::Type::kFloat64: { 2901 locations->SetInAt(0, Location::RequiresFpuRegister()); 2902 locations->SetInAt(1, 2903 IsFloatingPointZeroConstant(compare->InputAt(1)) 2904 ? Location::ConstantLocation(compare->InputAt(1)->AsConstant()) 2905 : Location::RequiresFpuRegister()); 2906 locations->SetOut(Location::RequiresRegister()); 2907 break; 2908 } 2909 default: 2910 LOG(FATAL) << "Unexpected type for compare operation " << in_type; 2911 } 2912 } 2913 2914 void InstructionCodeGeneratorARM64::VisitCompare(HCompare* compare) { 2915 DataType::Type in_type = compare->InputAt(0)->GetType(); 2916 2917 // 0 if: left == right 2918 // 1 if: left > right 2919 // -1 if: left < right 2920 switch (in_type) { 2921 case DataType::Type::kBool: 2922 case DataType::Type::kUint8: 2923 case DataType::Type::kInt8: 2924 case DataType::Type::kUint16: 2925 case DataType::Type::kInt16: 2926 case DataType::Type::kInt32: 2927 case DataType::Type::kInt64: { 2928 Register result = OutputRegister(compare); 2929 Register left = InputRegisterAt(compare, 0); 2930 Operand right = InputOperandAt(compare, 1); 2931 __ Cmp(left, right); 2932 __ Cset(result, ne); // result == +1 if NE or 0 otherwise 2933 __ Cneg(result, result, lt); // result == -1 if LT or unchanged otherwise 2934 break; 2935 } 2936 case DataType::Type::kFloat32: 2937 case DataType::Type::kFloat64: { 2938 Register result = OutputRegister(compare); 2939 GenerateFcmp(compare); 2940 __ Cset(result, ne); 2941 __ Cneg(result, result, ARM64FPCondition(kCondLT, compare->IsGtBias())); 2942 break; 2943 } 2944 default: 2945 LOG(FATAL) << "Unimplemented compare type " << in_type; 2946 } 2947 } 2948 2949 void LocationsBuilderARM64::HandleCondition(HCondition* instruction) { 2950 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction); 2951 2952 if (DataType::IsFloatingPointType(instruction->InputAt(0)->GetType())) { 2953 locations->SetInAt(0, Location::RequiresFpuRegister()); 2954 locations->SetInAt(1, 2955 IsFloatingPointZeroConstant(instruction->InputAt(1)) 2956 ? Location::ConstantLocation(instruction->InputAt(1)->AsConstant()) 2957 : Location::RequiresFpuRegister()); 2958 } else { 2959 // Integer cases. 2960 locations->SetInAt(0, Location::RequiresRegister()); 2961 locations->SetInAt(1, ARM64EncodableConstantOrRegister(instruction->InputAt(1), instruction)); 2962 } 2963 2964 if (!instruction->IsEmittedAtUseSite()) { 2965 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); 2966 } 2967 } 2968 2969 void InstructionCodeGeneratorARM64::HandleCondition(HCondition* instruction) { 2970 if (instruction->IsEmittedAtUseSite()) { 2971 return; 2972 } 2973 2974 LocationSummary* locations = instruction->GetLocations(); 2975 Register res = RegisterFrom(locations->Out(), instruction->GetType()); 2976 IfCondition if_cond = instruction->GetCondition(); 2977 2978 if (DataType::IsFloatingPointType(instruction->InputAt(0)->GetType())) { 2979 GenerateFcmp(instruction); 2980 __ Cset(res, ARM64FPCondition(if_cond, instruction->IsGtBias())); 2981 } else { 2982 // Integer cases. 2983 Register lhs = InputRegisterAt(instruction, 0); 2984 Operand rhs = InputOperandAt(instruction, 1); 2985 __ Cmp(lhs, rhs); 2986 __ Cset(res, ARM64Condition(if_cond)); 2987 } 2988 } 2989 2990 #define FOR_EACH_CONDITION_INSTRUCTION(M) \ 2991 M(Equal) \ 2992 M(NotEqual) \ 2993 M(LessThan) \ 2994 M(LessThanOrEqual) \ 2995 M(GreaterThan) \ 2996 M(GreaterThanOrEqual) \ 2997 M(Below) \ 2998 M(BelowOrEqual) \ 2999 M(Above) \ 3000 M(AboveOrEqual) 3001 #define DEFINE_CONDITION_VISITORS(Name) \ 3002 void LocationsBuilderARM64::Visit##Name(H##Name* comp) { HandleCondition(comp); } \ 3003 void InstructionCodeGeneratorARM64::Visit##Name(H##Name* comp) { HandleCondition(comp); } 3004 FOR_EACH_CONDITION_INSTRUCTION(DEFINE_CONDITION_VISITORS) 3005 #undef DEFINE_CONDITION_VISITORS 3006 #undef FOR_EACH_CONDITION_INSTRUCTION 3007 3008 void InstructionCodeGeneratorARM64::GenerateIntDivForPower2Denom(HDiv* instruction) { 3009 int64_t imm = Int64FromLocation(instruction->GetLocations()->InAt(1)); 3010 uint64_t abs_imm = static_cast<uint64_t>(AbsOrMin(imm)); 3011 DCHECK(IsPowerOfTwo(abs_imm)) << abs_imm; 3012 3013 Register out = OutputRegister(instruction); 3014 Register dividend = InputRegisterAt(instruction, 0); 3015 3016 if (abs_imm == 2) { 3017 int bits = DataType::Size(instruction->GetResultType()) * kBitsPerByte; 3018 __ Add(out, dividend, Operand(dividend, LSR, bits - 1)); 3019 } else { 3020 UseScratchRegisterScope temps(GetVIXLAssembler()); 3021 Register temp = temps.AcquireSameSizeAs(out); 3022 __ Add(temp, dividend, abs_imm - 1); 3023 __ Cmp(dividend, 0); 3024 __ Csel(out, temp, dividend, lt); 3025 } 3026 3027 int ctz_imm = CTZ(abs_imm); 3028 if (imm > 0) { 3029 __ Asr(out, out, ctz_imm); 3030 } else { 3031 __ Neg(out, Operand(out, ASR, ctz_imm)); 3032 } 3033 } 3034 3035 void InstructionCodeGeneratorARM64::GenerateDivRemWithAnyConstant(HBinaryOperation* instruction) { 3036 DCHECK(instruction->IsDiv() || instruction->IsRem()); 3037 3038 LocationSummary* locations = instruction->GetLocations(); 3039 Location second = locations->InAt(1); 3040 DCHECK(second.IsConstant()); 3041 3042 Register out = OutputRegister(instruction); 3043 Register dividend = InputRegisterAt(instruction, 0); 3044 int64_t imm = Int64FromConstant(second.GetConstant()); 3045 3046 DataType::Type type = instruction->GetResultType(); 3047 DCHECK(type == DataType::Type::kInt32 || type == DataType::Type::kInt64); 3048 3049 int64_t magic; 3050 int shift; 3051 CalculateMagicAndShiftForDivRem( 3052 imm, /* is_long= */ type == DataType::Type::kInt64, &magic, &shift); 3053 3054 UseScratchRegisterScope temps(GetVIXLAssembler()); 3055 Register temp = temps.AcquireSameSizeAs(out); 3056 3057 // temp = get_high(dividend * magic) 3058 __ Mov(temp, magic); 3059 if (type == DataType::Type::kInt64) { 3060 __ Smulh(temp, dividend, temp); 3061 } else { 3062 __ Smull(temp.X(), dividend, temp); 3063 __ Lsr(temp.X(), temp.X(), 32); 3064 } 3065 3066 if (imm > 0 && magic < 0) { 3067 __ Add(temp, temp, dividend); 3068 } else if (imm < 0 && magic > 0) { 3069 __ Sub(temp, temp, dividend); 3070 } 3071 3072 if (shift != 0) { 3073 __ Asr(temp, temp, shift); 3074 } 3075 3076 if (instruction->IsDiv()) { 3077 __ Sub(out, temp, Operand(temp, ASR, type == DataType::Type::kInt64 ? 63 : 31)); 3078 } else { 3079 __ Sub(temp, temp, Operand(temp, ASR, type == DataType::Type::kInt64 ? 63 : 31)); 3080 // TODO: Strength reduction for msub. 3081 Register temp_imm = temps.AcquireSameSizeAs(out); 3082 __ Mov(temp_imm, imm); 3083 __ Msub(out, temp, temp_imm, dividend); 3084 } 3085 } 3086 3087 void InstructionCodeGeneratorARM64::GenerateIntDivForConstDenom(HDiv *instruction) { 3088 int64_t imm = Int64FromLocation(instruction->GetLocations()->InAt(1)); 3089 3090 if (imm == 0) { 3091 // Do not generate anything. DivZeroCheck would prevent any code to be executed. 3092 return; 3093 } 3094 3095 if (IsPowerOfTwo(AbsOrMin(imm))) { 3096 GenerateIntDivForPower2Denom(instruction); 3097 } else { 3098 // Cases imm == -1 or imm == 1 are handled by InstructionSimplifier. 3099 DCHECK(imm < -2 || imm > 2) << imm; 3100 GenerateDivRemWithAnyConstant(instruction); 3101 } 3102 } 3103 3104 void InstructionCodeGeneratorARM64::GenerateIntDiv(HDiv *instruction) { 3105 DCHECK(DataType::IsIntOrLongType(instruction->GetResultType())) 3106 << instruction->GetResultType(); 3107 3108 if (instruction->GetLocations()->InAt(1).IsConstant()) { 3109 GenerateIntDivForConstDenom(instruction); 3110 } else { 3111 Register out = OutputRegister(instruction); 3112 Register dividend = InputRegisterAt(instruction, 0); 3113 Register divisor = InputRegisterAt(instruction, 1); 3114 __ Sdiv(out, dividend, divisor); 3115 } 3116 } 3117 3118 void LocationsBuilderARM64::VisitDiv(HDiv* div) { 3119 LocationSummary* locations = 3120 new (GetGraph()->GetAllocator()) LocationSummary(div, LocationSummary::kNoCall); 3121 switch (div->GetResultType()) { 3122 case DataType::Type::kInt32: 3123 case DataType::Type::kInt64: 3124 locations->SetInAt(0, Location::RequiresRegister()); 3125 locations->SetInAt(1, Location::RegisterOrConstant(div->InputAt(1))); 3126 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); 3127 break; 3128 3129 case DataType::Type::kFloat32: 3130 case DataType::Type::kFloat64: 3131 locations->SetInAt(0, Location::RequiresFpuRegister()); 3132 locations->SetInAt(1, Location::RequiresFpuRegister()); 3133 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); 3134 break; 3135 3136 default: 3137 LOG(FATAL) << "Unexpected div type " << div->GetResultType(); 3138 } 3139 } 3140 3141 void InstructionCodeGeneratorARM64::VisitDiv(HDiv* div) { 3142 DataType::Type type = div->GetResultType(); 3143 switch (type) { 3144 case DataType::Type::kInt32: 3145 case DataType::Type::kInt64: 3146 GenerateIntDiv(div); 3147 break; 3148 3149 case DataType::Type::kFloat32: 3150 case DataType::Type::kFloat64: 3151 __ Fdiv(OutputFPRegister(div), InputFPRegisterAt(div, 0), InputFPRegisterAt(div, 1)); 3152 break; 3153 3154 default: 3155 LOG(FATAL) << "Unexpected div type " << type; 3156 } 3157 } 3158 3159 void LocationsBuilderARM64::VisitDivZeroCheck(HDivZeroCheck* instruction) { 3160 LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction); 3161 locations->SetInAt(0, Location::RegisterOrConstant(instruction->InputAt(0))); 3162 } 3163 3164 void InstructionCodeGeneratorARM64::VisitDivZeroCheck(HDivZeroCheck* instruction) { 3165 SlowPathCodeARM64* slow_path = 3166 new (codegen_->GetScopedAllocator()) DivZeroCheckSlowPathARM64(instruction); 3167 codegen_->AddSlowPath(slow_path); 3168 Location value = instruction->GetLocations()->InAt(0); 3169 3170 DataType::Type type = instruction->GetType(); 3171 3172 if (!DataType::IsIntegralType(type)) { 3173 LOG(FATAL) << "Unexpected type " << type << " for DivZeroCheck."; 3174 UNREACHABLE(); 3175 } 3176 3177 if (value.IsConstant()) { 3178 int64_t divisor = Int64FromLocation(value); 3179 if (divisor == 0) { 3180 __ B(slow_path->GetEntryLabel()); 3181 } else { 3182 // A division by a non-null constant is valid. We don't need to perform 3183 // any check, so simply fall through. 3184 } 3185 } else { 3186 __ Cbz(InputRegisterAt(instruction, 0), slow_path->GetEntryLabel()); 3187 } 3188 } 3189 3190 void LocationsBuilderARM64::VisitDoubleConstant(HDoubleConstant* constant) { 3191 LocationSummary* locations = 3192 new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall); 3193 locations->SetOut(Location::ConstantLocation(constant)); 3194 } 3195 3196 void InstructionCodeGeneratorARM64::VisitDoubleConstant( 3197 HDoubleConstant* constant ATTRIBUTE_UNUSED) { 3198 // Will be generated at use site. 3199 } 3200 3201 void LocationsBuilderARM64::VisitExit(HExit* exit) { 3202 exit->SetLocations(nullptr); 3203 } 3204 3205 void InstructionCodeGeneratorARM64::VisitExit(HExit* exit ATTRIBUTE_UNUSED) { 3206 } 3207 3208 void LocationsBuilderARM64::VisitFloatConstant(HFloatConstant* constant) { 3209 LocationSummary* locations = 3210 new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall); 3211 locations->SetOut(Location::ConstantLocation(constant)); 3212 } 3213 3214 void InstructionCodeGeneratorARM64::VisitFloatConstant(HFloatConstant* constant ATTRIBUTE_UNUSED) { 3215 // Will be generated at use site. 3216 } 3217 3218 void InstructionCodeGeneratorARM64::HandleGoto(HInstruction* got, HBasicBlock* successor) { 3219 if (successor->IsExitBlock()) { 3220 DCHECK(got->GetPrevious()->AlwaysThrows()); 3221 return; // no code needed 3222 } 3223 3224 HBasicBlock* block = got->GetBlock(); 3225 HInstruction* previous = got->GetPrevious(); 3226 HLoopInformation* info = block->GetLoopInformation(); 3227 3228 if (info != nullptr && info->IsBackEdge(*block) && info->HasSuspendCheck()) { 3229 codegen_->MaybeIncrementHotness(/* is_frame_entry= */ false); 3230 GenerateSuspendCheck(info->GetSuspendCheck(), successor); 3231 return; 3232 } 3233 if (block->IsEntryBlock() && (previous != nullptr) && previous->IsSuspendCheck()) { 3234 GenerateSuspendCheck(previous->AsSuspendCheck(), nullptr); 3235 codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__); 3236 } 3237 if (!codegen_->GoesToNextBlock(block, successor)) { 3238 __ B(codegen_->GetLabelOf(successor)); 3239 } 3240 } 3241 3242 void LocationsBuilderARM64::VisitGoto(HGoto* got) { 3243 got->SetLocations(nullptr); 3244 } 3245 3246 void InstructionCodeGeneratorARM64::VisitGoto(HGoto* got) { 3247 HandleGoto(got, got->GetSuccessor()); 3248 } 3249 3250 void LocationsBuilderARM64::VisitTryBoundary(HTryBoundary* try_boundary) { 3251 try_boundary->SetLocations(nullptr); 3252 } 3253 3254 void InstructionCodeGeneratorARM64::VisitTryBoundary(HTryBoundary* try_boundary) { 3255 HBasicBlock* successor = try_boundary->GetNormalFlowSuccessor(); 3256 if (!successor->IsExitBlock()) { 3257 HandleGoto(try_boundary, successor); 3258 } 3259 } 3260 3261 void InstructionCodeGeneratorARM64::GenerateTestAndBranch(HInstruction* instruction, 3262 size_t condition_input_index, 3263 vixl::aarch64::Label* true_target, 3264 vixl::aarch64::Label* false_target) { 3265 HInstruction* cond = instruction->InputAt(condition_input_index); 3266 3267 if (true_target == nullptr && false_target == nullptr) { 3268 // Nothing to do. The code always falls through. 3269 return; 3270 } else if (cond->IsIntConstant()) { 3271 // Constant condition, statically compared against "true" (integer value 1). 3272 if (cond->AsIntConstant()->IsTrue()) { 3273 if (true_target != nullptr) { 3274 __ B(true_target); 3275 } 3276 } else { 3277 DCHECK(cond->AsIntConstant()->IsFalse()) << cond->AsIntConstant()->GetValue(); 3278 if (false_target != nullptr) { 3279 __ B(false_target); 3280 } 3281 } 3282 return; 3283 } 3284 3285 // The following code generates these patterns: 3286 // (1) true_target == nullptr && false_target != nullptr 3287 // - opposite condition true => branch to false_target 3288 // (2) true_target != nullptr && false_target == nullptr 3289 // - condition true => branch to true_target 3290 // (3) true_target != nullptr && false_target != nullptr 3291 // - condition true => branch to true_target 3292 // - branch to false_target 3293 if (IsBooleanValueOrMaterializedCondition(cond)) { 3294 // The condition instruction has been materialized, compare the output to 0. 3295 Location cond_val = instruction->GetLocations()->InAt(condition_input_index); 3296 DCHECK(cond_val.IsRegister()); 3297 if (true_target == nullptr) { 3298 __ Cbz(InputRegisterAt(instruction, condition_input_index), false_target); 3299 } else { 3300 __ Cbnz(InputRegisterAt(instruction, condition_input_index), true_target); 3301 } 3302 } else { 3303 // The condition instruction has not been materialized, use its inputs as 3304 // the comparison and its condition as the branch condition. 3305 HCondition* condition = cond->AsCondition(); 3306 3307 DataType::Type type = condition->InputAt(0)->GetType(); 3308 if (DataType::IsFloatingPointType(type)) { 3309 GenerateFcmp(condition); 3310 if (true_target == nullptr) { 3311 IfCondition opposite_condition = condition->GetOppositeCondition(); 3312 __ B(ARM64FPCondition(opposite_condition, condition->IsGtBias()), false_target); 3313 } else { 3314 __ B(ARM64FPCondition(condition->GetCondition(), condition->IsGtBias()), true_target); 3315 } 3316 } else { 3317 // Integer cases. 3318 Register lhs = InputRegisterAt(condition, 0); 3319 Operand rhs = InputOperandAt(condition, 1); 3320 3321 Condition arm64_cond; 3322 vixl::aarch64::Label* non_fallthrough_target; 3323 if (true_target == nullptr) { 3324 arm64_cond = ARM64Condition(condition->GetOppositeCondition()); 3325 non_fallthrough_target = false_target; 3326 } else { 3327 arm64_cond = ARM64Condition(condition->GetCondition()); 3328 non_fallthrough_target = true_target; 3329 } 3330 3331 if ((arm64_cond == eq || arm64_cond == ne || arm64_cond == lt || arm64_cond == ge) && 3332 rhs.IsImmediate() && (rhs.GetImmediate() == 0)) { 3333 switch (arm64_cond) { 3334 case eq: 3335 __ Cbz(lhs, non_fallthrough_target); 3336 break; 3337 case ne: 3338 __ Cbnz(lhs, non_fallthrough_target); 3339 break; 3340 case lt: 3341 // Test the sign bit and branch accordingly. 3342 __ Tbnz(lhs, (lhs.IsX() ? kXRegSize : kWRegSize) - 1, non_fallthrough_target); 3343 break; 3344 case ge: 3345 // Test the sign bit and branch accordingly. 3346 __ Tbz(lhs, (lhs.IsX() ? kXRegSize : kWRegSize) - 1, non_fallthrough_target); 3347 break; 3348 default: 3349 // Without the `static_cast` the compiler throws an error for 3350 // `-Werror=sign-promo`. 3351 LOG(FATAL) << "Unexpected condition: " << static_cast<int>(arm64_cond); 3352 } 3353 } else { 3354 __ Cmp(lhs, rhs); 3355 __ B(arm64_cond, non_fallthrough_target); 3356 } 3357 } 3358 } 3359 3360 // If neither branch falls through (case 3), the conditional branch to `true_target` 3361 // was already emitted (case 2) and we need to emit a jump to `false_target`. 3362 if (true_target != nullptr && false_target != nullptr) { 3363 __ B(false_target); 3364 } 3365 } 3366 3367 void LocationsBuilderARM64::VisitIf(HIf* if_instr) { 3368 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(if_instr); 3369 if (IsBooleanValueOrMaterializedCondition(if_instr->InputAt(0))) { 3370 locations->SetInAt(0, Location::RequiresRegister()); 3371 } 3372 } 3373 3374 void InstructionCodeGeneratorARM64::VisitIf(HIf* if_instr) { 3375 HBasicBlock* true_successor = if_instr->IfTrueSuccessor(); 3376 HBasicBlock* false_successor = if_instr->IfFalseSuccessor(); 3377 vixl::aarch64::Label* true_target = codegen_->GetLabelOf(true_successor); 3378 if (codegen_->GoesToNextBlock(if_instr->GetBlock(), true_successor)) { 3379 true_target = nullptr; 3380 } 3381 vixl::aarch64::Label* false_target = codegen_->GetLabelOf(false_successor); 3382 if (codegen_->GoesToNextBlock(if_instr->GetBlock(), false_successor)) { 3383 false_target = nullptr; 3384 } 3385 GenerateTestAndBranch(if_instr, /* condition_input_index= */ 0, true_target, false_target); 3386 } 3387 3388 void LocationsBuilderARM64::VisitDeoptimize(HDeoptimize* deoptimize) { 3389 LocationSummary* locations = new (GetGraph()->GetAllocator()) 3390 LocationSummary(deoptimize, LocationSummary::kCallOnSlowPath); 3391 InvokeRuntimeCallingConvention calling_convention; 3392 RegisterSet caller_saves = RegisterSet::Empty(); 3393 caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0).GetCode())); 3394 locations->SetCustomSlowPathCallerSaves(caller_saves); 3395 if (IsBooleanValueOrMaterializedCondition(deoptimize->InputAt(0))) { 3396 locations->SetInAt(0, Location::RequiresRegister()); 3397 } 3398 } 3399 3400 void InstructionCodeGeneratorARM64::VisitDeoptimize(HDeoptimize* deoptimize) { 3401 SlowPathCodeARM64* slow_path = 3402 deopt_slow_paths_.NewSlowPath<DeoptimizationSlowPathARM64>(deoptimize); 3403 GenerateTestAndBranch(deoptimize, 3404 /* condition_input_index= */ 0, 3405 slow_path->GetEntryLabel(), 3406 /* false_target= */ nullptr); 3407 } 3408 3409 void LocationsBuilderARM64::VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag* flag) { 3410 LocationSummary* locations = new (GetGraph()->GetAllocator()) 3411 LocationSummary(flag, LocationSummary::kNoCall); 3412 locations->SetOut(Location::RequiresRegister()); 3413 } 3414 3415 void InstructionCodeGeneratorARM64::VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag* flag) { 3416 __ Ldr(OutputRegister(flag), 3417 MemOperand(sp, codegen_->GetStackOffsetOfShouldDeoptimizeFlag())); 3418 } 3419 3420 static inline bool IsConditionOnFloatingPointValues(HInstruction* condition) { 3421 return condition->IsCondition() && 3422 DataType::IsFloatingPointType(condition->InputAt(0)->GetType()); 3423 } 3424 3425 static inline Condition GetConditionForSelect(HCondition* condition) { 3426 IfCondition cond = condition->AsCondition()->GetCondition(); 3427 return IsConditionOnFloatingPointValues(condition) ? ARM64FPCondition(cond, condition->IsGtBias()) 3428 : ARM64Condition(cond); 3429 } 3430 3431 void LocationsBuilderARM64::VisitSelect(HSelect* select) { 3432 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(select); 3433 if (DataType::IsFloatingPointType(select->GetType())) { 3434 locations->SetInAt(0, Location::RequiresFpuRegister()); 3435 locations->SetInAt(1, Location::RequiresFpuRegister()); 3436 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); 3437 } else { 3438 HConstant* cst_true_value = select->GetTrueValue()->AsConstant(); 3439 HConstant* cst_false_value = select->GetFalseValue()->AsConstant(); 3440 bool is_true_value_constant = cst_true_value != nullptr; 3441 bool is_false_value_constant = cst_false_value != nullptr; 3442 // Ask VIXL whether we should synthesize constants in registers. 3443 // We give an arbitrary register to VIXL when dealing with non-constant inputs. 3444 Operand true_op = is_true_value_constant ? 3445 Operand(Int64FromConstant(cst_true_value)) : Operand(x1); 3446 Operand false_op = is_false_value_constant ? 3447 Operand(Int64FromConstant(cst_false_value)) : Operand(x2); 3448 bool true_value_in_register = false; 3449 bool false_value_in_register = false; 3450 MacroAssembler::GetCselSynthesisInformation( 3451 x0, true_op, false_op, &true_value_in_register, &false_value_in_register); 3452 true_value_in_register |= !is_true_value_constant; 3453 false_value_in_register |= !is_false_value_constant; 3454 3455 locations->SetInAt(1, true_value_in_register ? Location::RequiresRegister() 3456 : Location::ConstantLocation(cst_true_value)); 3457 locations->SetInAt(0, false_value_in_register ? Location::RequiresRegister() 3458 : Location::ConstantLocation(cst_false_value)); 3459 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); 3460 } 3461 3462 if (IsBooleanValueOrMaterializedCondition(select->GetCondition())) { 3463 locations->SetInAt(2, Location::RequiresRegister()); 3464 } 3465 } 3466 3467 void InstructionCodeGeneratorARM64::VisitSelect(HSelect* select) { 3468 HInstruction* cond = select->GetCondition(); 3469 Condition csel_cond; 3470 3471 if (IsBooleanValueOrMaterializedCondition(cond)) { 3472 if (cond->IsCondition() && cond->GetNext() == select) { 3473 // Use the condition flags set by the previous instruction. 3474 csel_cond = GetConditionForSelect(cond->AsCondition()); 3475 } else { 3476 __ Cmp(InputRegisterAt(select, 2), 0); 3477 csel_cond = ne; 3478 } 3479 } else if (IsConditionOnFloatingPointValues(cond)) { 3480 GenerateFcmp(cond); 3481 csel_cond = GetConditionForSelect(cond->AsCondition()); 3482 } else { 3483 __ Cmp(InputRegisterAt(cond, 0), InputOperandAt(cond, 1)); 3484 csel_cond = GetConditionForSelect(cond->AsCondition()); 3485 } 3486 3487 if (DataType::IsFloatingPointType(select->GetType())) { 3488 __ Fcsel(OutputFPRegister(select), 3489 InputFPRegisterAt(select, 1), 3490 InputFPRegisterAt(select, 0), 3491 csel_cond); 3492 } else { 3493 __ Csel(OutputRegister(select), 3494 InputOperandAt(select, 1), 3495 InputOperandAt(select, 0), 3496 csel_cond); 3497 } 3498 } 3499 3500 void LocationsBuilderARM64::VisitNativeDebugInfo(HNativeDebugInfo* info) { 3501 new (GetGraph()->GetAllocator()) LocationSummary(info); 3502 } 3503 3504 void InstructionCodeGeneratorARM64::VisitNativeDebugInfo(HNativeDebugInfo*) { 3505 // MaybeRecordNativeDebugInfo is already called implicitly in CodeGenerator::Compile. 3506 } 3507 3508 void CodeGeneratorARM64::GenerateNop() { 3509 __ Nop(); 3510 } 3511 3512 void LocationsBuilderARM64::VisitInstanceFieldGet(HInstanceFieldGet* instruction) { 3513 HandleFieldGet(instruction, instruction->GetFieldInfo()); 3514 } 3515 3516 void InstructionCodeGeneratorARM64::VisitInstanceFieldGet(HInstanceFieldGet* instruction) { 3517 HandleFieldGet(instruction, instruction->GetFieldInfo()); 3518 } 3519 3520 void LocationsBuilderARM64::VisitInstanceFieldSet(HInstanceFieldSet* instruction) { 3521 HandleFieldSet(instruction); 3522 } 3523 3524 void InstructionCodeGeneratorARM64::VisitInstanceFieldSet(HInstanceFieldSet* instruction) { 3525 HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetValueCanBeNull()); 3526 } 3527 3528 // Temp is used for read barrier. 3529 static size_t NumberOfInstanceOfTemps(TypeCheckKind type_check_kind) { 3530 if (kEmitCompilerReadBarrier && 3531 (kUseBakerReadBarrier || 3532 type_check_kind == TypeCheckKind::kAbstractClassCheck || 3533 type_check_kind == TypeCheckKind::kClassHierarchyCheck || 3534 type_check_kind == TypeCheckKind::kArrayObjectCheck)) { 3535 return 1; 3536 } 3537 return 0; 3538 } 3539 3540 // Interface case has 3 temps, one for holding the number of interfaces, one for the current 3541 // interface pointer, one for loading the current interface. 3542 // The other checks have one temp for loading the object's class. 3543 static size_t NumberOfCheckCastTemps(TypeCheckKind type_check_kind) { 3544 if (type_check_kind == TypeCheckKind::kInterfaceCheck) { 3545 return 3; 3546 } 3547 return 1 + NumberOfInstanceOfTemps(type_check_kind); 3548 } 3549 3550 void LocationsBuilderARM64::VisitInstanceOf(HInstanceOf* instruction) { 3551 LocationSummary::CallKind call_kind = LocationSummary::kNoCall; 3552 TypeCheckKind type_check_kind = instruction->GetTypeCheckKind(); 3553 bool baker_read_barrier_slow_path = false; 3554 switch (type_check_kind) { 3555 case TypeCheckKind::kExactCheck: 3556 case TypeCheckKind::kAbstractClassCheck: 3557 case TypeCheckKind::kClassHierarchyCheck: 3558 case TypeCheckKind::kArrayObjectCheck: { 3559 bool needs_read_barrier = CodeGenerator::InstanceOfNeedsReadBarrier(instruction); 3560 call_kind = needs_read_barrier ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall; 3561 baker_read_barrier_slow_path = kUseBakerReadBarrier && needs_read_barrier; 3562 break; 3563 } 3564 case TypeCheckKind::kArrayCheck: 3565 case TypeCheckKind::kUnresolvedCheck: 3566 case TypeCheckKind::kInterfaceCheck: 3567 call_kind = LocationSummary::kCallOnSlowPath; 3568 break; 3569 case TypeCheckKind::kBitstringCheck: 3570 break; 3571 } 3572 3573 LocationSummary* locations = 3574 new (GetGraph()->GetAllocator()) LocationSummary(instruction, call_kind); 3575 if (baker_read_barrier_slow_path) { 3576 locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. 3577 } 3578 locations->SetInAt(0, Location::RequiresRegister()); 3579 if (type_check_kind == TypeCheckKind::kBitstringCheck) { 3580 locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant())); 3581 locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant())); 3582 locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant())); 3583 } else { 3584 locations->SetInAt(1, Location::RequiresRegister()); 3585 } 3586 // The "out" register is used as a temporary, so it overlaps with the inputs. 3587 // Note that TypeCheckSlowPathARM64 uses this register too. 3588 locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); 3589 // Add temps if necessary for read barriers. 3590 locations->AddRegisterTemps(NumberOfInstanceOfTemps(type_check_kind)); 3591 } 3592 3593 void InstructionCodeGeneratorARM64::VisitInstanceOf(HInstanceOf* instruction) { 3594 TypeCheckKind type_check_kind = instruction->GetTypeCheckKind(); 3595 LocationSummary* locations = instruction->GetLocations(); 3596 Location obj_loc = locations->InAt(0); 3597 Register obj = InputRegisterAt(instruction, 0); 3598 Register cls = (type_check_kind == TypeCheckKind::kBitstringCheck) 3599 ? Register() 3600 : InputRegisterAt(instruction, 1); 3601 Location out_loc = locations->Out(); 3602 Register out = OutputRegister(instruction); 3603 const size_t num_temps = NumberOfInstanceOfTemps(type_check_kind); 3604 DCHECK_LE(num_temps, 1u); 3605 Location maybe_temp_loc = (num_temps >= 1) ? locations->GetTemp(0) : Location::NoLocation(); 3606 uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); 3607 uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value(); 3608 uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value(); 3609 uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value(); 3610 3611 vixl::aarch64::Label done, zero; 3612 SlowPathCodeARM64* slow_path = nullptr; 3613 3614 // Return 0 if `obj` is null. 3615 // Avoid null check if we know `obj` is not null. 3616 if (instruction->MustDoNullCheck()) { 3617 __ Cbz(obj, &zero); 3618 } 3619 3620 switch (type_check_kind) { 3621 case TypeCheckKind::kExactCheck: { 3622 ReadBarrierOption read_barrier_option = 3623 CodeGenerator::ReadBarrierOptionForInstanceOf(instruction); 3624 // /* HeapReference<Class> */ out = obj->klass_ 3625 GenerateReferenceLoadTwoRegisters(instruction, 3626 out_loc, 3627 obj_loc, 3628 class_offset, 3629 maybe_temp_loc, 3630 read_barrier_option); 3631 __ Cmp(out, cls); 3632 __ Cset(out, eq); 3633 if (zero.IsLinked()) { 3634 __ B(&done); 3635 } 3636 break; 3637 } 3638 3639 case TypeCheckKind::kAbstractClassCheck: { 3640 ReadBarrierOption read_barrier_option = 3641 CodeGenerator::ReadBarrierOptionForInstanceOf(instruction); 3642 // /* HeapReference<Class> */ out = obj->klass_ 3643 GenerateReferenceLoadTwoRegisters(instruction, 3644 out_loc, 3645 obj_loc, 3646 class_offset, 3647 maybe_temp_loc, 3648 read_barrier_option); 3649 // If the class is abstract, we eagerly fetch the super class of the 3650 // object to avoid doing a comparison we know will fail. 3651 vixl::aarch64::Label loop, success; 3652 __ Bind(&loop); 3653 // /* HeapReference<Class> */ out = out->super_class_ 3654 GenerateReferenceLoadOneRegister(instruction, 3655 out_loc, 3656 super_offset, 3657 maybe_temp_loc, 3658 read_barrier_option); 3659 // If `out` is null, we use it for the result, and jump to `done`. 3660 __ Cbz(out, &done); 3661 __ Cmp(out, cls); 3662 __ B(ne, &loop); 3663 __ Mov(out, 1); 3664 if (zero.IsLinked()) { 3665 __ B(&done); 3666 } 3667 break; 3668 } 3669 3670 case TypeCheckKind::kClassHierarchyCheck: { 3671 ReadBarrierOption read_barrier_option = 3672 CodeGenerator::ReadBarrierOptionForInstanceOf(instruction); 3673 // /* HeapReference<Class> */ out = obj->klass_ 3674 GenerateReferenceLoadTwoRegisters(instruction, 3675 out_loc, 3676 obj_loc, 3677 class_offset, 3678 maybe_temp_loc, 3679 read_barrier_option); 3680 // Walk over the class hierarchy to find a match. 3681 vixl::aarch64::Label loop, success; 3682 __ Bind(&loop); 3683 __ Cmp(out, cls); 3684 __ B(eq, &success); 3685 // /* HeapReference<Class> */ out = out->super_class_ 3686 GenerateReferenceLoadOneRegister(instruction, 3687 out_loc, 3688 super_offset, 3689 maybe_temp_loc, 3690 read_barrier_option); 3691 __ Cbnz(out, &loop); 3692 // If `out` is null, we use it for the result, and jump to `done`. 3693 __ B(&done); 3694 __ Bind(&success); 3695 __ Mov(out, 1); 3696 if (zero.IsLinked()) { 3697 __ B(&done); 3698 } 3699 break; 3700 } 3701 3702 case TypeCheckKind::kArrayObjectCheck: { 3703 ReadBarrierOption read_barrier_option = 3704 CodeGenerator::ReadBarrierOptionForInstanceOf(instruction); 3705 // /* HeapReference<Class> */ out = obj->klass_ 3706 GenerateReferenceLoadTwoRegisters(instruction, 3707 out_loc, 3708 obj_loc, 3709 class_offset, 3710 maybe_temp_loc, 3711 read_barrier_option); 3712 // Do an exact check. 3713 vixl::aarch64::Label exact_check; 3714 __ Cmp(out, cls); 3715 __ B(eq, &exact_check); 3716 // Otherwise, we need to check that the object's class is a non-primitive array. 3717 // /* HeapReference<Class> */ out = out->component_type_ 3718 GenerateReferenceLoadOneRegister(instruction, 3719 out_loc, 3720 component_offset, 3721 maybe_temp_loc, 3722 read_barrier_option); 3723 // If `out` is null, we use it for the result, and jump to `done`. 3724 __ Cbz(out, &done); 3725 __ Ldrh(out, HeapOperand(out, primitive_offset)); 3726 static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot"); 3727 __ Cbnz(out, &zero); 3728 __ Bind(&exact_check); 3729 __ Mov(out, 1); 3730 __ B(&done); 3731 break; 3732 } 3733 3734 case TypeCheckKind::kArrayCheck: { 3735 // No read barrier since the slow path will retry upon failure. 3736 // /* HeapReference<Class> */ out = obj->klass_ 3737 GenerateReferenceLoadTwoRegisters(instruction, 3738 out_loc, 3739 obj_loc, 3740 class_offset, 3741 maybe_temp_loc, 3742 kWithoutReadBarrier); 3743 __ Cmp(out, cls); 3744 DCHECK(locations->OnlyCallsOnSlowPath()); 3745 slow_path = new (codegen_->GetScopedAllocator()) TypeCheckSlowPathARM64( 3746 instruction, /* is_fatal= */ false); 3747 codegen_->AddSlowPath(slow_path); 3748 __ B(ne, slow_path->GetEntryLabel()); 3749 __ Mov(out, 1); 3750 if (zero.IsLinked()) { 3751 __ B(&done); 3752 } 3753 break; 3754 } 3755 3756 case TypeCheckKind::kUnresolvedCheck: 3757 case TypeCheckKind::kInterfaceCheck: { 3758 // Note that we indeed only call on slow path, but we always go 3759 // into the slow path for the unresolved and interface check 3760 // cases. 3761 // 3762 // We cannot directly call the InstanceofNonTrivial runtime 3763 // entry point without resorting to a type checking slow path 3764 // here (i.e. by calling InvokeRuntime directly), as it would 3765 // require to assign fixed registers for the inputs of this 3766 // HInstanceOf instruction (following the runtime calling 3767 // convention), which might be cluttered by the potential first 3768 // read barrier emission at the beginning of this method. 3769 // 3770 // TODO: Introduce a new runtime entry point taking the object 3771 // to test (instead of its class) as argument, and let it deal 3772 // with the read barrier issues. This will let us refactor this 3773 // case of the `switch` code as it was previously (with a direct 3774 // call to the runtime not using a type checking slow path). 3775 // This should also be beneficial for the other cases above. 3776 DCHECK(locations->OnlyCallsOnSlowPath()); 3777 slow_path = new (codegen_->GetScopedAllocator()) TypeCheckSlowPathARM64( 3778 instruction, /* is_fatal= */ false); 3779 codegen_->AddSlowPath(slow_path); 3780 __ B(slow_path->GetEntryLabel()); 3781 if (zero.IsLinked()) { 3782 __ B(&done); 3783 } 3784 break; 3785 } 3786 3787 case TypeCheckKind::kBitstringCheck: { 3788 // /* HeapReference<Class> */ temp = obj->klass_ 3789 GenerateReferenceLoadTwoRegisters(instruction, 3790 out_loc, 3791 obj_loc, 3792 class_offset, 3793 maybe_temp_loc, 3794 kWithoutReadBarrier); 3795 3796 GenerateBitstringTypeCheckCompare(instruction, out); 3797 __ Cset(out, eq); 3798 if (zero.IsLinked()) { 3799 __ B(&done); 3800 } 3801 break; 3802 } 3803 } 3804 3805 if (zero.IsLinked()) { 3806 __ Bind(&zero); 3807 __ Mov(out, 0); 3808 } 3809 3810 if (done.IsLinked()) { 3811 __ Bind(&done); 3812 } 3813 3814 if (slow_path != nullptr) { 3815 __ Bind(slow_path->GetExitLabel()); 3816 } 3817 } 3818 3819 void LocationsBuilderARM64::VisitCheckCast(HCheckCast* instruction) { 3820 TypeCheckKind type_check_kind = instruction->GetTypeCheckKind(); 3821 LocationSummary::CallKind call_kind = CodeGenerator::GetCheckCastCallKind(instruction); 3822 LocationSummary* locations = 3823 new (GetGraph()->GetAllocator()) LocationSummary(instruction, call_kind); 3824 locations->SetInAt(0, Location::RequiresRegister()); 3825 if (type_check_kind == TypeCheckKind::kBitstringCheck) { 3826 locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant())); 3827 locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant())); 3828 locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant())); 3829 } else { 3830 locations->SetInAt(1, Location::RequiresRegister()); 3831 } 3832 // Add temps for read barriers and other uses. One is used by TypeCheckSlowPathARM64. 3833 locations->AddRegisterTemps(NumberOfCheckCastTemps(type_check_kind)); 3834 } 3835 3836 void InstructionCodeGeneratorARM64::VisitCheckCast(HCheckCast* instruction) { 3837 TypeCheckKind type_check_kind = instruction->GetTypeCheckKind(); 3838 LocationSummary* locations = instruction->GetLocations(); 3839 Location obj_loc = locations->InAt(0); 3840 Register obj = InputRegisterAt(instruction, 0); 3841 Register cls = (type_check_kind == TypeCheckKind::kBitstringCheck) 3842 ? Register() 3843 : InputRegisterAt(instruction, 1); 3844 const size_t num_temps = NumberOfCheckCastTemps(type_check_kind); 3845 DCHECK_GE(num_temps, 1u); 3846 DCHECK_LE(num_temps, 3u); 3847 Location temp_loc = locations->GetTemp(0); 3848 Location maybe_temp2_loc = (num_temps >= 2) ? locations->GetTemp(1) : Location::NoLocation(); 3849 Location maybe_temp3_loc = (num_temps >= 3) ? locations->GetTemp(2) : Location::NoLocation(); 3850 Register temp = WRegisterFrom(temp_loc); 3851 const uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); 3852 const uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value(); 3853 const uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value(); 3854 const uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value(); 3855 const uint32_t iftable_offset = mirror::Class::IfTableOffset().Uint32Value(); 3856 const uint32_t array_length_offset = mirror::Array::LengthOffset().Uint32Value(); 3857 const uint32_t object_array_data_offset = 3858 mirror::Array::DataOffset(kHeapReferenceSize).Uint32Value(); 3859 3860 bool is_type_check_slow_path_fatal = CodeGenerator::IsTypeCheckSlowPathFatal(instruction); 3861 SlowPathCodeARM64* type_check_slow_path = 3862 new (codegen_->GetScopedAllocator()) TypeCheckSlowPathARM64( 3863 instruction, is_type_check_slow_path_fatal); 3864 codegen_->AddSlowPath(type_check_slow_path); 3865 3866 vixl::aarch64::Label done; 3867 // Avoid null check if we know obj is not null. 3868 if (instruction->MustDoNullCheck()) { 3869 __ Cbz(obj, &done); 3870 } 3871 3872 switch (type_check_kind) { 3873 case TypeCheckKind::kExactCheck: 3874 case TypeCheckKind::kArrayCheck: { 3875 // /* HeapReference<Class> */ temp = obj->klass_ 3876 GenerateReferenceLoadTwoRegisters(instruction, 3877 temp_loc, 3878 obj_loc, 3879 class_offset, 3880 maybe_temp2_loc, 3881 kWithoutReadBarrier); 3882 3883 __ Cmp(temp, cls); 3884 // Jump to slow path for throwing the exception or doing a 3885 // more involved array check. 3886 __ B(ne, type_check_slow_path->GetEntryLabel()); 3887 break; 3888 } 3889 3890 case TypeCheckKind::kAbstractClassCheck: { 3891 // /* HeapReference<Class> */ temp = obj->klass_ 3892 GenerateReferenceLoadTwoRegisters(instruction, 3893 temp_loc, 3894 obj_loc, 3895 class_offset, 3896 maybe_temp2_loc, 3897 kWithoutReadBarrier); 3898 3899 // If the class is abstract, we eagerly fetch the super class of the 3900 // object to avoid doing a comparison we know will fail. 3901 vixl::aarch64::Label loop; 3902 __ Bind(&loop); 3903 // /* HeapReference<Class> */ temp = temp->super_class_ 3904 GenerateReferenceLoadOneRegister(instruction, 3905 temp_loc, 3906 super_offset, 3907 maybe_temp2_loc, 3908 kWithoutReadBarrier); 3909 3910 // If the class reference currently in `temp` is null, jump to the slow path to throw the 3911 // exception. 3912 __ Cbz(temp, type_check_slow_path->GetEntryLabel()); 3913 // Otherwise, compare classes. 3914 __ Cmp(temp, cls); 3915 __ B(ne, &loop); 3916 break; 3917 } 3918 3919 case TypeCheckKind::kClassHierarchyCheck: { 3920 // /* HeapReference<Class> */ temp = obj->klass_ 3921 GenerateReferenceLoadTwoRegisters(instruction, 3922 temp_loc, 3923 obj_loc, 3924 class_offset, 3925 maybe_temp2_loc, 3926 kWithoutReadBarrier); 3927 3928 // Walk over the class hierarchy to find a match. 3929 vixl::aarch64::Label loop; 3930 __ Bind(&loop); 3931 __ Cmp(temp, cls); 3932 __ B(eq, &done); 3933 3934 // /* HeapReference<Class> */ temp = temp->super_class_ 3935 GenerateReferenceLoadOneRegister(instruction, 3936 temp_loc, 3937 super_offset, 3938 maybe_temp2_loc, 3939 kWithoutReadBarrier); 3940 3941 // If the class reference currently in `temp` is not null, jump 3942 // back at the beginning of the loop. 3943 __ Cbnz(temp, &loop); 3944 // Otherwise, jump to the slow path to throw the exception. 3945 __ B(type_check_slow_path->GetEntryLabel()); 3946 break; 3947 } 3948 3949 case TypeCheckKind::kArrayObjectCheck: { 3950 // /* HeapReference<Class> */ temp = obj->klass_ 3951 GenerateReferenceLoadTwoRegisters(instruction, 3952 temp_loc, 3953 obj_loc, 3954 class_offset, 3955 maybe_temp2_loc, 3956 kWithoutReadBarrier); 3957 3958 // Do an exact check. 3959 __ Cmp(temp, cls); 3960 __ B(eq, &done); 3961 3962 // Otherwise, we need to check that the object's class is a non-primitive array. 3963 // /* HeapReference<Class> */ temp = temp->component_type_ 3964 GenerateReferenceLoadOneRegister(instruction, 3965 temp_loc, 3966 component_offset, 3967 maybe_temp2_loc, 3968 kWithoutReadBarrier); 3969 3970 // If the component type is null, jump to the slow path to throw the exception. 3971 __ Cbz(temp, type_check_slow_path->GetEntryLabel()); 3972 // Otherwise, the object is indeed an array. Further check that this component type is not a 3973 // primitive type. 3974 __ Ldrh(temp, HeapOperand(temp, primitive_offset)); 3975 static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot"); 3976 __ Cbnz(temp, type_check_slow_path->GetEntryLabel()); 3977 break; 3978 } 3979 3980 case TypeCheckKind::kUnresolvedCheck: 3981 // We always go into the type check slow path for the unresolved check cases. 3982 // 3983 // We cannot directly call the CheckCast runtime entry point 3984 // without resorting to a type checking slow path here (i.e. by 3985 // calling InvokeRuntime directly), as it would require to 3986 // assign fixed registers for the inputs of this HInstanceOf 3987 // instruction (following the runtime calling convention), which 3988 // might be cluttered by the potential first read barrier 3989 // emission at the beginning of this method. 3990 __ B(type_check_slow_path->GetEntryLabel()); 3991 break; 3992 case TypeCheckKind::kInterfaceCheck: { 3993 // /* HeapReference<Class> */ temp = obj->klass_ 3994 GenerateReferenceLoadTwoRegisters(instruction, 3995 temp_loc, 3996 obj_loc, 3997 class_offset, 3998 maybe_temp2_loc, 3999 kWithoutReadBarrier); 4000 4001 // /* HeapReference<Class> */ temp = temp->iftable_ 4002 GenerateReferenceLoadTwoRegisters(instruction, 4003 temp_loc, 4004 temp_loc, 4005 iftable_offset, 4006 maybe_temp2_loc, 4007 kWithoutReadBarrier); 4008 // Iftable is never null. 4009 __ Ldr(WRegisterFrom(maybe_temp2_loc), HeapOperand(temp.W(), array_length_offset)); 4010 // Loop through the iftable and check if any class matches. 4011 vixl::aarch64::Label start_loop; 4012 __ Bind(&start_loop); 4013 __ Cbz(WRegisterFrom(maybe_temp2_loc), type_check_slow_path->GetEntryLabel()); 4014 __ Ldr(WRegisterFrom(maybe_temp3_loc), HeapOperand(temp.W(), object_array_data_offset)); 4015 GetAssembler()->MaybeUnpoisonHeapReference(WRegisterFrom(maybe_temp3_loc)); 4016 // Go to next interface. 4017 __ Add(temp, temp, 2 * kHeapReferenceSize); 4018 __ Sub(WRegisterFrom(maybe_temp2_loc), WRegisterFrom(maybe_temp2_loc), 2); 4019 // Compare the classes and continue the loop if they do not match. 4020 __ Cmp(cls, WRegisterFrom(maybe_temp3_loc)); 4021 __ B(ne, &start_loop); 4022 break; 4023 } 4024 4025 case TypeCheckKind::kBitstringCheck: { 4026 // /* HeapReference<Class> */ temp = obj->klass_ 4027 GenerateReferenceLoadTwoRegisters(instruction, 4028 temp_loc, 4029 obj_loc, 4030 class_offset, 4031 maybe_temp2_loc, 4032 kWithoutReadBarrier); 4033 4034 GenerateBitstringTypeCheckCompare(instruction, temp); 4035 __ B(ne, type_check_slow_path->GetEntryLabel()); 4036 break; 4037 } 4038 } 4039 __ Bind(&done); 4040 4041 __ Bind(type_check_slow_path->GetExitLabel()); 4042 } 4043 4044 void LocationsBuilderARM64::VisitIntConstant(HIntConstant* constant) { 4045 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(constant); 4046 locations->SetOut(Location::ConstantLocation(constant)); 4047 } 4048 4049 void InstructionCodeGeneratorARM64::VisitIntConstant(HIntConstant* constant ATTRIBUTE_UNUSED) { 4050 // Will be generated at use site. 4051 } 4052 4053 void LocationsBuilderARM64::VisitNullConstant(HNullConstant* constant) { 4054 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(constant); 4055 locations->SetOut(Location::ConstantLocation(constant)); 4056 } 4057 4058 void InstructionCodeGeneratorARM64::VisitNullConstant(HNullConstant* constant ATTRIBUTE_UNUSED) { 4059 // Will be generated at use site. 4060 } 4061 4062 void LocationsBuilderARM64::VisitInvokeUnresolved(HInvokeUnresolved* invoke) { 4063 // The trampoline uses the same calling convention as dex calling conventions, 4064 // except instead of loading arg0/r0 with the target Method*, arg0/r0 will contain 4065 // the method_idx. 4066 HandleInvoke(invoke); 4067 } 4068 4069 void InstructionCodeGeneratorARM64::VisitInvokeUnresolved(HInvokeUnresolved* invoke) { 4070 codegen_->GenerateInvokeUnresolvedRuntimeCall(invoke); 4071 codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__); 4072 } 4073 4074 void LocationsBuilderARM64::HandleInvoke(HInvoke* invoke) { 4075 InvokeDexCallingConventionVisitorARM64 calling_convention_visitor; 4076 CodeGenerator::CreateCommonInvokeLocationSummary(invoke, &calling_convention_visitor); 4077 } 4078 4079 void LocationsBuilderARM64::VisitInvokeInterface(HInvokeInterface* invoke) { 4080 HandleInvoke(invoke); 4081 } 4082 4083 void CodeGeneratorARM64::MaybeGenerateInlineCacheCheck(HInstruction* instruction, 4084 Register klass) { 4085 DCHECK_EQ(klass.GetCode(), 0u); 4086 // We know the destination of an intrinsic, so no need to record inline 4087 // caches. 4088 if (!instruction->GetLocations()->Intrinsified() && 4089 GetGraph()->IsCompilingBaseline() && 4090 !Runtime::Current()->IsAotCompiler()) { 4091 DCHECK(!instruction->GetEnvironment()->IsFromInlinedInvoke()); 4092 ScopedObjectAccess soa(Thread::Current()); 4093 ProfilingInfo* info = GetGraph()->GetArtMethod()->GetProfilingInfo(kRuntimePointerSize); 4094 if (info != nullptr) { 4095 InlineCache* cache = info->GetInlineCache(instruction->GetDexPc()); 4096 uint64_t address = reinterpret_cast64<uint64_t>(cache); 4097 vixl::aarch64::Label done; 4098 __ Mov(x8, address); 4099 __ Ldr(x9, MemOperand(x8, InlineCache::ClassesOffset().Int32Value())); 4100 // Fast path for a monomorphic cache. 4101 __ Cmp(klass, x9); 4102 __ B(eq, &done); 4103 InvokeRuntime(kQuickUpdateInlineCache, instruction, instruction->GetDexPc()); 4104 __ Bind(&done); 4105 } 4106 } 4107 } 4108 4109 void InstructionCodeGeneratorARM64::VisitInvokeInterface(HInvokeInterface* invoke) { 4110 // TODO: b/18116999, our IMTs can miss an IncompatibleClassChangeError. 4111 LocationSummary* locations = invoke->GetLocations(); 4112 Register temp = XRegisterFrom(locations->GetTemp(0)); 4113 Location receiver = locations->InAt(0); 4114 Offset class_offset = mirror::Object::ClassOffset(); 4115 Offset entry_point = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArm64PointerSize); 4116 4117 // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted. 4118 if (receiver.IsStackSlot()) { 4119 __ Ldr(temp.W(), StackOperandFrom(receiver)); 4120 { 4121 EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes); 4122 // /* HeapReference<Class> */ temp = temp->klass_ 4123 __ Ldr(temp.W(), HeapOperand(temp.W(), class_offset)); 4124 codegen_->MaybeRecordImplicitNullCheck(invoke); 4125 } 4126 } else { 4127 EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes); 4128 // /* HeapReference<Class> */ temp = receiver->klass_ 4129 __ Ldr(temp.W(), HeapOperandFrom(receiver, class_offset)); 4130 codegen_->MaybeRecordImplicitNullCheck(invoke); 4131 } 4132 4133 // Instead of simply (possibly) unpoisoning `temp` here, we should 4134 // emit a read barrier for the previous class reference load. 4135 // However this is not required in practice, as this is an 4136 // intermediate/temporary reference and because the current 4137 // concurrent copying collector keeps the from-space memory 4138 // intact/accessible until the end of the marking phase (the 4139 // concurrent copying collector may not in the future). 4140 GetAssembler()->MaybeUnpoisonHeapReference(temp.W()); 4141 4142 // If we're compiling baseline, update the inline cache. 4143 codegen_->MaybeGenerateInlineCacheCheck(invoke, temp); 4144 4145 // The register ip1 is required to be used for the hidden argument in 4146 // art_quick_imt_conflict_trampoline, so prevent VIXL from using it. 4147 MacroAssembler* masm = GetVIXLAssembler(); 4148 UseScratchRegisterScope scratch_scope(masm); 4149 scratch_scope.Exclude(ip1); 4150 __ Mov(ip1, invoke->GetDexMethodIndex()); 4151 4152 __ Ldr(temp, 4153 MemOperand(temp, mirror::Class::ImtPtrOffset(kArm64PointerSize).Uint32Value())); 4154 uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement( 4155 invoke->GetImtIndex(), kArm64PointerSize)); 4156 // temp = temp->GetImtEntryAt(method_offset); 4157 __ Ldr(temp, MemOperand(temp, method_offset)); 4158 // lr = temp->GetEntryPoint(); 4159 __ Ldr(lr, MemOperand(temp, entry_point.Int32Value())); 4160 4161 { 4162 // Ensure the pc position is recorded immediately after the `blr` instruction. 4163 ExactAssemblyScope eas(GetVIXLAssembler(), kInstructionSize, CodeBufferCheckScope::kExactSize); 4164 4165 // lr(); 4166 __ blr(lr); 4167 DCHECK(!codegen_->IsLeafMethod()); 4168 codegen_->RecordPcInfo(invoke, invoke->GetDexPc()); 4169 } 4170 4171 codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__); 4172 } 4173 4174 void LocationsBuilderARM64::VisitInvokeVirtual(HInvokeVirtual* invoke) { 4175 IntrinsicLocationsBuilderARM64 intrinsic(GetGraph()->GetAllocator(), codegen_); 4176 if (intrinsic.TryDispatch(invoke)) { 4177 return; 4178 } 4179 4180 HandleInvoke(invoke); 4181 } 4182 4183 void LocationsBuilderARM64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) { 4184 // Explicit clinit checks triggered by static invokes must have been pruned by 4185 // art::PrepareForRegisterAllocation. 4186 DCHECK(!invoke->IsStaticWithExplicitClinitCheck()); 4187 4188 IntrinsicLocationsBuilderARM64 intrinsic(GetGraph()->GetAllocator(), codegen_); 4189 if (intrinsic.TryDispatch(invoke)) { 4190 return; 4191 } 4192 4193 HandleInvoke(invoke); 4194 } 4195 4196 static bool TryGenerateIntrinsicCode(HInvoke* invoke, CodeGeneratorARM64* codegen) { 4197 if (invoke->GetLocations()->Intrinsified()) { 4198 IntrinsicCodeGeneratorARM64 intrinsic(codegen); 4199 intrinsic.Dispatch(invoke); 4200 return true; 4201 } 4202 return false; 4203 } 4204 4205 HInvokeStaticOrDirect::DispatchInfo CodeGeneratorARM64::GetSupportedInvokeStaticOrDirectDispatch( 4206 const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info, 4207 ArtMethod* method ATTRIBUTE_UNUSED) { 4208 // On ARM64 we support all dispatch types. 4209 return desired_dispatch_info; 4210 } 4211 4212 void CodeGeneratorARM64::GenerateStaticOrDirectCall( 4213 HInvokeStaticOrDirect* invoke, Location temp, SlowPathCode* slow_path) { 4214 // Make sure that ArtMethod* is passed in kArtMethodRegister as per the calling convention. 4215 Location callee_method = temp; // For all kinds except kRecursive, callee will be in temp. 4216 switch (invoke->GetMethodLoadKind()) { 4217 case HInvokeStaticOrDirect::MethodLoadKind::kStringInit: { 4218 uint32_t offset = 4219 GetThreadOffset<kArm64PointerSize>(invoke->GetStringInitEntryPoint()).Int32Value(); 4220 // temp = thread->string_init_entrypoint 4221 __ Ldr(XRegisterFrom(temp), MemOperand(tr, offset)); 4222 break; 4223 } 4224 case HInvokeStaticOrDirect::MethodLoadKind::kRecursive: 4225 callee_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex()); 4226 break; 4227 case HInvokeStaticOrDirect::MethodLoadKind::kBootImageLinkTimePcRelative: { 4228 DCHECK(GetCompilerOptions().IsBootImage() || GetCompilerOptions().IsBootImageExtension()); 4229 // Add ADRP with its PC-relative method patch. 4230 vixl::aarch64::Label* adrp_label = NewBootImageMethodPatch(invoke->GetTargetMethod()); 4231 EmitAdrpPlaceholder(adrp_label, XRegisterFrom(temp)); 4232 // Add ADD with its PC-relative method patch. 4233 vixl::aarch64::Label* add_label = 4234 NewBootImageMethodPatch(invoke->GetTargetMethod(), adrp_label); 4235 EmitAddPlaceholder(add_label, XRegisterFrom(temp), XRegisterFrom(temp)); 4236 break; 4237 } 4238 case HInvokeStaticOrDirect::MethodLoadKind::kBootImageRelRo: { 4239 // Add ADRP with its PC-relative .data.bimg.rel.ro patch. 4240 uint32_t boot_image_offset = GetBootImageOffset(invoke); 4241 vixl::aarch64::Label* adrp_label = NewBootImageRelRoPatch(boot_image_offset); 4242 EmitAdrpPlaceholder(adrp_label, XRegisterFrom(temp)); 4243 // Add LDR with its PC-relative .data.bimg.rel.ro patch. 4244 vixl::aarch64::Label* ldr_label = NewBootImageRelRoPatch(boot_image_offset, adrp_label); 4245 // Note: Boot image is in the low 4GiB and the entry is 32-bit, so emit a 32-bit load. 4246 EmitLdrOffsetPlaceholder(ldr_label, WRegisterFrom(temp), XRegisterFrom(temp)); 4247 break; 4248 } 4249 case HInvokeStaticOrDirect::MethodLoadKind::kBssEntry: { 4250 // Add ADRP with its PC-relative .bss entry patch. 4251 MethodReference target_method(&GetGraph()->GetDexFile(), invoke->GetDexMethodIndex()); 4252 vixl::aarch64::Label* adrp_label = NewMethodBssEntryPatch(target_method); 4253 EmitAdrpPlaceholder(adrp_label, XRegisterFrom(temp)); 4254 // Add LDR with its PC-relative .bss entry patch. 4255 vixl::aarch64::Label* ldr_label = 4256 NewMethodBssEntryPatch(target_method, adrp_label); 4257 // All aligned loads are implicitly atomic consume operations on ARM64. 4258 EmitLdrOffsetPlaceholder(ldr_label, XRegisterFrom(temp), XRegisterFrom(temp)); 4259 break; 4260 } 4261 case HInvokeStaticOrDirect::MethodLoadKind::kJitDirectAddress: 4262 // Load method address from literal pool. 4263 __ Ldr(XRegisterFrom(temp), DeduplicateUint64Literal(invoke->GetMethodAddress())); 4264 break; 4265 case HInvokeStaticOrDirect::MethodLoadKind::kRuntimeCall: { 4266 GenerateInvokeStaticOrDirectRuntimeCall(invoke, temp, slow_path); 4267 return; // No code pointer retrieval; the runtime performs the call directly. 4268 } 4269 } 4270 4271 switch (invoke->GetCodePtrLocation()) { 4272 case HInvokeStaticOrDirect::CodePtrLocation::kCallSelf: 4273 { 4274 // Use a scope to help guarantee that `RecordPcInfo()` records the correct pc. 4275 ExactAssemblyScope eas(GetVIXLAssembler(), 4276 kInstructionSize, 4277 CodeBufferCheckScope::kExactSize); 4278 __ bl(&frame_entry_label_); 4279 RecordPcInfo(invoke, invoke->GetDexPc(), slow_path); 4280 } 4281 break; 4282 case HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod: 4283 // LR = callee_method->entry_point_from_quick_compiled_code_; 4284 __ Ldr(lr, MemOperand( 4285 XRegisterFrom(callee_method), 4286 ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArm64PointerSize).Int32Value())); 4287 { 4288 // Use a scope to help guarantee that `RecordPcInfo()` records the correct pc. 4289 ExactAssemblyScope eas(GetVIXLAssembler(), 4290 kInstructionSize, 4291 CodeBufferCheckScope::kExactSize); 4292 // lr() 4293 __ blr(lr); 4294 RecordPcInfo(invoke, invoke->GetDexPc(), slow_path); 4295 } 4296 break; 4297 } 4298 4299 DCHECK(!IsLeafMethod()); 4300 } 4301 4302 void CodeGeneratorARM64::GenerateVirtualCall( 4303 HInvokeVirtual* invoke, Location temp_in, SlowPathCode* slow_path) { 4304 // Use the calling convention instead of the location of the receiver, as 4305 // intrinsics may have put the receiver in a different register. In the intrinsics 4306 // slow path, the arguments have been moved to the right place, so here we are 4307 // guaranteed that the receiver is the first register of the calling convention. 4308 InvokeDexCallingConvention calling_convention; 4309 Register receiver = calling_convention.GetRegisterAt(0); 4310 Register temp = XRegisterFrom(temp_in); 4311 size_t method_offset = mirror::Class::EmbeddedVTableEntryOffset( 4312 invoke->GetVTableIndex(), kArm64PointerSize).SizeValue(); 4313 Offset class_offset = mirror::Object::ClassOffset(); 4314 Offset entry_point = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArm64PointerSize); 4315 4316 DCHECK(receiver.IsRegister()); 4317 4318 { 4319 // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted. 4320 EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes); 4321 // /* HeapReference<Class> */ temp = receiver->klass_ 4322 __ Ldr(temp.W(), HeapOperandFrom(LocationFrom(receiver), class_offset)); 4323 MaybeRecordImplicitNullCheck(invoke); 4324 } 4325 // Instead of simply (possibly) unpoisoning `temp` here, we should 4326 // emit a read barrier for the previous class reference load. 4327 // intermediate/temporary reference and because the current 4328 // concurrent copying collector keeps the from-space memory 4329 // intact/accessible until the end of the marking phase (the 4330 // concurrent copying collector may not in the future). 4331 GetAssembler()->MaybeUnpoisonHeapReference(temp.W()); 4332 4333 // If we're compiling baseline, update the inline cache. 4334 MaybeGenerateInlineCacheCheck(invoke, temp); 4335 4336 // temp = temp->GetMethodAt(method_offset); 4337 __ Ldr(temp, MemOperand(temp, method_offset)); 4338 // lr = temp->GetEntryPoint(); 4339 __ Ldr(lr, MemOperand(temp, entry_point.SizeValue())); 4340 { 4341 // Use a scope to help guarantee that `RecordPcInfo()` records the correct pc. 4342 ExactAssemblyScope eas(GetVIXLAssembler(), kInstructionSize, CodeBufferCheckScope::kExactSize); 4343 // lr(); 4344 __ blr(lr); 4345 RecordPcInfo(invoke, invoke->GetDexPc(), slow_path); 4346 } 4347 } 4348 4349 void LocationsBuilderARM64::VisitInvokePolymorphic(HInvokePolymorphic* invoke) { 4350 HandleInvoke(invoke); 4351 } 4352 4353 void InstructionCodeGeneratorARM64::VisitInvokePolymorphic(HInvokePolymorphic* invoke) { 4354 codegen_->GenerateInvokePolymorphicCall(invoke); 4355 codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__); 4356 } 4357 4358 void LocationsBuilderARM64::VisitInvokeCustom(HInvokeCustom* invoke) { 4359 HandleInvoke(invoke); 4360 } 4361 4362 void InstructionCodeGeneratorARM64::VisitInvokeCustom(HInvokeCustom* invoke) { 4363 codegen_->GenerateInvokeCustomCall(invoke); 4364 codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__); 4365 } 4366 4367 vixl::aarch64::Label* CodeGeneratorARM64::NewBootImageIntrinsicPatch( 4368 uint32_t intrinsic_data, 4369 vixl::aarch64::Label* adrp_label) { 4370 return NewPcRelativePatch( 4371 /* dex_file= */ nullptr, intrinsic_data, adrp_label, &boot_image_other_patches_); 4372 } 4373 4374 vixl::aarch64::Label* CodeGeneratorARM64::NewBootImageRelRoPatch( 4375 uint32_t boot_image_offset, 4376 vixl::aarch64::Label* adrp_label) { 4377 return NewPcRelativePatch( 4378 /* dex_file= */ nullptr, boot_image_offset, adrp_label, &boot_image_other_patches_); 4379 } 4380 4381 vixl::aarch64::Label* CodeGeneratorARM64::NewBootImageMethodPatch( 4382 MethodReference target_method, 4383 vixl::aarch64::Label* adrp_label) { 4384 return NewPcRelativePatch( 4385 target_method.dex_file, target_method.index, adrp_label, &boot_image_method_patches_); 4386 } 4387 4388 vixl::aarch64::Label* CodeGeneratorARM64::NewMethodBssEntryPatch( 4389 MethodReference target_method, 4390 vixl::aarch64::Label* adrp_label) { 4391 return NewPcRelativePatch( 4392 target_method.dex_file, target_method.index, adrp_label, &method_bss_entry_patches_); 4393 } 4394 4395 vixl::aarch64::Label* CodeGeneratorARM64::NewBootImageTypePatch( 4396 const DexFile& dex_file, 4397 dex::TypeIndex type_index, 4398 vixl::aarch64::Label* adrp_label) { 4399 return NewPcRelativePatch(&dex_file, type_index.index_, adrp_label, &boot_image_type_patches_); 4400 } 4401 4402 vixl::aarch64::Label* CodeGeneratorARM64::NewBssEntryTypePatch( 4403 const DexFile& dex_file, 4404 dex::TypeIndex type_index, 4405 vixl::aarch64::Label* adrp_label) { 4406 return NewPcRelativePatch(&dex_file, type_index.index_, adrp_label, &type_bss_entry_patches_); 4407 } 4408 4409 vixl::aarch64::Label* CodeGeneratorARM64::NewBootImageStringPatch( 4410 const DexFile& dex_file, 4411 dex::StringIndex string_index, 4412 vixl::aarch64::Label* adrp_label) { 4413 return NewPcRelativePatch( 4414 &dex_file, string_index.index_, adrp_label, &boot_image_string_patches_); 4415 } 4416 4417 vixl::aarch64::Label* CodeGeneratorARM64::NewStringBssEntryPatch( 4418 const DexFile& dex_file, 4419 dex::StringIndex string_index, 4420 vixl::aarch64::Label* adrp_label) { 4421 return NewPcRelativePatch(&dex_file, string_index.index_, adrp_label, &string_bss_entry_patches_); 4422 } 4423 4424 void CodeGeneratorARM64::EmitEntrypointThunkCall(ThreadOffset64 entrypoint_offset) { 4425 DCHECK(!__ AllowMacroInstructions()); // In ExactAssemblyScope. 4426 DCHECK(!Runtime::Current()->UseJitCompilation()); 4427 call_entrypoint_patches_.emplace_back(/*dex_file*/ nullptr, entrypoint_offset.Uint32Value()); 4428 vixl::aarch64::Label* bl_label = &call_entrypoint_patches_.back().label; 4429 __ bind(bl_label); 4430 __ bl(static_cast<int64_t>(0)); // Placeholder, patched at link-time. 4431 } 4432 4433 void CodeGeneratorARM64::EmitBakerReadBarrierCbnz(uint32_t custom_data) { 4434 DCHECK(!__ AllowMacroInstructions()); // In ExactAssemblyScope. 4435 if (Runtime::Current()->UseJitCompilation()) { 4436 auto it = jit_baker_read_barrier_slow_paths_.FindOrAdd(custom_data); 4437 vixl::aarch64::Label* slow_path_entry = &it->second.label; 4438 __ cbnz(mr, slow_path_entry); 4439 } else { 4440 baker_read_barrier_patches_.emplace_back(custom_data); 4441 vixl::aarch64::Label* cbnz_label = &baker_read_barrier_patches_.back().label; 4442 __ bind(cbnz_label); 4443 __ cbnz(mr, static_cast<int64_t>(0)); // Placeholder, patched at link-time. 4444 } 4445 } 4446 4447 vixl::aarch64::Label* CodeGeneratorARM64::NewPcRelativePatch( 4448 const DexFile* dex_file, 4449 uint32_t offset_or_index, 4450 vixl::aarch64::Label* adrp_label, 4451 ArenaDeque<PcRelativePatchInfo>* patches) { 4452 // Add a patch entry and return the label. 4453 patches->emplace_back(dex_file, offset_or_index); 4454 PcRelativePatchInfo* info = &patches->back(); 4455 vixl::aarch64::Label* label = &info->label; 4456 // If adrp_label is null, this is the ADRP patch and needs to point to its own label. 4457 info->pc_insn_label = (adrp_label != nullptr) ? adrp_label : label; 4458 return label; 4459 } 4460 4461 vixl::aarch64::Literal<uint32_t>* CodeGeneratorARM64::DeduplicateBootImageAddressLiteral( 4462 uint64_t address) { 4463 return DeduplicateUint32Literal(dchecked_integral_cast<uint32_t>(address)); 4464 } 4465 4466 vixl::aarch64::Literal<uint32_t>* CodeGeneratorARM64::DeduplicateJitStringLiteral( 4467 const DexFile& dex_file, dex::StringIndex string_index, Handle<mirror::String> handle) { 4468 ReserveJitStringRoot(StringReference(&dex_file, string_index), handle); 4469 return jit_string_patches_.GetOrCreate( 4470 StringReference(&dex_file, string_index), 4471 [this]() { return __ CreateLiteralDestroyedWithPool<uint32_t>(/* value= */ 0u); }); 4472 } 4473 4474 vixl::aarch64::Literal<uint32_t>* CodeGeneratorARM64::DeduplicateJitClassLiteral( 4475 const DexFile& dex_file, dex::TypeIndex type_index, Handle<mirror::Class> handle) { 4476 ReserveJitClassRoot(TypeReference(&dex_file, type_index), handle); 4477 return jit_class_patches_.GetOrCreate( 4478 TypeReference(&dex_file, type_index), 4479 [this]() { return __ CreateLiteralDestroyedWithPool<uint32_t>(/* value= */ 0u); }); 4480 } 4481 4482 void CodeGeneratorARM64::EmitAdrpPlaceholder(vixl::aarch64::Label* fixup_label, 4483 vixl::aarch64::Register reg) { 4484 DCHECK(reg.IsX()); 4485 SingleEmissionCheckScope guard(GetVIXLAssembler()); 4486 __ Bind(fixup_label); 4487 __ adrp(reg, /* offset placeholder */ static_cast<int64_t>(0)); 4488 } 4489 4490 void CodeGeneratorARM64::EmitAddPlaceholder(vixl::aarch64::Label* fixup_label, 4491 vixl::aarch64::Register out, 4492 vixl::aarch64::Register base) { 4493 DCHECK(out.IsX()); 4494 DCHECK(base.IsX()); 4495 SingleEmissionCheckScope guard(GetVIXLAssembler()); 4496 __ Bind(fixup_label); 4497 __ add(out, base, Operand(/* offset placeholder */ 0)); 4498 } 4499 4500 void CodeGeneratorARM64::EmitLdrOffsetPlaceholder(vixl::aarch64::Label* fixup_label, 4501 vixl::aarch64::Register out, 4502 vixl::aarch64::Register base) { 4503 DCHECK(base.IsX()); 4504 SingleEmissionCheckScope guard(GetVIXLAssembler()); 4505 __ Bind(fixup_label); 4506 __ ldr(out, MemOperand(base, /* offset placeholder */ 0)); 4507 } 4508 4509 void CodeGeneratorARM64::LoadBootImageAddress(vixl::aarch64::Register reg, 4510 uint32_t boot_image_reference) { 4511 if (GetCompilerOptions().IsBootImage()) { 4512 // Add ADRP with its PC-relative type patch. 4513 vixl::aarch64::Label* adrp_label = NewBootImageIntrinsicPatch(boot_image_reference); 4514 EmitAdrpPlaceholder(adrp_label, reg.X()); 4515 // Add ADD with its PC-relative type patch. 4516 vixl::aarch64::Label* add_label = NewBootImageIntrinsicPatch(boot_image_reference, adrp_label); 4517 EmitAddPlaceholder(add_label, reg.X(), reg.X()); 4518 } else if (GetCompilerOptions().GetCompilePic()) { 4519 // Add ADRP with its PC-relative .data.bimg.rel.ro patch. 4520 vixl::aarch64::Label* adrp_label = NewBootImageRelRoPatch(boot_image_reference); 4521 EmitAdrpPlaceholder(adrp_label, reg.X()); 4522 // Add LDR with its PC-relative .data.bimg.rel.ro patch. 4523 vixl::aarch64::Label* ldr_label = NewBootImageRelRoPatch(boot_image_reference, adrp_label); 4524 EmitLdrOffsetPlaceholder(ldr_label, reg.W(), reg.X()); 4525 } else { 4526 DCHECK(Runtime::Current()->UseJitCompilation()); 4527 gc::Heap* heap = Runtime::Current()->GetHeap(); 4528 DCHECK(!heap->GetBootImageSpaces().empty()); 4529 const uint8_t* address = heap->GetBootImageSpaces()[0]->Begin() + boot_image_reference; 4530 __ Ldr(reg.W(), DeduplicateBootImageAddressLiteral(reinterpret_cast<uintptr_t>(address))); 4531 } 4532 } 4533 4534 void CodeGeneratorARM64::AllocateInstanceForIntrinsic(HInvokeStaticOrDirect* invoke, 4535 uint32_t boot_image_offset) { 4536 DCHECK(invoke->IsStatic()); 4537 InvokeRuntimeCallingConvention calling_convention; 4538 Register argument = calling_convention.GetRegisterAt(0); 4539 if (GetCompilerOptions().IsBootImage()) { 4540 DCHECK_EQ(boot_image_offset, IntrinsicVisitor::IntegerValueOfInfo::kInvalidReference); 4541 // Load the class the same way as for HLoadClass::LoadKind::kBootImageLinkTimePcRelative. 4542 MethodReference target_method = invoke->GetTargetMethod(); 4543 dex::TypeIndex type_idx = target_method.dex_file->GetMethodId(target_method.index).class_idx_; 4544 // Add ADRP with its PC-relative type patch. 4545 vixl::aarch64::Label* adrp_label = NewBootImageTypePatch(*target_method.dex_file, type_idx); 4546 EmitAdrpPlaceholder(adrp_label, argument.X()); 4547 // Add ADD with its PC-relative type patch. 4548 vixl::aarch64::Label* add_label = 4549 NewBootImageTypePatch(*target_method.dex_file, type_idx, adrp_label); 4550 EmitAddPlaceholder(add_label, argument.X(), argument.X()); 4551 } else { 4552 LoadBootImageAddress(argument, boot_image_offset); 4553 } 4554 InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc()); 4555 CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>(); 4556 } 4557 4558 template <linker::LinkerPatch (*Factory)(size_t, const DexFile*, uint32_t, uint32_t)> 4559 inline void CodeGeneratorARM64::EmitPcRelativeLinkerPatches( 4560 const ArenaDeque<PcRelativePatchInfo>& infos, 4561 ArenaVector<linker::LinkerPatch>* linker_patches) { 4562 for (const PcRelativePatchInfo& info : infos) { 4563 linker_patches->push_back(Factory(info.label.GetLocation(), 4564 info.target_dex_file, 4565 info.pc_insn_label->GetLocation(), 4566 info.offset_or_index)); 4567 } 4568 } 4569 4570 template <linker::LinkerPatch (*Factory)(size_t, uint32_t, uint32_t)> 4571 linker::LinkerPatch NoDexFileAdapter(size_t literal_offset, 4572 const DexFile* target_dex_file, 4573 uint32_t pc_insn_offset, 4574 uint32_t boot_image_offset) { 4575 DCHECK(target_dex_file == nullptr); // Unused for these patches, should be null. 4576 return Factory(literal_offset, pc_insn_offset, boot_image_offset); 4577 } 4578 4579 void CodeGeneratorARM64::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linker_patches) { 4580 DCHECK(linker_patches->empty()); 4581 size_t size = 4582 boot_image_method_patches_.size() + 4583 method_bss_entry_patches_.size() + 4584 boot_image_type_patches_.size() + 4585 type_bss_entry_patches_.size() + 4586 boot_image_string_patches_.size() + 4587 string_bss_entry_patches_.size() + 4588 boot_image_other_patches_.size() + 4589 call_entrypoint_patches_.size() + 4590 baker_read_barrier_patches_.size(); 4591 linker_patches->reserve(size); 4592 if (GetCompilerOptions().IsBootImage() || GetCompilerOptions().IsBootImageExtension()) { 4593 EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeMethodPatch>( 4594 boot_image_method_patches_, linker_patches); 4595 EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeTypePatch>( 4596 boot_image_type_patches_, linker_patches); 4597 EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeStringPatch>( 4598 boot_image_string_patches_, linker_patches); 4599 } else { 4600 DCHECK(boot_image_method_patches_.empty()); 4601 DCHECK(boot_image_type_patches_.empty()); 4602 DCHECK(boot_image_string_patches_.empty()); 4603 } 4604 if (GetCompilerOptions().IsBootImage()) { 4605 EmitPcRelativeLinkerPatches<NoDexFileAdapter<linker::LinkerPatch::IntrinsicReferencePatch>>( 4606 boot_image_other_patches_, linker_patches); 4607 } else { 4608 EmitPcRelativeLinkerPatches<NoDexFileAdapter<linker::LinkerPatch::DataBimgRelRoPatch>>( 4609 boot_image_other_patches_, linker_patches); 4610 } 4611 EmitPcRelativeLinkerPatches<linker::LinkerPatch::MethodBssEntryPatch>( 4612 method_bss_entry_patches_, linker_patches); 4613 EmitPcRelativeLinkerPatches<linker::LinkerPatch::TypeBssEntryPatch>( 4614 type_bss_entry_patches_, linker_patches); 4615 EmitPcRelativeLinkerPatches<linker::LinkerPatch::StringBssEntryPatch>( 4616 string_bss_entry_patches_, linker_patches); 4617 for (const PatchInfo<vixl::aarch64::Label>& info : call_entrypoint_patches_) { 4618 DCHECK(info.target_dex_file == nullptr); 4619 linker_patches->push_back(linker::LinkerPatch::CallEntrypointPatch( 4620 info.label.GetLocation(), info.offset_or_index)); 4621 } 4622 for (const BakerReadBarrierPatchInfo& info : baker_read_barrier_patches_) { 4623 linker_patches->push_back(linker::LinkerPatch::BakerReadBarrierBranchPatch( 4624 info.label.GetLocation(), info.custom_data)); 4625 } 4626 DCHECK_EQ(size, linker_patches->size()); 4627 } 4628 4629 bool CodeGeneratorARM64::NeedsThunkCode(const linker::LinkerPatch& patch) const { 4630 return patch.GetType() == linker::LinkerPatch::Type::kCallEntrypoint || 4631 patch.GetType() == linker::LinkerPatch::Type::kBakerReadBarrierBranch || 4632 patch.GetType() == linker::LinkerPatch::Type::kCallRelative; 4633 } 4634 4635 void CodeGeneratorARM64::EmitThunkCode(const linker::LinkerPatch& patch, 4636 /*out*/ ArenaVector<uint8_t>* code, 4637 /*out*/ std::string* debug_name) { 4638 Arm64Assembler assembler(GetGraph()->GetAllocator()); 4639 switch (patch.GetType()) { 4640 case linker::LinkerPatch::Type::kCallRelative: { 4641 // The thunk just uses the entry point in the ArtMethod. This works even for calls 4642 // to the generic JNI and interpreter trampolines. 4643 Offset offset(ArtMethod::EntryPointFromQuickCompiledCodeOffset( 4644 kArm64PointerSize).Int32Value()); 4645 assembler.JumpTo(ManagedRegister(arm64::X0), offset, ManagedRegister(arm64::IP0)); 4646 if (GetCompilerOptions().GenerateAnyDebugInfo()) { 4647 *debug_name = "MethodCallThunk"; 4648 } 4649 break; 4650 } 4651 case linker::LinkerPatch::Type::kCallEntrypoint: { 4652 Offset offset(patch.EntrypointOffset()); 4653 assembler.JumpTo(ManagedRegister(arm64::TR), offset, ManagedRegister(arm64::IP0)); 4654 if (GetCompilerOptions().GenerateAnyDebugInfo()) { 4655 *debug_name = "EntrypointCallThunk_" + std::to_string(offset.Uint32Value()); 4656 } 4657 break; 4658 } 4659 case linker::LinkerPatch::Type::kBakerReadBarrierBranch: { 4660 DCHECK_EQ(patch.GetBakerCustomValue2(), 0u); 4661 CompileBakerReadBarrierThunk(assembler, patch.GetBakerCustomValue1(), debug_name); 4662 break; 4663 } 4664 default: 4665 LOG(FATAL) << "Unexpected patch type " << patch.GetType(); 4666 UNREACHABLE(); 4667 } 4668 4669 // Ensure we emit the literal pool if any. 4670 assembler.FinalizeCode(); 4671 code->resize(assembler.CodeSize()); 4672 MemoryRegion code_region(code->data(), code->size()); 4673 assembler.FinalizeInstructions(code_region); 4674 } 4675 4676 vixl::aarch64::Literal<uint32_t>* CodeGeneratorARM64::DeduplicateUint32Literal(uint32_t value) { 4677 return uint32_literals_.GetOrCreate( 4678 value, 4679 [this, value]() { return __ CreateLiteralDestroyedWithPool<uint32_t>(value); }); 4680 } 4681 4682 vixl::aarch64::Literal<uint64_t>* CodeGeneratorARM64::DeduplicateUint64Literal(uint64_t value) { 4683 return uint64_literals_.GetOrCreate( 4684 value, 4685 [this, value]() { return __ CreateLiteralDestroyedWithPool<uint64_t>(value); }); 4686 } 4687 4688 void InstructionCodeGeneratorARM64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) { 4689 // Explicit clinit checks triggered by static invokes must have been pruned by 4690 // art::PrepareForRegisterAllocation. 4691 DCHECK(!invoke->IsStaticWithExplicitClinitCheck()); 4692 4693 if (TryGenerateIntrinsicCode(invoke, codegen_)) { 4694 codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__); 4695 return; 4696 } 4697 4698 { 4699 // Ensure that between the BLR (emitted by GenerateStaticOrDirectCall) and RecordPcInfo there 4700 // are no pools emitted. 4701 EmissionCheckScope guard(GetVIXLAssembler(), kInvokeCodeMarginSizeInBytes); 4702 LocationSummary* locations = invoke->GetLocations(); 4703 codegen_->GenerateStaticOrDirectCall( 4704 invoke, locations->HasTemps() ? locations->GetTemp(0) : Location::NoLocation()); 4705 } 4706 4707 codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__); 4708 } 4709 4710 void InstructionCodeGeneratorARM64::VisitInvokeVirtual(HInvokeVirtual* invoke) { 4711 if (TryGenerateIntrinsicCode(invoke, codegen_)) { 4712 codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__); 4713 return; 4714 } 4715 4716 { 4717 // Ensure that between the BLR (emitted by GenerateVirtualCall) and RecordPcInfo there 4718 // are no pools emitted. 4719 EmissionCheckScope guard(GetVIXLAssembler(), kInvokeCodeMarginSizeInBytes); 4720 codegen_->GenerateVirtualCall(invoke, invoke->GetLocations()->GetTemp(0)); 4721 DCHECK(!codegen_->IsLeafMethod()); 4722 } 4723 4724 codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__); 4725 } 4726 4727 HLoadClass::LoadKind CodeGeneratorARM64::GetSupportedLoadClassKind( 4728 HLoadClass::LoadKind desired_class_load_kind) { 4729 switch (desired_class_load_kind) { 4730 case HLoadClass::LoadKind::kInvalid: 4731 LOG(FATAL) << "UNREACHABLE"; 4732 UNREACHABLE(); 4733 case HLoadClass::LoadKind::kReferrersClass: 4734 break; 4735 case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: 4736 case HLoadClass::LoadKind::kBootImageRelRo: 4737 case HLoadClass::LoadKind::kBssEntry: 4738 DCHECK(!Runtime::Current()->UseJitCompilation()); 4739 break; 4740 case HLoadClass::LoadKind::kJitBootImageAddress: 4741 case HLoadClass::LoadKind::kJitTableAddress: 4742 DCHECK(Runtime::Current()->UseJitCompilation()); 4743 break; 4744 case HLoadClass::LoadKind::kRuntimeCall: 4745 break; 4746 } 4747 return desired_class_load_kind; 4748 } 4749 4750 void LocationsBuilderARM64::VisitLoadClass(HLoadClass* cls) { 4751 HLoadClass::LoadKind load_kind = cls->GetLoadKind(); 4752 if (load_kind == HLoadClass::LoadKind::kRuntimeCall) { 4753 InvokeRuntimeCallingConvention calling_convention; 4754 CodeGenerator::CreateLoadClassRuntimeCallLocationSummary( 4755 cls, 4756 LocationFrom(calling_convention.GetRegisterAt(0)), 4757 LocationFrom(vixl::aarch64::x0)); 4758 DCHECK(calling_convention.GetRegisterAt(0).Is(vixl::aarch64::x0)); 4759 return; 4760 } 4761 DCHECK(!cls->NeedsAccessCheck()); 4762 4763 const bool requires_read_barrier = kEmitCompilerReadBarrier && !cls->IsInBootImage(); 4764 LocationSummary::CallKind call_kind = (cls->NeedsEnvironment() || requires_read_barrier) 4765 ? LocationSummary::kCallOnSlowPath 4766 : LocationSummary::kNoCall; 4767 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(cls, call_kind); 4768 if (kUseBakerReadBarrier && requires_read_barrier && !cls->NeedsEnvironment()) { 4769 locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. 4770 } 4771 4772 if (load_kind == HLoadClass::LoadKind::kReferrersClass) { 4773 locations->SetInAt(0, Location::RequiresRegister()); 4774 } 4775 locations->SetOut(Location::RequiresRegister()); 4776 if (cls->GetLoadKind() == HLoadClass::LoadKind::kBssEntry) { 4777 if (!kUseReadBarrier || kUseBakerReadBarrier) { 4778 // Rely on the type resolution or initialization and marking to save everything we need. 4779 locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves()); 4780 } else { 4781 // For non-Baker read barrier we have a temp-clobbering call. 4782 } 4783 } 4784 } 4785 4786 // NO_THREAD_SAFETY_ANALYSIS as we manipulate handles whose internal object we know does not 4787 // move. 4788 void InstructionCodeGeneratorARM64::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAFETY_ANALYSIS { 4789 HLoadClass::LoadKind load_kind = cls->GetLoadKind(); 4790 if (load_kind == HLoadClass::LoadKind::kRuntimeCall) { 4791 codegen_->GenerateLoadClassRuntimeCall(cls); 4792 codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__); 4793 return; 4794 } 4795 DCHECK(!cls->NeedsAccessCheck()); 4796 4797 Location out_loc = cls->GetLocations()->Out(); 4798 Register out = OutputRegister(cls); 4799 4800 const ReadBarrierOption read_barrier_option = cls->IsInBootImage() 4801 ? kWithoutReadBarrier 4802 : kCompilerReadBarrierOption; 4803 bool generate_null_check = false; 4804 switch (load_kind) { 4805 case HLoadClass::LoadKind::kReferrersClass: { 4806 DCHECK(!cls->CanCallRuntime()); 4807 DCHECK(!cls->MustGenerateClinitCheck()); 4808 // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_ 4809 Register current_method = InputRegisterAt(cls, 0); 4810 codegen_->GenerateGcRootFieldLoad(cls, 4811 out_loc, 4812 current_method, 4813 ArtMethod::DeclaringClassOffset().Int32Value(), 4814 /* fixup_label= */ nullptr, 4815 read_barrier_option); 4816 break; 4817 } 4818 case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: { 4819 DCHECK(codegen_->GetCompilerOptions().IsBootImage() || 4820 codegen_->GetCompilerOptions().IsBootImageExtension()); 4821 DCHECK_EQ(read_barrier_option, kWithoutReadBarrier); 4822 // Add ADRP with its PC-relative type patch. 4823 const DexFile& dex_file = cls->GetDexFile(); 4824 dex::TypeIndex type_index = cls->GetTypeIndex(); 4825 vixl::aarch64::Label* adrp_label = codegen_->NewBootImageTypePatch(dex_file, type_index); 4826 codegen_->EmitAdrpPlaceholder(adrp_label, out.X()); 4827 // Add ADD with its PC-relative type patch. 4828 vixl::aarch64::Label* add_label = 4829 codegen_->NewBootImageTypePatch(dex_file, type_index, adrp_label); 4830 codegen_->EmitAddPlaceholder(add_label, out.X(), out.X()); 4831 break; 4832 } 4833 case HLoadClass::LoadKind::kBootImageRelRo: { 4834 DCHECK(!codegen_->GetCompilerOptions().IsBootImage()); 4835 uint32_t boot_image_offset = codegen_->GetBootImageOffset(cls); 4836 // Add ADRP with its PC-relative .data.bimg.rel.ro patch. 4837 vixl::aarch64::Label* adrp_label = codegen_->NewBootImageRelRoPatch(boot_image_offset); 4838 codegen_->EmitAdrpPlaceholder(adrp_label, out.X()); 4839 // Add LDR with its PC-relative .data.bimg.rel.ro patch. 4840 vixl::aarch64::Label* ldr_label = 4841 codegen_->NewBootImageRelRoPatch(boot_image_offset, adrp_label); 4842 codegen_->EmitLdrOffsetPlaceholder(ldr_label, out.W(), out.X()); 4843 break; 4844 } 4845 case HLoadClass::LoadKind::kBssEntry: { 4846 // Add ADRP with its PC-relative Class .bss entry patch. 4847 const DexFile& dex_file = cls->GetDexFile(); 4848 dex::TypeIndex type_index = cls->GetTypeIndex(); 4849 vixl::aarch64::Register temp = XRegisterFrom(out_loc); 4850 vixl::aarch64::Label* adrp_label = codegen_->NewBssEntryTypePatch(dex_file, type_index); 4851 codegen_->EmitAdrpPlaceholder(adrp_label, temp); 4852 // Add LDR with its PC-relative Class .bss entry patch. 4853 vixl::aarch64::Label* ldr_label = 4854 codegen_->NewBssEntryTypePatch(dex_file, type_index, adrp_label); 4855 // /* GcRoot<mirror::Class> */ out = *(base_address + offset) /* PC-relative */ 4856 // All aligned loads are implicitly atomic consume operations on ARM64. 4857 codegen_->GenerateGcRootFieldLoad(cls, 4858 out_loc, 4859 temp, 4860 /* offset placeholder */ 0u, 4861 ldr_label, 4862 read_barrier_option); 4863 generate_null_check = true; 4864 break; 4865 } 4866 case HLoadClass::LoadKind::kJitBootImageAddress: { 4867 DCHECK_EQ(read_barrier_option, kWithoutReadBarrier); 4868 uint32_t address = reinterpret_cast32<uint32_t>(cls->GetClass().Get()); 4869 DCHECK_NE(address, 0u); 4870 __ Ldr(out.W(), codegen_->DeduplicateBootImageAddressLiteral(address)); 4871 break; 4872 } 4873 case HLoadClass::LoadKind::kJitTableAddress: { 4874 __ Ldr(out, codegen_->DeduplicateJitClassLiteral(cls->GetDexFile(), 4875 cls->GetTypeIndex(), 4876 cls->GetClass())); 4877 codegen_->GenerateGcRootFieldLoad(cls, 4878 out_loc, 4879 out.X(), 4880 /* offset= */ 0, 4881 /* fixup_label= */ nullptr, 4882 read_barrier_option); 4883 break; 4884 } 4885 case HLoadClass::LoadKind::kRuntimeCall: 4886 case HLoadClass::LoadKind::kInvalid: 4887 LOG(FATAL) << "UNREACHABLE"; 4888 UNREACHABLE(); 4889 } 4890 4891 bool do_clinit = cls->MustGenerateClinitCheck(); 4892 if (generate_null_check || do_clinit) { 4893 DCHECK(cls->CanCallRuntime()); 4894 SlowPathCodeARM64* slow_path = 4895 new (codegen_->GetScopedAllocator()) LoadClassSlowPathARM64(cls, cls); 4896 codegen_->AddSlowPath(slow_path); 4897 if (generate_null_check) { 4898 __ Cbz(out, slow_path->GetEntryLabel()); 4899 } 4900 if (cls->MustGenerateClinitCheck()) { 4901 GenerateClassInitializationCheck(slow_path, out); 4902 } else { 4903 __ Bind(slow_path->GetExitLabel()); 4904 } 4905 codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__); 4906 } 4907 } 4908 4909 void LocationsBuilderARM64::VisitLoadMethodHandle(HLoadMethodHandle* load) { 4910 InvokeRuntimeCallingConvention calling_convention; 4911 Location location = LocationFrom(calling_convention.GetRegisterAt(0)); 4912 CodeGenerator::CreateLoadMethodHandleRuntimeCallLocationSummary(load, location, location); 4913 } 4914 4915 void InstructionCodeGeneratorARM64::VisitLoadMethodHandle(HLoadMethodHandle* load) { 4916 codegen_->GenerateLoadMethodHandleRuntimeCall(load); 4917 } 4918 4919 void LocationsBuilderARM64::VisitLoadMethodType(HLoadMethodType* load) { 4920 InvokeRuntimeCallingConvention calling_convention; 4921 Location location = LocationFrom(calling_convention.GetRegisterAt(0)); 4922 CodeGenerator::CreateLoadMethodTypeRuntimeCallLocationSummary(load, location, location); 4923 } 4924 4925 void InstructionCodeGeneratorARM64::VisitLoadMethodType(HLoadMethodType* load) { 4926 codegen_->GenerateLoadMethodTypeRuntimeCall(load); 4927 } 4928 4929 static MemOperand GetExceptionTlsAddress() { 4930 return MemOperand(tr, Thread::ExceptionOffset<kArm64PointerSize>().Int32Value()); 4931 } 4932 4933 void LocationsBuilderARM64::VisitLoadException(HLoadException* load) { 4934 LocationSummary* locations = 4935 new (GetGraph()->GetAllocator()) LocationSummary(load, LocationSummary::kNoCall); 4936 locations->SetOut(Location::RequiresRegister()); 4937 } 4938 4939 void InstructionCodeGeneratorARM64::VisitLoadException(HLoadException* instruction) { 4940 __ Ldr(OutputRegister(instruction), GetExceptionTlsAddress()); 4941 } 4942 4943 void LocationsBuilderARM64::VisitClearException(HClearException* clear) { 4944 new (GetGraph()->GetAllocator()) LocationSummary(clear, LocationSummary::kNoCall); 4945 } 4946 4947 void InstructionCodeGeneratorARM64::VisitClearException(HClearException* clear ATTRIBUTE_UNUSED) { 4948 __ Str(wzr, GetExceptionTlsAddress()); 4949 } 4950 4951 HLoadString::LoadKind CodeGeneratorARM64::GetSupportedLoadStringKind( 4952 HLoadString::LoadKind desired_string_load_kind) { 4953 switch (desired_string_load_kind) { 4954 case HLoadString::LoadKind::kBootImageLinkTimePcRelative: 4955 case HLoadString::LoadKind::kBootImageRelRo: 4956 case HLoadString::LoadKind::kBssEntry: 4957 DCHECK(!Runtime::Current()->UseJitCompilation()); 4958 break; 4959 case HLoadString::LoadKind::kJitBootImageAddress: 4960 case HLoadString::LoadKind::kJitTableAddress: 4961 DCHECK(Runtime::Current()->UseJitCompilation()); 4962 break; 4963 case HLoadString::LoadKind::kRuntimeCall: 4964 break; 4965 } 4966 return desired_string_load_kind; 4967 } 4968 4969 void LocationsBuilderARM64::VisitLoadString(HLoadString* load) { 4970 LocationSummary::CallKind call_kind = CodeGenerator::GetLoadStringCallKind(load); 4971 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(load, call_kind); 4972 if (load->GetLoadKind() == HLoadString::LoadKind::kRuntimeCall) { 4973 InvokeRuntimeCallingConvention calling_convention; 4974 locations->SetOut(calling_convention.GetReturnLocation(load->GetType())); 4975 } else { 4976 locations->SetOut(Location::RequiresRegister()); 4977 if (load->GetLoadKind() == HLoadString::LoadKind::kBssEntry) { 4978 if (!kUseReadBarrier || kUseBakerReadBarrier) { 4979 // Rely on the pResolveString and marking to save everything we need. 4980 locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves()); 4981 } else { 4982 // For non-Baker read barrier we have a temp-clobbering call. 4983 } 4984 } 4985 } 4986 } 4987 4988 // NO_THREAD_SAFETY_ANALYSIS as we manipulate handles whose internal object we know does not 4989 // move. 4990 void InstructionCodeGeneratorARM64::VisitLoadString(HLoadString* load) NO_THREAD_SAFETY_ANALYSIS { 4991 Register out = OutputRegister(load); 4992 Location out_loc = load->GetLocations()->Out(); 4993 4994 switch (load->GetLoadKind()) { 4995 case HLoadString::LoadKind::kBootImageLinkTimePcRelative: { 4996 DCHECK(codegen_->GetCompilerOptions().IsBootImage() || 4997 codegen_->GetCompilerOptions().IsBootImageExtension()); 4998 // Add ADRP with its PC-relative String patch. 4999 const DexFile& dex_file = load->GetDexFile(); 5000 const dex::StringIndex string_index = load->GetStringIndex(); 5001 vixl::aarch64::Label* adrp_label = codegen_->NewBootImageStringPatch(dex_file, string_index); 5002 codegen_->EmitAdrpPlaceholder(adrp_label, out.X()); 5003 // Add ADD with its PC-relative String patch. 5004 vixl::aarch64::Label* add_label = 5005 codegen_->NewBootImageStringPatch(dex_file, string_index, adrp_label); 5006 codegen_->EmitAddPlaceholder(add_label, out.X(), out.X()); 5007 return; 5008 } 5009 case HLoadString::LoadKind::kBootImageRelRo: { 5010 DCHECK(!codegen_->GetCompilerOptions().IsBootImage()); 5011 // Add ADRP with its PC-relative .data.bimg.rel.ro patch. 5012 uint32_t boot_image_offset = codegen_->GetBootImageOffset(load); 5013 vixl::aarch64::Label* adrp_label = codegen_->NewBootImageRelRoPatch(boot_image_offset); 5014 codegen_->EmitAdrpPlaceholder(adrp_label, out.X()); 5015 // Add LDR with its PC-relative .data.bimg.rel.ro patch. 5016 vixl::aarch64::Label* ldr_label = 5017 codegen_->NewBootImageRelRoPatch(boot_image_offset, adrp_label); 5018 codegen_->EmitLdrOffsetPlaceholder(ldr_label, out.W(), out.X()); 5019 return; 5020 } 5021 case HLoadString::LoadKind::kBssEntry: { 5022 // Add ADRP with its PC-relative String .bss entry patch. 5023 const DexFile& dex_file = load->GetDexFile(); 5024 const dex::StringIndex string_index = load->GetStringIndex(); 5025 Register temp = XRegisterFrom(out_loc); 5026 vixl::aarch64::Label* adrp_label = codegen_->NewStringBssEntryPatch(dex_file, string_index); 5027 codegen_->EmitAdrpPlaceholder(adrp_label, temp); 5028 // Add LDR with its PC-relative String .bss entry patch. 5029 vixl::aarch64::Label* ldr_label = 5030 codegen_->NewStringBssEntryPatch(dex_file, string_index, adrp_label); 5031 // /* GcRoot<mirror::String> */ out = *(base_address + offset) /* PC-relative */ 5032 // All aligned loads are implicitly atomic consume operations on ARM64. 5033 codegen_->GenerateGcRootFieldLoad(load, 5034 out_loc, 5035 temp, 5036 /* offset placeholder */ 0u, 5037 ldr_label, 5038 kCompilerReadBarrierOption); 5039 SlowPathCodeARM64* slow_path = 5040 new (codegen_->GetScopedAllocator()) LoadStringSlowPathARM64(load); 5041 codegen_->AddSlowPath(slow_path); 5042 __ Cbz(out.X(), slow_path->GetEntryLabel()); 5043 __ Bind(slow_path->GetExitLabel()); 5044 codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__); 5045 return; 5046 } 5047 case HLoadString::LoadKind::kJitBootImageAddress: { 5048 uint32_t address = reinterpret_cast32<uint32_t>(load->GetString().Get()); 5049 DCHECK_NE(address, 0u); 5050 __ Ldr(out.W(), codegen_->DeduplicateBootImageAddressLiteral(address)); 5051 return; 5052 } 5053 case HLoadString::LoadKind::kJitTableAddress: { 5054 __ Ldr(out, codegen_->DeduplicateJitStringLiteral(load->GetDexFile(), 5055 load->GetStringIndex(), 5056 load->GetString())); 5057 codegen_->GenerateGcRootFieldLoad(load, 5058 out_loc, 5059 out.X(), 5060 /* offset= */ 0, 5061 /* fixup_label= */ nullptr, 5062 kCompilerReadBarrierOption); 5063 return; 5064 } 5065 default: 5066 break; 5067 } 5068 5069 // TODO: Re-add the compiler code to do string dex cache lookup again. 5070 InvokeRuntimeCallingConvention calling_convention; 5071 DCHECK_EQ(calling_convention.GetRegisterAt(0).GetCode(), out.GetCode()); 5072 __ Mov(calling_convention.GetRegisterAt(0).W(), load->GetStringIndex().index_); 5073 codegen_->InvokeRuntime(kQuickResolveString, load, load->GetDexPc()); 5074 CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>(); 5075 codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__); 5076 } 5077 5078 void LocationsBuilderARM64::VisitLongConstant(HLongConstant* constant) { 5079 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(constant); 5080 locations->SetOut(Location::ConstantLocation(constant)); 5081 } 5082 5083 void InstructionCodeGeneratorARM64::VisitLongConstant(HLongConstant* constant ATTRIBUTE_UNUSED) { 5084 // Will be generated at use site. 5085 } 5086 5087 void LocationsBuilderARM64::VisitMonitorOperation(HMonitorOperation* instruction) { 5088 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary( 5089 instruction, LocationSummary::kCallOnMainOnly); 5090 InvokeRuntimeCallingConvention calling_convention; 5091 locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0))); 5092 } 5093 5094 void InstructionCodeGeneratorARM64::VisitMonitorOperation(HMonitorOperation* instruction) { 5095 codegen_->InvokeRuntime(instruction->IsEnter() ? kQuickLockObject : kQuickUnlockObject, 5096 instruction, 5097 instruction->GetDexPc()); 5098 if (instruction->IsEnter()) { 5099 CheckEntrypointTypes<kQuickLockObject, void, mirror::Object*>(); 5100 } else { 5101 CheckEntrypointTypes<kQuickUnlockObject, void, mirror::Object*>(); 5102 } 5103 codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__); 5104 } 5105 5106 void LocationsBuilderARM64::VisitMul(HMul* mul) { 5107 LocationSummary* locations = 5108 new (GetGraph()->GetAllocator()) LocationSummary(mul, LocationSummary::kNoCall); 5109 switch (mul->GetResultType()) { 5110 case DataType::Type::kInt32: 5111 case DataType::Type::kInt64: 5112 locations->SetInAt(0, Location::RequiresRegister()); 5113 locations->SetInAt(1, Location::RequiresRegister()); 5114 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); 5115 break; 5116 5117 case DataType::Type::kFloat32: 5118 case DataType::Type::kFloat64: 5119 locations->SetInAt(0, Location::RequiresFpuRegister()); 5120 locations->SetInAt(1, Location::RequiresFpuRegister()); 5121 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); 5122 break; 5123 5124 default: 5125 LOG(FATAL) << "Unexpected mul type " << mul->GetResultType(); 5126 } 5127 } 5128 5129 void InstructionCodeGeneratorARM64::VisitMul(HMul* mul) { 5130 switch (mul->GetResultType()) { 5131 case DataType::Type::kInt32: 5132 case DataType::Type::kInt64: 5133 __ Mul(OutputRegister(mul), InputRegisterAt(mul, 0), InputRegisterAt(mul, 1)); 5134 break; 5135 5136 case DataType::Type::kFloat32: 5137 case DataType::Type::kFloat64: 5138 __ Fmul(OutputFPRegister(mul), InputFPRegisterAt(mul, 0), InputFPRegisterAt(mul, 1)); 5139 break; 5140 5141 default: 5142 LOG(FATAL) << "Unexpected mul type " << mul->GetResultType(); 5143 } 5144 } 5145 5146 void LocationsBuilderARM64::VisitNeg(HNeg* neg) { 5147 LocationSummary* locations = 5148 new (GetGraph()->GetAllocator()) LocationSummary(neg, LocationSummary::kNoCall); 5149 switch (neg->GetResultType()) { 5150 case DataType::Type::kInt32: 5151 case DataType::Type::kInt64: 5152 locations->SetInAt(0, ARM64EncodableConstantOrRegister(neg->InputAt(0), neg)); 5153 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); 5154 break; 5155 5156 case DataType::Type::kFloat32: 5157 case DataType::Type::kFloat64: 5158 locations->SetInAt(0, Location::RequiresFpuRegister()); 5159 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); 5160 break; 5161 5162 default: 5163 LOG(FATAL) << "Unexpected neg type " << neg->GetResultType(); 5164 } 5165 } 5166 5167 void InstructionCodeGeneratorARM64::VisitNeg(HNeg* neg) { 5168 switch (neg->GetResultType()) { 5169 case DataType::Type::kInt32: 5170 case DataType::Type::kInt64: 5171 __ Neg(OutputRegister(neg), InputOperandAt(neg, 0)); 5172 break; 5173 5174 case DataType::Type::kFloat32: 5175 case DataType::Type::kFloat64: 5176 __ Fneg(OutputFPRegister(neg), InputFPRegisterAt(neg, 0)); 5177 break; 5178 5179 default: 5180 LOG(FATAL) << "Unexpected neg type " << neg->GetResultType(); 5181 } 5182 } 5183 5184 void LocationsBuilderARM64::VisitNewArray(HNewArray* instruction) { 5185 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary( 5186 instruction, LocationSummary::kCallOnMainOnly); 5187 InvokeRuntimeCallingConvention calling_convention; 5188 locations->SetOut(LocationFrom(x0)); 5189 locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0))); 5190 locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1))); 5191 } 5192 5193 void InstructionCodeGeneratorARM64::VisitNewArray(HNewArray* instruction) { 5194 // Note: if heap poisoning is enabled, the entry point takes care of poisoning the reference. 5195 QuickEntrypointEnum entrypoint = CodeGenerator::GetArrayAllocationEntrypoint(instruction); 5196 codegen_->InvokeRuntime(entrypoint, instruction, instruction->GetDexPc()); 5197 CheckEntrypointTypes<kQuickAllocArrayResolved, void*, mirror::Class*, int32_t>(); 5198 codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__); 5199 } 5200 5201 void LocationsBuilderARM64::VisitNewInstance(HNewInstance* instruction) { 5202 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary( 5203 instruction, LocationSummary::kCallOnMainOnly); 5204 InvokeRuntimeCallingConvention calling_convention; 5205 locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0))); 5206 locations->SetOut(calling_convention.GetReturnLocation(DataType::Type::kReference)); 5207 } 5208 5209 void InstructionCodeGeneratorARM64::VisitNewInstance(HNewInstance* instruction) { 5210 codegen_->InvokeRuntime(instruction->GetEntrypoint(), instruction, instruction->GetDexPc()); 5211 CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>(); 5212 codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__); 5213 } 5214 5215 void LocationsBuilderARM64::VisitNot(HNot* instruction) { 5216 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction); 5217 locations->SetInAt(0, Location::RequiresRegister()); 5218 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); 5219 } 5220 5221 void InstructionCodeGeneratorARM64::VisitNot(HNot* instruction) { 5222 switch (instruction->GetResultType()) { 5223 case DataType::Type::kInt32: 5224 case DataType::Type::kInt64: 5225 __ Mvn(OutputRegister(instruction), InputOperandAt(instruction, 0)); 5226 break; 5227 5228 default: 5229 LOG(FATAL) << "Unexpected type for not operation " << instruction->GetResultType(); 5230 } 5231 } 5232 5233 void LocationsBuilderARM64::VisitBooleanNot(HBooleanNot* instruction) { 5234 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction); 5235 locations->SetInAt(0, Location::RequiresRegister()); 5236 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); 5237 } 5238 5239 void InstructionCodeGeneratorARM64::VisitBooleanNot(HBooleanNot* instruction) { 5240 __ Eor(OutputRegister(instruction), InputRegisterAt(instruction, 0), vixl::aarch64::Operand(1)); 5241 } 5242 5243 void LocationsBuilderARM64::VisitNullCheck(HNullCheck* instruction) { 5244 LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction); 5245 locations->SetInAt(0, Location::RequiresRegister()); 5246 } 5247 5248 void CodeGeneratorARM64::GenerateImplicitNullCheck(HNullCheck* instruction) { 5249 if (CanMoveNullCheckToUser(instruction)) { 5250 return; 5251 } 5252 { 5253 // Ensure that between load and RecordPcInfo there are no pools emitted. 5254 EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes); 5255 Location obj = instruction->GetLocations()->InAt(0); 5256 __ Ldr(wzr, HeapOperandFrom(obj, Offset(0))); 5257 RecordPcInfo(instruction, instruction->GetDexPc()); 5258 } 5259 } 5260 5261 void CodeGeneratorARM64::GenerateExplicitNullCheck(HNullCheck* instruction) { 5262 SlowPathCodeARM64* slow_path = new (GetScopedAllocator()) NullCheckSlowPathARM64(instruction); 5263 AddSlowPath(slow_path); 5264 5265 LocationSummary* locations = instruction->GetLocations(); 5266 Location obj = locations->InAt(0); 5267 5268 __ Cbz(RegisterFrom(obj, instruction->InputAt(0)->GetType()), slow_path->GetEntryLabel()); 5269 } 5270 5271 void InstructionCodeGeneratorARM64::VisitNullCheck(HNullCheck* instruction) { 5272 codegen_->GenerateNullCheck(instruction); 5273 } 5274 5275 void LocationsBuilderARM64::VisitOr(HOr* instruction) { 5276 HandleBinaryOp(instruction); 5277 } 5278 5279 void InstructionCodeGeneratorARM64::VisitOr(HOr* instruction) { 5280 HandleBinaryOp(instruction); 5281 } 5282 5283 void LocationsBuilderARM64::VisitParallelMove(HParallelMove* instruction ATTRIBUTE_UNUSED) { 5284 LOG(FATAL) << "Unreachable"; 5285 } 5286 5287 void InstructionCodeGeneratorARM64::VisitParallelMove(HParallelMove* instruction) { 5288 if (instruction->GetNext()->IsSuspendCheck() && 5289 instruction->GetBlock()->GetLoopInformation() != nullptr) { 5290 HSuspendCheck* suspend_check = instruction->GetNext()->AsSuspendCheck(); 5291 // The back edge will generate the suspend check. 5292 codegen_->ClearSpillSlotsFromLoopPhisInStackMap(suspend_check, instruction); 5293 } 5294 5295 codegen_->GetMoveResolver()->EmitNativeCode(instruction); 5296 } 5297 5298 void LocationsBuilderARM64::VisitParameterValue(HParameterValue* instruction) { 5299 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction); 5300 Location location = parameter_visitor_.GetNextLocation(instruction->GetType()); 5301 if (location.IsStackSlot()) { 5302 location = Location::StackSlot(location.GetStackIndex() + codegen_->GetFrameSize()); 5303 } else if (location.IsDoubleStackSlot()) { 5304 location = Location::DoubleStackSlot(location.GetStackIndex() + codegen_->GetFrameSize()); 5305 } 5306 locations->SetOut(location); 5307 } 5308 5309 void InstructionCodeGeneratorARM64::VisitParameterValue( 5310 HParameterValue* instruction ATTRIBUTE_UNUSED) { 5311 // Nothing to do, the parameter is already at its location. 5312 } 5313 5314 void LocationsBuilderARM64::VisitCurrentMethod(HCurrentMethod* instruction) { 5315 LocationSummary* locations = 5316 new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall); 5317 locations->SetOut(LocationFrom(kArtMethodRegister)); 5318 } 5319 5320 void InstructionCodeGeneratorARM64::VisitCurrentMethod( 5321 HCurrentMethod* instruction ATTRIBUTE_UNUSED) { 5322 // Nothing to do, the method is already at its location. 5323 } 5324 5325 void LocationsBuilderARM64::VisitPhi(HPhi* instruction) { 5326 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction); 5327 for (size_t i = 0, e = locations->GetInputCount(); i < e; ++i) { 5328 locations->SetInAt(i, Location::Any()); 5329 } 5330 locations->SetOut(Location::Any()); 5331 } 5332 5333 void InstructionCodeGeneratorARM64::VisitPhi(HPhi* instruction ATTRIBUTE_UNUSED) { 5334 LOG(FATAL) << "Unreachable"; 5335 } 5336 5337 void LocationsBuilderARM64::VisitRem(HRem* rem) { 5338 DataType::Type type = rem->GetResultType(); 5339 LocationSummary::CallKind call_kind = 5340 DataType::IsFloatingPointType(type) ? LocationSummary::kCallOnMainOnly 5341 : LocationSummary::kNoCall; 5342 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(rem, call_kind); 5343 5344 switch (type) { 5345 case DataType::Type::kInt32: 5346 case DataType::Type::kInt64: 5347 locations->SetInAt(0, Location::RequiresRegister()); 5348 locations->SetInAt(1, Location::RegisterOrConstant(rem->InputAt(1))); 5349 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); 5350 break; 5351 5352 case DataType::Type::kFloat32: 5353 case DataType::Type::kFloat64: { 5354 InvokeRuntimeCallingConvention calling_convention; 5355 locations->SetInAt(0, LocationFrom(calling_convention.GetFpuRegisterAt(0))); 5356 locations->SetInAt(1, LocationFrom(calling_convention.GetFpuRegisterAt(1))); 5357 locations->SetOut(calling_convention.GetReturnLocation(type)); 5358 5359 break; 5360 } 5361 5362 default: 5363 LOG(FATAL) << "Unexpected rem type " << type; 5364 } 5365 } 5366 5367 void InstructionCodeGeneratorARM64::GenerateIntRemForPower2Denom(HRem *instruction) { 5368 int64_t imm = Int64FromLocation(instruction->GetLocations()->InAt(1)); 5369 uint64_t abs_imm = static_cast<uint64_t>(AbsOrMin(imm)); 5370 DCHECK(IsPowerOfTwo(abs_imm)) << abs_imm; 5371 5372 Register out = OutputRegister(instruction); 5373 Register dividend = InputRegisterAt(instruction, 0); 5374 5375 if (abs_imm == 2) { 5376 __ Cmp(dividend, 0); 5377 __ And(out, dividend, 1); 5378 __ Csneg(out, out, out, ge); 5379 } else { 5380 UseScratchRegisterScope temps(GetVIXLAssembler()); 5381 Register temp = temps.AcquireSameSizeAs(out); 5382 5383 __ Negs(temp, dividend); 5384 __ And(out, dividend, abs_imm - 1); 5385 __ And(temp, temp, abs_imm - 1); 5386 __ Csneg(out, out, temp, mi); 5387 } 5388 } 5389 5390 void InstructionCodeGeneratorARM64::GenerateIntRemForConstDenom(HRem *instruction) { 5391 int64_t imm = Int64FromLocation(instruction->GetLocations()->InAt(1)); 5392 5393 if (imm == 0) { 5394 // Do not generate anything. 5395 // DivZeroCheck would prevent any code to be executed. 5396 return; 5397 } 5398 5399 if (IsPowerOfTwo(AbsOrMin(imm))) { 5400 // Cases imm == -1 or imm == 1 are handled in constant folding by 5401 // InstructionWithAbsorbingInputSimplifier. 5402 // If the cases have survided till code generation they are handled in 5403 // GenerateIntRemForPower2Denom becauses -1 and 1 are the power of 2 (2^0). 5404 // The correct code is generated for them, just more instructions. 5405 GenerateIntRemForPower2Denom(instruction); 5406 } else { 5407 DCHECK(imm < -2 || imm > 2) << imm; 5408 GenerateDivRemWithAnyConstant(instruction); 5409 } 5410 } 5411 5412 void InstructionCodeGeneratorARM64::GenerateIntRem(HRem* instruction) { 5413 DCHECK(DataType::IsIntOrLongType(instruction->GetResultType())) 5414 << instruction->GetResultType(); 5415 5416 if (instruction->GetLocations()->InAt(1).IsConstant()) { 5417 GenerateIntRemForConstDenom(instruction); 5418 } else { 5419 Register out = OutputRegister(instruction); 5420 Register dividend = InputRegisterAt(instruction, 0); 5421 Register divisor = InputRegisterAt(instruction, 1); 5422 UseScratchRegisterScope temps(GetVIXLAssembler()); 5423 Register temp = temps.AcquireSameSizeAs(out); 5424 __ Sdiv(temp, dividend, divisor); 5425 __ Msub(out, temp, divisor, dividend); 5426 } 5427 } 5428 5429 void InstructionCodeGeneratorARM64::VisitRem(HRem* rem) { 5430 DataType::Type type = rem->GetResultType(); 5431 5432 switch (type) { 5433 case DataType::Type::kInt32: 5434 case DataType::Type::kInt64: { 5435 GenerateIntRem(rem); 5436 break; 5437 } 5438 5439 case DataType::Type::kFloat32: 5440 case DataType::Type::kFloat64: { 5441 QuickEntrypointEnum entrypoint = 5442 (type == DataType::Type::kFloat32) ? kQuickFmodf : kQuickFmod; 5443 codegen_->InvokeRuntime(entrypoint, rem, rem->GetDexPc()); 5444 if (type == DataType::Type::kFloat32) { 5445 CheckEntrypointTypes<kQuickFmodf, float, float, float>(); 5446 } else { 5447 CheckEntrypointTypes<kQuickFmod, double, double, double>(); 5448 } 5449 break; 5450 } 5451 5452 default: 5453 LOG(FATAL) << "Unexpected rem type " << type; 5454 UNREACHABLE(); 5455 } 5456 } 5457 5458 void LocationsBuilderARM64::VisitMin(HMin* min) { 5459 HandleBinaryOp(min); 5460 } 5461 5462 void InstructionCodeGeneratorARM64::VisitMin(HMin* min) { 5463 HandleBinaryOp(min); 5464 } 5465 5466 void LocationsBuilderARM64::VisitMax(HMax* max) { 5467 HandleBinaryOp(max); 5468 } 5469 5470 void InstructionCodeGeneratorARM64::VisitMax(HMax* max) { 5471 HandleBinaryOp(max); 5472 } 5473 5474 void LocationsBuilderARM64::VisitAbs(HAbs* abs) { 5475 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(abs); 5476 switch (abs->GetResultType()) { 5477 case DataType::Type::kInt32: 5478 case DataType::Type::kInt64: 5479 locations->SetInAt(0, Location::RequiresRegister()); 5480 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); 5481 break; 5482 case DataType::Type::kFloat32: 5483 case DataType::Type::kFloat64: 5484 locations->SetInAt(0, Location::RequiresFpuRegister()); 5485 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); 5486 break; 5487 default: 5488 LOG(FATAL) << "Unexpected type for abs operation " << abs->GetResultType(); 5489 } 5490 } 5491 5492 void InstructionCodeGeneratorARM64::VisitAbs(HAbs* abs) { 5493 switch (abs->GetResultType()) { 5494 case DataType::Type::kInt32: 5495 case DataType::Type::kInt64: { 5496 Register in_reg = InputRegisterAt(abs, 0); 5497 Register out_reg = OutputRegister(abs); 5498 __ Cmp(in_reg, Operand(0)); 5499 __ Cneg(out_reg, in_reg, lt); 5500 break; 5501 } 5502 case DataType::Type::kFloat32: 5503 case DataType::Type::kFloat64: { 5504 VRegister in_reg = InputFPRegisterAt(abs, 0); 5505 VRegister out_reg = OutputFPRegister(abs); 5506 __ Fabs(out_reg, in_reg); 5507 break; 5508 } 5509 default: 5510 LOG(FATAL) << "Unexpected type for abs operation " << abs->GetResultType(); 5511 } 5512 } 5513 5514 void LocationsBuilderARM64::VisitConstructorFence(HConstructorFence* constructor_fence) { 5515 constructor_fence->SetLocations(nullptr); 5516 } 5517 5518 void InstructionCodeGeneratorARM64::VisitConstructorFence( 5519 HConstructorFence* constructor_fence ATTRIBUTE_UNUSED) { 5520 codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore); 5521 } 5522 5523 void LocationsBuilderARM64::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) { 5524 memory_barrier->SetLocations(nullptr); 5525 } 5526 5527 void InstructionCodeGeneratorARM64::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) { 5528 codegen_->GenerateMemoryBarrier(memory_barrier->GetBarrierKind()); 5529 } 5530 5531 void LocationsBuilderARM64::VisitReturn(HReturn* instruction) { 5532 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction); 5533 DataType::Type return_type = instruction->InputAt(0)->GetType(); 5534 locations->SetInAt(0, ARM64ReturnLocation(return_type)); 5535 } 5536 5537 void InstructionCodeGeneratorARM64::VisitReturn(HReturn* ret) { 5538 if (GetGraph()->IsCompilingOsr()) { 5539 // To simplify callers of an OSR method, we put the return value in both 5540 // floating point and core register. 5541 switch (ret->InputAt(0)->GetType()) { 5542 case DataType::Type::kFloat32: 5543 __ Fmov(w0, s0); 5544 break; 5545 case DataType::Type::kFloat64: 5546 __ Fmov(x0, d0); 5547 break; 5548 default: 5549 break; 5550 } 5551 } 5552 codegen_->GenerateFrameExit(); 5553 } 5554 5555 void LocationsBuilderARM64::VisitReturnVoid(HReturnVoid* instruction) { 5556 instruction->SetLocations(nullptr); 5557 } 5558 5559 void InstructionCodeGeneratorARM64::VisitReturnVoid(HReturnVoid* instruction ATTRIBUTE_UNUSED) { 5560 codegen_->GenerateFrameExit(); 5561 } 5562 5563 void LocationsBuilderARM64::VisitRor(HRor* ror) { 5564 HandleBinaryOp(ror); 5565 } 5566 5567 void InstructionCodeGeneratorARM64::VisitRor(HRor* ror) { 5568 HandleBinaryOp(ror); 5569 } 5570 5571 void LocationsBuilderARM64::VisitShl(HShl* shl) { 5572 HandleShift(shl); 5573 } 5574 5575 void InstructionCodeGeneratorARM64::VisitShl(HShl* shl) { 5576 HandleShift(shl); 5577 } 5578 5579 void LocationsBuilderARM64::VisitShr(HShr* shr) { 5580 HandleShift(shr); 5581 } 5582 5583 void InstructionCodeGeneratorARM64::VisitShr(HShr* shr) { 5584 HandleShift(shr); 5585 } 5586 5587 void LocationsBuilderARM64::VisitSub(HSub* instruction) { 5588 HandleBinaryOp(instruction); 5589 } 5590 5591 void InstructionCodeGeneratorARM64::VisitSub(HSub* instruction) { 5592 HandleBinaryOp(instruction); 5593 } 5594 5595 void LocationsBuilderARM64::VisitStaticFieldGet(HStaticFieldGet* instruction) { 5596 HandleFieldGet(instruction, instruction->GetFieldInfo()); 5597 } 5598 5599 void InstructionCodeGeneratorARM64::VisitStaticFieldGet(HStaticFieldGet* instruction) { 5600 HandleFieldGet(instruction, instruction->GetFieldInfo()); 5601 } 5602 5603 void LocationsBuilderARM64::VisitStaticFieldSet(HStaticFieldSet* instruction) { 5604 HandleFieldSet(instruction); 5605 } 5606 5607 void InstructionCodeGeneratorARM64::VisitStaticFieldSet(HStaticFieldSet* instruction) { 5608 HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetValueCanBeNull()); 5609 } 5610 5611 void LocationsBuilderARM64::VisitStringBuilderAppend(HStringBuilderAppend* instruction) { 5612 codegen_->CreateStringBuilderAppendLocations(instruction, LocationFrom(x0)); 5613 } 5614 5615 void InstructionCodeGeneratorARM64::VisitStringBuilderAppend(HStringBuilderAppend* instruction) { 5616 __ Mov(w0, instruction->GetFormat()->GetValue()); 5617 codegen_->InvokeRuntime(kQuickStringBuilderAppend, instruction, instruction->GetDexPc()); 5618 } 5619 5620 void LocationsBuilderARM64::VisitUnresolvedInstanceFieldGet( 5621 HUnresolvedInstanceFieldGet* instruction) { 5622 FieldAccessCallingConventionARM64 calling_convention; 5623 codegen_->CreateUnresolvedFieldLocationSummary( 5624 instruction, instruction->GetFieldType(), calling_convention); 5625 } 5626 5627 void InstructionCodeGeneratorARM64::VisitUnresolvedInstanceFieldGet( 5628 HUnresolvedInstanceFieldGet* instruction) { 5629 FieldAccessCallingConventionARM64 calling_convention; 5630 codegen_->GenerateUnresolvedFieldAccess(instruction, 5631 instruction->GetFieldType(), 5632 instruction->GetFieldIndex(), 5633 instruction->GetDexPc(), 5634 calling_convention); 5635 } 5636 5637 void LocationsBuilderARM64::VisitUnresolvedInstanceFieldSet( 5638 HUnresolvedInstanceFieldSet* instruction) { 5639 FieldAccessCallingConventionARM64 calling_convention; 5640 codegen_->CreateUnresolvedFieldLocationSummary( 5641 instruction, instruction->GetFieldType(), calling_convention); 5642 } 5643 5644 void InstructionCodeGeneratorARM64::VisitUnresolvedInstanceFieldSet( 5645 HUnresolvedInstanceFieldSet* instruction) { 5646 FieldAccessCallingConventionARM64 calling_convention; 5647 codegen_->GenerateUnresolvedFieldAccess(instruction, 5648 instruction->GetFieldType(), 5649 instruction->GetFieldIndex(), 5650 instruction->GetDexPc(), 5651 calling_convention); 5652 } 5653 5654 void LocationsBuilderARM64::VisitUnresolvedStaticFieldGet( 5655 HUnresolvedStaticFieldGet* instruction) { 5656 FieldAccessCallingConventionARM64 calling_convention; 5657 codegen_->CreateUnresolvedFieldLocationSummary( 5658 instruction, instruction->GetFieldType(), calling_convention); 5659 } 5660 5661 void InstructionCodeGeneratorARM64::VisitUnresolvedStaticFieldGet( 5662 HUnresolvedStaticFieldGet* instruction) { 5663 FieldAccessCallingConventionARM64 calling_convention; 5664 codegen_->GenerateUnresolvedFieldAccess(instruction, 5665 instruction->GetFieldType(), 5666 instruction->GetFieldIndex(), 5667 instruction->GetDexPc(), 5668 calling_convention); 5669 } 5670 5671 void LocationsBuilderARM64::VisitUnresolvedStaticFieldSet( 5672 HUnresolvedStaticFieldSet* instruction) { 5673 FieldAccessCallingConventionARM64 calling_convention; 5674 codegen_->CreateUnresolvedFieldLocationSummary( 5675 instruction, instruction->GetFieldType(), calling_convention); 5676 } 5677 5678 void InstructionCodeGeneratorARM64::VisitUnresolvedStaticFieldSet( 5679 HUnresolvedStaticFieldSet* instruction) { 5680 FieldAccessCallingConventionARM64 calling_convention; 5681 codegen_->GenerateUnresolvedFieldAccess(instruction, 5682 instruction->GetFieldType(), 5683 instruction->GetFieldIndex(), 5684 instruction->GetDexPc(), 5685 calling_convention); 5686 } 5687 5688 void LocationsBuilderARM64::VisitSuspendCheck(HSuspendCheck* instruction) { 5689 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary( 5690 instruction, LocationSummary::kCallOnSlowPath); 5691 // In suspend check slow path, usually there are no caller-save registers at all. 5692 // If SIMD instructions are present, however, we force spilling all live SIMD 5693 // registers in full width (since the runtime only saves/restores lower part). 5694 locations->SetCustomSlowPathCallerSaves( 5695 GetGraph()->HasSIMD() ? RegisterSet::AllFpu() : RegisterSet::Empty()); 5696 } 5697 5698 void InstructionCodeGeneratorARM64::VisitSuspendCheck(HSuspendCheck* instruction) { 5699 HBasicBlock* block = instruction->GetBlock(); 5700 if (block->GetLoopInformation() != nullptr) { 5701 DCHECK(block->GetLoopInformation()->GetSuspendCheck() == instruction); 5702 // The back edge will generate the suspend check. 5703 return; 5704 } 5705 if (block->IsEntryBlock() && instruction->GetNext()->IsGoto()) { 5706 // The goto will generate the suspend check. 5707 return; 5708 } 5709 GenerateSuspendCheck(instruction, nullptr); 5710 codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__); 5711 } 5712 5713 void LocationsBuilderARM64::VisitThrow(HThrow* instruction) { 5714 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary( 5715 instruction, LocationSummary::kCallOnMainOnly); 5716 InvokeRuntimeCallingConvention calling_convention; 5717 locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0))); 5718 } 5719 5720 void InstructionCodeGeneratorARM64::VisitThrow(HThrow* instruction) { 5721 codegen_->InvokeRuntime(kQuickDeliverException, instruction, instruction->GetDexPc()); 5722 CheckEntrypointTypes<kQuickDeliverException, void, mirror::Object*>(); 5723 } 5724 5725 void LocationsBuilderARM64::VisitTypeConversion(HTypeConversion* conversion) { 5726 LocationSummary* locations = 5727 new (GetGraph()->GetAllocator()) LocationSummary(conversion, LocationSummary::kNoCall); 5728 DataType::Type input_type = conversion->GetInputType(); 5729 DataType::Type result_type = conversion->GetResultType(); 5730 DCHECK(!DataType::IsTypeConversionImplicit(input_type, result_type)) 5731 << input_type << " -> " << result_type; 5732 if ((input_type == DataType::Type::kReference) || (input_type == DataType::Type::kVoid) || 5733 (result_type == DataType::Type::kReference) || (result_type == DataType::Type::kVoid)) { 5734 LOG(FATAL) << "Unexpected type conversion from " << input_type << " to " << result_type; 5735 } 5736 5737 if (DataType::IsFloatingPointType(input_type)) { 5738 locations->SetInAt(0, Location::RequiresFpuRegister()); 5739 } else { 5740 locations->SetInAt(0, Location::RequiresRegister()); 5741 } 5742 5743 if (DataType::IsFloatingPointType(result_type)) { 5744 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); 5745 } else { 5746 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); 5747 } 5748 } 5749 5750 void InstructionCodeGeneratorARM64::VisitTypeConversion(HTypeConversion* conversion) { 5751 DataType::Type result_type = conversion->GetResultType(); 5752 DataType::Type input_type = conversion->GetInputType(); 5753 5754 DCHECK(!DataType::IsTypeConversionImplicit(input_type, result_type)) 5755 << input_type << " -> " << result_type; 5756 5757 if (DataType::IsIntegralType(result_type) && DataType::IsIntegralType(input_type)) { 5758 int result_size = DataType::Size(result_type); 5759 int input_size = DataType::Size(input_type); 5760 int min_size = std::min(result_size, input_size); 5761 Register output = OutputRegister(conversion); 5762 Register source = InputRegisterAt(conversion, 0); 5763 if (result_type == DataType::Type::kInt32 && input_type == DataType::Type::kInt64) { 5764 // 'int' values are used directly as W registers, discarding the top 5765 // bits, so we don't need to sign-extend and can just perform a move. 5766 // We do not pass the `kDiscardForSameWReg` argument to force clearing the 5767 // top 32 bits of the target register. We theoretically could leave those 5768 // bits unchanged, but we would have to make sure that no code uses a 5769 // 32bit input value as a 64bit value assuming that the top 32 bits are 5770 // zero. 5771 __ Mov(output.W(), source.W()); 5772 } else if (DataType::IsUnsignedType(result_type) || 5773 (DataType::IsUnsignedType(input_type) && input_size < result_size)) { 5774 __ Ubfx(output, output.IsX() ? source.X() : source.W(), 0, result_size * kBitsPerByte); 5775 } else { 5776 __ Sbfx(output, output.IsX() ? source.X() : source.W(), 0, min_size * kBitsPerByte); 5777 } 5778 } else if (DataType::IsFloatingPointType(result_type) && DataType::IsIntegralType(input_type)) { 5779 __ Scvtf(OutputFPRegister(conversion), InputRegisterAt(conversion, 0)); 5780 } else if (DataType::IsIntegralType(result_type) && DataType::IsFloatingPointType(input_type)) { 5781 CHECK(result_type == DataType::Type::kInt32 || result_type == DataType::Type::kInt64); 5782 __ Fcvtzs(OutputRegister(conversion), InputFPRegisterAt(conversion, 0)); 5783 } else if (DataType::IsFloatingPointType(result_type) && 5784 DataType::IsFloatingPointType(input_type)) { 5785 __ Fcvt(OutputFPRegister(conversion), InputFPRegisterAt(conversion, 0)); 5786 } else { 5787 LOG(FATAL) << "Unexpected or unimplemented type conversion from " << input_type 5788 << " to " << result_type; 5789 } 5790 } 5791 5792 void LocationsBuilderARM64::VisitUShr(HUShr* ushr) { 5793 HandleShift(ushr); 5794 } 5795 5796 void InstructionCodeGeneratorARM64::VisitUShr(HUShr* ushr) { 5797 HandleShift(ushr); 5798 } 5799 5800 void LocationsBuilderARM64::VisitXor(HXor* instruction) { 5801 HandleBinaryOp(instruction); 5802 } 5803 5804 void InstructionCodeGeneratorARM64::VisitXor(HXor* instruction) { 5805 HandleBinaryOp(instruction); 5806 } 5807 5808 void LocationsBuilderARM64::VisitBoundType(HBoundType* instruction ATTRIBUTE_UNUSED) { 5809 // Nothing to do, this should be removed during prepare for register allocator. 5810 LOG(FATAL) << "Unreachable"; 5811 } 5812 5813 void InstructionCodeGeneratorARM64::VisitBoundType(HBoundType* instruction ATTRIBUTE_UNUSED) { 5814 // Nothing to do, this should be removed during prepare for register allocator. 5815 LOG(FATAL) << "Unreachable"; 5816 } 5817 5818 // Simple implementation of packed switch - generate cascaded compare/jumps. 5819 void LocationsBuilderARM64::VisitPackedSwitch(HPackedSwitch* switch_instr) { 5820 LocationSummary* locations = 5821 new (GetGraph()->GetAllocator()) LocationSummary(switch_instr, LocationSummary::kNoCall); 5822 locations->SetInAt(0, Location::RequiresRegister()); 5823 } 5824 5825 void InstructionCodeGeneratorARM64::VisitPackedSwitch(HPackedSwitch* switch_instr) { 5826 int32_t lower_bound = switch_instr->GetStartValue(); 5827 uint32_t num_entries = switch_instr->GetNumEntries(); 5828 Register value_reg = InputRegisterAt(switch_instr, 0); 5829 HBasicBlock* default_block = switch_instr->GetDefaultBlock(); 5830 5831 // Roughly set 16 as max average assemblies generated per HIR in a graph. 5832 static constexpr int32_t kMaxExpectedSizePerHInstruction = 16 * kInstructionSize; 5833 // ADR has a limited range(+/-1MB), so we set a threshold for the number of HIRs in the graph to 5834 // make sure we don't emit it if the target may run out of range. 5835 // TODO: Instead of emitting all jump tables at the end of the code, we could keep track of ADR 5836 // ranges and emit the tables only as required. 5837 static constexpr int32_t kJumpTableInstructionThreshold = 1* MB / kMaxExpectedSizePerHInstruction; 5838 5839 if (num_entries <= kPackedSwitchCompareJumpThreshold || 5840 // Current instruction id is an upper bound of the number of HIRs in the graph. 5841 GetGraph()->GetCurrentInstructionId() > kJumpTableInstructionThreshold) { 5842 // Create a series of compare/jumps. 5843 UseScratchRegisterScope temps(codegen_->GetVIXLAssembler()); 5844 Register temp = temps.AcquireW(); 5845 __ Subs(temp, value_reg, Operand(lower_bound)); 5846 5847 const ArenaVector<HBasicBlock*>& successors = switch_instr->GetBlock()->GetSuccessors(); 5848 // Jump to successors[0] if value == lower_bound. 5849 __ B(eq, codegen_->GetLabelOf(successors[0])); 5850 int32_t last_index = 0; 5851 for (; num_entries - last_index > 2; last_index += 2) { 5852 __ Subs(temp, temp, Operand(2)); 5853 // Jump to successors[last_index + 1] if value < case_value[last_index + 2]. 5854 __ B(lo, codegen_->GetLabelOf(successors[last_index + 1])); 5855 // Jump to successors[last_index + 2] if value == case_value[last_index + 2]. 5856 __ B(eq, codegen_->GetLabelOf(successors[last_index + 2])); 5857 } 5858 if (num_entries - last_index == 2) { 5859 // The last missing case_value. 5860 __ Cmp(temp, Operand(1)); 5861 __ B(eq, codegen_->GetLabelOf(successors[last_index + 1])); 5862 } 5863 5864 // And the default for any other value. 5865 if (!codegen_->GoesToNextBlock(switch_instr->GetBlock(), default_block)) { 5866 __ B(codegen_->GetLabelOf(default_block)); 5867 } 5868 } else { 5869 JumpTableARM64* jump_table = codegen_->CreateJumpTable(switch_instr); 5870 5871 UseScratchRegisterScope temps(codegen_->GetVIXLAssembler()); 5872 5873 // Below instructions should use at most one blocked register. Since there are two blocked 5874 // registers, we are free to block one. 5875 Register temp_w = temps.AcquireW(); 5876 Register index; 5877 // Remove the bias. 5878 if (lower_bound != 0) { 5879 index = temp_w; 5880 __ Sub(index, value_reg, Operand(lower_bound)); 5881 } else { 5882 index = value_reg; 5883 } 5884 5885 // Jump to default block if index is out of the range. 5886 __ Cmp(index, Operand(num_entries)); 5887 __ B(hs, codegen_->GetLabelOf(default_block)); 5888 5889 // In current VIXL implementation, it won't require any blocked registers to encode the 5890 // immediate value for Adr. So we are free to use both VIXL blocked registers to reduce the 5891 // register pressure. 5892 Register table_base = temps.AcquireX(); 5893 // Load jump offset from the table. 5894 __ Adr(table_base, jump_table->GetTableStartLabel()); 5895 Register jump_offset = temp_w; 5896 __ Ldr(jump_offset, MemOperand(table_base, index, UXTW, 2)); 5897 5898 // Jump to target block by branching to table_base(pc related) + offset. 5899 Register target_address = table_base; 5900 __ Add(target_address, table_base, Operand(jump_offset, SXTW)); 5901 __ Br(target_address); 5902 } 5903 } 5904 5905 void InstructionCodeGeneratorARM64::GenerateReferenceLoadOneRegister( 5906 HInstruction* instruction, 5907 Location out, 5908 uint32_t offset, 5909 Location maybe_temp, 5910 ReadBarrierOption read_barrier_option) { 5911 DataType::Type type = DataType::Type::kReference; 5912 Register out_reg = RegisterFrom(out, type); 5913 if (read_barrier_option == kWithReadBarrier) { 5914 CHECK(kEmitCompilerReadBarrier); 5915 if (kUseBakerReadBarrier) { 5916 // Load with fast path based Baker's read barrier. 5917 // /* HeapReference<Object> */ out = *(out + offset) 5918 codegen_->GenerateFieldLoadWithBakerReadBarrier(instruction, 5919 out, 5920 out_reg, 5921 offset, 5922 maybe_temp, 5923 /* needs_null_check= */ false, 5924 /* use_load_acquire= */ false); 5925 } else { 5926 // Load with slow path based read barrier. 5927 // Save the value of `out` into `maybe_temp` before overwriting it 5928 // in the following move operation, as we will need it for the 5929 // read barrier below. 5930 Register temp_reg = RegisterFrom(maybe_temp, type); 5931 __ Mov(temp_reg, out_reg); 5932 // /* HeapReference<Object> */ out = *(out + offset) 5933 __ Ldr(out_reg, HeapOperand(out_reg, offset)); 5934 codegen_->GenerateReadBarrierSlow(instruction, out, out, maybe_temp, offset); 5935 } 5936 } else { 5937 // Plain load with no read barrier. 5938 // /* HeapReference<Object> */ out = *(out + offset) 5939 __ Ldr(out_reg, HeapOperand(out_reg, offset)); 5940 GetAssembler()->MaybeUnpoisonHeapReference(out_reg); 5941 } 5942 } 5943 5944 void InstructionCodeGeneratorARM64::GenerateReferenceLoadTwoRegisters( 5945 HInstruction* instruction, 5946 Location out, 5947 Location obj, 5948 uint32_t offset, 5949 Location maybe_temp, 5950 ReadBarrierOption read_barrier_option) { 5951 DataType::Type type = DataType::Type::kReference; 5952 Register out_reg = RegisterFrom(out, type); 5953 Register obj_reg = RegisterFrom(obj, type); 5954 if (read_barrier_option == kWithReadBarrier) { 5955 CHECK(kEmitCompilerReadBarrier); 5956 if (kUseBakerReadBarrier) { 5957 // Load with fast path based Baker's read barrier. 5958 // /* HeapReference<Object> */ out = *(obj + offset) 5959 codegen_->GenerateFieldLoadWithBakerReadBarrier(instruction, 5960 out, 5961 obj_reg, 5962 offset, 5963 maybe_temp, 5964 /* needs_null_check= */ false, 5965 /* use_load_acquire= */ false); 5966 } else { 5967 // Load with slow path based read barrier. 5968 // /* HeapReference<Object> */ out = *(obj + offset) 5969 __ Ldr(out_reg, HeapOperand(obj_reg, offset)); 5970 codegen_->GenerateReadBarrierSlow(instruction, out, out, obj, offset); 5971 } 5972 } else { 5973 // Plain load with no read barrier. 5974 // /* HeapReference<Object> */ out = *(obj + offset) 5975 __ Ldr(out_reg, HeapOperand(obj_reg, offset)); 5976 GetAssembler()->MaybeUnpoisonHeapReference(out_reg); 5977 } 5978 } 5979 5980 void CodeGeneratorARM64::GenerateGcRootFieldLoad( 5981 HInstruction* instruction, 5982 Location root, 5983 Register obj, 5984 uint32_t offset, 5985 vixl::aarch64::Label* fixup_label, 5986 ReadBarrierOption read_barrier_option) { 5987 DCHECK(fixup_label == nullptr || offset == 0u); 5988 Register root_reg = RegisterFrom(root, DataType::Type::kReference); 5989 if (read_barrier_option == kWithReadBarrier) { 5990 DCHECK(kEmitCompilerReadBarrier); 5991 if (kUseBakerReadBarrier) { 5992 // Fast path implementation of art::ReadBarrier::BarrierForRoot when 5993 // Baker's read barrier are used. 5994 5995 // Query `art::Thread::Current()->GetIsGcMarking()` (stored in 5996 // the Marking Register) to decide whether we need to enter 5997 // the slow path to mark the GC root. 5998 // 5999 // We use shared thunks for the slow path; shared within the method 6000 // for JIT, across methods for AOT. That thunk checks the reference 6001 // and jumps to the entrypoint if needed. 6002 // 6003 // lr = &return_address; 6004 // GcRoot<mirror::Object> root = *(obj+offset); // Original reference load. 6005 // if (mr) { // Thread::Current()->GetIsGcMarking() 6006 // goto gc_root_thunk<root_reg>(lr) 6007 // } 6008 // return_address: 6009 6010 UseScratchRegisterScope temps(GetVIXLAssembler()); 6011 DCHECK(temps.IsAvailable(ip0)); 6012 DCHECK(temps.IsAvailable(ip1)); 6013 temps.Exclude(ip0, ip1); 6014 uint32_t custom_data = EncodeBakerReadBarrierGcRootData(root_reg.GetCode()); 6015 6016 ExactAssemblyScope guard(GetVIXLAssembler(), 3 * vixl::aarch64::kInstructionSize); 6017 vixl::aarch64::Label return_address; 6018 __ adr(lr, &return_address); 6019 if (fixup_label != nullptr) { 6020 __ bind(fixup_label); 6021 } 6022 static_assert(BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_OFFSET == -8, 6023 "GC root LDR must be 2 instructions (8B) before the return address label."); 6024 __ ldr(root_reg, MemOperand(obj.X(), offset)); 6025 EmitBakerReadBarrierCbnz(custom_data); 6026 __ bind(&return_address); 6027 } else { 6028 // GC root loaded through a slow path for read barriers other 6029 // than Baker's. 6030 // /* GcRoot<mirror::Object>* */ root = obj + offset 6031 if (fixup_label == nullptr) { 6032 __ Add(root_reg.X(), obj.X(), offset); 6033 } else { 6034 EmitAddPlaceholder(fixup_label, root_reg.X(), obj.X()); 6035 } 6036 // /* mirror::Object* */ root = root->Read() 6037 GenerateReadBarrierForRootSlow(instruction, root, root); 6038 } 6039 } else { 6040 // Plain GC root load with no read barrier. 6041 // /* GcRoot<mirror::Object> */ root = *(obj + offset) 6042 if (fixup_label == nullptr) { 6043 __ Ldr(root_reg, MemOperand(obj, offset)); 6044 } else { 6045 EmitLdrOffsetPlaceholder(fixup_label, root_reg, obj.X()); 6046 } 6047 // Note that GC roots are not affected by heap poisoning, thus we 6048 // do not have to unpoison `root_reg` here. 6049 } 6050 MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__); 6051 } 6052 6053 void CodeGeneratorARM64::GenerateUnsafeCasOldValueMovWithBakerReadBarrier( 6054 vixl::aarch64::Register marked, 6055 vixl::aarch64::Register old_value) { 6056 DCHECK(kEmitCompilerReadBarrier); 6057 DCHECK(kUseBakerReadBarrier); 6058 6059 // Similar to the Baker RB path in GenerateGcRootFieldLoad(), with a MOV instead of LDR. 6060 uint32_t custom_data = EncodeBakerReadBarrierGcRootData(marked.GetCode()); 6061 6062 ExactAssemblyScope guard(GetVIXLAssembler(), 3 * vixl::aarch64::kInstructionSize); 6063 vixl::aarch64::Label return_address; 6064 __ adr(lr, &return_address); 6065 static_assert(BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_OFFSET == -8, 6066 "GC root LDR must be 2 instructions (8B) before the return address label."); 6067 __ mov(marked, old_value); 6068 EmitBakerReadBarrierCbnz(custom_data); 6069 __ bind(&return_address); 6070 } 6071 6072 void CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction, 6073 Location ref, 6074 vixl::aarch64::Register obj, 6075 const vixl::aarch64::MemOperand& src, 6076 bool needs_null_check, 6077 bool use_load_acquire) { 6078 DCHECK(kEmitCompilerReadBarrier); 6079 DCHECK(kUseBakerReadBarrier); 6080 6081 // Query `art::Thread::Current()->GetIsGcMarking()` (stored in the 6082 // Marking Register) to decide whether we need to enter the slow 6083 // path to mark the reference. Then, in the slow path, check the 6084 // gray bit in the lock word of the reference's holder (`obj`) to 6085 // decide whether to mark `ref` or not. 6086 // 6087 // We use shared thunks for the slow path; shared within the method 6088 // for JIT, across methods for AOT. That thunk checks the holder 6089 // and jumps to the entrypoint if needed. If the holder is not gray, 6090 // it creates a fake dependency and returns to the LDR instruction. 6091 // 6092 // lr = &gray_return_address; 6093 // if (mr) { // Thread::Current()->GetIsGcMarking() 6094 // goto field_thunk<holder_reg, base_reg, use_load_acquire>(lr) 6095 // } 6096 // not_gray_return_address: 6097 // // Original reference load. If the offset is too large to fit 6098 // // into LDR, we use an adjusted base register here. 6099 // HeapReference<mirror::Object> reference = *(obj+offset); 6100 // gray_return_address: 6101 6102 DCHECK(src.GetAddrMode() == vixl::aarch64::Offset); 6103 DCHECK_ALIGNED(src.GetOffset(), sizeof(mirror::HeapReference<mirror::Object>)); 6104 6105 UseScratchRegisterScope temps(GetVIXLAssembler()); 6106 DCHECK(temps.IsAvailable(ip0)); 6107 DCHECK(temps.IsAvailable(ip1)); 6108 temps.Exclude(ip0, ip1); 6109 uint32_t custom_data = use_load_acquire 6110 ? EncodeBakerReadBarrierAcquireData(src.GetBaseRegister().GetCode(), obj.GetCode()) 6111 : EncodeBakerReadBarrierFieldData(src.GetBaseRegister().GetCode(), obj.GetCode()); 6112 6113 { 6114 ExactAssemblyScope guard(GetVIXLAssembler(), 6115 (kPoisonHeapReferences ? 4u : 3u) * vixl::aarch64::kInstructionSize); 6116 vixl::aarch64::Label return_address; 6117 __ adr(lr, &return_address); 6118 EmitBakerReadBarrierCbnz(custom_data); 6119 static_assert(BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET == (kPoisonHeapReferences ? -8 : -4), 6120 "Field LDR must be 1 instruction (4B) before the return address label; " 6121 " 2 instructions (8B) for heap poisoning."); 6122 Register ref_reg = RegisterFrom(ref, DataType::Type::kReference); 6123 if (use_load_acquire) { 6124 DCHECK_EQ(src.GetOffset(), 0); 6125 __ ldar(ref_reg, src); 6126 } else { 6127 __ ldr(ref_reg, src); 6128 } 6129 if (needs_null_check) { 6130 MaybeRecordImplicitNullCheck(instruction); 6131 } 6132 // Unpoison the reference explicitly if needed. MaybeUnpoisonHeapReference() uses 6133 // macro instructions disallowed in ExactAssemblyScope. 6134 if (kPoisonHeapReferences) { 6135 __ neg(ref_reg, Operand(ref_reg)); 6136 } 6137 __ bind(&return_address); 6138 } 6139 MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__, /* temp_loc= */ LocationFrom(ip1)); 6140 } 6141 6142 void CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction, 6143 Location ref, 6144 Register obj, 6145 uint32_t offset, 6146 Location maybe_temp, 6147 bool needs_null_check, 6148 bool use_load_acquire) { 6149 DCHECK_ALIGNED(offset, sizeof(mirror::HeapReference<mirror::Object>)); 6150 Register base = obj; 6151 if (use_load_acquire) { 6152 DCHECK(maybe_temp.IsRegister()); 6153 base = WRegisterFrom(maybe_temp); 6154 __ Add(base, obj, offset); 6155 offset = 0u; 6156 } else if (offset >= kReferenceLoadMinFarOffset) { 6157 DCHECK(maybe_temp.IsRegister()); 6158 base = WRegisterFrom(maybe_temp); 6159 static_assert(IsPowerOfTwo(kReferenceLoadMinFarOffset), "Expecting a power of 2."); 6160 __ Add(base, obj, Operand(offset & ~(kReferenceLoadMinFarOffset - 1u))); 6161 offset &= (kReferenceLoadMinFarOffset - 1u); 6162 } 6163 MemOperand src(base.X(), offset); 6164 GenerateFieldLoadWithBakerReadBarrier( 6165 instruction, ref, obj, src, needs_null_check, use_load_acquire); 6166 } 6167 6168 void CodeGeneratorARM64::GenerateArrayLoadWithBakerReadBarrier(HArrayGet* instruction, 6169 Location ref, 6170 Register obj, 6171 uint32_t data_offset, 6172 Location index, 6173 bool needs_null_check) { 6174 DCHECK(kEmitCompilerReadBarrier); 6175 DCHECK(kUseBakerReadBarrier); 6176 6177 static_assert( 6178 sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t), 6179 "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes."); 6180 size_t scale_factor = DataType::SizeShift(DataType::Type::kReference); 6181 6182 // Query `art::Thread::Current()->GetIsGcMarking()` (stored in the 6183 // Marking Register) to decide whether we need to enter the slow 6184 // path to mark the reference. Then, in the slow path, check the 6185 // gray bit in the lock word of the reference's holder (`obj`) to 6186 // decide whether to mark `ref` or not. 6187 // 6188 // We use shared thunks for the slow path; shared within the method 6189 // for JIT, across methods for AOT. That thunk checks the holder 6190 // and jumps to the entrypoint if needed. If the holder is not gray, 6191 // it creates a fake dependency and returns to the LDR instruction. 6192 // 6193 // lr = &gray_return_address; 6194 // if (mr) { // Thread::Current()->GetIsGcMarking() 6195 // goto array_thunk<base_reg>(lr) 6196 // } 6197 // not_gray_return_address: 6198 // // Original reference load. If the offset is too large to fit 6199 // // into LDR, we use an adjusted base register here. 6200 // HeapReference<mirror::Object> reference = data[index]; 6201 // gray_return_address: 6202 6203 DCHECK(index.IsValid()); 6204 Register index_reg = RegisterFrom(index, DataType::Type::kInt32); 6205 Register ref_reg = RegisterFrom(ref, DataType::Type::kReference); 6206 6207 UseScratchRegisterScope temps(GetVIXLAssembler()); 6208 DCHECK(temps.IsAvailable(ip0)); 6209 DCHECK(temps.IsAvailable(ip1)); 6210 temps.Exclude(ip0, ip1); 6211 6212 Register temp; 6213 if (instruction->GetArray()->IsIntermediateAddress()) { 6214 // We do not need to compute the intermediate address from the array: the 6215 // input instruction has done it already. See the comment in 6216 // `TryExtractArrayAccessAddress()`. 6217 if (kIsDebugBuild) { 6218 HIntermediateAddress* interm_addr = instruction->GetArray()->AsIntermediateAddress(); 6219 DCHECK_EQ(interm_addr->GetOffset()->AsIntConstant()->GetValueAsUint64(), data_offset); 6220 } 6221 temp = obj; 6222 } else { 6223 temp = WRegisterFrom(instruction->GetLocations()->GetTemp(0)); 6224 __ Add(temp.X(), obj.X(), Operand(data_offset)); 6225 } 6226 6227 uint32_t custom_data = EncodeBakerReadBarrierArrayData(temp.GetCode()); 6228 6229 { 6230 ExactAssemblyScope guard(GetVIXLAssembler(), 6231 (kPoisonHeapReferences ? 4u : 3u) * vixl::aarch64::kInstructionSize); 6232 vixl::aarch64::Label return_address; 6233 __ adr(lr, &return_address); 6234 EmitBakerReadBarrierCbnz(custom_data); 6235 static_assert(BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET == (kPoisonHeapReferences ? -8 : -4), 6236 "Array LDR must be 1 instruction (4B) before the return address label; " 6237 " 2 instructions (8B) for heap poisoning."); 6238 __ ldr(ref_reg, MemOperand(temp.X(), index_reg.X(), LSL, scale_factor)); 6239 DCHECK(!needs_null_check); // The thunk cannot handle the null check. 6240 // Unpoison the reference explicitly if needed. MaybeUnpoisonHeapReference() uses 6241 // macro instructions disallowed in ExactAssemblyScope. 6242 if (kPoisonHeapReferences) { 6243 __ neg(ref_reg, Operand(ref_reg)); 6244 } 6245 __ bind(&return_address); 6246 } 6247 MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__, /* temp_loc= */ LocationFrom(ip1)); 6248 } 6249 6250 void CodeGeneratorARM64::MaybeGenerateMarkingRegisterCheck(int code, Location temp_loc) { 6251 // The following condition is a compile-time one, so it does not have a run-time cost. 6252 if (kEmitCompilerReadBarrier && kUseBakerReadBarrier && kIsDebugBuild) { 6253 // The following condition is a run-time one; it is executed after the 6254 // previous compile-time test, to avoid penalizing non-debug builds. 6255 if (GetCompilerOptions().EmitRunTimeChecksInDebugMode()) { 6256 UseScratchRegisterScope temps(GetVIXLAssembler()); 6257 Register temp = temp_loc.IsValid() ? WRegisterFrom(temp_loc) : temps.AcquireW(); 6258 GetAssembler()->GenerateMarkingRegisterCheck(temp, code); 6259 } 6260 } 6261 } 6262 6263 void CodeGeneratorARM64::GenerateReadBarrierSlow(HInstruction* instruction, 6264 Location out, 6265 Location ref, 6266 Location obj, 6267 uint32_t offset, 6268 Location index) { 6269 DCHECK(kEmitCompilerReadBarrier); 6270 6271 // Insert a slow path based read barrier *after* the reference load. 6272 // 6273 // If heap poisoning is enabled, the unpoisoning of the loaded 6274 // reference will be carried out by the runtime within the slow 6275 // path. 6276 // 6277 // Note that `ref` currently does not get unpoisoned (when heap 6278 // poisoning is enabled), which is alright as the `ref` argument is 6279 // not used by the artReadBarrierSlow entry point. 6280 // 6281 // TODO: Unpoison `ref` when it is used by artReadBarrierSlow. 6282 SlowPathCodeARM64* slow_path = new (GetScopedAllocator()) 6283 ReadBarrierForHeapReferenceSlowPathARM64(instruction, out, ref, obj, offset, index); 6284 AddSlowPath(slow_path); 6285 6286 __ B(slow_path->GetEntryLabel()); 6287 __ Bind(slow_path->GetExitLabel()); 6288 } 6289 6290 void CodeGeneratorARM64::MaybeGenerateReadBarrierSlow(HInstruction* instruction, 6291 Location out, 6292 Location ref, 6293 Location obj, 6294 uint32_t offset, 6295 Location index) { 6296 if (kEmitCompilerReadBarrier) { 6297 // Baker's read barriers shall be handled by the fast path 6298 // (CodeGeneratorARM64::GenerateReferenceLoadWithBakerReadBarrier). 6299 DCHECK(!kUseBakerReadBarrier); 6300 // If heap poisoning is enabled, unpoisoning will be taken care of 6301 // by the runtime within the slow path. 6302 GenerateReadBarrierSlow(instruction, out, ref, obj, offset, index); 6303 } else if (kPoisonHeapReferences) { 6304 GetAssembler()->UnpoisonHeapReference(WRegisterFrom(out)); 6305 } 6306 } 6307 6308 void CodeGeneratorARM64::GenerateReadBarrierForRootSlow(HInstruction* instruction, 6309 Location out, 6310 Location root) { 6311 DCHECK(kEmitCompilerReadBarrier); 6312 6313 // Insert a slow path based read barrier *after* the GC root load. 6314 // 6315 // Note that GC roots are not affected by heap poisoning, so we do 6316 // not need to do anything special for this here. 6317 SlowPathCodeARM64* slow_path = 6318 new (GetScopedAllocator()) ReadBarrierForRootSlowPathARM64(instruction, out, root); 6319 AddSlowPath(slow_path); 6320 6321 __ B(slow_path->GetEntryLabel()); 6322 __ Bind(slow_path->GetExitLabel()); 6323 } 6324 6325 void LocationsBuilderARM64::VisitClassTableGet(HClassTableGet* instruction) { 6326 LocationSummary* locations = 6327 new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall); 6328 locations->SetInAt(0, Location::RequiresRegister()); 6329 locations->SetOut(Location::RequiresRegister()); 6330 } 6331 6332 void InstructionCodeGeneratorARM64::VisitClassTableGet(HClassTableGet* instruction) { 6333 LocationSummary* locations = instruction->GetLocations(); 6334 if (instruction->GetTableKind() == HClassTableGet::TableKind::kVTable) { 6335 uint32_t method_offset = mirror::Class::EmbeddedVTableEntryOffset( 6336 instruction->GetIndex(), kArm64PointerSize).SizeValue(); 6337 __ Ldr(XRegisterFrom(locations->Out()), 6338 MemOperand(XRegisterFrom(locations->InAt(0)), method_offset)); 6339 } else { 6340 uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement( 6341 instruction->GetIndex(), kArm64PointerSize)); 6342 __ Ldr(XRegisterFrom(locations->Out()), MemOperand(XRegisterFrom(locations->InAt(0)), 6343 mirror::Class::ImtPtrOffset(kArm64PointerSize).Uint32Value())); 6344 __ Ldr(XRegisterFrom(locations->Out()), 6345 MemOperand(XRegisterFrom(locations->Out()), method_offset)); 6346 } 6347 } 6348 6349 static void PatchJitRootUse(uint8_t* code, 6350 const uint8_t* roots_data, 6351 vixl::aarch64::Literal<uint32_t>* literal, 6352 uint64_t index_in_table) { 6353 uint32_t literal_offset = literal->GetOffset(); 6354 uintptr_t address = 6355 reinterpret_cast<uintptr_t>(roots_data) + index_in_table * sizeof(GcRoot<mirror::Object>); 6356 uint8_t* data = code + literal_offset; 6357 reinterpret_cast<uint32_t*>(data)[0] = dchecked_integral_cast<uint32_t>(address); 6358 } 6359 6360 void CodeGeneratorARM64::EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) { 6361 for (const auto& entry : jit_string_patches_) { 6362 const StringReference& string_reference = entry.first; 6363 vixl::aarch64::Literal<uint32_t>* table_entry_literal = entry.second; 6364 uint64_t index_in_table = GetJitStringRootIndex(string_reference); 6365 PatchJitRootUse(code, roots_data, table_entry_literal, index_in_table); 6366 } 6367 for (const auto& entry : jit_class_patches_) { 6368 const TypeReference& type_reference = entry.first; 6369 vixl::aarch64::Literal<uint32_t>* table_entry_literal = entry.second; 6370 uint64_t index_in_table = GetJitClassRootIndex(type_reference); 6371 PatchJitRootUse(code, roots_data, table_entry_literal, index_in_table); 6372 } 6373 } 6374 6375 #undef __ 6376 #undef QUICK_ENTRY_POINT 6377 6378 #define __ assembler.GetVIXLAssembler()-> 6379 6380 static void EmitGrayCheckAndFastPath(arm64::Arm64Assembler& assembler, 6381 vixl::aarch64::Register base_reg, 6382 vixl::aarch64::MemOperand& lock_word, 6383 vixl::aarch64::Label* slow_path, 6384 vixl::aarch64::Label* throw_npe = nullptr) { 6385 // Load the lock word containing the rb_state. 6386 __ Ldr(ip0.W(), lock_word); 6387 // Given the numeric representation, it's enough to check the low bit of the rb_state. 6388 static_assert(ReadBarrier::NonGrayState() == 0, "Expecting non-gray to have value 0"); 6389 static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1"); 6390 __ Tbnz(ip0.W(), LockWord::kReadBarrierStateShift, slow_path); 6391 static_assert( 6392 BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET == BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET, 6393 "Field and array LDR offsets must be the same to reuse the same code."); 6394 // To throw NPE, we return to the fast path; the artificial dependence below does not matter. 6395 if (throw_npe != nullptr) { 6396 __ Bind(throw_npe); 6397 } 6398 // Adjust the return address back to the LDR (1 instruction; 2 for heap poisoning). 6399 static_assert(BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET == (kPoisonHeapReferences ? -8 : -4), 6400 "Field LDR must be 1 instruction (4B) before the return address label; " 6401 " 2 instructions (8B) for heap poisoning."); 6402 __ Add(lr, lr, BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET); 6403 // Introduce a dependency on the lock_word including rb_state, 6404 // to prevent load-load reordering, and without using 6405 // a memory barrier (which would be more expensive). 6406 __ Add(base_reg, base_reg, Operand(ip0, LSR, 32)); 6407 __ Br(lr); // And return back to the function. 6408 // Note: The fake dependency is unnecessary for the slow path. 6409 } 6410 6411 // Load the read barrier introspection entrypoint in register `entrypoint`. 6412 static void LoadReadBarrierMarkIntrospectionEntrypoint(arm64::Arm64Assembler& assembler, 6413 vixl::aarch64::Register entrypoint) { 6414 // entrypoint = Thread::Current()->pReadBarrierMarkReg16, i.e. pReadBarrierMarkIntrospection. 6415 DCHECK_EQ(ip0.GetCode(), 16u); 6416 const int32_t entry_point_offset = 6417 Thread::ReadBarrierMarkEntryPointsOffset<kArm64PointerSize>(ip0.GetCode()); 6418 __ Ldr(entrypoint, MemOperand(tr, entry_point_offset)); 6419 } 6420 6421 void CodeGeneratorARM64::CompileBakerReadBarrierThunk(Arm64Assembler& assembler, 6422 uint32_t encoded_data, 6423 /*out*/ std::string* debug_name) { 6424 BakerReadBarrierKind kind = BakerReadBarrierKindField::Decode(encoded_data); 6425 switch (kind) { 6426 case BakerReadBarrierKind::kField: 6427 case BakerReadBarrierKind::kAcquire: { 6428 auto base_reg = 6429 Register::GetXRegFromCode(BakerReadBarrierFirstRegField::Decode(encoded_data)); 6430 CheckValidReg(base_reg.GetCode()); 6431 auto holder_reg = 6432 Register::GetXRegFromCode(BakerReadBarrierSecondRegField::Decode(encoded_data)); 6433 CheckValidReg(holder_reg.GetCode()); 6434 UseScratchRegisterScope temps(assembler.GetVIXLAssembler()); 6435 temps.Exclude(ip0, ip1); 6436 // In the case of a field load (with relaxed semantic), if `base_reg` differs from 6437 // `holder_reg`, the offset was too large and we must have emitted (during the construction 6438 // of the HIR graph, see `art::HInstructionBuilder::BuildInstanceFieldAccess`) and preserved 6439 // (see `art::PrepareForRegisterAllocation::VisitNullCheck`) an explicit null check before 6440 // the load. Otherwise, for implicit null checks, we need to null-check the holder as we do 6441 // not necessarily do that check before going to the thunk. 6442 // 6443 // In the case of a field load with load-acquire semantics (where `base_reg` always differs 6444 // from `holder_reg`), we also need an explicit null check when implicit null checks are 6445 // allowed, as we do not emit one before going to the thunk. 6446 vixl::aarch64::Label throw_npe_label; 6447 vixl::aarch64::Label* throw_npe = nullptr; 6448 if (GetCompilerOptions().GetImplicitNullChecks() && 6449 (holder_reg.Is(base_reg) || (kind == BakerReadBarrierKind::kAcquire))) { 6450 throw_npe = &throw_npe_label; 6451 __ Cbz(holder_reg.W(), throw_npe); 6452 } 6453 // Check if the holder is gray and, if not, add fake dependency to the base register 6454 // and return to the LDR instruction to load the reference. Otherwise, use introspection 6455 // to load the reference and call the entrypoint that performs further checks on the 6456 // reference and marks it if needed. 6457 vixl::aarch64::Label slow_path; 6458 MemOperand lock_word(holder_reg, mirror::Object::MonitorOffset().Int32Value()); 6459 EmitGrayCheckAndFastPath(assembler, base_reg, lock_word, &slow_path, throw_npe); 6460 __ Bind(&slow_path); 6461 if (kind == BakerReadBarrierKind::kField) { 6462 MemOperand ldr_address(lr, BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET); 6463 __ Ldr(ip0.W(), ldr_address); // Load the LDR (immediate) unsigned offset. 6464 LoadReadBarrierMarkIntrospectionEntrypoint(assembler, ip1); 6465 __ Ubfx(ip0.W(), ip0.W(), 10, 12); // Extract the offset. 6466 __ Ldr(ip0.W(), MemOperand(base_reg, ip0, LSL, 2)); // Load the reference. 6467 } else { 6468 DCHECK(kind == BakerReadBarrierKind::kAcquire); 6469 DCHECK(!base_reg.Is(holder_reg)); 6470 LoadReadBarrierMarkIntrospectionEntrypoint(assembler, ip1); 6471 __ Ldar(ip0.W(), MemOperand(base_reg)); 6472 } 6473 // Do not unpoison. With heap poisoning enabled, the entrypoint expects a poisoned reference. 6474 __ Br(ip1); // Jump to the entrypoint. 6475 break; 6476 } 6477 case BakerReadBarrierKind::kArray: { 6478 auto base_reg = 6479 Register::GetXRegFromCode(BakerReadBarrierFirstRegField::Decode(encoded_data)); 6480 CheckValidReg(base_reg.GetCode()); 6481 DCHECK_EQ(kBakerReadBarrierInvalidEncodedReg, 6482 BakerReadBarrierSecondRegField::Decode(encoded_data)); 6483 UseScratchRegisterScope temps(assembler.GetVIXLAssembler()); 6484 temps.Exclude(ip0, ip1); 6485 vixl::aarch64::Label slow_path; 6486 int32_t data_offset = 6487 mirror::Array::DataOffset(Primitive::ComponentSize(Primitive::kPrimNot)).Int32Value(); 6488 MemOperand lock_word(base_reg, mirror::Object::MonitorOffset().Int32Value() - data_offset); 6489 DCHECK_LT(lock_word.GetOffset(), 0); 6490 EmitGrayCheckAndFastPath(assembler, base_reg, lock_word, &slow_path); 6491 __ Bind(&slow_path); 6492 MemOperand ldr_address(lr, BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET); 6493 __ Ldr(ip0.W(), ldr_address); // Load the LDR (register) unsigned offset. 6494 LoadReadBarrierMarkIntrospectionEntrypoint(assembler, ip1); 6495 __ Ubfx(ip0, ip0, 16, 6); // Extract the index register, plus 32 (bit 21 is set). 6496 __ Bfi(ip1, ip0, 3, 6); // Insert ip0 to the entrypoint address to create 6497 // a switch case target based on the index register. 6498 __ Mov(ip0, base_reg); // Move the base register to ip0. 6499 __ Br(ip1); // Jump to the entrypoint's array switch case. 6500 break; 6501 } 6502 case BakerReadBarrierKind::kGcRoot: { 6503 // Check if the reference needs to be marked and if so (i.e. not null, not marked yet 6504 // and it does not have a forwarding address), call the correct introspection entrypoint; 6505 // otherwise return the reference (or the extracted forwarding address). 6506 // There is no gray bit check for GC roots. 6507 auto root_reg = 6508 Register::GetWRegFromCode(BakerReadBarrierFirstRegField::Decode(encoded_data)); 6509 CheckValidReg(root_reg.GetCode()); 6510 DCHECK_EQ(kBakerReadBarrierInvalidEncodedReg, 6511 BakerReadBarrierSecondRegField::Decode(encoded_data)); 6512 UseScratchRegisterScope temps(assembler.GetVIXLAssembler()); 6513 temps.Exclude(ip0, ip1); 6514 vixl::aarch64::Label return_label, not_marked, forwarding_address; 6515 __ Cbz(root_reg, &return_label); 6516 MemOperand lock_word(root_reg.X(), mirror::Object::MonitorOffset().Int32Value()); 6517 __ Ldr(ip0.W(), lock_word); 6518 __ Tbz(ip0.W(), LockWord::kMarkBitStateShift, ¬_marked); 6519 __ Bind(&return_label); 6520 __ Br(lr); 6521 __ Bind(¬_marked); 6522 __ Tst(ip0.W(), Operand(ip0.W(), LSL, 1)); 6523 __ B(&forwarding_address, mi); 6524 LoadReadBarrierMarkIntrospectionEntrypoint(assembler, ip1); 6525 // Adjust the art_quick_read_barrier_mark_introspection address in IP1 to 6526 // art_quick_read_barrier_mark_introspection_gc_roots. 6527 __ Add(ip1, ip1, Operand(BAKER_MARK_INTROSPECTION_GC_ROOT_ENTRYPOINT_OFFSET)); 6528 __ Mov(ip0.W(), root_reg); 6529 __ Br(ip1); 6530 __ Bind(&forwarding_address); 6531 __ Lsl(root_reg, ip0.W(), LockWord::kForwardingAddressShift); 6532 __ Br(lr); 6533 break; 6534 } 6535 default: 6536 LOG(FATAL) << "Unexpected kind: " << static_cast<uint32_t>(kind); 6537 UNREACHABLE(); 6538 } 6539 6540 // For JIT, the slow path is considered part of the compiled method, 6541 // so JIT should pass null as `debug_name`. Tests may not have a runtime. 6542 DCHECK(Runtime::Current() == nullptr || 6543 !Runtime::Current()->UseJitCompilation() || 6544 debug_name == nullptr); 6545 if (debug_name != nullptr && GetCompilerOptions().GenerateAnyDebugInfo()) { 6546 std::ostringstream oss; 6547 oss << "BakerReadBarrierThunk"; 6548 switch (kind) { 6549 case BakerReadBarrierKind::kField: 6550 oss << "Field_r" << BakerReadBarrierFirstRegField::Decode(encoded_data) 6551 << "_r" << BakerReadBarrierSecondRegField::Decode(encoded_data); 6552 break; 6553 case BakerReadBarrierKind::kAcquire: 6554 oss << "Acquire_r" << BakerReadBarrierFirstRegField::Decode(encoded_data) 6555 << "_r" << BakerReadBarrierSecondRegField::Decode(encoded_data); 6556 break; 6557 case BakerReadBarrierKind::kArray: 6558 oss << "Array_r" << BakerReadBarrierFirstRegField::Decode(encoded_data); 6559 DCHECK_EQ(kBakerReadBarrierInvalidEncodedReg, 6560 BakerReadBarrierSecondRegField::Decode(encoded_data)); 6561 break; 6562 case BakerReadBarrierKind::kGcRoot: 6563 oss << "GcRoot_r" << BakerReadBarrierFirstRegField::Decode(encoded_data); 6564 DCHECK_EQ(kBakerReadBarrierInvalidEncodedReg, 6565 BakerReadBarrierSecondRegField::Decode(encoded_data)); 6566 break; 6567 } 6568 *debug_name = oss.str(); 6569 } 6570 } 6571 6572 #undef __ 6573 6574 } // namespace arm64 6575 } // namespace art 6576