/* * Copyright (C) 2014 The Android Open Source Project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include "code_generator_arm64.h" #include "arch/arm64/asm_support_arm64.h" #include "arch/arm64/instruction_set_features_arm64.h" #include "art_method-inl.h" #include "base/bit_utils.h" #include "base/bit_utils_iterator.h" #include "class_table.h" #include "code_generator_utils.h" #include "compiled_method.h" #include "entrypoints/quick/quick_entrypoints.h" #include "entrypoints/quick/quick_entrypoints_enum.h" #include "gc/accounting/card_table.h" #include "gc/space/image_space.h" #include "heap_poisoning.h" #include "intrinsics.h" #include "intrinsics_arm64.h" #include "linker/linker_patch.h" #include "lock_word.h" #include "mirror/array-inl.h" #include "mirror/class-inl.h" #include "offsets.h" #include "thread.h" #include "utils/arm64/assembler_arm64.h" #include "utils/assembler.h" #include "utils/stack_checks.h" using namespace vixl::aarch64; // NOLINT(build/namespaces) using vixl::ExactAssemblyScope; using vixl::CodeBufferCheckScope; using vixl::EmissionCheckScope; #ifdef __ #error "ARM64 Codegen VIXL macro-assembler macro already defined." #endif namespace art { template class GcRoot; namespace arm64 { using helpers::ARM64EncodableConstantOrRegister; using helpers::ArtVixlRegCodeCoherentForRegSet; using helpers::CPURegisterFrom; using helpers::DRegisterFrom; using helpers::FPRegisterFrom; using helpers::HeapOperand; using helpers::HeapOperandFrom; using helpers::InputCPURegisterOrZeroRegAt; using helpers::InputFPRegisterAt; using helpers::InputOperandAt; using helpers::InputRegisterAt; using helpers::Int64FromLocation; using helpers::IsConstantZeroBitPattern; using helpers::LocationFrom; using helpers::OperandFromMemOperand; using helpers::OutputCPURegister; using helpers::OutputFPRegister; using helpers::OutputRegister; using helpers::QRegisterFrom; using helpers::RegisterFrom; using helpers::StackOperandFrom; using helpers::VIXLRegCodeFromART; using helpers::WRegisterFrom; using helpers::XRegisterFrom; // The compare/jump sequence will generate about (1.5 * num_entries + 3) instructions. While jump // table version generates 7 instructions and num_entries literals. Compare/jump sequence will // generates less code/data with a small num_entries. static constexpr uint32_t kPackedSwitchCompareJumpThreshold = 7; // Reference load (except object array loads) is using LDR Wt, [Xn, #offset] which can handle // offset < 16KiB. For offsets >= 16KiB, the load shall be emitted as two or more instructions. // For the Baker read barrier implementation using link-time generated thunks we need to split // the offset explicitly. constexpr uint32_t kReferenceLoadMinFarOffset = 16 * KB; inline Condition ARM64Condition(IfCondition cond) { switch (cond) { case kCondEQ: return eq; case kCondNE: return ne; case kCondLT: return lt; case kCondLE: return le; case kCondGT: return gt; case kCondGE: return ge; case kCondB: return lo; case kCondBE: return ls; case kCondA: return hi; case kCondAE: return hs; } LOG(FATAL) << "Unreachable"; UNREACHABLE(); } inline Condition ARM64FPCondition(IfCondition cond, bool gt_bias) { // The ARM64 condition codes can express all the necessary branches, see the // "Meaning (floating-point)" column in the table C1-1 in the ARMv8 reference manual. // There is no dex instruction or HIR that would need the missing conditions // "equal or unordered" or "not equal". switch (cond) { case kCondEQ: return eq; case kCondNE: return ne /* unordered */; case kCondLT: return gt_bias ? cc : lt /* unordered */; case kCondLE: return gt_bias ? ls : le /* unordered */; case kCondGT: return gt_bias ? hi /* unordered */ : gt; case kCondGE: return gt_bias ? cs /* unordered */ : ge; default: LOG(FATAL) << "UNREACHABLE"; UNREACHABLE(); } } Location ARM64ReturnLocation(DataType::Type return_type) { // Note that in practice, `LocationFrom(x0)` and `LocationFrom(w0)` create the // same Location object, and so do `LocationFrom(d0)` and `LocationFrom(s0)`, // but we use the exact registers for clarity. if (return_type == DataType::Type::kFloat32) { return LocationFrom(s0); } else if (return_type == DataType::Type::kFloat64) { return LocationFrom(d0); } else if (return_type == DataType::Type::kInt64) { return LocationFrom(x0); } else if (return_type == DataType::Type::kVoid) { return Location::NoLocation(); } else { return LocationFrom(w0); } } Location InvokeRuntimeCallingConvention::GetReturnLocation(DataType::Type return_type) { return ARM64ReturnLocation(return_type); } static RegisterSet OneRegInReferenceOutSaveEverythingCallerSaves() { InvokeRuntimeCallingConvention calling_convention; RegisterSet caller_saves = RegisterSet::Empty(); caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0).GetCode())); DCHECK_EQ(calling_convention.GetRegisterAt(0).GetCode(), RegisterFrom(calling_convention.GetReturnLocation(DataType::Type::kReference), DataType::Type::kReference).GetCode()); return caller_saves; } // NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy. #define __ down_cast(codegen)->GetVIXLAssembler()-> // NOLINT #define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kArm64PointerSize, x).Int32Value() // Calculate memory accessing operand for save/restore live registers. static void SaveRestoreLiveRegistersHelper(CodeGenerator* codegen, LocationSummary* locations, int64_t spill_offset, bool is_save) { const uint32_t core_spills = codegen->GetSlowPathSpills(locations, /* core_registers= */ true); const uint32_t fp_spills = codegen->GetSlowPathSpills(locations, /* core_registers= */ false); DCHECK(ArtVixlRegCodeCoherentForRegSet(core_spills, codegen->GetNumberOfCoreRegisters(), fp_spills, codegen->GetNumberOfFloatingPointRegisters())); CPURegList core_list = CPURegList(CPURegister::kRegister, kXRegSize, core_spills); unsigned v_reg_size = codegen->GetGraph()->HasSIMD() ? kQRegSize : kDRegSize; CPURegList fp_list = CPURegList(CPURegister::kVRegister, v_reg_size, fp_spills); MacroAssembler* masm = down_cast(codegen)->GetVIXLAssembler(); UseScratchRegisterScope temps(masm); Register base = masm->StackPointer(); int64_t core_spill_size = core_list.GetTotalSizeInBytes(); int64_t fp_spill_size = fp_list.GetTotalSizeInBytes(); int64_t reg_size = kXRegSizeInBytes; int64_t max_ls_pair_offset = spill_offset + core_spill_size + fp_spill_size - 2 * reg_size; uint32_t ls_access_size = WhichPowerOf2(reg_size); if (((core_list.GetCount() > 1) || (fp_list.GetCount() > 1)) && !masm->IsImmLSPair(max_ls_pair_offset, ls_access_size)) { // If the offset does not fit in the instruction's immediate field, use an alternate register // to compute the base address(float point registers spill base address). Register new_base = temps.AcquireSameSizeAs(base); __ Add(new_base, base, Operand(spill_offset + core_spill_size)); base = new_base; spill_offset = -core_spill_size; int64_t new_max_ls_pair_offset = fp_spill_size - 2 * reg_size; DCHECK(masm->IsImmLSPair(spill_offset, ls_access_size)); DCHECK(masm->IsImmLSPair(new_max_ls_pair_offset, ls_access_size)); } if (is_save) { __ StoreCPURegList(core_list, MemOperand(base, spill_offset)); __ StoreCPURegList(fp_list, MemOperand(base, spill_offset + core_spill_size)); } else { __ LoadCPURegList(core_list, MemOperand(base, spill_offset)); __ LoadCPURegList(fp_list, MemOperand(base, spill_offset + core_spill_size)); } } void SlowPathCodeARM64::SaveLiveRegisters(CodeGenerator* codegen, LocationSummary* locations) { size_t stack_offset = codegen->GetFirstRegisterSlotInSlowPath(); const uint32_t core_spills = codegen->GetSlowPathSpills(locations, /* core_registers= */ true); for (uint32_t i : LowToHighBits(core_spills)) { // If the register holds an object, update the stack mask. if (locations->RegisterContainsObject(i)) { locations->SetStackBit(stack_offset / kVRegSize); } DCHECK_LT(stack_offset, codegen->GetFrameSize() - codegen->FrameEntrySpillSize()); DCHECK_LT(i, kMaximumNumberOfExpectedRegisters); saved_core_stack_offsets_[i] = stack_offset; stack_offset += kXRegSizeInBytes; } const size_t fp_reg_size = codegen->GetGraph()->HasSIMD() ? kQRegSizeInBytes : kDRegSizeInBytes; const uint32_t fp_spills = codegen->GetSlowPathSpills(locations, /* core_registers= */ false); for (uint32_t i : LowToHighBits(fp_spills)) { DCHECK_LT(stack_offset, codegen->GetFrameSize() - codegen->FrameEntrySpillSize()); DCHECK_LT(i, kMaximumNumberOfExpectedRegisters); saved_fpu_stack_offsets_[i] = stack_offset; stack_offset += fp_reg_size; } SaveRestoreLiveRegistersHelper(codegen, locations, codegen->GetFirstRegisterSlotInSlowPath(), /* is_save= */ true); } void SlowPathCodeARM64::RestoreLiveRegisters(CodeGenerator* codegen, LocationSummary* locations) { SaveRestoreLiveRegistersHelper(codegen, locations, codegen->GetFirstRegisterSlotInSlowPath(), /* is_save= */ false); } class BoundsCheckSlowPathARM64 : public SlowPathCodeARM64 { public: explicit BoundsCheckSlowPathARM64(HBoundsCheck* instruction) : SlowPathCodeARM64(instruction) {} void EmitNativeCode(CodeGenerator* codegen) override { LocationSummary* locations = instruction_->GetLocations(); CodeGeneratorARM64* arm64_codegen = down_cast(codegen); __ Bind(GetEntryLabel()); if (instruction_->CanThrowIntoCatchBlock()) { // Live registers will be restored in the catch block if caught. SaveLiveRegisters(codegen, instruction_->GetLocations()); } // We're moving two locations to locations that could overlap, so we need a parallel // move resolver. InvokeRuntimeCallingConvention calling_convention; codegen->EmitParallelMoves(locations->InAt(0), LocationFrom(calling_convention.GetRegisterAt(0)), DataType::Type::kInt32, locations->InAt(1), LocationFrom(calling_convention.GetRegisterAt(1)), DataType::Type::kInt32); QuickEntrypointEnum entrypoint = instruction_->AsBoundsCheck()->IsStringCharAt() ? kQuickThrowStringBounds : kQuickThrowArrayBounds; arm64_codegen->InvokeRuntime(entrypoint, instruction_, instruction_->GetDexPc(), this); CheckEntrypointTypes(); CheckEntrypointTypes(); } bool IsFatal() const override { return true; } const char* GetDescription() const override { return "BoundsCheckSlowPathARM64"; } private: DISALLOW_COPY_AND_ASSIGN(BoundsCheckSlowPathARM64); }; class DivZeroCheckSlowPathARM64 : public SlowPathCodeARM64 { public: explicit DivZeroCheckSlowPathARM64(HDivZeroCheck* instruction) : SlowPathCodeARM64(instruction) {} void EmitNativeCode(CodeGenerator* codegen) override { CodeGeneratorARM64* arm64_codegen = down_cast(codegen); __ Bind(GetEntryLabel()); arm64_codegen->InvokeRuntime(kQuickThrowDivZero, instruction_, instruction_->GetDexPc(), this); CheckEntrypointTypes(); } bool IsFatal() const override { return true; } const char* GetDescription() const override { return "DivZeroCheckSlowPathARM64"; } private: DISALLOW_COPY_AND_ASSIGN(DivZeroCheckSlowPathARM64); }; class LoadClassSlowPathARM64 : public SlowPathCodeARM64 { public: LoadClassSlowPathARM64(HLoadClass* cls, HInstruction* at) : SlowPathCodeARM64(at), cls_(cls) { DCHECK(at->IsLoadClass() || at->IsClinitCheck()); DCHECK_EQ(instruction_->IsLoadClass(), cls_ == instruction_); } void EmitNativeCode(CodeGenerator* codegen) override { LocationSummary* locations = instruction_->GetLocations(); Location out = locations->Out(); const uint32_t dex_pc = instruction_->GetDexPc(); bool must_resolve_type = instruction_->IsLoadClass() && cls_->MustResolveTypeOnSlowPath(); bool must_do_clinit = instruction_->IsClinitCheck() || cls_->MustGenerateClinitCheck(); CodeGeneratorARM64* arm64_codegen = down_cast(codegen); __ Bind(GetEntryLabel()); SaveLiveRegisters(codegen, locations); InvokeRuntimeCallingConvention calling_convention; if (must_resolve_type) { DCHECK(IsSameDexFile(cls_->GetDexFile(), arm64_codegen->GetGraph()->GetDexFile())); dex::TypeIndex type_index = cls_->GetTypeIndex(); __ Mov(calling_convention.GetRegisterAt(0).W(), type_index.index_); arm64_codegen->InvokeRuntime(kQuickResolveType, instruction_, dex_pc, this); CheckEntrypointTypes(); // If we also must_do_clinit, the resolved type is now in the correct register. } else { DCHECK(must_do_clinit); Location source = instruction_->IsLoadClass() ? out : locations->InAt(0); arm64_codegen->MoveLocation(LocationFrom(calling_convention.GetRegisterAt(0)), source, cls_->GetType()); } if (must_do_clinit) { arm64_codegen->InvokeRuntime(kQuickInitializeStaticStorage, instruction_, dex_pc, this); CheckEntrypointTypes(); } // Move the class to the desired location. if (out.IsValid()) { DCHECK(out.IsRegister() && !locations->GetLiveRegisters()->ContainsCoreRegister(out.reg())); DataType::Type type = instruction_->GetType(); arm64_codegen->MoveLocation(out, calling_convention.GetReturnLocation(type), type); } RestoreLiveRegisters(codegen, locations); __ B(GetExitLabel()); } const char* GetDescription() const override { return "LoadClassSlowPathARM64"; } private: // The class this slow path will load. HLoadClass* const cls_; DISALLOW_COPY_AND_ASSIGN(LoadClassSlowPathARM64); }; class LoadStringSlowPathARM64 : public SlowPathCodeARM64 { public: explicit LoadStringSlowPathARM64(HLoadString* instruction) : SlowPathCodeARM64(instruction) {} void EmitNativeCode(CodeGenerator* codegen) override { LocationSummary* locations = instruction_->GetLocations(); DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg())); CodeGeneratorARM64* arm64_codegen = down_cast(codegen); __ Bind(GetEntryLabel()); SaveLiveRegisters(codegen, locations); InvokeRuntimeCallingConvention calling_convention; const dex::StringIndex string_index = instruction_->AsLoadString()->GetStringIndex(); __ Mov(calling_convention.GetRegisterAt(0).W(), string_index.index_); arm64_codegen->InvokeRuntime(kQuickResolveString, instruction_, instruction_->GetDexPc(), this); CheckEntrypointTypes(); DataType::Type type = instruction_->GetType(); arm64_codegen->MoveLocation(locations->Out(), calling_convention.GetReturnLocation(type), type); RestoreLiveRegisters(codegen, locations); __ B(GetExitLabel()); } const char* GetDescription() const override { return "LoadStringSlowPathARM64"; } private: DISALLOW_COPY_AND_ASSIGN(LoadStringSlowPathARM64); }; class NullCheckSlowPathARM64 : public SlowPathCodeARM64 { public: explicit NullCheckSlowPathARM64(HNullCheck* instr) : SlowPathCodeARM64(instr) {} void EmitNativeCode(CodeGenerator* codegen) override { CodeGeneratorARM64* arm64_codegen = down_cast(codegen); __ Bind(GetEntryLabel()); if (instruction_->CanThrowIntoCatchBlock()) { // Live registers will be restored in the catch block if caught. SaveLiveRegisters(codegen, instruction_->GetLocations()); } arm64_codegen->InvokeRuntime(kQuickThrowNullPointer, instruction_, instruction_->GetDexPc(), this); CheckEntrypointTypes(); } bool IsFatal() const override { return true; } const char* GetDescription() const override { return "NullCheckSlowPathARM64"; } private: DISALLOW_COPY_AND_ASSIGN(NullCheckSlowPathARM64); }; class SuspendCheckSlowPathARM64 : public SlowPathCodeARM64 { public: SuspendCheckSlowPathARM64(HSuspendCheck* instruction, HBasicBlock* successor) : SlowPathCodeARM64(instruction), successor_(successor) {} void EmitNativeCode(CodeGenerator* codegen) override { LocationSummary* locations = instruction_->GetLocations(); CodeGeneratorARM64* arm64_codegen = down_cast(codegen); __ Bind(GetEntryLabel()); SaveLiveRegisters(codegen, locations); // Only saves live 128-bit regs for SIMD. arm64_codegen->InvokeRuntime(kQuickTestSuspend, instruction_, instruction_->GetDexPc(), this); CheckEntrypointTypes(); RestoreLiveRegisters(codegen, locations); // Only restores live 128-bit regs for SIMD. if (successor_ == nullptr) { __ B(GetReturnLabel()); } else { __ B(arm64_codegen->GetLabelOf(successor_)); } } vixl::aarch64::Label* GetReturnLabel() { DCHECK(successor_ == nullptr); return &return_label_; } HBasicBlock* GetSuccessor() const { return successor_; } const char* GetDescription() const override { return "SuspendCheckSlowPathARM64"; } private: // If not null, the block to branch to after the suspend check. HBasicBlock* const successor_; // If `successor_` is null, the label to branch to after the suspend check. vixl::aarch64::Label return_label_; DISALLOW_COPY_AND_ASSIGN(SuspendCheckSlowPathARM64); }; class TypeCheckSlowPathARM64 : public SlowPathCodeARM64 { public: TypeCheckSlowPathARM64(HInstruction* instruction, bool is_fatal) : SlowPathCodeARM64(instruction), is_fatal_(is_fatal) {} void EmitNativeCode(CodeGenerator* codegen) override { LocationSummary* locations = instruction_->GetLocations(); DCHECK(instruction_->IsCheckCast() || !locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg())); CodeGeneratorARM64* arm64_codegen = down_cast(codegen); uint32_t dex_pc = instruction_->GetDexPc(); __ Bind(GetEntryLabel()); if (!is_fatal_ || instruction_->CanThrowIntoCatchBlock()) { SaveLiveRegisters(codegen, locations); } // We're moving two locations to locations that could overlap, so we need a parallel // move resolver. InvokeRuntimeCallingConvention calling_convention; codegen->EmitParallelMoves(locations->InAt(0), LocationFrom(calling_convention.GetRegisterAt(0)), DataType::Type::kReference, locations->InAt(1), LocationFrom(calling_convention.GetRegisterAt(1)), DataType::Type::kReference); if (instruction_->IsInstanceOf()) { arm64_codegen->InvokeRuntime(kQuickInstanceofNonTrivial, instruction_, dex_pc, this); CheckEntrypointTypes(); DataType::Type ret_type = instruction_->GetType(); Location ret_loc = calling_convention.GetReturnLocation(ret_type); arm64_codegen->MoveLocation(locations->Out(), ret_loc, ret_type); } else { DCHECK(instruction_->IsCheckCast()); arm64_codegen->InvokeRuntime(kQuickCheckInstanceOf, instruction_, dex_pc, this); CheckEntrypointTypes(); } if (!is_fatal_) { RestoreLiveRegisters(codegen, locations); __ B(GetExitLabel()); } } const char* GetDescription() const override { return "TypeCheckSlowPathARM64"; } bool IsFatal() const override { return is_fatal_; } private: const bool is_fatal_; DISALLOW_COPY_AND_ASSIGN(TypeCheckSlowPathARM64); }; class DeoptimizationSlowPathARM64 : public SlowPathCodeARM64 { public: explicit DeoptimizationSlowPathARM64(HDeoptimize* instruction) : SlowPathCodeARM64(instruction) {} void EmitNativeCode(CodeGenerator* codegen) override { CodeGeneratorARM64* arm64_codegen = down_cast(codegen); __ Bind(GetEntryLabel()); LocationSummary* locations = instruction_->GetLocations(); SaveLiveRegisters(codegen, locations); InvokeRuntimeCallingConvention calling_convention; __ Mov(calling_convention.GetRegisterAt(0), static_cast(instruction_->AsDeoptimize()->GetDeoptimizationKind())); arm64_codegen->InvokeRuntime(kQuickDeoptimize, instruction_, instruction_->GetDexPc(), this); CheckEntrypointTypes(); } const char* GetDescription() const override { return "DeoptimizationSlowPathARM64"; } private: DISALLOW_COPY_AND_ASSIGN(DeoptimizationSlowPathARM64); }; class ArraySetSlowPathARM64 : public SlowPathCodeARM64 { public: explicit ArraySetSlowPathARM64(HInstruction* instruction) : SlowPathCodeARM64(instruction) {} void EmitNativeCode(CodeGenerator* codegen) override { LocationSummary* locations = instruction_->GetLocations(); __ Bind(GetEntryLabel()); SaveLiveRegisters(codegen, locations); InvokeRuntimeCallingConvention calling_convention; HParallelMove parallel_move(codegen->GetGraph()->GetAllocator()); parallel_move.AddMove( locations->InAt(0), LocationFrom(calling_convention.GetRegisterAt(0)), DataType::Type::kReference, nullptr); parallel_move.AddMove( locations->InAt(1), LocationFrom(calling_convention.GetRegisterAt(1)), DataType::Type::kInt32, nullptr); parallel_move.AddMove( locations->InAt(2), LocationFrom(calling_convention.GetRegisterAt(2)), DataType::Type::kReference, nullptr); codegen->GetMoveResolver()->EmitNativeCode(¶llel_move); CodeGeneratorARM64* arm64_codegen = down_cast(codegen); arm64_codegen->InvokeRuntime(kQuickAputObject, instruction_, instruction_->GetDexPc(), this); CheckEntrypointTypes(); RestoreLiveRegisters(codegen, locations); __ B(GetExitLabel()); } const char* GetDescription() const override { return "ArraySetSlowPathARM64"; } private: DISALLOW_COPY_AND_ASSIGN(ArraySetSlowPathARM64); }; void JumpTableARM64::EmitTable(CodeGeneratorARM64* codegen) { uint32_t num_entries = switch_instr_->GetNumEntries(); DCHECK_GE(num_entries, kPackedSwitchCompareJumpThreshold); // We are about to use the assembler to place literals directly. Make sure we have enough // underlying code buffer and we have generated the jump table with right size. EmissionCheckScope scope(codegen->GetVIXLAssembler(), num_entries * sizeof(int32_t), CodeBufferCheckScope::kExactSize); __ Bind(&table_start_); const ArenaVector& successors = switch_instr_->GetBlock()->GetSuccessors(); for (uint32_t i = 0; i < num_entries; i++) { vixl::aarch64::Label* target_label = codegen->GetLabelOf(successors[i]); DCHECK(target_label->IsBound()); ptrdiff_t jump_offset = target_label->GetLocation() - table_start_.GetLocation(); DCHECK_GT(jump_offset, std::numeric_limits::min()); DCHECK_LE(jump_offset, std::numeric_limits::max()); Literal literal(jump_offset); __ place(&literal); } } // Slow path generating a read barrier for a heap reference. class ReadBarrierForHeapReferenceSlowPathARM64 : public SlowPathCodeARM64 { public: ReadBarrierForHeapReferenceSlowPathARM64(HInstruction* instruction, Location out, Location ref, Location obj, uint32_t offset, Location index) : SlowPathCodeARM64(instruction), out_(out), ref_(ref), obj_(obj), offset_(offset), index_(index) { DCHECK(kEmitCompilerReadBarrier); // If `obj` is equal to `out` or `ref`, it means the initial object // has been overwritten by (or after) the heap object reference load // to be instrumented, e.g.: // // __ Ldr(out, HeapOperand(out, class_offset); // codegen_->GenerateReadBarrierSlow(instruction, out_loc, out_loc, out_loc, offset); // // In that case, we have lost the information about the original // object, and the emitted read barrier cannot work properly. DCHECK(!obj.Equals(out)) << "obj=" << obj << " out=" << out; DCHECK(!obj.Equals(ref)) << "obj=" << obj << " ref=" << ref; } void EmitNativeCode(CodeGenerator* codegen) override { CodeGeneratorARM64* arm64_codegen = down_cast(codegen); LocationSummary* locations = instruction_->GetLocations(); DataType::Type type = DataType::Type::kReference; DCHECK(locations->CanCall()); DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(out_.reg())); DCHECK(instruction_->IsInstanceFieldGet() || instruction_->IsStaticFieldGet() || instruction_->IsArrayGet() || instruction_->IsInstanceOf() || instruction_->IsCheckCast() || (instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified())) << "Unexpected instruction in read barrier for heap reference slow path: " << instruction_->DebugName(); // The read barrier instrumentation of object ArrayGet // instructions does not support the HIntermediateAddress // instruction. DCHECK(!(instruction_->IsArrayGet() && instruction_->AsArrayGet()->GetArray()->IsIntermediateAddress())); __ Bind(GetEntryLabel()); SaveLiveRegisters(codegen, locations); // We may have to change the index's value, but as `index_` is a // constant member (like other "inputs" of this slow path), // introduce a copy of it, `index`. Location index = index_; if (index_.IsValid()) { // Handle `index_` for HArrayGet and UnsafeGetObject/UnsafeGetObjectVolatile intrinsics. if (instruction_->IsArrayGet()) { // Compute the actual memory offset and store it in `index`. Register index_reg = RegisterFrom(index_, DataType::Type::kInt32); DCHECK(locations->GetLiveRegisters()->ContainsCoreRegister(index_.reg())); if (codegen->IsCoreCalleeSaveRegister(index_.reg())) { // We are about to change the value of `index_reg` (see the // calls to vixl::MacroAssembler::Lsl and // vixl::MacroAssembler::Mov below), but it has // not been saved by the previous call to // art::SlowPathCode::SaveLiveRegisters, as it is a // callee-save register -- // art::SlowPathCode::SaveLiveRegisters does not consider // callee-save registers, as it has been designed with the // assumption that callee-save registers are supposed to be // handled by the called function. So, as a callee-save // register, `index_reg` _would_ eventually be saved onto // the stack, but it would be too late: we would have // changed its value earlier. Therefore, we manually save // it here into another freely available register, // `free_reg`, chosen of course among the caller-save // registers (as a callee-save `free_reg` register would // exhibit the same problem). // // Note we could have requested a temporary register from // the register allocator instead; but we prefer not to, as // this is a slow path, and we know we can find a // caller-save register that is available. Register free_reg = FindAvailableCallerSaveRegister(codegen); __ Mov(free_reg.W(), index_reg); index_reg = free_reg; index = LocationFrom(index_reg); } else { // The initial register stored in `index_` has already been // saved in the call to art::SlowPathCode::SaveLiveRegisters // (as it is not a callee-save register), so we can freely // use it. } // Shifting the index value contained in `index_reg` by the scale // factor (2) cannot overflow in practice, as the runtime is // unable to allocate object arrays with a size larger than // 2^26 - 1 (that is, 2^28 - 4 bytes). __ Lsl(index_reg, index_reg, DataType::SizeShift(type)); static_assert( sizeof(mirror::HeapReference) == sizeof(int32_t), "art::mirror::HeapReference and int32_t have different sizes."); __ Add(index_reg, index_reg, Operand(offset_)); } else { // In the case of the UnsafeGetObject/UnsafeGetObjectVolatile // intrinsics, `index_` is not shifted by a scale factor of 2 // (as in the case of ArrayGet), as it is actually an offset // to an object field within an object. DCHECK(instruction_->IsInvoke()) << instruction_->DebugName(); DCHECK(instruction_->GetLocations()->Intrinsified()); DCHECK((instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObject) || (instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile)) << instruction_->AsInvoke()->GetIntrinsic(); DCHECK_EQ(offset_, 0u); DCHECK(index_.IsRegister()); } } // We're moving two or three locations to locations that could // overlap, so we need a parallel move resolver. InvokeRuntimeCallingConvention calling_convention; HParallelMove parallel_move(codegen->GetGraph()->GetAllocator()); parallel_move.AddMove(ref_, LocationFrom(calling_convention.GetRegisterAt(0)), type, nullptr); parallel_move.AddMove(obj_, LocationFrom(calling_convention.GetRegisterAt(1)), type, nullptr); if (index.IsValid()) { parallel_move.AddMove(index, LocationFrom(calling_convention.GetRegisterAt(2)), DataType::Type::kInt32, nullptr); codegen->GetMoveResolver()->EmitNativeCode(¶llel_move); } else { codegen->GetMoveResolver()->EmitNativeCode(¶llel_move); arm64_codegen->MoveConstant(LocationFrom(calling_convention.GetRegisterAt(2)), offset_); } arm64_codegen->InvokeRuntime(kQuickReadBarrierSlow, instruction_, instruction_->GetDexPc(), this); CheckEntrypointTypes< kQuickReadBarrierSlow, mirror::Object*, mirror::Object*, mirror::Object*, uint32_t>(); arm64_codegen->MoveLocation(out_, calling_convention.GetReturnLocation(type), type); RestoreLiveRegisters(codegen, locations); __ B(GetExitLabel()); } const char* GetDescription() const override { return "ReadBarrierForHeapReferenceSlowPathARM64"; } private: Register FindAvailableCallerSaveRegister(CodeGenerator* codegen) { size_t ref = static_cast(XRegisterFrom(ref_).GetCode()); size_t obj = static_cast(XRegisterFrom(obj_).GetCode()); for (size_t i = 0, e = codegen->GetNumberOfCoreRegisters(); i < e; ++i) { if (i != ref && i != obj && !codegen->IsCoreCalleeSaveRegister(i)) { return Register(VIXLRegCodeFromART(i), kXRegSize); } } // We shall never fail to find a free caller-save register, as // there are more than two core caller-save registers on ARM64 // (meaning it is possible to find one which is different from // `ref` and `obj`). DCHECK_GT(codegen->GetNumberOfCoreCallerSaveRegisters(), 2u); LOG(FATAL) << "Could not find a free register"; UNREACHABLE(); } const Location out_; const Location ref_; const Location obj_; const uint32_t offset_; // An additional location containing an index to an array. // Only used for HArrayGet and the UnsafeGetObject & // UnsafeGetObjectVolatile intrinsics. const Location index_; DISALLOW_COPY_AND_ASSIGN(ReadBarrierForHeapReferenceSlowPathARM64); }; // Slow path generating a read barrier for a GC root. class ReadBarrierForRootSlowPathARM64 : public SlowPathCodeARM64 { public: ReadBarrierForRootSlowPathARM64(HInstruction* instruction, Location out, Location root) : SlowPathCodeARM64(instruction), out_(out), root_(root) { DCHECK(kEmitCompilerReadBarrier); } void EmitNativeCode(CodeGenerator* codegen) override { LocationSummary* locations = instruction_->GetLocations(); DataType::Type type = DataType::Type::kReference; DCHECK(locations->CanCall()); DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(out_.reg())); DCHECK(instruction_->IsLoadClass() || instruction_->IsLoadString()) << "Unexpected instruction in read barrier for GC root slow path: " << instruction_->DebugName(); __ Bind(GetEntryLabel()); SaveLiveRegisters(codegen, locations); InvokeRuntimeCallingConvention calling_convention; CodeGeneratorARM64* arm64_codegen = down_cast(codegen); // The argument of the ReadBarrierForRootSlow is not a managed // reference (`mirror::Object*`), but a `GcRoot*`; // thus we need a 64-bit move here, and we cannot use // // arm64_codegen->MoveLocation( // LocationFrom(calling_convention.GetRegisterAt(0)), // root_, // type); // // which would emit a 32-bit move, as `type` is a (32-bit wide) // reference type (`DataType::Type::kReference`). __ Mov(calling_convention.GetRegisterAt(0), XRegisterFrom(out_)); arm64_codegen->InvokeRuntime(kQuickReadBarrierForRootSlow, instruction_, instruction_->GetDexPc(), this); CheckEntrypointTypes*>(); arm64_codegen->MoveLocation(out_, calling_convention.GetReturnLocation(type), type); RestoreLiveRegisters(codegen, locations); __ B(GetExitLabel()); } const char* GetDescription() const override { return "ReadBarrierForRootSlowPathARM64"; } private: const Location out_; const Location root_; DISALLOW_COPY_AND_ASSIGN(ReadBarrierForRootSlowPathARM64); }; #undef __ Location InvokeDexCallingConventionVisitorARM64::GetNextLocation(DataType::Type type) { Location next_location; if (type == DataType::Type::kVoid) { LOG(FATAL) << "Unreachable type " << type; } if (DataType::IsFloatingPointType(type) && (float_index_ < calling_convention.GetNumberOfFpuRegisters())) { next_location = LocationFrom(calling_convention.GetFpuRegisterAt(float_index_++)); } else if (!DataType::IsFloatingPointType(type) && (gp_index_ < calling_convention.GetNumberOfRegisters())) { next_location = LocationFrom(calling_convention.GetRegisterAt(gp_index_++)); } else { size_t stack_offset = calling_convention.GetStackOffsetOf(stack_index_); next_location = DataType::Is64BitType(type) ? Location::DoubleStackSlot(stack_offset) : Location::StackSlot(stack_offset); } // Space on the stack is reserved for all arguments. stack_index_ += DataType::Is64BitType(type) ? 2 : 1; return next_location; } Location InvokeDexCallingConventionVisitorARM64::GetMethodLocation() const { return LocationFrom(kArtMethodRegister); } CodeGeneratorARM64::CodeGeneratorARM64(HGraph* graph, const CompilerOptions& compiler_options, OptimizingCompilerStats* stats) : CodeGenerator(graph, kNumberOfAllocatableRegisters, kNumberOfAllocatableFPRegisters, kNumberOfAllocatableRegisterPairs, callee_saved_core_registers.GetList(), callee_saved_fp_registers.GetList(), compiler_options, stats), block_labels_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), jump_tables_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), location_builder_(graph, this), instruction_visitor_(graph, this), move_resolver_(graph->GetAllocator(), this), assembler_(graph->GetAllocator(), compiler_options.GetInstructionSetFeatures()->AsArm64InstructionSetFeatures()), boot_image_method_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), method_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), boot_image_type_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), type_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), boot_image_string_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), string_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), boot_image_other_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), call_entrypoint_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), baker_read_barrier_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), uint32_literals_(std::less(), graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), uint64_literals_(std::less(), graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), jit_string_patches_(StringReferenceValueComparator(), graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), jit_class_patches_(TypeReferenceValueComparator(), graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), jit_baker_read_barrier_slow_paths_(std::less(), graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)) { // Save the link register (containing the return address) to mimic Quick. AddAllocatedRegister(LocationFrom(lr)); } #define __ GetVIXLAssembler()-> void CodeGeneratorARM64::EmitJumpTables() { for (auto&& jump_table : jump_tables_) { jump_table->EmitTable(this); } } void CodeGeneratorARM64::Finalize(CodeAllocator* allocator) { EmitJumpTables(); // Emit JIT baker read barrier slow paths. DCHECK(Runtime::Current()->UseJitCompilation() || jit_baker_read_barrier_slow_paths_.empty()); for (auto& entry : jit_baker_read_barrier_slow_paths_) { uint32_t encoded_data = entry.first; vixl::aarch64::Label* slow_path_entry = &entry.second.label; __ Bind(slow_path_entry); CompileBakerReadBarrierThunk(*GetAssembler(), encoded_data, /* debug_name= */ nullptr); } // Ensure we emit the literal pool. __ FinalizeCode(); CodeGenerator::Finalize(allocator); // Verify Baker read barrier linker patches. if (kIsDebugBuild) { ArrayRef code = allocator->GetMemory(); for (const BakerReadBarrierPatchInfo& info : baker_read_barrier_patches_) { DCHECK(info.label.IsBound()); uint32_t literal_offset = info.label.GetLocation(); DCHECK_ALIGNED(literal_offset, 4u); auto GetInsn = [&code](uint32_t offset) { DCHECK_ALIGNED(offset, 4u); return (static_cast(code[offset + 0]) << 0) + (static_cast(code[offset + 1]) << 8) + (static_cast(code[offset + 2]) << 16)+ (static_cast(code[offset + 3]) << 24); }; const uint32_t encoded_data = info.custom_data; BakerReadBarrierKind kind = BakerReadBarrierKindField::Decode(encoded_data); // Check that the next instruction matches the expected LDR. switch (kind) { case BakerReadBarrierKind::kField: case BakerReadBarrierKind::kAcquire: { DCHECK_GE(code.size() - literal_offset, 8u); uint32_t next_insn = GetInsn(literal_offset + 4u); CheckValidReg(next_insn & 0x1fu); // Check destination register. const uint32_t base_reg = BakerReadBarrierFirstRegField::Decode(encoded_data); if (kind == BakerReadBarrierKind::kField) { // LDR (immediate) with correct base_reg. CHECK_EQ(next_insn & 0xffc003e0u, 0xb9400000u | (base_reg << 5)); } else { DCHECK(kind == BakerReadBarrierKind::kAcquire); // LDAR with correct base_reg. CHECK_EQ(next_insn & 0xffffffe0u, 0x88dffc00u | (base_reg << 5)); } break; } case BakerReadBarrierKind::kArray: { DCHECK_GE(code.size() - literal_offset, 8u); uint32_t next_insn = GetInsn(literal_offset + 4u); // LDR (register) with the correct base_reg, size=10 (32-bit), option=011 (extend = LSL), // and S=1 (shift amount = 2 for 32-bit version), i.e. LDR Wt, [Xn, Xm, LSL #2]. CheckValidReg(next_insn & 0x1fu); // Check destination register. const uint32_t base_reg = BakerReadBarrierFirstRegField::Decode(encoded_data); CHECK_EQ(next_insn & 0xffe0ffe0u, 0xb8607800u | (base_reg << 5)); CheckValidReg((next_insn >> 16) & 0x1f); // Check index register break; } case BakerReadBarrierKind::kGcRoot: { DCHECK_GE(literal_offset, 4u); uint32_t prev_insn = GetInsn(literal_offset - 4u); const uint32_t root_reg = BakerReadBarrierFirstRegField::Decode(encoded_data); // Usually LDR (immediate) with correct root_reg but // we may have a "MOV marked, old_value" for UnsafeCASObject. if ((prev_insn & 0xffe0ffff) != (0x2a0003e0 | root_reg)) { // MOV? CHECK_EQ(prev_insn & 0xffc0001fu, 0xb9400000u | root_reg); // LDR? } break; } default: LOG(FATAL) << "Unexpected kind: " << static_cast(kind); UNREACHABLE(); } } } } void ParallelMoveResolverARM64::PrepareForEmitNativeCode() { // Note: There are 6 kinds of moves: // 1. constant -> GPR/FPR (non-cycle) // 2. constant -> stack (non-cycle) // 3. GPR/FPR -> GPR/FPR // 4. GPR/FPR -> stack // 5. stack -> GPR/FPR // 6. stack -> stack (non-cycle) // Case 1, 2 and 6 should never be included in a dependency cycle on ARM64. For case 3, 4, and 5 // VIXL uses at most 1 GPR. VIXL has 2 GPR and 1 FPR temps, and there should be no intersecting // cycles on ARM64, so we always have 1 GPR and 1 FPR available VIXL temps to resolve the // dependency. vixl_temps_.Open(GetVIXLAssembler()); } void ParallelMoveResolverARM64::FinishEmitNativeCode() { vixl_temps_.Close(); } Location ParallelMoveResolverARM64::AllocateScratchLocationFor(Location::Kind kind) { DCHECK(kind == Location::kRegister || kind == Location::kFpuRegister || kind == Location::kStackSlot || kind == Location::kDoubleStackSlot || kind == Location::kSIMDStackSlot); kind = (kind == Location::kFpuRegister || kind == Location::kSIMDStackSlot) ? Location::kFpuRegister : Location::kRegister; Location scratch = GetScratchLocation(kind); if (!scratch.Equals(Location::NoLocation())) { return scratch; } // Allocate from VIXL temp registers. if (kind == Location::kRegister) { scratch = LocationFrom(vixl_temps_.AcquireX()); } else { DCHECK_EQ(kind, Location::kFpuRegister); scratch = LocationFrom(codegen_->GetGraph()->HasSIMD() ? vixl_temps_.AcquireVRegisterOfSize(kQRegSize) : vixl_temps_.AcquireD()); } AddScratchLocation(scratch); return scratch; } void ParallelMoveResolverARM64::FreeScratchLocation(Location loc) { if (loc.IsRegister()) { vixl_temps_.Release(XRegisterFrom(loc)); } else { DCHECK(loc.IsFpuRegister()); vixl_temps_.Release(codegen_->GetGraph()->HasSIMD() ? QRegisterFrom(loc) : DRegisterFrom(loc)); } RemoveScratchLocation(loc); } void ParallelMoveResolverARM64::EmitMove(size_t index) { MoveOperands* move = moves_[index]; codegen_->MoveLocation(move->GetDestination(), move->GetSource(), DataType::Type::kVoid); } void CodeGeneratorARM64::MaybeIncrementHotness(bool is_frame_entry) { MacroAssembler* masm = GetVIXLAssembler(); if (GetCompilerOptions().CountHotnessInCompiledCode()) { UseScratchRegisterScope temps(masm); Register counter = temps.AcquireX(); Register method = is_frame_entry ? kArtMethodRegister : temps.AcquireX(); if (!is_frame_entry) { __ Ldr(method, MemOperand(sp, 0)); } __ Ldrh(counter, MemOperand(method, ArtMethod::HotnessCountOffset().Int32Value())); __ Add(counter, counter, 1); // Subtract one if the counter would overflow. __ Sub(counter, counter, Operand(counter, LSR, 16)); __ Strh(counter, MemOperand(method, ArtMethod::HotnessCountOffset().Int32Value())); } if (GetGraph()->IsCompilingBaseline() && !Runtime::Current()->IsAotCompiler()) { ScopedObjectAccess soa(Thread::Current()); ProfilingInfo* info = GetGraph()->GetArtMethod()->GetProfilingInfo(kRuntimePointerSize); if (info != nullptr) { uint64_t address = reinterpret_cast64(info); vixl::aarch64::Label done; UseScratchRegisterScope temps(masm); Register temp = temps.AcquireX(); Register counter = temps.AcquireW(); __ Mov(temp, address); __ Ldrh(counter, MemOperand(temp, ProfilingInfo::BaselineHotnessCountOffset().Int32Value())); __ Add(counter, counter, 1); __ Strh(counter, MemOperand(temp, ProfilingInfo::BaselineHotnessCountOffset().Int32Value())); __ Tst(counter, 0xffff); __ B(ne, &done); if (is_frame_entry) { if (HasEmptyFrame()) { // The entyrpoint expects the method at the bottom of the stack. We // claim stack space necessary for alignment. __ Claim(kStackAlignment); __ Stp(kArtMethodRegister, lr, MemOperand(sp, 0)); } else if (!RequiresCurrentMethod()) { __ Str(kArtMethodRegister, MemOperand(sp, 0)); } } else { CHECK(RequiresCurrentMethod()); } uint32_t entrypoint_offset = GetThreadOffset(kQuickCompileOptimized).Int32Value(); __ Ldr(lr, MemOperand(tr, entrypoint_offset)); // Note: we don't record the call here (and therefore don't generate a stack // map), as the entrypoint should never be suspended. __ Blr(lr); if (HasEmptyFrame()) { CHECK(is_frame_entry); __ Ldr(lr, MemOperand(sp, 8)); __ Drop(kStackAlignment); } __ Bind(&done); } } } void CodeGeneratorARM64::GenerateFrameEntry() { MacroAssembler* masm = GetVIXLAssembler(); __ Bind(&frame_entry_label_); bool do_overflow_check = FrameNeedsStackCheck(GetFrameSize(), InstructionSet::kArm64) || !IsLeafMethod(); if (do_overflow_check) { UseScratchRegisterScope temps(masm); Register temp = temps.AcquireX(); DCHECK(GetCompilerOptions().GetImplicitStackOverflowChecks()); __ Sub(temp, sp, static_cast(GetStackOverflowReservedBytes(InstructionSet::kArm64))); { // Ensure that between load and RecordPcInfo there are no pools emitted. ExactAssemblyScope eas(GetVIXLAssembler(), kInstructionSize, CodeBufferCheckScope::kExactSize); __ ldr(wzr, MemOperand(temp, 0)); RecordPcInfo(nullptr, 0); } } if (!HasEmptyFrame()) { // Stack layout: // sp[frame_size - 8] : lr. // ... : other preserved core registers. // ... : other preserved fp registers. // ... : reserved frame space. // sp[0] : current method. int32_t frame_size = dchecked_integral_cast(GetFrameSize()); uint32_t core_spills_offset = frame_size - GetCoreSpillSize(); CPURegList preserved_core_registers = GetFramePreservedCoreRegisters(); DCHECK(!preserved_core_registers.IsEmpty()); uint32_t fp_spills_offset = frame_size - FrameEntrySpillSize(); CPURegList preserved_fp_registers = GetFramePreservedFPRegisters(); // Save the current method if we need it, or if using STP reduces code // size. Note that we do not do this in HCurrentMethod, as the // instruction might have been removed in the SSA graph. CPURegister lowest_spill; if (core_spills_offset == kXRegSizeInBytes) { // If there is no gap between the method and the lowest core spill, use // aligned STP pre-index to store both. Max difference is 512. We do // that to reduce code size even if we do not have to save the method. DCHECK_LE(frame_size, 512); // 32 core registers are only 256 bytes. lowest_spill = preserved_core_registers.PopLowestIndex(); __ Stp(kArtMethodRegister, lowest_spill, MemOperand(sp, -frame_size, PreIndex)); } else if (RequiresCurrentMethod()) { __ Str(kArtMethodRegister, MemOperand(sp, -frame_size, PreIndex)); } else { __ Claim(frame_size); } GetAssembler()->cfi().AdjustCFAOffset(frame_size); if (lowest_spill.IsValid()) { GetAssembler()->cfi().RelOffset(DWARFReg(lowest_spill), core_spills_offset); core_spills_offset += kXRegSizeInBytes; } GetAssembler()->SpillRegisters(preserved_core_registers, core_spills_offset); GetAssembler()->SpillRegisters(preserved_fp_registers, fp_spills_offset); if (GetGraph()->HasShouldDeoptimizeFlag()) { // Initialize should_deoptimize flag to 0. Register wzr = Register(VIXLRegCodeFromART(WZR), kWRegSize); __ Str(wzr, MemOperand(sp, GetStackOffsetOfShouldDeoptimizeFlag())); } } MaybeIncrementHotness(/* is_frame_entry= */ true); MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__); } void CodeGeneratorARM64::GenerateFrameExit() { GetAssembler()->cfi().RememberState(); if (!HasEmptyFrame()) { int32_t frame_size = dchecked_integral_cast(GetFrameSize()); uint32_t core_spills_offset = frame_size - GetCoreSpillSize(); CPURegList preserved_core_registers = GetFramePreservedCoreRegisters(); DCHECK(!preserved_core_registers.IsEmpty()); uint32_t fp_spills_offset = frame_size - FrameEntrySpillSize(); CPURegList preserved_fp_registers = GetFramePreservedFPRegisters(); CPURegister lowest_spill; if (core_spills_offset == kXRegSizeInBytes) { // If there is no gap between the method and the lowest core spill, use // aligned LDP pre-index to pop both. Max difference is 504. We do // that to reduce code size even though the loaded method is unused. DCHECK_LE(frame_size, 504); // 32 core registers are only 256 bytes. lowest_spill = preserved_core_registers.PopLowestIndex(); core_spills_offset += kXRegSizeInBytes; } GetAssembler()->UnspillRegisters(preserved_fp_registers, fp_spills_offset); GetAssembler()->UnspillRegisters(preserved_core_registers, core_spills_offset); if (lowest_spill.IsValid()) { __ Ldp(xzr, lowest_spill, MemOperand(sp, frame_size, PostIndex)); GetAssembler()->cfi().Restore(DWARFReg(lowest_spill)); } else { __ Drop(frame_size); } GetAssembler()->cfi().AdjustCFAOffset(-frame_size); } __ Ret(); GetAssembler()->cfi().RestoreState(); GetAssembler()->cfi().DefCFAOffset(GetFrameSize()); } CPURegList CodeGeneratorARM64::GetFramePreservedCoreRegisters() const { DCHECK(ArtVixlRegCodeCoherentForRegSet(core_spill_mask_, GetNumberOfCoreRegisters(), 0, 0)); return CPURegList(CPURegister::kRegister, kXRegSize, core_spill_mask_); } CPURegList CodeGeneratorARM64::GetFramePreservedFPRegisters() const { DCHECK(ArtVixlRegCodeCoherentForRegSet(0, 0, fpu_spill_mask_, GetNumberOfFloatingPointRegisters())); return CPURegList(CPURegister::kVRegister, kDRegSize, fpu_spill_mask_); } void CodeGeneratorARM64::Bind(HBasicBlock* block) { __ Bind(GetLabelOf(block)); } void CodeGeneratorARM64::MoveConstant(Location location, int32_t value) { DCHECK(location.IsRegister()); __ Mov(RegisterFrom(location, DataType::Type::kInt32), value); } void CodeGeneratorARM64::AddLocationAsTemp(Location location, LocationSummary* locations) { if (location.IsRegister()) { locations->AddTemp(location); } else { UNIMPLEMENTED(FATAL) << "AddLocationAsTemp not implemented for location " << location; } } void CodeGeneratorARM64::MarkGCCard(Register object, Register value, bool value_can_be_null) { UseScratchRegisterScope temps(GetVIXLAssembler()); Register card = temps.AcquireX(); Register temp = temps.AcquireW(); // Index within the CardTable - 32bit. vixl::aarch64::Label done; if (value_can_be_null) { __ Cbz(value, &done); } // Load the address of the card table into `card`. __ Ldr(card, MemOperand(tr, Thread::CardTableOffset().Int32Value())); // Calculate the offset (in the card table) of the card corresponding to // `object`. __ Lsr(temp, object, gc::accounting::CardTable::kCardShift); // Write the `art::gc::accounting::CardTable::kCardDirty` value into the // `object`'s card. // // Register `card` contains the address of the card table. Note that the card // table's base is biased during its creation so that it always starts at an // address whose least-significant byte is equal to `kCardDirty` (see // art::gc::accounting::CardTable::Create). Therefore the STRB instruction // below writes the `kCardDirty` (byte) value into the `object`'s card // (located at `card + object >> kCardShift`). // // This dual use of the value in register `card` (1. to calculate the location // of the card to mark; and 2. to load the `kCardDirty` value) saves a load // (no need to explicitly load `kCardDirty` as an immediate value). __ Strb(card, MemOperand(card, temp.X())); if (value_can_be_null) { __ Bind(&done); } } void CodeGeneratorARM64::SetupBlockedRegisters() const { // Blocked core registers: // lr : Runtime reserved. // tr : Runtime reserved. // mr : Runtime reserved. // ip1 : VIXL core temp. // ip0 : VIXL core temp. // x18 : Platform register. // // Blocked fp registers: // d31 : VIXL fp temp. CPURegList reserved_core_registers = vixl_reserved_core_registers; reserved_core_registers.Combine(runtime_reserved_core_registers); while (!reserved_core_registers.IsEmpty()) { blocked_core_registers_[reserved_core_registers.PopLowestIndex().GetCode()] = true; } blocked_core_registers_[X18] = true; CPURegList reserved_fp_registers = vixl_reserved_fp_registers; while (!reserved_fp_registers.IsEmpty()) { blocked_fpu_registers_[reserved_fp_registers.PopLowestIndex().GetCode()] = true; } if (GetGraph()->IsDebuggable()) { // Stubs do not save callee-save floating point registers. If the graph // is debuggable, we need to deal with these registers differently. For // now, just block them. CPURegList reserved_fp_registers_debuggable = callee_saved_fp_registers; while (!reserved_fp_registers_debuggable.IsEmpty()) { blocked_fpu_registers_[reserved_fp_registers_debuggable.PopLowestIndex().GetCode()] = true; } } } size_t CodeGeneratorARM64::SaveCoreRegister(size_t stack_index, uint32_t reg_id) { Register reg = Register(VIXLRegCodeFromART(reg_id), kXRegSize); __ Str(reg, MemOperand(sp, stack_index)); return kArm64WordSize; } size_t CodeGeneratorARM64::RestoreCoreRegister(size_t stack_index, uint32_t reg_id) { Register reg = Register(VIXLRegCodeFromART(reg_id), kXRegSize); __ Ldr(reg, MemOperand(sp, stack_index)); return kArm64WordSize; } size_t CodeGeneratorARM64::SaveFloatingPointRegister(size_t stack_index ATTRIBUTE_UNUSED, uint32_t reg_id ATTRIBUTE_UNUSED) { LOG(FATAL) << "FP registers shouldn't be saved/restored individually, " << "use SaveRestoreLiveRegistersHelper"; UNREACHABLE(); } size_t CodeGeneratorARM64::RestoreFloatingPointRegister(size_t stack_index ATTRIBUTE_UNUSED, uint32_t reg_id ATTRIBUTE_UNUSED) { LOG(FATAL) << "FP registers shouldn't be saved/restored individually, " << "use SaveRestoreLiveRegistersHelper"; UNREACHABLE(); } void CodeGeneratorARM64::DumpCoreRegister(std::ostream& stream, int reg) const { stream << XRegister(reg); } void CodeGeneratorARM64::DumpFloatingPointRegister(std::ostream& stream, int reg) const { stream << DRegister(reg); } const Arm64InstructionSetFeatures& CodeGeneratorARM64::GetInstructionSetFeatures() const { return *GetCompilerOptions().GetInstructionSetFeatures()->AsArm64InstructionSetFeatures(); } void CodeGeneratorARM64::MoveConstant(CPURegister destination, HConstant* constant) { if (constant->IsIntConstant()) { __ Mov(Register(destination), constant->AsIntConstant()->GetValue()); } else if (constant->IsLongConstant()) { __ Mov(Register(destination), constant->AsLongConstant()->GetValue()); } else if (constant->IsNullConstant()) { __ Mov(Register(destination), 0); } else if (constant->IsFloatConstant()) { __ Fmov(VRegister(destination), constant->AsFloatConstant()->GetValue()); } else { DCHECK(constant->IsDoubleConstant()); __ Fmov(VRegister(destination), constant->AsDoubleConstant()->GetValue()); } } static bool CoherentConstantAndType(Location constant, DataType::Type type) { DCHECK(constant.IsConstant()); HConstant* cst = constant.GetConstant(); return (cst->IsIntConstant() && type == DataType::Type::kInt32) || // Null is mapped to a core W register, which we associate with kPrimInt. (cst->IsNullConstant() && type == DataType::Type::kInt32) || (cst->IsLongConstant() && type == DataType::Type::kInt64) || (cst->IsFloatConstant() && type == DataType::Type::kFloat32) || (cst->IsDoubleConstant() && type == DataType::Type::kFloat64); } // Allocate a scratch register from the VIXL pool, querying first // the floating-point register pool, and then the core register // pool. This is essentially a reimplementation of // vixl::aarch64::UseScratchRegisterScope::AcquireCPURegisterOfSize // using a different allocation strategy. static CPURegister AcquireFPOrCoreCPURegisterOfSize(vixl::aarch64::MacroAssembler* masm, vixl::aarch64::UseScratchRegisterScope* temps, int size_in_bits) { return masm->GetScratchVRegisterList()->IsEmpty() ? CPURegister(temps->AcquireRegisterOfSize(size_in_bits)) : CPURegister(temps->AcquireVRegisterOfSize(size_in_bits)); } void CodeGeneratorARM64::MoveLocation(Location destination, Location source, DataType::Type dst_type) { if (source.Equals(destination)) { return; } // A valid move can always be inferred from the destination and source // locations. When moving from and to a register, the argument type can be // used to generate 32bit instead of 64bit moves. In debug mode we also // checks the coherency of the locations and the type. bool unspecified_type = (dst_type == DataType::Type::kVoid); if (destination.IsRegister() || destination.IsFpuRegister()) { if (unspecified_type) { HConstant* src_cst = source.IsConstant() ? source.GetConstant() : nullptr; if (source.IsStackSlot() || (src_cst != nullptr && (src_cst->IsIntConstant() || src_cst->IsFloatConstant() || src_cst->IsNullConstant()))) { // For stack slots and 32bit constants, a 64bit type is appropriate. dst_type = destination.IsRegister() ? DataType::Type::kInt32 : DataType::Type::kFloat32; } else { // If the source is a double stack slot or a 64bit constant, a 64bit // type is appropriate. Else the source is a register, and since the // type has not been specified, we chose a 64bit type to force a 64bit // move. dst_type = destination.IsRegister() ? DataType::Type::kInt64 : DataType::Type::kFloat64; } } DCHECK((destination.IsFpuRegister() && DataType::IsFloatingPointType(dst_type)) || (destination.IsRegister() && !DataType::IsFloatingPointType(dst_type))); CPURegister dst = CPURegisterFrom(destination, dst_type); if (source.IsStackSlot() || source.IsDoubleStackSlot()) { DCHECK(dst.Is64Bits() == source.IsDoubleStackSlot()); __ Ldr(dst, StackOperandFrom(source)); } else if (source.IsSIMDStackSlot()) { __ Ldr(QRegisterFrom(destination), StackOperandFrom(source)); } else if (source.IsConstant()) { DCHECK(CoherentConstantAndType(source, dst_type)); MoveConstant(dst, source.GetConstant()); } else if (source.IsRegister()) { if (destination.IsRegister()) { __ Mov(Register(dst), RegisterFrom(source, dst_type)); } else { DCHECK(destination.IsFpuRegister()); DataType::Type source_type = DataType::Is64BitType(dst_type) ? DataType::Type::kInt64 : DataType::Type::kInt32; __ Fmov(FPRegisterFrom(destination, dst_type), RegisterFrom(source, source_type)); } } else { DCHECK(source.IsFpuRegister()); if (destination.IsRegister()) { DataType::Type source_type = DataType::Is64BitType(dst_type) ? DataType::Type::kFloat64 : DataType::Type::kFloat32; __ Fmov(RegisterFrom(destination, dst_type), FPRegisterFrom(source, source_type)); } else { DCHECK(destination.IsFpuRegister()); if (GetGraph()->HasSIMD()) { __ Mov(QRegisterFrom(destination), QRegisterFrom(source)); } else { __ Fmov(VRegister(dst), FPRegisterFrom(source, dst_type)); } } } } else if (destination.IsSIMDStackSlot()) { if (source.IsFpuRegister()) { __ Str(QRegisterFrom(source), StackOperandFrom(destination)); } else { DCHECK(source.IsSIMDStackSlot()); UseScratchRegisterScope temps(GetVIXLAssembler()); if (GetVIXLAssembler()->GetScratchVRegisterList()->IsEmpty()) { Register temp = temps.AcquireX(); __ Ldr(temp, MemOperand(sp, source.GetStackIndex())); __ Str(temp, MemOperand(sp, destination.GetStackIndex())); __ Ldr(temp, MemOperand(sp, source.GetStackIndex() + kArm64WordSize)); __ Str(temp, MemOperand(sp, destination.GetStackIndex() + kArm64WordSize)); } else { VRegister temp = temps.AcquireVRegisterOfSize(kQRegSize); __ Ldr(temp, StackOperandFrom(source)); __ Str(temp, StackOperandFrom(destination)); } } } else { // The destination is not a register. It must be a stack slot. DCHECK(destination.IsStackSlot() || destination.IsDoubleStackSlot()); if (source.IsRegister() || source.IsFpuRegister()) { if (unspecified_type) { if (source.IsRegister()) { dst_type = destination.IsStackSlot() ? DataType::Type::kInt32 : DataType::Type::kInt64; } else { dst_type = destination.IsStackSlot() ? DataType::Type::kFloat32 : DataType::Type::kFloat64; } } DCHECK((destination.IsDoubleStackSlot() == DataType::Is64BitType(dst_type)) && (source.IsFpuRegister() == DataType::IsFloatingPointType(dst_type))); __ Str(CPURegisterFrom(source, dst_type), StackOperandFrom(destination)); } else if (source.IsConstant()) { DCHECK(unspecified_type || CoherentConstantAndType(source, dst_type)) << source << " " << dst_type; UseScratchRegisterScope temps(GetVIXLAssembler()); HConstant* src_cst = source.GetConstant(); CPURegister temp; if (src_cst->IsZeroBitPattern()) { temp = (src_cst->IsLongConstant() || src_cst->IsDoubleConstant()) ? Register(xzr) : Register(wzr); } else { if (src_cst->IsIntConstant()) { temp = temps.AcquireW(); } else if (src_cst->IsLongConstant()) { temp = temps.AcquireX(); } else if (src_cst->IsFloatConstant()) { temp = temps.AcquireS(); } else { DCHECK(src_cst->IsDoubleConstant()); temp = temps.AcquireD(); } MoveConstant(temp, src_cst); } __ Str(temp, StackOperandFrom(destination)); } else { DCHECK(source.IsStackSlot() || source.IsDoubleStackSlot()); DCHECK(source.IsDoubleStackSlot() == destination.IsDoubleStackSlot()); UseScratchRegisterScope temps(GetVIXLAssembler()); // Use any scratch register (a core or a floating-point one) // from VIXL scratch register pools as a temporary. // // We used to only use the FP scratch register pool, but in some // rare cases the only register from this pool (D31) would // already be used (e.g. within a ParallelMove instruction, when // a move is blocked by a another move requiring a scratch FP // register, which would reserve D31). To prevent this issue, we // ask for a scratch register of any type (core or FP). // // Also, we start by asking for a FP scratch register first, as the // demand of scratch core registers is higher. This is why we // use AcquireFPOrCoreCPURegisterOfSize instead of // UseScratchRegisterScope::AcquireCPURegisterOfSize, which // allocates core scratch registers first. CPURegister temp = AcquireFPOrCoreCPURegisterOfSize( GetVIXLAssembler(), &temps, (destination.IsDoubleStackSlot() ? kXRegSize : kWRegSize)); __ Ldr(temp, StackOperandFrom(source)); __ Str(temp, StackOperandFrom(destination)); } } } void CodeGeneratorARM64::Load(DataType::Type type, CPURegister dst, const MemOperand& src) { switch (type) { case DataType::Type::kBool: case DataType::Type::kUint8: __ Ldrb(Register(dst), src); break; case DataType::Type::kInt8: __ Ldrsb(Register(dst), src); break; case DataType::Type::kUint16: __ Ldrh(Register(dst), src); break; case DataType::Type::kInt16: __ Ldrsh(Register(dst), src); break; case DataType::Type::kInt32: case DataType::Type::kReference: case DataType::Type::kInt64: case DataType::Type::kFloat32: case DataType::Type::kFloat64: DCHECK_EQ(dst.Is64Bits(), DataType::Is64BitType(type)); __ Ldr(dst, src); break; case DataType::Type::kUint32: case DataType::Type::kUint64: case DataType::Type::kVoid: LOG(FATAL) << "Unreachable type " << type; } } void CodeGeneratorARM64::LoadAcquire(HInstruction* instruction, CPURegister dst, const MemOperand& src, bool needs_null_check) { MacroAssembler* masm = GetVIXLAssembler(); UseScratchRegisterScope temps(masm); Register temp_base = temps.AcquireX(); DataType::Type type = instruction->GetType(); DCHECK(!src.IsPreIndex()); DCHECK(!src.IsPostIndex()); // TODO(vixl): Let the MacroAssembler handle MemOperand. __ Add(temp_base, src.GetBaseRegister(), OperandFromMemOperand(src)); { // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted. MemOperand base = MemOperand(temp_base); switch (type) { case DataType::Type::kBool: case DataType::Type::kUint8: case DataType::Type::kInt8: { ExactAssemblyScope eas(masm, kInstructionSize, CodeBufferCheckScope::kExactSize); __ ldarb(Register(dst), base); if (needs_null_check) { MaybeRecordImplicitNullCheck(instruction); } } if (type == DataType::Type::kInt8) { __ Sbfx(Register(dst), Register(dst), 0, DataType::Size(type) * kBitsPerByte); } break; case DataType::Type::kUint16: case DataType::Type::kInt16: { ExactAssemblyScope eas(masm, kInstructionSize, CodeBufferCheckScope::kExactSize); __ ldarh(Register(dst), base); if (needs_null_check) { MaybeRecordImplicitNullCheck(instruction); } } if (type == DataType::Type::kInt16) { __ Sbfx(Register(dst), Register(dst), 0, DataType::Size(type) * kBitsPerByte); } break; case DataType::Type::kInt32: case DataType::Type::kReference: case DataType::Type::kInt64: DCHECK_EQ(dst.Is64Bits(), DataType::Is64BitType(type)); { ExactAssemblyScope eas(masm, kInstructionSize, CodeBufferCheckScope::kExactSize); __ ldar(Register(dst), base); if (needs_null_check) { MaybeRecordImplicitNullCheck(instruction); } } break; case DataType::Type::kFloat32: case DataType::Type::kFloat64: { DCHECK(dst.IsFPRegister()); DCHECK_EQ(dst.Is64Bits(), DataType::Is64BitType(type)); Register temp = dst.Is64Bits() ? temps.AcquireX() : temps.AcquireW(); { ExactAssemblyScope eas(masm, kInstructionSize, CodeBufferCheckScope::kExactSize); __ ldar(temp, base); if (needs_null_check) { MaybeRecordImplicitNullCheck(instruction); } } __ Fmov(VRegister(dst), temp); break; } case DataType::Type::kUint32: case DataType::Type::kUint64: case DataType::Type::kVoid: LOG(FATAL) << "Unreachable type " << type; } } } void CodeGeneratorARM64::Store(DataType::Type type, CPURegister src, const MemOperand& dst) { switch (type) { case DataType::Type::kBool: case DataType::Type::kUint8: case DataType::Type::kInt8: __ Strb(Register(src), dst); break; case DataType::Type::kUint16: case DataType::Type::kInt16: __ Strh(Register(src), dst); break; case DataType::Type::kInt32: case DataType::Type::kReference: case DataType::Type::kInt64: case DataType::Type::kFloat32: case DataType::Type::kFloat64: DCHECK_EQ(src.Is64Bits(), DataType::Is64BitType(type)); __ Str(src, dst); break; case DataType::Type::kUint32: case DataType::Type::kUint64: case DataType::Type::kVoid: LOG(FATAL) << "Unreachable type " << type; } } void CodeGeneratorARM64::StoreRelease(HInstruction* instruction, DataType::Type type, CPURegister src, const MemOperand& dst, bool needs_null_check) { MacroAssembler* masm = GetVIXLAssembler(); UseScratchRegisterScope temps(GetVIXLAssembler()); Register temp_base = temps.AcquireX(); DCHECK(!dst.IsPreIndex()); DCHECK(!dst.IsPostIndex()); // TODO(vixl): Let the MacroAssembler handle this. Operand op = OperandFromMemOperand(dst); __ Add(temp_base, dst.GetBaseRegister(), op); MemOperand base = MemOperand(temp_base); // Ensure that between store and MaybeRecordImplicitNullCheck there are no pools emitted. switch (type) { case DataType::Type::kBool: case DataType::Type::kUint8: case DataType::Type::kInt8: { ExactAssemblyScope eas(masm, kInstructionSize, CodeBufferCheckScope::kExactSize); __ stlrb(Register(src), base); if (needs_null_check) { MaybeRecordImplicitNullCheck(instruction); } } break; case DataType::Type::kUint16: case DataType::Type::kInt16: { ExactAssemblyScope eas(masm, kInstructionSize, CodeBufferCheckScope::kExactSize); __ stlrh(Register(src), base); if (needs_null_check) { MaybeRecordImplicitNullCheck(instruction); } } break; case DataType::Type::kInt32: case DataType::Type::kReference: case DataType::Type::kInt64: DCHECK_EQ(src.Is64Bits(), DataType::Is64BitType(type)); { ExactAssemblyScope eas(masm, kInstructionSize, CodeBufferCheckScope::kExactSize); __ stlr(Register(src), base); if (needs_null_check) { MaybeRecordImplicitNullCheck(instruction); } } break; case DataType::Type::kFloat32: case DataType::Type::kFloat64: { DCHECK_EQ(src.Is64Bits(), DataType::Is64BitType(type)); Register temp_src; if (src.IsZero()) { // The zero register is used to avoid synthesizing zero constants. temp_src = Register(src); } else { DCHECK(src.IsFPRegister()); temp_src = src.Is64Bits() ? temps.AcquireX() : temps.AcquireW(); __ Fmov(temp_src, VRegister(src)); } { ExactAssemblyScope eas(masm, kInstructionSize, CodeBufferCheckScope::kExactSize); __ stlr(temp_src, base); if (needs_null_check) { MaybeRecordImplicitNullCheck(instruction); } } break; } case DataType::Type::kUint32: case DataType::Type::kUint64: case DataType::Type::kVoid: LOG(FATAL) << "Unreachable type " << type; } } void CodeGeneratorARM64::InvokeRuntime(QuickEntrypointEnum entrypoint, HInstruction* instruction, uint32_t dex_pc, SlowPathCode* slow_path) { ValidateInvokeRuntime(entrypoint, instruction, slow_path); ThreadOffset64 entrypoint_offset = GetThreadOffset(entrypoint); // Reduce code size for AOT by using shared trampolines for slow path runtime calls across the // entire oat file. This adds an extra branch and we do not want to slow down the main path. // For JIT, thunk sharing is per-method, so the gains would be smaller or even negative. if (slow_path == nullptr || Runtime::Current()->UseJitCompilation()) { __ Ldr(lr, MemOperand(tr, entrypoint_offset.Int32Value())); // Ensure the pc position is recorded immediately after the `blr` instruction. ExactAssemblyScope eas(GetVIXLAssembler(), kInstructionSize, CodeBufferCheckScope::kExactSize); __ blr(lr); if (EntrypointRequiresStackMap(entrypoint)) { RecordPcInfo(instruction, dex_pc, slow_path); } } else { // Ensure the pc position is recorded immediately after the `bl` instruction. ExactAssemblyScope eas(GetVIXLAssembler(), kInstructionSize, CodeBufferCheckScope::kExactSize); EmitEntrypointThunkCall(entrypoint_offset); if (EntrypointRequiresStackMap(entrypoint)) { RecordPcInfo(instruction, dex_pc, slow_path); } } } void CodeGeneratorARM64::InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset, HInstruction* instruction, SlowPathCode* slow_path) { ValidateInvokeRuntimeWithoutRecordingPcInfo(instruction, slow_path); __ Ldr(lr, MemOperand(tr, entry_point_offset)); __ Blr(lr); } void InstructionCodeGeneratorARM64::GenerateClassInitializationCheck(SlowPathCodeARM64* slow_path, Register class_reg) { UseScratchRegisterScope temps(GetVIXLAssembler()); Register temp = temps.AcquireW(); constexpr size_t status_lsb_position = SubtypeCheckBits::BitStructSizeOf(); const size_t status_byte_offset = mirror::Class::StatusOffset().SizeValue() + (status_lsb_position / kBitsPerByte); constexpr uint32_t shifted_visibly_initialized_value = enum_cast(ClassStatus::kVisiblyInitialized) << (status_lsb_position % kBitsPerByte); // CMP (immediate) is limited to imm12 or imm12<<12, so we would need to materialize // the constant 0xf0000000 for comparison with the full 32-bit field. To reduce the code // size, load only the high byte of the field and compare with 0xf0. // Note: The same code size could be achieved with LDR+MNV(asr #24)+CBNZ but benchmarks // show that this pattern is slower (tested on little cores). __ Ldrb(temp, HeapOperand(class_reg, status_byte_offset)); __ Cmp(temp, shifted_visibly_initialized_value); __ B(lo, slow_path->GetEntryLabel()); __ Bind(slow_path->GetExitLabel()); } void InstructionCodeGeneratorARM64::GenerateBitstringTypeCheckCompare( HTypeCheckInstruction* check, vixl::aarch64::Register temp) { uint32_t path_to_root = check->GetBitstringPathToRoot(); uint32_t mask = check->GetBitstringMask(); DCHECK(IsPowerOfTwo(mask + 1)); size_t mask_bits = WhichPowerOf2(mask + 1); if (mask_bits == 16u) { // Load only the bitstring part of the status word. __ Ldrh(temp, HeapOperand(temp, mirror::Class::StatusOffset())); } else { // /* uint32_t */ temp = temp->status_ __ Ldr(temp, HeapOperand(temp, mirror::Class::StatusOffset())); // Extract the bitstring bits. __ Ubfx(temp, temp, 0, mask_bits); } // Compare the bitstring bits to `path_to_root`. __ Cmp(temp, path_to_root); } void CodeGeneratorARM64::GenerateMemoryBarrier(MemBarrierKind kind) { BarrierType type = BarrierAll; switch (kind) { case MemBarrierKind::kAnyAny: case MemBarrierKind::kAnyStore: { type = BarrierAll; break; } case MemBarrierKind::kLoadAny: { type = BarrierReads; break; } case MemBarrierKind::kStoreStore: { type = BarrierWrites; break; } default: LOG(FATAL) << "Unexpected memory barrier " << kind; } __ Dmb(InnerShareable, type); } void InstructionCodeGeneratorARM64::GenerateSuspendCheck(HSuspendCheck* instruction, HBasicBlock* successor) { SuspendCheckSlowPathARM64* slow_path = down_cast(instruction->GetSlowPath()); if (slow_path == nullptr) { slow_path = new (codegen_->GetScopedAllocator()) SuspendCheckSlowPathARM64(instruction, successor); instruction->SetSlowPath(slow_path); codegen_->AddSlowPath(slow_path); if (successor != nullptr) { DCHECK(successor->IsLoopHeader()); } } else { DCHECK_EQ(slow_path->GetSuccessor(), successor); } UseScratchRegisterScope temps(codegen_->GetVIXLAssembler()); Register temp = temps.AcquireW(); __ Ldrh(temp, MemOperand(tr, Thread::ThreadFlagsOffset().SizeValue())); if (successor == nullptr) { __ Cbnz(temp, slow_path->GetEntryLabel()); __ Bind(slow_path->GetReturnLabel()); } else { __ Cbz(temp, codegen_->GetLabelOf(successor)); __ B(slow_path->GetEntryLabel()); // slow_path will return to GetLabelOf(successor). } } InstructionCodeGeneratorARM64::InstructionCodeGeneratorARM64(HGraph* graph, CodeGeneratorARM64* codegen) : InstructionCodeGenerator(graph, codegen), assembler_(codegen->GetAssembler()), codegen_(codegen) {} void LocationsBuilderARM64::HandleBinaryOp(HBinaryOperation* instr) { DCHECK_EQ(instr->InputCount(), 2U); LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instr); DataType::Type type = instr->GetResultType(); switch (type) { case DataType::Type::kInt32: case DataType::Type::kInt64: locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, ARM64EncodableConstantOrRegister(instr->InputAt(1), instr)); locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); break; case DataType::Type::kFloat32: case DataType::Type::kFloat64: locations->SetInAt(0, Location::RequiresFpuRegister()); locations->SetInAt(1, Location::RequiresFpuRegister()); locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); break; default: LOG(FATAL) << "Unexpected " << instr->DebugName() << " type " << type; } } void LocationsBuilderARM64::HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info) { DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet()); bool object_field_get_with_read_barrier = kEmitCompilerReadBarrier && (instruction->GetType() == DataType::Type::kReference); LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction, object_field_get_with_read_barrier ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall); if (object_field_get_with_read_barrier && kUseBakerReadBarrier) { locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. // We need a temporary register for the read barrier load in // CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier() // only if the field is volatile or the offset is too big. if (field_info.IsVolatile() || field_info.GetFieldOffset().Uint32Value() >= kReferenceLoadMinFarOffset) { locations->AddTemp(FixedTempLocation()); } } locations->SetInAt(0, Location::RequiresRegister()); if (DataType::IsFloatingPointType(instruction->GetType())) { locations->SetOut(Location::RequiresFpuRegister()); } else { // The output overlaps for an object field get when read barriers // are enabled: we do not want the load to overwrite the object's // location, as we need it to emit the read barrier. locations->SetOut( Location::RequiresRegister(), object_field_get_with_read_barrier ? Location::kOutputOverlap : Location::kNoOutputOverlap); } } void InstructionCodeGeneratorARM64::HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info) { DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet()); LocationSummary* locations = instruction->GetLocations(); Location base_loc = locations->InAt(0); Location out = locations->Out(); uint32_t offset = field_info.GetFieldOffset().Uint32Value(); DCHECK_EQ(DataType::Size(field_info.GetFieldType()), DataType::Size(instruction->GetType())); DataType::Type load_type = instruction->GetType(); MemOperand field = HeapOperand(InputRegisterAt(instruction, 0), field_info.GetFieldOffset()); if (kEmitCompilerReadBarrier && kUseBakerReadBarrier && load_type == DataType::Type::kReference) { // Object FieldGet with Baker's read barrier case. // /* HeapReference