1 /* 2 * Copyright (C) 2014 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #include "code_generator_x86_64.h" 18 19 #include "art_method-inl.h" 20 #include "class_table.h" 21 #include "code_generator_utils.h" 22 #include "compiled_method.h" 23 #include "entrypoints/quick/quick_entrypoints.h" 24 #include "gc/accounting/card_table.h" 25 #include "gc/space/image_space.h" 26 #include "heap_poisoning.h" 27 #include "intrinsics.h" 28 #include "intrinsics_x86_64.h" 29 #include "jit/profiling_info.h" 30 #include "linker/linker_patch.h" 31 #include "lock_word.h" 32 #include "mirror/array-inl.h" 33 #include "mirror/class-inl.h" 34 #include "mirror/object_reference.h" 35 #include "scoped_thread_state_change-inl.h" 36 #include "thread.h" 37 #include "utils/assembler.h" 38 #include "utils/stack_checks.h" 39 #include "utils/x86_64/assembler_x86_64.h" 40 #include "utils/x86_64/managed_register_x86_64.h" 41 42 namespace art { 43 44 template<class MirrorType> 45 class GcRoot; 46 47 namespace x86_64 { 48 49 static constexpr int kCurrentMethodStackOffset = 0; 50 static constexpr Register kMethodRegisterArgument = RDI; 51 // The compare/jump sequence will generate about (1.5 * num_entries) instructions. A jump 52 // table version generates 7 instructions and num_entries literals. Compare/jump sequence will 53 // generates less code/data with a small num_entries. 54 static constexpr uint32_t kPackedSwitchJumpTableThreshold = 5; 55 56 static constexpr Register kCoreCalleeSaves[] = { RBX, RBP, R12, R13, R14, R15 }; 57 static constexpr FloatRegister kFpuCalleeSaves[] = { XMM12, XMM13, XMM14, XMM15 }; 58 59 static constexpr int kC2ConditionMask = 0x400; 60 61 static RegisterSet OneRegInReferenceOutSaveEverythingCallerSaves() { 62 // Custom calling convention: RAX serves as both input and output. 63 RegisterSet caller_saves = RegisterSet::Empty(); 64 caller_saves.Add(Location::RegisterLocation(RAX)); 65 return caller_saves; 66 } 67 68 // NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy. 69 #define __ down_cast<X86_64Assembler*>(codegen->GetAssembler())-> // NOLINT 70 #define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kX86_64PointerSize, x).Int32Value() 71 72 class NullCheckSlowPathX86_64 : public SlowPathCode { 73 public: 74 explicit NullCheckSlowPathX86_64(HNullCheck* instruction) : SlowPathCode(instruction) {} 75 76 void EmitNativeCode(CodeGenerator* codegen) override { 77 CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen); 78 __ Bind(GetEntryLabel()); 79 if (instruction_->CanThrowIntoCatchBlock()) { 80 // Live registers will be restored in the catch block if caught. 81 SaveLiveRegisters(codegen, instruction_->GetLocations()); 82 } 83 x86_64_codegen->InvokeRuntime(kQuickThrowNullPointer, 84 instruction_, 85 instruction_->GetDexPc(), 86 this); 87 CheckEntrypointTypes<kQuickThrowNullPointer, void, void>(); 88 } 89 90 bool IsFatal() const override { return true; } 91 92 const char* GetDescription() const override { return "NullCheckSlowPathX86_64"; } 93 94 private: 95 DISALLOW_COPY_AND_ASSIGN(NullCheckSlowPathX86_64); 96 }; 97 98 class DivZeroCheckSlowPathX86_64 : public SlowPathCode { 99 public: 100 explicit DivZeroCheckSlowPathX86_64(HDivZeroCheck* instruction) : SlowPathCode(instruction) {} 101 102 void EmitNativeCode(CodeGenerator* codegen) override { 103 CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen); 104 __ Bind(GetEntryLabel()); 105 x86_64_codegen->InvokeRuntime(kQuickThrowDivZero, instruction_, instruction_->GetDexPc(), this); 106 CheckEntrypointTypes<kQuickThrowDivZero, void, void>(); 107 } 108 109 bool IsFatal() const override { return true; } 110 111 const char* GetDescription() const override { return "DivZeroCheckSlowPathX86_64"; } 112 113 private: 114 DISALLOW_COPY_AND_ASSIGN(DivZeroCheckSlowPathX86_64); 115 }; 116 117 class DivRemMinusOneSlowPathX86_64 : public SlowPathCode { 118 public: 119 DivRemMinusOneSlowPathX86_64(HInstruction* at, Register reg, DataType::Type type, bool is_div) 120 : SlowPathCode(at), cpu_reg_(CpuRegister(reg)), type_(type), is_div_(is_div) {} 121 122 void EmitNativeCode(CodeGenerator* codegen) override { 123 __ Bind(GetEntryLabel()); 124 if (type_ == DataType::Type::kInt32) { 125 if (is_div_) { 126 __ negl(cpu_reg_); 127 } else { 128 __ xorl(cpu_reg_, cpu_reg_); 129 } 130 131 } else { 132 DCHECK_EQ(DataType::Type::kInt64, type_); 133 if (is_div_) { 134 __ negq(cpu_reg_); 135 } else { 136 __ xorl(cpu_reg_, cpu_reg_); 137 } 138 } 139 __ jmp(GetExitLabel()); 140 } 141 142 const char* GetDescription() const override { return "DivRemMinusOneSlowPathX86_64"; } 143 144 private: 145 const CpuRegister cpu_reg_; 146 const DataType::Type type_; 147 const bool is_div_; 148 DISALLOW_COPY_AND_ASSIGN(DivRemMinusOneSlowPathX86_64); 149 }; 150 151 class SuspendCheckSlowPathX86_64 : public SlowPathCode { 152 public: 153 SuspendCheckSlowPathX86_64(HSuspendCheck* instruction, HBasicBlock* successor) 154 : SlowPathCode(instruction), successor_(successor) {} 155 156 void EmitNativeCode(CodeGenerator* codegen) override { 157 LocationSummary* locations = instruction_->GetLocations(); 158 CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen); 159 __ Bind(GetEntryLabel()); 160 SaveLiveRegisters(codegen, locations); // Only saves full width XMM for SIMD. 161 x86_64_codegen->InvokeRuntime(kQuickTestSuspend, instruction_, instruction_->GetDexPc(), this); 162 CheckEntrypointTypes<kQuickTestSuspend, void, void>(); 163 RestoreLiveRegisters(codegen, locations); // Only restores full width XMM for SIMD. 164 if (successor_ == nullptr) { 165 __ jmp(GetReturnLabel()); 166 } else { 167 __ jmp(x86_64_codegen->GetLabelOf(successor_)); 168 } 169 } 170 171 Label* GetReturnLabel() { 172 DCHECK(successor_ == nullptr); 173 return &return_label_; 174 } 175 176 HBasicBlock* GetSuccessor() const { 177 return successor_; 178 } 179 180 const char* GetDescription() const override { return "SuspendCheckSlowPathX86_64"; } 181 182 private: 183 HBasicBlock* const successor_; 184 Label return_label_; 185 186 DISALLOW_COPY_AND_ASSIGN(SuspendCheckSlowPathX86_64); 187 }; 188 189 class BoundsCheckSlowPathX86_64 : public SlowPathCode { 190 public: 191 explicit BoundsCheckSlowPathX86_64(HBoundsCheck* instruction) 192 : SlowPathCode(instruction) {} 193 194 void EmitNativeCode(CodeGenerator* codegen) override { 195 LocationSummary* locations = instruction_->GetLocations(); 196 CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen); 197 __ Bind(GetEntryLabel()); 198 if (instruction_->CanThrowIntoCatchBlock()) { 199 // Live registers will be restored in the catch block if caught. 200 SaveLiveRegisters(codegen, instruction_->GetLocations()); 201 } 202 // Are we using an array length from memory? 203 HInstruction* array_length = instruction_->InputAt(1); 204 Location length_loc = locations->InAt(1); 205 InvokeRuntimeCallingConvention calling_convention; 206 if (array_length->IsArrayLength() && array_length->IsEmittedAtUseSite()) { 207 // Load the array length into our temporary. 208 HArrayLength* length = array_length->AsArrayLength(); 209 uint32_t len_offset = CodeGenerator::GetArrayLengthOffset(length); 210 Location array_loc = array_length->GetLocations()->InAt(0); 211 Address array_len(array_loc.AsRegister<CpuRegister>(), len_offset); 212 length_loc = Location::RegisterLocation(calling_convention.GetRegisterAt(1)); 213 // Check for conflicts with index. 214 if (length_loc.Equals(locations->InAt(0))) { 215 // We know we aren't using parameter 2. 216 length_loc = Location::RegisterLocation(calling_convention.GetRegisterAt(2)); 217 } 218 __ movl(length_loc.AsRegister<CpuRegister>(), array_len); 219 if (mirror::kUseStringCompression && length->IsStringLength()) { 220 __ shrl(length_loc.AsRegister<CpuRegister>(), Immediate(1)); 221 } 222 } 223 224 // We're moving two locations to locations that could overlap, so we need a parallel 225 // move resolver. 226 codegen->EmitParallelMoves( 227 locations->InAt(0), 228 Location::RegisterLocation(calling_convention.GetRegisterAt(0)), 229 DataType::Type::kInt32, 230 length_loc, 231 Location::RegisterLocation(calling_convention.GetRegisterAt(1)), 232 DataType::Type::kInt32); 233 QuickEntrypointEnum entrypoint = instruction_->AsBoundsCheck()->IsStringCharAt() 234 ? kQuickThrowStringBounds 235 : kQuickThrowArrayBounds; 236 x86_64_codegen->InvokeRuntime(entrypoint, instruction_, instruction_->GetDexPc(), this); 237 CheckEntrypointTypes<kQuickThrowStringBounds, void, int32_t, int32_t>(); 238 CheckEntrypointTypes<kQuickThrowArrayBounds, void, int32_t, int32_t>(); 239 } 240 241 bool IsFatal() const override { return true; } 242 243 const char* GetDescription() const override { return "BoundsCheckSlowPathX86_64"; } 244 245 private: 246 DISALLOW_COPY_AND_ASSIGN(BoundsCheckSlowPathX86_64); 247 }; 248 249 class LoadClassSlowPathX86_64 : public SlowPathCode { 250 public: 251 LoadClassSlowPathX86_64(HLoadClass* cls, HInstruction* at) 252 : SlowPathCode(at), cls_(cls) { 253 DCHECK(at->IsLoadClass() || at->IsClinitCheck()); 254 DCHECK_EQ(instruction_->IsLoadClass(), cls_ == instruction_); 255 } 256 257 void EmitNativeCode(CodeGenerator* codegen) override { 258 LocationSummary* locations = instruction_->GetLocations(); 259 Location out = locations->Out(); 260 const uint32_t dex_pc = instruction_->GetDexPc(); 261 bool must_resolve_type = instruction_->IsLoadClass() && cls_->MustResolveTypeOnSlowPath(); 262 bool must_do_clinit = instruction_->IsClinitCheck() || cls_->MustGenerateClinitCheck(); 263 264 CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen); 265 __ Bind(GetEntryLabel()); 266 SaveLiveRegisters(codegen, locations); 267 268 // Custom calling convention: RAX serves as both input and output. 269 if (must_resolve_type) { 270 DCHECK(IsSameDexFile(cls_->GetDexFile(), x86_64_codegen->GetGraph()->GetDexFile())); 271 dex::TypeIndex type_index = cls_->GetTypeIndex(); 272 __ movl(CpuRegister(RAX), Immediate(type_index.index_)); 273 x86_64_codegen->InvokeRuntime(kQuickResolveType, instruction_, dex_pc, this); 274 CheckEntrypointTypes<kQuickResolveType, void*, uint32_t>(); 275 // If we also must_do_clinit, the resolved type is now in the correct register. 276 } else { 277 DCHECK(must_do_clinit); 278 Location source = instruction_->IsLoadClass() ? out : locations->InAt(0); 279 x86_64_codegen->Move(Location::RegisterLocation(RAX), source); 280 } 281 if (must_do_clinit) { 282 x86_64_codegen->InvokeRuntime(kQuickInitializeStaticStorage, instruction_, dex_pc, this); 283 CheckEntrypointTypes<kQuickInitializeStaticStorage, void*, mirror::Class*>(); 284 } 285 286 // Move the class to the desired location. 287 if (out.IsValid()) { 288 DCHECK(out.IsRegister() && !locations->GetLiveRegisters()->ContainsCoreRegister(out.reg())); 289 x86_64_codegen->Move(out, Location::RegisterLocation(RAX)); 290 } 291 292 RestoreLiveRegisters(codegen, locations); 293 __ jmp(GetExitLabel()); 294 } 295 296 const char* GetDescription() const override { return "LoadClassSlowPathX86_64"; } 297 298 private: 299 // The class this slow path will load. 300 HLoadClass* const cls_; 301 302 DISALLOW_COPY_AND_ASSIGN(LoadClassSlowPathX86_64); 303 }; 304 305 class LoadStringSlowPathX86_64 : public SlowPathCode { 306 public: 307 explicit LoadStringSlowPathX86_64(HLoadString* instruction) : SlowPathCode(instruction) {} 308 309 void EmitNativeCode(CodeGenerator* codegen) override { 310 LocationSummary* locations = instruction_->GetLocations(); 311 DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg())); 312 313 CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen); 314 __ Bind(GetEntryLabel()); 315 SaveLiveRegisters(codegen, locations); 316 317 const dex::StringIndex string_index = instruction_->AsLoadString()->GetStringIndex(); 318 // Custom calling convention: RAX serves as both input and output. 319 __ movl(CpuRegister(RAX), Immediate(string_index.index_)); 320 x86_64_codegen->InvokeRuntime(kQuickResolveString, 321 instruction_, 322 instruction_->GetDexPc(), 323 this); 324 CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>(); 325 x86_64_codegen->Move(locations->Out(), Location::RegisterLocation(RAX)); 326 RestoreLiveRegisters(codegen, locations); 327 328 __ jmp(GetExitLabel()); 329 } 330 331 const char* GetDescription() const override { return "LoadStringSlowPathX86_64"; } 332 333 private: 334 DISALLOW_COPY_AND_ASSIGN(LoadStringSlowPathX86_64); 335 }; 336 337 class TypeCheckSlowPathX86_64 : public SlowPathCode { 338 public: 339 TypeCheckSlowPathX86_64(HInstruction* instruction, bool is_fatal) 340 : SlowPathCode(instruction), is_fatal_(is_fatal) {} 341 342 void EmitNativeCode(CodeGenerator* codegen) override { 343 LocationSummary* locations = instruction_->GetLocations(); 344 uint32_t dex_pc = instruction_->GetDexPc(); 345 DCHECK(instruction_->IsCheckCast() 346 || !locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg())); 347 348 CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen); 349 __ Bind(GetEntryLabel()); 350 351 if (kPoisonHeapReferences && 352 instruction_->IsCheckCast() && 353 instruction_->AsCheckCast()->GetTypeCheckKind() == TypeCheckKind::kInterfaceCheck) { 354 // First, unpoison the `cls` reference that was poisoned for direct memory comparison. 355 __ UnpoisonHeapReference(locations->InAt(1).AsRegister<CpuRegister>()); 356 } 357 358 if (!is_fatal_ || instruction_->CanThrowIntoCatchBlock()) { 359 SaveLiveRegisters(codegen, locations); 360 } 361 362 // We're moving two locations to locations that could overlap, so we need a parallel 363 // move resolver. 364 InvokeRuntimeCallingConvention calling_convention; 365 codegen->EmitParallelMoves(locations->InAt(0), 366 Location::RegisterLocation(calling_convention.GetRegisterAt(0)), 367 DataType::Type::kReference, 368 locations->InAt(1), 369 Location::RegisterLocation(calling_convention.GetRegisterAt(1)), 370 DataType::Type::kReference); 371 if (instruction_->IsInstanceOf()) { 372 x86_64_codegen->InvokeRuntime(kQuickInstanceofNonTrivial, instruction_, dex_pc, this); 373 CheckEntrypointTypes<kQuickInstanceofNonTrivial, size_t, mirror::Object*, mirror::Class*>(); 374 } else { 375 DCHECK(instruction_->IsCheckCast()); 376 x86_64_codegen->InvokeRuntime(kQuickCheckInstanceOf, instruction_, dex_pc, this); 377 CheckEntrypointTypes<kQuickCheckInstanceOf, void, mirror::Object*, mirror::Class*>(); 378 } 379 380 if (!is_fatal_) { 381 if (instruction_->IsInstanceOf()) { 382 x86_64_codegen->Move(locations->Out(), Location::RegisterLocation(RAX)); 383 } 384 385 RestoreLiveRegisters(codegen, locations); 386 __ jmp(GetExitLabel()); 387 } 388 } 389 390 const char* GetDescription() const override { return "TypeCheckSlowPathX86_64"; } 391 392 bool IsFatal() const override { return is_fatal_; } 393 394 private: 395 const bool is_fatal_; 396 397 DISALLOW_COPY_AND_ASSIGN(TypeCheckSlowPathX86_64); 398 }; 399 400 class DeoptimizationSlowPathX86_64 : public SlowPathCode { 401 public: 402 explicit DeoptimizationSlowPathX86_64(HDeoptimize* instruction) 403 : SlowPathCode(instruction) {} 404 405 void EmitNativeCode(CodeGenerator* codegen) override { 406 CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen); 407 __ Bind(GetEntryLabel()); 408 LocationSummary* locations = instruction_->GetLocations(); 409 SaveLiveRegisters(codegen, locations); 410 InvokeRuntimeCallingConvention calling_convention; 411 x86_64_codegen->Load32BitValue( 412 CpuRegister(calling_convention.GetRegisterAt(0)), 413 static_cast<uint32_t>(instruction_->AsDeoptimize()->GetDeoptimizationKind())); 414 x86_64_codegen->InvokeRuntime(kQuickDeoptimize, instruction_, instruction_->GetDexPc(), this); 415 CheckEntrypointTypes<kQuickDeoptimize, void, DeoptimizationKind>(); 416 } 417 418 const char* GetDescription() const override { return "DeoptimizationSlowPathX86_64"; } 419 420 private: 421 DISALLOW_COPY_AND_ASSIGN(DeoptimizationSlowPathX86_64); 422 }; 423 424 class ArraySetSlowPathX86_64 : public SlowPathCode { 425 public: 426 explicit ArraySetSlowPathX86_64(HInstruction* instruction) : SlowPathCode(instruction) {} 427 428 void EmitNativeCode(CodeGenerator* codegen) override { 429 LocationSummary* locations = instruction_->GetLocations(); 430 __ Bind(GetEntryLabel()); 431 SaveLiveRegisters(codegen, locations); 432 433 InvokeRuntimeCallingConvention calling_convention; 434 HParallelMove parallel_move(codegen->GetGraph()->GetAllocator()); 435 parallel_move.AddMove( 436 locations->InAt(0), 437 Location::RegisterLocation(calling_convention.GetRegisterAt(0)), 438 DataType::Type::kReference, 439 nullptr); 440 parallel_move.AddMove( 441 locations->InAt(1), 442 Location::RegisterLocation(calling_convention.GetRegisterAt(1)), 443 DataType::Type::kInt32, 444 nullptr); 445 parallel_move.AddMove( 446 locations->InAt(2), 447 Location::RegisterLocation(calling_convention.GetRegisterAt(2)), 448 DataType::Type::kReference, 449 nullptr); 450 codegen->GetMoveResolver()->EmitNativeCode(¶llel_move); 451 452 CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen); 453 x86_64_codegen->InvokeRuntime(kQuickAputObject, instruction_, instruction_->GetDexPc(), this); 454 CheckEntrypointTypes<kQuickAputObject, void, mirror::Array*, int32_t, mirror::Object*>(); 455 RestoreLiveRegisters(codegen, locations); 456 __ jmp(GetExitLabel()); 457 } 458 459 const char* GetDescription() const override { return "ArraySetSlowPathX86_64"; } 460 461 private: 462 DISALLOW_COPY_AND_ASSIGN(ArraySetSlowPathX86_64); 463 }; 464 465 // Slow path marking an object reference `ref` during a read 466 // barrier. The field `obj.field` in the object `obj` holding this 467 // reference does not get updated by this slow path after marking (see 468 // ReadBarrierMarkAndUpdateFieldSlowPathX86_64 below for that). 469 // 470 // This means that after the execution of this slow path, `ref` will 471 // always be up-to-date, but `obj.field` may not; i.e., after the 472 // flip, `ref` will be a to-space reference, but `obj.field` will 473 // probably still be a from-space reference (unless it gets updated by 474 // another thread, or if another thread installed another object 475 // reference (different from `ref`) in `obj.field`). 476 class ReadBarrierMarkSlowPathX86_64 : public SlowPathCode { 477 public: 478 ReadBarrierMarkSlowPathX86_64(HInstruction* instruction, 479 Location ref, 480 bool unpoison_ref_before_marking) 481 : SlowPathCode(instruction), 482 ref_(ref), 483 unpoison_ref_before_marking_(unpoison_ref_before_marking) { 484 DCHECK(kEmitCompilerReadBarrier); 485 } 486 487 const char* GetDescription() const override { return "ReadBarrierMarkSlowPathX86_64"; } 488 489 void EmitNativeCode(CodeGenerator* codegen) override { 490 LocationSummary* locations = instruction_->GetLocations(); 491 CpuRegister ref_cpu_reg = ref_.AsRegister<CpuRegister>(); 492 Register ref_reg = ref_cpu_reg.AsRegister(); 493 DCHECK(locations->CanCall()); 494 DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_reg)) << ref_reg; 495 DCHECK(instruction_->IsInstanceFieldGet() || 496 instruction_->IsStaticFieldGet() || 497 instruction_->IsArrayGet() || 498 instruction_->IsArraySet() || 499 instruction_->IsLoadClass() || 500 instruction_->IsLoadString() || 501 instruction_->IsInstanceOf() || 502 instruction_->IsCheckCast() || 503 (instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()) || 504 (instruction_->IsInvokeStaticOrDirect() && instruction_->GetLocations()->Intrinsified())) 505 << "Unexpected instruction in read barrier marking slow path: " 506 << instruction_->DebugName(); 507 508 __ Bind(GetEntryLabel()); 509 if (unpoison_ref_before_marking_) { 510 // Object* ref = ref_addr->AsMirrorPtr() 511 __ MaybeUnpoisonHeapReference(ref_cpu_reg); 512 } 513 // No need to save live registers; it's taken care of by the 514 // entrypoint. Also, there is no need to update the stack mask, 515 // as this runtime call will not trigger a garbage collection. 516 CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen); 517 DCHECK_NE(ref_reg, RSP); 518 DCHECK(0 <= ref_reg && ref_reg < kNumberOfCpuRegisters) << ref_reg; 519 // "Compact" slow path, saving two moves. 520 // 521 // Instead of using the standard runtime calling convention (input 522 // and output in R0): 523 // 524 // RDI <- ref 525 // RAX <- ReadBarrierMark(RDI) 526 // ref <- RAX 527 // 528 // we just use rX (the register containing `ref`) as input and output 529 // of a dedicated entrypoint: 530 // 531 // rX <- ReadBarrierMarkRegX(rX) 532 // 533 int32_t entry_point_offset = 534 Thread::ReadBarrierMarkEntryPointsOffset<kX86_64PointerSize>(ref_reg); 535 // This runtime call does not require a stack map. 536 x86_64_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this); 537 __ jmp(GetExitLabel()); 538 } 539 540 private: 541 // The location (register) of the marked object reference. 542 const Location ref_; 543 // Should the reference in `ref_` be unpoisoned prior to marking it? 544 const bool unpoison_ref_before_marking_; 545 546 DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathX86_64); 547 }; 548 549 // Slow path marking an object reference `ref` during a read barrier, 550 // and if needed, atomically updating the field `obj.field` in the 551 // object `obj` holding this reference after marking (contrary to 552 // ReadBarrierMarkSlowPathX86_64 above, which never tries to update 553 // `obj.field`). 554 // 555 // This means that after the execution of this slow path, both `ref` 556 // and `obj.field` will be up-to-date; i.e., after the flip, both will 557 // hold the same to-space reference (unless another thread installed 558 // another object reference (different from `ref`) in `obj.field`). 559 class ReadBarrierMarkAndUpdateFieldSlowPathX86_64 : public SlowPathCode { 560 public: 561 ReadBarrierMarkAndUpdateFieldSlowPathX86_64(HInstruction* instruction, 562 Location ref, 563 CpuRegister obj, 564 const Address& field_addr, 565 bool unpoison_ref_before_marking, 566 CpuRegister temp1, 567 CpuRegister temp2) 568 : SlowPathCode(instruction), 569 ref_(ref), 570 obj_(obj), 571 field_addr_(field_addr), 572 unpoison_ref_before_marking_(unpoison_ref_before_marking), 573 temp1_(temp1), 574 temp2_(temp2) { 575 DCHECK(kEmitCompilerReadBarrier); 576 } 577 578 const char* GetDescription() const override { 579 return "ReadBarrierMarkAndUpdateFieldSlowPathX86_64"; 580 } 581 582 void EmitNativeCode(CodeGenerator* codegen) override { 583 LocationSummary* locations = instruction_->GetLocations(); 584 CpuRegister ref_cpu_reg = ref_.AsRegister<CpuRegister>(); 585 Register ref_reg = ref_cpu_reg.AsRegister(); 586 DCHECK(locations->CanCall()); 587 DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_reg)) << ref_reg; 588 // This slow path is only used by the UnsafeCASObject intrinsic. 589 DCHECK((instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified())) 590 << "Unexpected instruction in read barrier marking and field updating slow path: " 591 << instruction_->DebugName(); 592 DCHECK(instruction_->GetLocations()->Intrinsified()); 593 DCHECK_EQ(instruction_->AsInvoke()->GetIntrinsic(), Intrinsics::kUnsafeCASObject); 594 595 __ Bind(GetEntryLabel()); 596 if (unpoison_ref_before_marking_) { 597 // Object* ref = ref_addr->AsMirrorPtr() 598 __ MaybeUnpoisonHeapReference(ref_cpu_reg); 599 } 600 601 // Save the old (unpoisoned) reference. 602 __ movl(temp1_, ref_cpu_reg); 603 604 // No need to save live registers; it's taken care of by the 605 // entrypoint. Also, there is no need to update the stack mask, 606 // as this runtime call will not trigger a garbage collection. 607 CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen); 608 DCHECK_NE(ref_reg, RSP); 609 DCHECK(0 <= ref_reg && ref_reg < kNumberOfCpuRegisters) << ref_reg; 610 // "Compact" slow path, saving two moves. 611 // 612 // Instead of using the standard runtime calling convention (input 613 // and output in R0): 614 // 615 // RDI <- ref 616 // RAX <- ReadBarrierMark(RDI) 617 // ref <- RAX 618 // 619 // we just use rX (the register containing `ref`) as input and output 620 // of a dedicated entrypoint: 621 // 622 // rX <- ReadBarrierMarkRegX(rX) 623 // 624 int32_t entry_point_offset = 625 Thread::ReadBarrierMarkEntryPointsOffset<kX86_64PointerSize>(ref_reg); 626 // This runtime call does not require a stack map. 627 x86_64_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this); 628 629 // If the new reference is different from the old reference, 630 // update the field in the holder (`*field_addr`). 631 // 632 // Note that this field could also hold a different object, if 633 // another thread had concurrently changed it. In that case, the 634 // LOCK CMPXCHGL instruction in the compare-and-set (CAS) 635 // operation below would abort the CAS, leaving the field as-is. 636 NearLabel done; 637 __ cmpl(temp1_, ref_cpu_reg); 638 __ j(kEqual, &done); 639 640 // Update the the holder's field atomically. This may fail if 641 // mutator updates before us, but it's OK. This is achived 642 // using a strong compare-and-set (CAS) operation with relaxed 643 // memory synchronization ordering, where the expected value is 644 // the old reference and the desired value is the new reference. 645 // This operation is implemented with a 32-bit LOCK CMPXLCHG 646 // instruction, which requires the expected value (the old 647 // reference) to be in EAX. Save RAX beforehand, and move the 648 // expected value (stored in `temp1_`) into EAX. 649 __ movq(temp2_, CpuRegister(RAX)); 650 __ movl(CpuRegister(RAX), temp1_); 651 652 // Convenience aliases. 653 CpuRegister base = obj_; 654 CpuRegister expected = CpuRegister(RAX); 655 CpuRegister value = ref_cpu_reg; 656 657 bool base_equals_value = (base.AsRegister() == value.AsRegister()); 658 Register value_reg = ref_reg; 659 if (kPoisonHeapReferences) { 660 if (base_equals_value) { 661 // If `base` and `value` are the same register location, move 662 // `value_reg` to a temporary register. This way, poisoning 663 // `value_reg` won't invalidate `base`. 664 value_reg = temp1_.AsRegister(); 665 __ movl(CpuRegister(value_reg), base); 666 } 667 668 // Check that the register allocator did not assign the location 669 // of `expected` (RAX) to `value` nor to `base`, so that heap 670 // poisoning (when enabled) works as intended below. 671 // - If `value` were equal to `expected`, both references would 672 // be poisoned twice, meaning they would not be poisoned at 673 // all, as heap poisoning uses address negation. 674 // - If `base` were equal to `expected`, poisoning `expected` 675 // would invalidate `base`. 676 DCHECK_NE(value_reg, expected.AsRegister()); 677 DCHECK_NE(base.AsRegister(), expected.AsRegister()); 678 679 __ PoisonHeapReference(expected); 680 __ PoisonHeapReference(CpuRegister(value_reg)); 681 } 682 683 __ LockCmpxchgl(field_addr_, CpuRegister(value_reg)); 684 685 // If heap poisoning is enabled, we need to unpoison the values 686 // that were poisoned earlier. 687 if (kPoisonHeapReferences) { 688 if (base_equals_value) { 689 // `value_reg` has been moved to a temporary register, no need 690 // to unpoison it. 691 } else { 692 __ UnpoisonHeapReference(CpuRegister(value_reg)); 693 } 694 // No need to unpoison `expected` (RAX), as it is be overwritten below. 695 } 696 697 // Restore RAX. 698 __ movq(CpuRegister(RAX), temp2_); 699 700 __ Bind(&done); 701 __ jmp(GetExitLabel()); 702 } 703 704 private: 705 // The location (register) of the marked object reference. 706 const Location ref_; 707 // The register containing the object holding the marked object reference field. 708 const CpuRegister obj_; 709 // The address of the marked reference field. The base of this address must be `obj_`. 710 const Address field_addr_; 711 712 // Should the reference in `ref_` be unpoisoned prior to marking it? 713 const bool unpoison_ref_before_marking_; 714 715 const CpuRegister temp1_; 716 const CpuRegister temp2_; 717 718 DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkAndUpdateFieldSlowPathX86_64); 719 }; 720 721 // Slow path generating a read barrier for a heap reference. 722 class ReadBarrierForHeapReferenceSlowPathX86_64 : public SlowPathCode { 723 public: 724 ReadBarrierForHeapReferenceSlowPathX86_64(HInstruction* instruction, 725 Location out, 726 Location ref, 727 Location obj, 728 uint32_t offset, 729 Location index) 730 : SlowPathCode(instruction), 731 out_(out), 732 ref_(ref), 733 obj_(obj), 734 offset_(offset), 735 index_(index) { 736 DCHECK(kEmitCompilerReadBarrier); 737 // If `obj` is equal to `out` or `ref`, it means the initial 738 // object has been overwritten by (or after) the heap object 739 // reference load to be instrumented, e.g.: 740 // 741 // __ movl(out, Address(out, offset)); 742 // codegen_->GenerateReadBarrierSlow(instruction, out_loc, out_loc, out_loc, offset); 743 // 744 // In that case, we have lost the information about the original 745 // object, and the emitted read barrier cannot work properly. 746 DCHECK(!obj.Equals(out)) << "obj=" << obj << " out=" << out; 747 DCHECK(!obj.Equals(ref)) << "obj=" << obj << " ref=" << ref; 748 } 749 750 void EmitNativeCode(CodeGenerator* codegen) override { 751 CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen); 752 LocationSummary* locations = instruction_->GetLocations(); 753 CpuRegister reg_out = out_.AsRegister<CpuRegister>(); 754 DCHECK(locations->CanCall()); 755 DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out.AsRegister())) << out_; 756 DCHECK(instruction_->IsInstanceFieldGet() || 757 instruction_->IsStaticFieldGet() || 758 instruction_->IsArrayGet() || 759 instruction_->IsInstanceOf() || 760 instruction_->IsCheckCast() || 761 (instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified())) 762 << "Unexpected instruction in read barrier for heap reference slow path: " 763 << instruction_->DebugName(); 764 765 __ Bind(GetEntryLabel()); 766 SaveLiveRegisters(codegen, locations); 767 768 // We may have to change the index's value, but as `index_` is a 769 // constant member (like other "inputs" of this slow path), 770 // introduce a copy of it, `index`. 771 Location index = index_; 772 if (index_.IsValid()) { 773 // Handle `index_` for HArrayGet and UnsafeGetObject/UnsafeGetObjectVolatile intrinsics. 774 if (instruction_->IsArrayGet()) { 775 // Compute real offset and store it in index_. 776 Register index_reg = index_.AsRegister<CpuRegister>().AsRegister(); 777 DCHECK(locations->GetLiveRegisters()->ContainsCoreRegister(index_reg)); 778 if (codegen->IsCoreCalleeSaveRegister(index_reg)) { 779 // We are about to change the value of `index_reg` (see the 780 // calls to art::x86_64::X86_64Assembler::shll and 781 // art::x86_64::X86_64Assembler::AddImmediate below), but it 782 // has not been saved by the previous call to 783 // art::SlowPathCode::SaveLiveRegisters, as it is a 784 // callee-save register -- 785 // art::SlowPathCode::SaveLiveRegisters does not consider 786 // callee-save registers, as it has been designed with the 787 // assumption that callee-save registers are supposed to be 788 // handled by the called function. So, as a callee-save 789 // register, `index_reg` _would_ eventually be saved onto 790 // the stack, but it would be too late: we would have 791 // changed its value earlier. Therefore, we manually save 792 // it here into another freely available register, 793 // `free_reg`, chosen of course among the caller-save 794 // registers (as a callee-save `free_reg` register would 795 // exhibit the same problem). 796 // 797 // Note we could have requested a temporary register from 798 // the register allocator instead; but we prefer not to, as 799 // this is a slow path, and we know we can find a 800 // caller-save register that is available. 801 Register free_reg = FindAvailableCallerSaveRegister(codegen).AsRegister(); 802 __ movl(CpuRegister(free_reg), CpuRegister(index_reg)); 803 index_reg = free_reg; 804 index = Location::RegisterLocation(index_reg); 805 } else { 806 // The initial register stored in `index_` has already been 807 // saved in the call to art::SlowPathCode::SaveLiveRegisters 808 // (as it is not a callee-save register), so we can freely 809 // use it. 810 } 811 // Shifting the index value contained in `index_reg` by the 812 // scale factor (2) cannot overflow in practice, as the 813 // runtime is unable to allocate object arrays with a size 814 // larger than 2^26 - 1 (that is, 2^28 - 4 bytes). 815 __ shll(CpuRegister(index_reg), Immediate(TIMES_4)); 816 static_assert( 817 sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t), 818 "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes."); 819 __ AddImmediate(CpuRegister(index_reg), Immediate(offset_)); 820 } else { 821 // In the case of the UnsafeGetObject/UnsafeGetObjectVolatile 822 // intrinsics, `index_` is not shifted by a scale factor of 2 823 // (as in the case of ArrayGet), as it is actually an offset 824 // to an object field within an object. 825 DCHECK(instruction_->IsInvoke()) << instruction_->DebugName(); 826 DCHECK(instruction_->GetLocations()->Intrinsified()); 827 DCHECK((instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObject) || 828 (instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile)) 829 << instruction_->AsInvoke()->GetIntrinsic(); 830 DCHECK_EQ(offset_, 0U); 831 DCHECK(index_.IsRegister()); 832 } 833 } 834 835 // We're moving two or three locations to locations that could 836 // overlap, so we need a parallel move resolver. 837 InvokeRuntimeCallingConvention calling_convention; 838 HParallelMove parallel_move(codegen->GetGraph()->GetAllocator()); 839 parallel_move.AddMove(ref_, 840 Location::RegisterLocation(calling_convention.GetRegisterAt(0)), 841 DataType::Type::kReference, 842 nullptr); 843 parallel_move.AddMove(obj_, 844 Location::RegisterLocation(calling_convention.GetRegisterAt(1)), 845 DataType::Type::kReference, 846 nullptr); 847 if (index.IsValid()) { 848 parallel_move.AddMove(index, 849 Location::RegisterLocation(calling_convention.GetRegisterAt(2)), 850 DataType::Type::kInt32, 851 nullptr); 852 codegen->GetMoveResolver()->EmitNativeCode(¶llel_move); 853 } else { 854 codegen->GetMoveResolver()->EmitNativeCode(¶llel_move); 855 __ movl(CpuRegister(calling_convention.GetRegisterAt(2)), Immediate(offset_)); 856 } 857 x86_64_codegen->InvokeRuntime(kQuickReadBarrierSlow, 858 instruction_, 859 instruction_->GetDexPc(), 860 this); 861 CheckEntrypointTypes< 862 kQuickReadBarrierSlow, mirror::Object*, mirror::Object*, mirror::Object*, uint32_t>(); 863 x86_64_codegen->Move(out_, Location::RegisterLocation(RAX)); 864 865 RestoreLiveRegisters(codegen, locations); 866 __ jmp(GetExitLabel()); 867 } 868 869 const char* GetDescription() const override { 870 return "ReadBarrierForHeapReferenceSlowPathX86_64"; 871 } 872 873 private: 874 CpuRegister FindAvailableCallerSaveRegister(CodeGenerator* codegen) { 875 size_t ref = static_cast<int>(ref_.AsRegister<CpuRegister>().AsRegister()); 876 size_t obj = static_cast<int>(obj_.AsRegister<CpuRegister>().AsRegister()); 877 for (size_t i = 0, e = codegen->GetNumberOfCoreRegisters(); i < e; ++i) { 878 if (i != ref && i != obj && !codegen->IsCoreCalleeSaveRegister(i)) { 879 return static_cast<CpuRegister>(i); 880 } 881 } 882 // We shall never fail to find a free caller-save register, as 883 // there are more than two core caller-save registers on x86-64 884 // (meaning it is possible to find one which is different from 885 // `ref` and `obj`). 886 DCHECK_GT(codegen->GetNumberOfCoreCallerSaveRegisters(), 2u); 887 LOG(FATAL) << "Could not find a free caller-save register"; 888 UNREACHABLE(); 889 } 890 891 const Location out_; 892 const Location ref_; 893 const Location obj_; 894 const uint32_t offset_; 895 // An additional location containing an index to an array. 896 // Only used for HArrayGet and the UnsafeGetObject & 897 // UnsafeGetObjectVolatile intrinsics. 898 const Location index_; 899 900 DISALLOW_COPY_AND_ASSIGN(ReadBarrierForHeapReferenceSlowPathX86_64); 901 }; 902 903 // Slow path generating a read barrier for a GC root. 904 class ReadBarrierForRootSlowPathX86_64 : public SlowPathCode { 905 public: 906 ReadBarrierForRootSlowPathX86_64(HInstruction* instruction, Location out, Location root) 907 : SlowPathCode(instruction), out_(out), root_(root) { 908 DCHECK(kEmitCompilerReadBarrier); 909 } 910 911 void EmitNativeCode(CodeGenerator* codegen) override { 912 LocationSummary* locations = instruction_->GetLocations(); 913 DCHECK(locations->CanCall()); 914 DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(out_.reg())); 915 DCHECK(instruction_->IsLoadClass() || instruction_->IsLoadString()) 916 << "Unexpected instruction in read barrier for GC root slow path: " 917 << instruction_->DebugName(); 918 919 __ Bind(GetEntryLabel()); 920 SaveLiveRegisters(codegen, locations); 921 922 InvokeRuntimeCallingConvention calling_convention; 923 CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen); 924 x86_64_codegen->Move(Location::RegisterLocation(calling_convention.GetRegisterAt(0)), root_); 925 x86_64_codegen->InvokeRuntime(kQuickReadBarrierForRootSlow, 926 instruction_, 927 instruction_->GetDexPc(), 928 this); 929 CheckEntrypointTypes<kQuickReadBarrierForRootSlow, mirror::Object*, GcRoot<mirror::Object>*>(); 930 x86_64_codegen->Move(out_, Location::RegisterLocation(RAX)); 931 932 RestoreLiveRegisters(codegen, locations); 933 __ jmp(GetExitLabel()); 934 } 935 936 const char* GetDescription() const override { return "ReadBarrierForRootSlowPathX86_64"; } 937 938 private: 939 const Location out_; 940 const Location root_; 941 942 DISALLOW_COPY_AND_ASSIGN(ReadBarrierForRootSlowPathX86_64); 943 }; 944 945 #undef __ 946 // NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy. 947 #define __ down_cast<X86_64Assembler*>(GetAssembler())-> // NOLINT 948 949 inline Condition X86_64IntegerCondition(IfCondition cond) { 950 switch (cond) { 951 case kCondEQ: return kEqual; 952 case kCondNE: return kNotEqual; 953 case kCondLT: return kLess; 954 case kCondLE: return kLessEqual; 955 case kCondGT: return kGreater; 956 case kCondGE: return kGreaterEqual; 957 case kCondB: return kBelow; 958 case kCondBE: return kBelowEqual; 959 case kCondA: return kAbove; 960 case kCondAE: return kAboveEqual; 961 } 962 LOG(FATAL) << "Unreachable"; 963 UNREACHABLE(); 964 } 965 966 // Maps FP condition to x86_64 name. 967 inline Condition X86_64FPCondition(IfCondition cond) { 968 switch (cond) { 969 case kCondEQ: return kEqual; 970 case kCondNE: return kNotEqual; 971 case kCondLT: return kBelow; 972 case kCondLE: return kBelowEqual; 973 case kCondGT: return kAbove; 974 case kCondGE: return kAboveEqual; 975 default: break; // should not happen 976 } 977 LOG(FATAL) << "Unreachable"; 978 UNREACHABLE(); 979 } 980 981 HInvokeStaticOrDirect::DispatchInfo CodeGeneratorX86_64::GetSupportedInvokeStaticOrDirectDispatch( 982 const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info, 983 ArtMethod* method ATTRIBUTE_UNUSED) { 984 return desired_dispatch_info; 985 } 986 987 void CodeGeneratorX86_64::GenerateStaticOrDirectCall( 988 HInvokeStaticOrDirect* invoke, Location temp, SlowPathCode* slow_path) { 989 // All registers are assumed to be correctly set up. 990 991 Location callee_method = temp; // For all kinds except kRecursive, callee will be in temp. 992 switch (invoke->GetMethodLoadKind()) { 993 case HInvokeStaticOrDirect::MethodLoadKind::kStringInit: { 994 // temp = thread->string_init_entrypoint 995 uint32_t offset = 996 GetThreadOffset<kX86_64PointerSize>(invoke->GetStringInitEntryPoint()).Int32Value(); 997 __ gs()->movq(temp.AsRegister<CpuRegister>(), Address::Absolute(offset, /* no_rip= */ true)); 998 break; 999 } 1000 case HInvokeStaticOrDirect::MethodLoadKind::kRecursive: 1001 callee_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex()); 1002 break; 1003 case HInvokeStaticOrDirect::MethodLoadKind::kBootImageLinkTimePcRelative: 1004 DCHECK(GetCompilerOptions().IsBootImage() || GetCompilerOptions().IsBootImageExtension()); 1005 __ leal(temp.AsRegister<CpuRegister>(), 1006 Address::Absolute(kDummy32BitOffset, /* no_rip= */ false)); 1007 RecordBootImageMethodPatch(invoke); 1008 break; 1009 case HInvokeStaticOrDirect::MethodLoadKind::kBootImageRelRo: { 1010 // Note: Boot image is in the low 4GiB and the entry is 32-bit, so emit a 32-bit load. 1011 __ movl(temp.AsRegister<CpuRegister>(), 1012 Address::Absolute(kDummy32BitOffset, /* no_rip= */ false)); 1013 RecordBootImageRelRoPatch(GetBootImageOffset(invoke)); 1014 break; 1015 } 1016 case HInvokeStaticOrDirect::MethodLoadKind::kBssEntry: { 1017 __ movq(temp.AsRegister<CpuRegister>(), 1018 Address::Absolute(kDummy32BitOffset, /* no_rip= */ false)); 1019 RecordMethodBssEntryPatch(invoke); 1020 // No need for memory fence, thanks to the x86-64 memory model. 1021 break; 1022 } 1023 case HInvokeStaticOrDirect::MethodLoadKind::kJitDirectAddress: 1024 Load64BitValue(temp.AsRegister<CpuRegister>(), invoke->GetMethodAddress()); 1025 break; 1026 case HInvokeStaticOrDirect::MethodLoadKind::kRuntimeCall: { 1027 GenerateInvokeStaticOrDirectRuntimeCall(invoke, temp, slow_path); 1028 return; // No code pointer retrieval; the runtime performs the call directly. 1029 } 1030 } 1031 1032 switch (invoke->GetCodePtrLocation()) { 1033 case HInvokeStaticOrDirect::CodePtrLocation::kCallSelf: 1034 __ call(&frame_entry_label_); 1035 break; 1036 case HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod: 1037 // (callee_method + offset_of_quick_compiled_code)() 1038 __ call(Address(callee_method.AsRegister<CpuRegister>(), 1039 ArtMethod::EntryPointFromQuickCompiledCodeOffset( 1040 kX86_64PointerSize).SizeValue())); 1041 break; 1042 } 1043 RecordPcInfo(invoke, invoke->GetDexPc(), slow_path); 1044 1045 DCHECK(!IsLeafMethod()); 1046 } 1047 1048 void CodeGeneratorX86_64::GenerateVirtualCall( 1049 HInvokeVirtual* invoke, Location temp_in, SlowPathCode* slow_path) { 1050 CpuRegister temp = temp_in.AsRegister<CpuRegister>(); 1051 size_t method_offset = mirror::Class::EmbeddedVTableEntryOffset( 1052 invoke->GetVTableIndex(), kX86_64PointerSize).SizeValue(); 1053 1054 // Use the calling convention instead of the location of the receiver, as 1055 // intrinsics may have put the receiver in a different register. In the intrinsics 1056 // slow path, the arguments have been moved to the right place, so here we are 1057 // guaranteed that the receiver is the first register of the calling convention. 1058 InvokeDexCallingConvention calling_convention; 1059 Register receiver = calling_convention.GetRegisterAt(0); 1060 1061 size_t class_offset = mirror::Object::ClassOffset().SizeValue(); 1062 // /* HeapReference<Class> */ temp = receiver->klass_ 1063 __ movl(temp, Address(CpuRegister(receiver), class_offset)); 1064 MaybeRecordImplicitNullCheck(invoke); 1065 // Instead of simply (possibly) unpoisoning `temp` here, we should 1066 // emit a read barrier for the previous class reference load. 1067 // However this is not required in practice, as this is an 1068 // intermediate/temporary reference and because the current 1069 // concurrent copying collector keeps the from-space memory 1070 // intact/accessible until the end of the marking phase (the 1071 // concurrent copying collector may not in the future). 1072 __ MaybeUnpoisonHeapReference(temp); 1073 1074 MaybeGenerateInlineCacheCheck(invoke, temp); 1075 1076 // temp = temp->GetMethodAt(method_offset); 1077 __ movq(temp, Address(temp, method_offset)); 1078 // call temp->GetEntryPoint(); 1079 __ call(Address(temp, ArtMethod::EntryPointFromQuickCompiledCodeOffset( 1080 kX86_64PointerSize).SizeValue())); 1081 RecordPcInfo(invoke, invoke->GetDexPc(), slow_path); 1082 } 1083 1084 void CodeGeneratorX86_64::RecordBootImageIntrinsicPatch(uint32_t intrinsic_data) { 1085 boot_image_other_patches_.emplace_back(/* target_dex_file= */ nullptr, intrinsic_data); 1086 __ Bind(&boot_image_other_patches_.back().label); 1087 } 1088 1089 void CodeGeneratorX86_64::RecordBootImageRelRoPatch(uint32_t boot_image_offset) { 1090 boot_image_other_patches_.emplace_back(/* target_dex_file= */ nullptr, boot_image_offset); 1091 __ Bind(&boot_image_other_patches_.back().label); 1092 } 1093 1094 void CodeGeneratorX86_64::RecordBootImageMethodPatch(HInvokeStaticOrDirect* invoke) { 1095 boot_image_method_patches_.emplace_back( 1096 invoke->GetTargetMethod().dex_file, invoke->GetTargetMethod().index); 1097 __ Bind(&boot_image_method_patches_.back().label); 1098 } 1099 1100 void CodeGeneratorX86_64::RecordMethodBssEntryPatch(HInvokeStaticOrDirect* invoke) { 1101 method_bss_entry_patches_.emplace_back(&GetGraph()->GetDexFile(), invoke->GetDexMethodIndex()); 1102 __ Bind(&method_bss_entry_patches_.back().label); 1103 } 1104 1105 void CodeGeneratorX86_64::RecordBootImageTypePatch(HLoadClass* load_class) { 1106 boot_image_type_patches_.emplace_back( 1107 &load_class->GetDexFile(), load_class->GetTypeIndex().index_); 1108 __ Bind(&boot_image_type_patches_.back().label); 1109 } 1110 1111 Label* CodeGeneratorX86_64::NewTypeBssEntryPatch(HLoadClass* load_class) { 1112 type_bss_entry_patches_.emplace_back( 1113 &load_class->GetDexFile(), load_class->GetTypeIndex().index_); 1114 return &type_bss_entry_patches_.back().label; 1115 } 1116 1117 void CodeGeneratorX86_64::RecordBootImageStringPatch(HLoadString* load_string) { 1118 boot_image_string_patches_.emplace_back( 1119 &load_string->GetDexFile(), load_string->GetStringIndex().index_); 1120 __ Bind(&boot_image_string_patches_.back().label); 1121 } 1122 1123 Label* CodeGeneratorX86_64::NewStringBssEntryPatch(HLoadString* load_string) { 1124 string_bss_entry_patches_.emplace_back( 1125 &load_string->GetDexFile(), load_string->GetStringIndex().index_); 1126 return &string_bss_entry_patches_.back().label; 1127 } 1128 1129 void CodeGeneratorX86_64::LoadBootImageAddress(CpuRegister reg, uint32_t boot_image_reference) { 1130 if (GetCompilerOptions().IsBootImage()) { 1131 __ leal(reg, Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset, /* no_rip= */ false)); 1132 RecordBootImageIntrinsicPatch(boot_image_reference); 1133 } else if (GetCompilerOptions().GetCompilePic()) { 1134 __ movl(reg, Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset, /* no_rip= */ false)); 1135 RecordBootImageRelRoPatch(boot_image_reference); 1136 } else { 1137 DCHECK(Runtime::Current()->UseJitCompilation()); 1138 gc::Heap* heap = Runtime::Current()->GetHeap(); 1139 DCHECK(!heap->GetBootImageSpaces().empty()); 1140 const uint8_t* address = heap->GetBootImageSpaces()[0]->Begin() + boot_image_reference; 1141 __ movl(reg, Immediate(dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(address)))); 1142 } 1143 } 1144 1145 void CodeGeneratorX86_64::AllocateInstanceForIntrinsic(HInvokeStaticOrDirect* invoke, 1146 uint32_t boot_image_offset) { 1147 DCHECK(invoke->IsStatic()); 1148 InvokeRuntimeCallingConvention calling_convention; 1149 CpuRegister argument = CpuRegister(calling_convention.GetRegisterAt(0)); 1150 if (GetCompilerOptions().IsBootImage()) { 1151 DCHECK_EQ(boot_image_offset, IntrinsicVisitor::IntegerValueOfInfo::kInvalidReference); 1152 // Load the class the same way as for HLoadClass::LoadKind::kBootImageLinkTimePcRelative. 1153 __ leal(argument, 1154 Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset, /* no_rip= */ false)); 1155 MethodReference target_method = invoke->GetTargetMethod(); 1156 dex::TypeIndex type_idx = target_method.dex_file->GetMethodId(target_method.index).class_idx_; 1157 boot_image_type_patches_.emplace_back(target_method.dex_file, type_idx.index_); 1158 __ Bind(&boot_image_type_patches_.back().label); 1159 } else { 1160 LoadBootImageAddress(argument, boot_image_offset); 1161 } 1162 InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc()); 1163 CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>(); 1164 } 1165 1166 // The label points to the end of the "movl" or another instruction but the literal offset 1167 // for method patch needs to point to the embedded constant which occupies the last 4 bytes. 1168 constexpr uint32_t kLabelPositionToLiteralOffsetAdjustment = 4u; 1169 1170 template <linker::LinkerPatch (*Factory)(size_t, const DexFile*, uint32_t, uint32_t)> 1171 inline void CodeGeneratorX86_64::EmitPcRelativeLinkerPatches( 1172 const ArenaDeque<PatchInfo<Label>>& infos, 1173 ArenaVector<linker::LinkerPatch>* linker_patches) { 1174 for (const PatchInfo<Label>& info : infos) { 1175 uint32_t literal_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment; 1176 linker_patches->push_back( 1177 Factory(literal_offset, info.target_dex_file, info.label.Position(), info.offset_or_index)); 1178 } 1179 } 1180 1181 template <linker::LinkerPatch (*Factory)(size_t, uint32_t, uint32_t)> 1182 linker::LinkerPatch NoDexFileAdapter(size_t literal_offset, 1183 const DexFile* target_dex_file, 1184 uint32_t pc_insn_offset, 1185 uint32_t boot_image_offset) { 1186 DCHECK(target_dex_file == nullptr); // Unused for these patches, should be null. 1187 return Factory(literal_offset, pc_insn_offset, boot_image_offset); 1188 } 1189 1190 void CodeGeneratorX86_64::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linker_patches) { 1191 DCHECK(linker_patches->empty()); 1192 size_t size = 1193 boot_image_method_patches_.size() + 1194 method_bss_entry_patches_.size() + 1195 boot_image_type_patches_.size() + 1196 type_bss_entry_patches_.size() + 1197 boot_image_string_patches_.size() + 1198 string_bss_entry_patches_.size() + 1199 boot_image_other_patches_.size(); 1200 linker_patches->reserve(size); 1201 if (GetCompilerOptions().IsBootImage() || GetCompilerOptions().IsBootImageExtension()) { 1202 EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeMethodPatch>( 1203 boot_image_method_patches_, linker_patches); 1204 EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeTypePatch>( 1205 boot_image_type_patches_, linker_patches); 1206 EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeStringPatch>( 1207 boot_image_string_patches_, linker_patches); 1208 } else { 1209 DCHECK(boot_image_method_patches_.empty()); 1210 DCHECK(boot_image_type_patches_.empty()); 1211 DCHECK(boot_image_string_patches_.empty()); 1212 } 1213 if (GetCompilerOptions().IsBootImage()) { 1214 EmitPcRelativeLinkerPatches<NoDexFileAdapter<linker::LinkerPatch::IntrinsicReferencePatch>>( 1215 boot_image_other_patches_, linker_patches); 1216 } else { 1217 EmitPcRelativeLinkerPatches<NoDexFileAdapter<linker::LinkerPatch::DataBimgRelRoPatch>>( 1218 boot_image_other_patches_, linker_patches); 1219 } 1220 EmitPcRelativeLinkerPatches<linker::LinkerPatch::MethodBssEntryPatch>( 1221 method_bss_entry_patches_, linker_patches); 1222 EmitPcRelativeLinkerPatches<linker::LinkerPatch::TypeBssEntryPatch>( 1223 type_bss_entry_patches_, linker_patches); 1224 EmitPcRelativeLinkerPatches<linker::LinkerPatch::StringBssEntryPatch>( 1225 string_bss_entry_patches_, linker_patches); 1226 DCHECK_EQ(size, linker_patches->size()); 1227 } 1228 1229 void CodeGeneratorX86_64::DumpCoreRegister(std::ostream& stream, int reg) const { 1230 stream << Register(reg); 1231 } 1232 1233 void CodeGeneratorX86_64::DumpFloatingPointRegister(std::ostream& stream, int reg) const { 1234 stream << FloatRegister(reg); 1235 } 1236 1237 const X86_64InstructionSetFeatures& CodeGeneratorX86_64::GetInstructionSetFeatures() const { 1238 return *GetCompilerOptions().GetInstructionSetFeatures()->AsX86_64InstructionSetFeatures(); 1239 } 1240 1241 size_t CodeGeneratorX86_64::SaveCoreRegister(size_t stack_index, uint32_t reg_id) { 1242 __ movq(Address(CpuRegister(RSP), stack_index), CpuRegister(reg_id)); 1243 return kX86_64WordSize; 1244 } 1245 1246 size_t CodeGeneratorX86_64::RestoreCoreRegister(size_t stack_index, uint32_t reg_id) { 1247 __ movq(CpuRegister(reg_id), Address(CpuRegister(RSP), stack_index)); 1248 return kX86_64WordSize; 1249 } 1250 1251 size_t CodeGeneratorX86_64::SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) { 1252 if (GetGraph()->HasSIMD()) { 1253 __ movups(Address(CpuRegister(RSP), stack_index), XmmRegister(reg_id)); 1254 } else { 1255 __ movsd(Address(CpuRegister(RSP), stack_index), XmmRegister(reg_id)); 1256 } 1257 return GetSlowPathFPWidth(); 1258 } 1259 1260 size_t CodeGeneratorX86_64::RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) { 1261 if (GetGraph()->HasSIMD()) { 1262 __ movups(XmmRegister(reg_id), Address(CpuRegister(RSP), stack_index)); 1263 } else { 1264 __ movsd(XmmRegister(reg_id), Address(CpuRegister(RSP), stack_index)); 1265 } 1266 return GetSlowPathFPWidth(); 1267 } 1268 1269 void CodeGeneratorX86_64::InvokeRuntime(QuickEntrypointEnum entrypoint, 1270 HInstruction* instruction, 1271 uint32_t dex_pc, 1272 SlowPathCode* slow_path) { 1273 ValidateInvokeRuntime(entrypoint, instruction, slow_path); 1274 GenerateInvokeRuntime(GetThreadOffset<kX86_64PointerSize>(entrypoint).Int32Value()); 1275 if (EntrypointRequiresStackMap(entrypoint)) { 1276 RecordPcInfo(instruction, dex_pc, slow_path); 1277 } 1278 } 1279 1280 void CodeGeneratorX86_64::InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset, 1281 HInstruction* instruction, 1282 SlowPathCode* slow_path) { 1283 ValidateInvokeRuntimeWithoutRecordingPcInfo(instruction, slow_path); 1284 GenerateInvokeRuntime(entry_point_offset); 1285 } 1286 1287 void CodeGeneratorX86_64::GenerateInvokeRuntime(int32_t entry_point_offset) { 1288 __ gs()->call(Address::Absolute(entry_point_offset, /* no_rip= */ true)); 1289 } 1290 1291 static constexpr int kNumberOfCpuRegisterPairs = 0; 1292 // Use a fake return address register to mimic Quick. 1293 static constexpr Register kFakeReturnRegister = Register(kLastCpuRegister + 1); 1294 CodeGeneratorX86_64::CodeGeneratorX86_64(HGraph* graph, 1295 const CompilerOptions& compiler_options, 1296 OptimizingCompilerStats* stats) 1297 : CodeGenerator(graph, 1298 kNumberOfCpuRegisters, 1299 kNumberOfFloatRegisters, 1300 kNumberOfCpuRegisterPairs, 1301 ComputeRegisterMask(reinterpret_cast<const int*>(kCoreCalleeSaves), 1302 arraysize(kCoreCalleeSaves)) 1303 | (1 << kFakeReturnRegister), 1304 ComputeRegisterMask(reinterpret_cast<const int*>(kFpuCalleeSaves), 1305 arraysize(kFpuCalleeSaves)), 1306 compiler_options, 1307 stats), 1308 block_labels_(nullptr), 1309 location_builder_(graph, this), 1310 instruction_visitor_(graph, this), 1311 move_resolver_(graph->GetAllocator(), this), 1312 assembler_(graph->GetAllocator()), 1313 constant_area_start_(0), 1314 boot_image_method_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), 1315 method_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), 1316 boot_image_type_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), 1317 type_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), 1318 boot_image_string_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), 1319 string_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), 1320 boot_image_other_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), 1321 jit_string_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), 1322 jit_class_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), 1323 fixups_to_jump_tables_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)) { 1324 AddAllocatedRegister(Location::RegisterLocation(kFakeReturnRegister)); 1325 } 1326 1327 InstructionCodeGeneratorX86_64::InstructionCodeGeneratorX86_64(HGraph* graph, 1328 CodeGeneratorX86_64* codegen) 1329 : InstructionCodeGenerator(graph, codegen), 1330 assembler_(codegen->GetAssembler()), 1331 codegen_(codegen) {} 1332 1333 void CodeGeneratorX86_64::SetupBlockedRegisters() const { 1334 // Stack register is always reserved. 1335 blocked_core_registers_[RSP] = true; 1336 1337 // Block the register used as TMP. 1338 blocked_core_registers_[TMP] = true; 1339 } 1340 1341 static dwarf::Reg DWARFReg(Register reg) { 1342 return dwarf::Reg::X86_64Core(static_cast<int>(reg)); 1343 } 1344 1345 static dwarf::Reg DWARFReg(FloatRegister reg) { 1346 return dwarf::Reg::X86_64Fp(static_cast<int>(reg)); 1347 } 1348 1349 void CodeGeneratorX86_64::MaybeIncrementHotness(bool is_frame_entry) { 1350 if (GetCompilerOptions().CountHotnessInCompiledCode()) { 1351 NearLabel overflow; 1352 Register method = kMethodRegisterArgument; 1353 if (!is_frame_entry) { 1354 CHECK(RequiresCurrentMethod()); 1355 method = TMP; 1356 __ movq(CpuRegister(method), Address(CpuRegister(RSP), kCurrentMethodStackOffset)); 1357 } 1358 __ cmpw(Address(CpuRegister(method), ArtMethod::HotnessCountOffset().Int32Value()), 1359 Immediate(ArtMethod::MaxCounter())); 1360 __ j(kEqual, &overflow); 1361 __ addw(Address(CpuRegister(method), ArtMethod::HotnessCountOffset().Int32Value()), 1362 Immediate(1)); 1363 __ Bind(&overflow); 1364 } 1365 1366 if (GetGraph()->IsCompilingBaseline() && !Runtime::Current()->IsAotCompiler()) { 1367 ScopedObjectAccess soa(Thread::Current()); 1368 ProfilingInfo* info = GetGraph()->GetArtMethod()->GetProfilingInfo(kRuntimePointerSize); 1369 if (info != nullptr) { 1370 uint64_t address = reinterpret_cast64<uint64_t>(info); 1371 NearLabel done; 1372 __ movq(CpuRegister(TMP), Immediate(address)); 1373 __ addw(Address(CpuRegister(TMP), ProfilingInfo::BaselineHotnessCountOffset().Int32Value()), 1374 Immediate(1)); 1375 __ j(kCarryClear, &done); 1376 if (HasEmptyFrame()) { 1377 CHECK(is_frame_entry); 1378 // Frame alignment, and the stub expects the method on the stack. 1379 __ pushq(CpuRegister(RDI)); 1380 __ cfi().AdjustCFAOffset(kX86_64WordSize); 1381 __ cfi().RelOffset(DWARFReg(RDI), 0); 1382 } else if (!RequiresCurrentMethod()) { 1383 CHECK(is_frame_entry); 1384 __ movq(Address(CpuRegister(RSP), kCurrentMethodStackOffset), CpuRegister(RDI)); 1385 } 1386 GenerateInvokeRuntime( 1387 GetThreadOffset<kX86_64PointerSize>(kQuickCompileOptimized).Int32Value()); 1388 if (HasEmptyFrame()) { 1389 __ popq(CpuRegister(RDI)); 1390 __ cfi().AdjustCFAOffset(-static_cast<int>(kX86_64WordSize)); 1391 __ cfi().Restore(DWARFReg(RDI)); 1392 } 1393 __ Bind(&done); 1394 } 1395 } 1396 } 1397 1398 void CodeGeneratorX86_64::GenerateFrameEntry() { 1399 __ cfi().SetCurrentCFAOffset(kX86_64WordSize); // return address 1400 __ Bind(&frame_entry_label_); 1401 bool skip_overflow_check = IsLeafMethod() 1402 && !FrameNeedsStackCheck(GetFrameSize(), InstructionSet::kX86_64); 1403 DCHECK(GetCompilerOptions().GetImplicitStackOverflowChecks()); 1404 1405 1406 if (!skip_overflow_check) { 1407 size_t reserved_bytes = GetStackOverflowReservedBytes(InstructionSet::kX86_64); 1408 __ testq(CpuRegister(RAX), Address(CpuRegister(RSP), -static_cast<int32_t>(reserved_bytes))); 1409 RecordPcInfo(nullptr, 0); 1410 } 1411 1412 if (!HasEmptyFrame()) { 1413 for (int i = arraysize(kCoreCalleeSaves) - 1; i >= 0; --i) { 1414 Register reg = kCoreCalleeSaves[i]; 1415 if (allocated_registers_.ContainsCoreRegister(reg)) { 1416 __ pushq(CpuRegister(reg)); 1417 __ cfi().AdjustCFAOffset(kX86_64WordSize); 1418 __ cfi().RelOffset(DWARFReg(reg), 0); 1419 } 1420 } 1421 1422 int adjust = GetFrameSize() - GetCoreSpillSize(); 1423 __ subq(CpuRegister(RSP), Immediate(adjust)); 1424 __ cfi().AdjustCFAOffset(adjust); 1425 uint32_t xmm_spill_location = GetFpuSpillStart(); 1426 size_t xmm_spill_slot_size = GetCalleePreservedFPWidth(); 1427 1428 for (int i = arraysize(kFpuCalleeSaves) - 1; i >= 0; --i) { 1429 if (allocated_registers_.ContainsFloatingPointRegister(kFpuCalleeSaves[i])) { 1430 int offset = xmm_spill_location + (xmm_spill_slot_size * i); 1431 __ movsd(Address(CpuRegister(RSP), offset), XmmRegister(kFpuCalleeSaves[i])); 1432 __ cfi().RelOffset(DWARFReg(kFpuCalleeSaves[i]), offset); 1433 } 1434 } 1435 1436 // Save the current method if we need it. Note that we do not 1437 // do this in HCurrentMethod, as the instruction might have been removed 1438 // in the SSA graph. 1439 if (RequiresCurrentMethod()) { 1440 CHECK(!HasEmptyFrame()); 1441 __ movq(Address(CpuRegister(RSP), kCurrentMethodStackOffset), 1442 CpuRegister(kMethodRegisterArgument)); 1443 } 1444 1445 if (GetGraph()->HasShouldDeoptimizeFlag()) { 1446 CHECK(!HasEmptyFrame()); 1447 // Initialize should_deoptimize flag to 0. 1448 __ movl(Address(CpuRegister(RSP), GetStackOffsetOfShouldDeoptimizeFlag()), Immediate(0)); 1449 } 1450 } 1451 1452 MaybeIncrementHotness(/* is_frame_entry= */ true); 1453 } 1454 1455 void CodeGeneratorX86_64::GenerateFrameExit() { 1456 __ cfi().RememberState(); 1457 if (!HasEmptyFrame()) { 1458 uint32_t xmm_spill_location = GetFpuSpillStart(); 1459 size_t xmm_spill_slot_size = GetCalleePreservedFPWidth(); 1460 for (size_t i = 0; i < arraysize(kFpuCalleeSaves); ++i) { 1461 if (allocated_registers_.ContainsFloatingPointRegister(kFpuCalleeSaves[i])) { 1462 int offset = xmm_spill_location + (xmm_spill_slot_size * i); 1463 __ movsd(XmmRegister(kFpuCalleeSaves[i]), Address(CpuRegister(RSP), offset)); 1464 __ cfi().Restore(DWARFReg(kFpuCalleeSaves[i])); 1465 } 1466 } 1467 1468 int adjust = GetFrameSize() - GetCoreSpillSize(); 1469 __ addq(CpuRegister(RSP), Immediate(adjust)); 1470 __ cfi().AdjustCFAOffset(-adjust); 1471 1472 for (size_t i = 0; i < arraysize(kCoreCalleeSaves); ++i) { 1473 Register reg = kCoreCalleeSaves[i]; 1474 if (allocated_registers_.ContainsCoreRegister(reg)) { 1475 __ popq(CpuRegister(reg)); 1476 __ cfi().AdjustCFAOffset(-static_cast<int>(kX86_64WordSize)); 1477 __ cfi().Restore(DWARFReg(reg)); 1478 } 1479 } 1480 } 1481 __ ret(); 1482 __ cfi().RestoreState(); 1483 __ cfi().DefCFAOffset(GetFrameSize()); 1484 } 1485 1486 void CodeGeneratorX86_64::Bind(HBasicBlock* block) { 1487 __ Bind(GetLabelOf(block)); 1488 } 1489 1490 void CodeGeneratorX86_64::Move(Location destination, Location source) { 1491 if (source.Equals(destination)) { 1492 return; 1493 } 1494 if (destination.IsRegister()) { 1495 CpuRegister dest = destination.AsRegister<CpuRegister>(); 1496 if (source.IsRegister()) { 1497 __ movq(dest, source.AsRegister<CpuRegister>()); 1498 } else if (source.IsFpuRegister()) { 1499 __ movd(dest, source.AsFpuRegister<XmmRegister>()); 1500 } else if (source.IsStackSlot()) { 1501 __ movl(dest, Address(CpuRegister(RSP), source.GetStackIndex())); 1502 } else if (source.IsConstant()) { 1503 HConstant* constant = source.GetConstant(); 1504 if (constant->IsLongConstant()) { 1505 Load64BitValue(dest, constant->AsLongConstant()->GetValue()); 1506 } else { 1507 Load32BitValue(dest, GetInt32ValueOf(constant)); 1508 } 1509 } else { 1510 DCHECK(source.IsDoubleStackSlot()); 1511 __ movq(dest, Address(CpuRegister(RSP), source.GetStackIndex())); 1512 } 1513 } else if (destination.IsFpuRegister()) { 1514 XmmRegister dest = destination.AsFpuRegister<XmmRegister>(); 1515 if (source.IsRegister()) { 1516 __ movd(dest, source.AsRegister<CpuRegister>()); 1517 } else if (source.IsFpuRegister()) { 1518 __ movaps(dest, source.AsFpuRegister<XmmRegister>()); 1519 } else if (source.IsConstant()) { 1520 HConstant* constant = source.GetConstant(); 1521 int64_t value = CodeGenerator::GetInt64ValueOf(constant); 1522 if (constant->IsFloatConstant()) { 1523 Load32BitValue(dest, static_cast<int32_t>(value)); 1524 } else { 1525 Load64BitValue(dest, value); 1526 } 1527 } else if (source.IsStackSlot()) { 1528 __ movss(dest, Address(CpuRegister(RSP), source.GetStackIndex())); 1529 } else { 1530 DCHECK(source.IsDoubleStackSlot()); 1531 __ movsd(dest, Address(CpuRegister(RSP), source.GetStackIndex())); 1532 } 1533 } else if (destination.IsStackSlot()) { 1534 if (source.IsRegister()) { 1535 __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), 1536 source.AsRegister<CpuRegister>()); 1537 } else if (source.IsFpuRegister()) { 1538 __ movss(Address(CpuRegister(RSP), destination.GetStackIndex()), 1539 source.AsFpuRegister<XmmRegister>()); 1540 } else if (source.IsConstant()) { 1541 HConstant* constant = source.GetConstant(); 1542 int32_t value = GetInt32ValueOf(constant); 1543 __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), Immediate(value)); 1544 } else { 1545 DCHECK(source.IsStackSlot()) << source; 1546 __ movl(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex())); 1547 __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP)); 1548 } 1549 } else { 1550 DCHECK(destination.IsDoubleStackSlot()); 1551 if (source.IsRegister()) { 1552 __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()), 1553 source.AsRegister<CpuRegister>()); 1554 } else if (source.IsFpuRegister()) { 1555 __ movsd(Address(CpuRegister(RSP), destination.GetStackIndex()), 1556 source.AsFpuRegister<XmmRegister>()); 1557 } else if (source.IsConstant()) { 1558 HConstant* constant = source.GetConstant(); 1559 DCHECK(constant->IsLongConstant() || constant->IsDoubleConstant()); 1560 int64_t value = GetInt64ValueOf(constant); 1561 Store64BitValueToStack(destination, value); 1562 } else { 1563 DCHECK(source.IsDoubleStackSlot()); 1564 __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex())); 1565 __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP)); 1566 } 1567 } 1568 } 1569 1570 void CodeGeneratorX86_64::MoveConstant(Location location, int32_t value) { 1571 DCHECK(location.IsRegister()); 1572 Load64BitValue(location.AsRegister<CpuRegister>(), static_cast<int64_t>(value)); 1573 } 1574 1575 void CodeGeneratorX86_64::MoveLocation( 1576 Location dst, Location src, DataType::Type dst_type ATTRIBUTE_UNUSED) { 1577 Move(dst, src); 1578 } 1579 1580 void CodeGeneratorX86_64::AddLocationAsTemp(Location location, LocationSummary* locations) { 1581 if (location.IsRegister()) { 1582 locations->AddTemp(location); 1583 } else { 1584 UNIMPLEMENTED(FATAL) << "AddLocationAsTemp not implemented for location " << location; 1585 } 1586 } 1587 1588 void InstructionCodeGeneratorX86_64::HandleGoto(HInstruction* got, HBasicBlock* successor) { 1589 if (successor->IsExitBlock()) { 1590 DCHECK(got->GetPrevious()->AlwaysThrows()); 1591 return; // no code needed 1592 } 1593 1594 HBasicBlock* block = got->GetBlock(); 1595 HInstruction* previous = got->GetPrevious(); 1596 1597 HLoopInformation* info = block->GetLoopInformation(); 1598 if (info != nullptr && info->IsBackEdge(*block) && info->HasSuspendCheck()) { 1599 codegen_->MaybeIncrementHotness(/* is_frame_entry= */ false); 1600 GenerateSuspendCheck(info->GetSuspendCheck(), successor); 1601 return; 1602 } 1603 1604 if (block->IsEntryBlock() && (previous != nullptr) && previous->IsSuspendCheck()) { 1605 GenerateSuspendCheck(previous->AsSuspendCheck(), nullptr); 1606 } 1607 if (!codegen_->GoesToNextBlock(got->GetBlock(), successor)) { 1608 __ jmp(codegen_->GetLabelOf(successor)); 1609 } 1610 } 1611 1612 void LocationsBuilderX86_64::VisitGoto(HGoto* got) { 1613 got->SetLocations(nullptr); 1614 } 1615 1616 void InstructionCodeGeneratorX86_64::VisitGoto(HGoto* got) { 1617 HandleGoto(got, got->GetSuccessor()); 1618 } 1619 1620 void LocationsBuilderX86_64::VisitTryBoundary(HTryBoundary* try_boundary) { 1621 try_boundary->SetLocations(nullptr); 1622 } 1623 1624 void InstructionCodeGeneratorX86_64::VisitTryBoundary(HTryBoundary* try_boundary) { 1625 HBasicBlock* successor = try_boundary->GetNormalFlowSuccessor(); 1626 if (!successor->IsExitBlock()) { 1627 HandleGoto(try_boundary, successor); 1628 } 1629 } 1630 1631 void LocationsBuilderX86_64::VisitExit(HExit* exit) { 1632 exit->SetLocations(nullptr); 1633 } 1634 1635 void InstructionCodeGeneratorX86_64::VisitExit(HExit* exit ATTRIBUTE_UNUSED) { 1636 } 1637 1638 template<class LabelType> 1639 void InstructionCodeGeneratorX86_64::GenerateFPJumps(HCondition* cond, 1640 LabelType* true_label, 1641 LabelType* false_label) { 1642 if (cond->IsFPConditionTrueIfNaN()) { 1643 __ j(kUnordered, true_label); 1644 } else if (cond->IsFPConditionFalseIfNaN()) { 1645 __ j(kUnordered, false_label); 1646 } 1647 __ j(X86_64FPCondition(cond->GetCondition()), true_label); 1648 } 1649 1650 void InstructionCodeGeneratorX86_64::GenerateCompareTest(HCondition* condition) { 1651 LocationSummary* locations = condition->GetLocations(); 1652 1653 Location left = locations->InAt(0); 1654 Location right = locations->InAt(1); 1655 DataType::Type type = condition->InputAt(0)->GetType(); 1656 switch (type) { 1657 case DataType::Type::kBool: 1658 case DataType::Type::kUint8: 1659 case DataType::Type::kInt8: 1660 case DataType::Type::kUint16: 1661 case DataType::Type::kInt16: 1662 case DataType::Type::kInt32: 1663 case DataType::Type::kReference: { 1664 codegen_->GenerateIntCompare(left, right); 1665 break; 1666 } 1667 case DataType::Type::kInt64: { 1668 codegen_->GenerateLongCompare(left, right); 1669 break; 1670 } 1671 case DataType::Type::kFloat32: { 1672 if (right.IsFpuRegister()) { 1673 __ ucomiss(left.AsFpuRegister<XmmRegister>(), right.AsFpuRegister<XmmRegister>()); 1674 } else if (right.IsConstant()) { 1675 __ ucomiss(left.AsFpuRegister<XmmRegister>(), 1676 codegen_->LiteralFloatAddress( 1677 right.GetConstant()->AsFloatConstant()->GetValue())); 1678 } else { 1679 DCHECK(right.IsStackSlot()); 1680 __ ucomiss(left.AsFpuRegister<XmmRegister>(), 1681 Address(CpuRegister(RSP), right.GetStackIndex())); 1682 } 1683 break; 1684 } 1685 case DataType::Type::kFloat64: { 1686 if (right.IsFpuRegister()) { 1687 __ ucomisd(left.AsFpuRegister<XmmRegister>(), right.AsFpuRegister<XmmRegister>()); 1688 } else if (right.IsConstant()) { 1689 __ ucomisd(left.AsFpuRegister<XmmRegister>(), 1690 codegen_->LiteralDoubleAddress( 1691 right.GetConstant()->AsDoubleConstant()->GetValue())); 1692 } else { 1693 DCHECK(right.IsDoubleStackSlot()); 1694 __ ucomisd(left.AsFpuRegister<XmmRegister>(), 1695 Address(CpuRegister(RSP), right.GetStackIndex())); 1696 } 1697 break; 1698 } 1699 default: 1700 LOG(FATAL) << "Unexpected condition type " << type; 1701 } 1702 } 1703 1704 template<class LabelType> 1705 void InstructionCodeGeneratorX86_64::GenerateCompareTestAndBranch(HCondition* condition, 1706 LabelType* true_target_in, 1707 LabelType* false_target_in) { 1708 // Generated branching requires both targets to be explicit. If either of the 1709 // targets is nullptr (fallthrough) use and bind `fallthrough_target` instead. 1710 LabelType fallthrough_target; 1711 LabelType* true_target = true_target_in == nullptr ? &fallthrough_target : true_target_in; 1712 LabelType* false_target = false_target_in == nullptr ? &fallthrough_target : false_target_in; 1713 1714 // Generate the comparison to set the CC. 1715 GenerateCompareTest(condition); 1716 1717 // Now generate the correct jump(s). 1718 DataType::Type type = condition->InputAt(0)->GetType(); 1719 switch (type) { 1720 case DataType::Type::kInt64: { 1721 __ j(X86_64IntegerCondition(condition->GetCondition()), true_target); 1722 break; 1723 } 1724 case DataType::Type::kFloat32: { 1725 GenerateFPJumps(condition, true_target, false_target); 1726 break; 1727 } 1728 case DataType::Type::kFloat64: { 1729 GenerateFPJumps(condition, true_target, false_target); 1730 break; 1731 } 1732 default: 1733 LOG(FATAL) << "Unexpected condition type " << type; 1734 } 1735 1736 if (false_target != &fallthrough_target) { 1737 __ jmp(false_target); 1738 } 1739 1740 if (fallthrough_target.IsLinked()) { 1741 __ Bind(&fallthrough_target); 1742 } 1743 } 1744 1745 static bool AreEflagsSetFrom(HInstruction* cond, HInstruction* branch) { 1746 // Moves may affect the eflags register (move zero uses xorl), so the EFLAGS 1747 // are set only strictly before `branch`. We can't use the eflags on long 1748 // conditions if they are materialized due to the complex branching. 1749 return cond->IsCondition() && 1750 cond->GetNext() == branch && 1751 !DataType::IsFloatingPointType(cond->InputAt(0)->GetType()); 1752 } 1753 1754 template<class LabelType> 1755 void InstructionCodeGeneratorX86_64::GenerateTestAndBranch(HInstruction* instruction, 1756 size_t condition_input_index, 1757 LabelType* true_target, 1758 LabelType* false_target) { 1759 HInstruction* cond = instruction->InputAt(condition_input_index); 1760 1761 if (true_target == nullptr && false_target == nullptr) { 1762 // Nothing to do. The code always falls through. 1763 return; 1764 } else if (cond->IsIntConstant()) { 1765 // Constant condition, statically compared against "true" (integer value 1). 1766 if (cond->AsIntConstant()->IsTrue()) { 1767 if (true_target != nullptr) { 1768 __ jmp(true_target); 1769 } 1770 } else { 1771 DCHECK(cond->AsIntConstant()->IsFalse()) << cond->AsIntConstant()->GetValue(); 1772 if (false_target != nullptr) { 1773 __ jmp(false_target); 1774 } 1775 } 1776 return; 1777 } 1778 1779 // The following code generates these patterns: 1780 // (1) true_target == nullptr && false_target != nullptr 1781 // - opposite condition true => branch to false_target 1782 // (2) true_target != nullptr && false_target == nullptr 1783 // - condition true => branch to true_target 1784 // (3) true_target != nullptr && false_target != nullptr 1785 // - condition true => branch to true_target 1786 // - branch to false_target 1787 if (IsBooleanValueOrMaterializedCondition(cond)) { 1788 if (AreEflagsSetFrom(cond, instruction)) { 1789 if (true_target == nullptr) { 1790 __ j(X86_64IntegerCondition(cond->AsCondition()->GetOppositeCondition()), false_target); 1791 } else { 1792 __ j(X86_64IntegerCondition(cond->AsCondition()->GetCondition()), true_target); 1793 } 1794 } else { 1795 // Materialized condition, compare against 0. 1796 Location lhs = instruction->GetLocations()->InAt(condition_input_index); 1797 if (lhs.IsRegister()) { 1798 __ testl(lhs.AsRegister<CpuRegister>(), lhs.AsRegister<CpuRegister>()); 1799 } else { 1800 __ cmpl(Address(CpuRegister(RSP), lhs.GetStackIndex()), Immediate(0)); 1801 } 1802 if (true_target == nullptr) { 1803 __ j(kEqual, false_target); 1804 } else { 1805 __ j(kNotEqual, true_target); 1806 } 1807 } 1808 } else { 1809 // Condition has not been materialized, use its inputs as the 1810 // comparison and its condition as the branch condition. 1811 HCondition* condition = cond->AsCondition(); 1812 1813 // If this is a long or FP comparison that has been folded into 1814 // the HCondition, generate the comparison directly. 1815 DataType::Type type = condition->InputAt(0)->GetType(); 1816 if (type == DataType::Type::kInt64 || DataType::IsFloatingPointType(type)) { 1817 GenerateCompareTestAndBranch(condition, true_target, false_target); 1818 return; 1819 } 1820 1821 Location lhs = condition->GetLocations()->InAt(0); 1822 Location rhs = condition->GetLocations()->InAt(1); 1823 codegen_->GenerateIntCompare(lhs, rhs); 1824 if (true_target == nullptr) { 1825 __ j(X86_64IntegerCondition(condition->GetOppositeCondition()), false_target); 1826 } else { 1827 __ j(X86_64IntegerCondition(condition->GetCondition()), true_target); 1828 } 1829 } 1830 1831 // If neither branch falls through (case 3), the conditional branch to `true_target` 1832 // was already emitted (case 2) and we need to emit a jump to `false_target`. 1833 if (true_target != nullptr && false_target != nullptr) { 1834 __ jmp(false_target); 1835 } 1836 } 1837 1838 void LocationsBuilderX86_64::VisitIf(HIf* if_instr) { 1839 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(if_instr); 1840 if (IsBooleanValueOrMaterializedCondition(if_instr->InputAt(0))) { 1841 locations->SetInAt(0, Location::Any()); 1842 } 1843 } 1844 1845 void InstructionCodeGeneratorX86_64::VisitIf(HIf* if_instr) { 1846 HBasicBlock* true_successor = if_instr->IfTrueSuccessor(); 1847 HBasicBlock* false_successor = if_instr->IfFalseSuccessor(); 1848 Label* true_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), true_successor) ? 1849 nullptr : codegen_->GetLabelOf(true_successor); 1850 Label* false_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), false_successor) ? 1851 nullptr : codegen_->GetLabelOf(false_successor); 1852 GenerateTestAndBranch(if_instr, /* condition_input_index= */ 0, true_target, false_target); 1853 } 1854 1855 void LocationsBuilderX86_64::VisitDeoptimize(HDeoptimize* deoptimize) { 1856 LocationSummary* locations = new (GetGraph()->GetAllocator()) 1857 LocationSummary(deoptimize, LocationSummary::kCallOnSlowPath); 1858 InvokeRuntimeCallingConvention calling_convention; 1859 RegisterSet caller_saves = RegisterSet::Empty(); 1860 caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0))); 1861 locations->SetCustomSlowPathCallerSaves(caller_saves); 1862 if (IsBooleanValueOrMaterializedCondition(deoptimize->InputAt(0))) { 1863 locations->SetInAt(0, Location::Any()); 1864 } 1865 } 1866 1867 void InstructionCodeGeneratorX86_64::VisitDeoptimize(HDeoptimize* deoptimize) { 1868 SlowPathCode* slow_path = deopt_slow_paths_.NewSlowPath<DeoptimizationSlowPathX86_64>(deoptimize); 1869 GenerateTestAndBranch<Label>(deoptimize, 1870 /* condition_input_index= */ 0, 1871 slow_path->GetEntryLabel(), 1872 /* false_target= */ nullptr); 1873 } 1874 1875 void LocationsBuilderX86_64::VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag* flag) { 1876 LocationSummary* locations = new (GetGraph()->GetAllocator()) 1877 LocationSummary(flag, LocationSummary::kNoCall); 1878 locations->SetOut(Location::RequiresRegister()); 1879 } 1880 1881 void InstructionCodeGeneratorX86_64::VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag* flag) { 1882 __ movl(flag->GetLocations()->Out().AsRegister<CpuRegister>(), 1883 Address(CpuRegister(RSP), codegen_->GetStackOffsetOfShouldDeoptimizeFlag())); 1884 } 1885 1886 static bool SelectCanUseCMOV(HSelect* select) { 1887 // There are no conditional move instructions for XMMs. 1888 if (DataType::IsFloatingPointType(select->GetType())) { 1889 return false; 1890 } 1891 1892 // A FP condition doesn't generate the single CC that we need. 1893 HInstruction* condition = select->GetCondition(); 1894 if (condition->IsCondition() && 1895 DataType::IsFloatingPointType(condition->InputAt(0)->GetType())) { 1896 return false; 1897 } 1898 1899 // We can generate a CMOV for this Select. 1900 return true; 1901 } 1902 1903 void LocationsBuilderX86_64::VisitSelect(HSelect* select) { 1904 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(select); 1905 if (DataType::IsFloatingPointType(select->GetType())) { 1906 locations->SetInAt(0, Location::RequiresFpuRegister()); 1907 locations->SetInAt(1, Location::Any()); 1908 } else { 1909 locations->SetInAt(0, Location::RequiresRegister()); 1910 if (SelectCanUseCMOV(select)) { 1911 if (select->InputAt(1)->IsConstant()) { 1912 locations->SetInAt(1, Location::RequiresRegister()); 1913 } else { 1914 locations->SetInAt(1, Location::Any()); 1915 } 1916 } else { 1917 locations->SetInAt(1, Location::Any()); 1918 } 1919 } 1920 if (IsBooleanValueOrMaterializedCondition(select->GetCondition())) { 1921 locations->SetInAt(2, Location::RequiresRegister()); 1922 } 1923 locations->SetOut(Location::SameAsFirstInput()); 1924 } 1925 1926 void InstructionCodeGeneratorX86_64::VisitSelect(HSelect* select) { 1927 LocationSummary* locations = select->GetLocations(); 1928 if (SelectCanUseCMOV(select)) { 1929 // If both the condition and the source types are integer, we can generate 1930 // a CMOV to implement Select. 1931 CpuRegister value_false = locations->InAt(0).AsRegister<CpuRegister>(); 1932 Location value_true_loc = locations->InAt(1); 1933 DCHECK(locations->InAt(0).Equals(locations->Out())); 1934 1935 HInstruction* select_condition = select->GetCondition(); 1936 Condition cond = kNotEqual; 1937 1938 // Figure out how to test the 'condition'. 1939 if (select_condition->IsCondition()) { 1940 HCondition* condition = select_condition->AsCondition(); 1941 if (!condition->IsEmittedAtUseSite()) { 1942 // This was a previously materialized condition. 1943 // Can we use the existing condition code? 1944 if (AreEflagsSetFrom(condition, select)) { 1945 // Materialization was the previous instruction. Condition codes are right. 1946 cond = X86_64IntegerCondition(condition->GetCondition()); 1947 } else { 1948 // No, we have to recreate the condition code. 1949 CpuRegister cond_reg = locations->InAt(2).AsRegister<CpuRegister>(); 1950 __ testl(cond_reg, cond_reg); 1951 } 1952 } else { 1953 GenerateCompareTest(condition); 1954 cond = X86_64IntegerCondition(condition->GetCondition()); 1955 } 1956 } else { 1957 // Must be a Boolean condition, which needs to be compared to 0. 1958 CpuRegister cond_reg = locations->InAt(2).AsRegister<CpuRegister>(); 1959 __ testl(cond_reg, cond_reg); 1960 } 1961 1962 // If the condition is true, overwrite the output, which already contains false. 1963 // Generate the correct sized CMOV. 1964 bool is_64_bit = DataType::Is64BitType(select->GetType()); 1965 if (value_true_loc.IsRegister()) { 1966 __ cmov(cond, value_false, value_true_loc.AsRegister<CpuRegister>(), is_64_bit); 1967 } else { 1968 __ cmov(cond, 1969 value_false, 1970 Address(CpuRegister(RSP), value_true_loc.GetStackIndex()), is_64_bit); 1971 } 1972 } else { 1973 NearLabel false_target; 1974 GenerateTestAndBranch<NearLabel>(select, 1975 /* condition_input_index= */ 2, 1976 /* true_target= */ nullptr, 1977 &false_target); 1978 codegen_->MoveLocation(locations->Out(), locations->InAt(1), select->GetType()); 1979 __ Bind(&false_target); 1980 } 1981 } 1982 1983 void LocationsBuilderX86_64::VisitNativeDebugInfo(HNativeDebugInfo* info) { 1984 new (GetGraph()->GetAllocator()) LocationSummary(info); 1985 } 1986 1987 void InstructionCodeGeneratorX86_64::VisitNativeDebugInfo(HNativeDebugInfo*) { 1988 // MaybeRecordNativeDebugInfo is already called implicitly in CodeGenerator::Compile. 1989 } 1990 1991 void CodeGeneratorX86_64::GenerateNop() { 1992 __ nop(); 1993 } 1994 1995 void LocationsBuilderX86_64::HandleCondition(HCondition* cond) { 1996 LocationSummary* locations = 1997 new (GetGraph()->GetAllocator()) LocationSummary(cond, LocationSummary::kNoCall); 1998 // Handle the long/FP comparisons made in instruction simplification. 1999 switch (cond->InputAt(0)->GetType()) { 2000 case DataType::Type::kInt64: 2001 locations->SetInAt(0, Location::RequiresRegister()); 2002 locations->SetInAt(1, Location::Any()); 2003 break; 2004 case DataType::Type::kFloat32: 2005 case DataType::Type::kFloat64: 2006 locations->SetInAt(0, Location::RequiresFpuRegister()); 2007 locations->SetInAt(1, Location::Any()); 2008 break; 2009 default: 2010 locations->SetInAt(0, Location::RequiresRegister()); 2011 locations->SetInAt(1, Location::Any()); 2012 break; 2013 } 2014 if (!cond->IsEmittedAtUseSite()) { 2015 locations->SetOut(Location::RequiresRegister()); 2016 } 2017 } 2018 2019 void InstructionCodeGeneratorX86_64::HandleCondition(HCondition* cond) { 2020 if (cond->IsEmittedAtUseSite()) { 2021 return; 2022 } 2023 2024 LocationSummary* locations = cond->GetLocations(); 2025 Location lhs = locations->InAt(0); 2026 Location rhs = locations->InAt(1); 2027 CpuRegister reg = locations->Out().AsRegister<CpuRegister>(); 2028 NearLabel true_label, false_label; 2029 2030 switch (cond->InputAt(0)->GetType()) { 2031 default: 2032 // Integer case. 2033 2034 // Clear output register: setcc only sets the low byte. 2035 __ xorl(reg, reg); 2036 2037 codegen_->GenerateIntCompare(lhs, rhs); 2038 __ setcc(X86_64IntegerCondition(cond->GetCondition()), reg); 2039 return; 2040 case DataType::Type::kInt64: 2041 // Clear output register: setcc only sets the low byte. 2042 __ xorl(reg, reg); 2043 2044 codegen_->GenerateLongCompare(lhs, rhs); 2045 __ setcc(X86_64IntegerCondition(cond->GetCondition()), reg); 2046 return; 2047 case DataType::Type::kFloat32: { 2048 XmmRegister lhs_reg = lhs.AsFpuRegister<XmmRegister>(); 2049 if (rhs.IsConstant()) { 2050 float value = rhs.GetConstant()->AsFloatConstant()->GetValue(); 2051 __ ucomiss(lhs_reg, codegen_->LiteralFloatAddress(value)); 2052 } else if (rhs.IsStackSlot()) { 2053 __ ucomiss(lhs_reg, Address(CpuRegister(RSP), rhs.GetStackIndex())); 2054 } else { 2055 __ ucomiss(lhs_reg, rhs.AsFpuRegister<XmmRegister>()); 2056 } 2057 GenerateFPJumps(cond, &true_label, &false_label); 2058 break; 2059 } 2060 case DataType::Type::kFloat64: { 2061 XmmRegister lhs_reg = lhs.AsFpuRegister<XmmRegister>(); 2062 if (rhs.IsConstant()) { 2063 double value = rhs.GetConstant()->AsDoubleConstant()->GetValue(); 2064 __ ucomisd(lhs_reg, codegen_->LiteralDoubleAddress(value)); 2065 } else if (rhs.IsDoubleStackSlot()) { 2066 __ ucomisd(lhs_reg, Address(CpuRegister(RSP), rhs.GetStackIndex())); 2067 } else { 2068 __ ucomisd(lhs_reg, rhs.AsFpuRegister<XmmRegister>()); 2069 } 2070 GenerateFPJumps(cond, &true_label, &false_label); 2071 break; 2072 } 2073 } 2074 2075 // Convert the jumps into the result. 2076 NearLabel done_label; 2077 2078 // False case: result = 0. 2079 __ Bind(&false_label); 2080 __ xorl(reg, reg); 2081 __ jmp(&done_label); 2082 2083 // True case: result = 1. 2084 __ Bind(&true_label); 2085 __ movl(reg, Immediate(1)); 2086 __ Bind(&done_label); 2087 } 2088 2089 void LocationsBuilderX86_64::VisitEqual(HEqual* comp) { 2090 HandleCondition(comp); 2091 } 2092 2093 void InstructionCodeGeneratorX86_64::VisitEqual(HEqual* comp) { 2094 HandleCondition(comp); 2095 } 2096 2097 void LocationsBuilderX86_64::VisitNotEqual(HNotEqual* comp) { 2098 HandleCondition(comp); 2099 } 2100 2101 void InstructionCodeGeneratorX86_64::VisitNotEqual(HNotEqual* comp) { 2102 HandleCondition(comp); 2103 } 2104 2105 void LocationsBuilderX86_64::VisitLessThan(HLessThan* comp) { 2106 HandleCondition(comp); 2107 } 2108 2109 void InstructionCodeGeneratorX86_64::VisitLessThan(HLessThan* comp) { 2110 HandleCondition(comp); 2111 } 2112 2113 void LocationsBuilderX86_64::VisitLessThanOrEqual(HLessThanOrEqual* comp) { 2114 HandleCondition(comp); 2115 } 2116 2117 void InstructionCodeGeneratorX86_64::VisitLessThanOrEqual(HLessThanOrEqual* comp) { 2118 HandleCondition(comp); 2119 } 2120 2121 void LocationsBuilderX86_64::VisitGreaterThan(HGreaterThan* comp) { 2122 HandleCondition(comp); 2123 } 2124 2125 void InstructionCodeGeneratorX86_64::VisitGreaterThan(HGreaterThan* comp) { 2126 HandleCondition(comp); 2127 } 2128 2129 void LocationsBuilderX86_64::VisitGreaterThanOrEqual(HGreaterThanOrEqual* comp) { 2130 HandleCondition(comp); 2131 } 2132 2133 void InstructionCodeGeneratorX86_64::VisitGreaterThanOrEqual(HGreaterThanOrEqual* comp) { 2134 HandleCondition(comp); 2135 } 2136 2137 void LocationsBuilderX86_64::VisitBelow(HBelow* comp) { 2138 HandleCondition(comp); 2139 } 2140 2141 void InstructionCodeGeneratorX86_64::VisitBelow(HBelow* comp) { 2142 HandleCondition(comp); 2143 } 2144 2145 void LocationsBuilderX86_64::VisitBelowOrEqual(HBelowOrEqual* comp) { 2146 HandleCondition(comp); 2147 } 2148 2149 void InstructionCodeGeneratorX86_64::VisitBelowOrEqual(HBelowOrEqual* comp) { 2150 HandleCondition(comp); 2151 } 2152 2153 void LocationsBuilderX86_64::VisitAbove(HAbove* comp) { 2154 HandleCondition(comp); 2155 } 2156 2157 void InstructionCodeGeneratorX86_64::VisitAbove(HAbove* comp) { 2158 HandleCondition(comp); 2159 } 2160 2161 void LocationsBuilderX86_64::VisitAboveOrEqual(HAboveOrEqual* comp) { 2162 HandleCondition(comp); 2163 } 2164 2165 void InstructionCodeGeneratorX86_64::VisitAboveOrEqual(HAboveOrEqual* comp) { 2166 HandleCondition(comp); 2167 } 2168 2169 void LocationsBuilderX86_64::VisitCompare(HCompare* compare) { 2170 LocationSummary* locations = 2171 new (GetGraph()->GetAllocator()) LocationSummary(compare, LocationSummary::kNoCall); 2172 switch (compare->InputAt(0)->GetType()) { 2173 case DataType::Type::kBool: 2174 case DataType::Type::kUint8: 2175 case DataType::Type::kInt8: 2176 case DataType::Type::kUint16: 2177 case DataType::Type::kInt16: 2178 case DataType::Type::kInt32: 2179 case DataType::Type::kInt64: { 2180 locations->SetInAt(0, Location::RequiresRegister()); 2181 locations->SetInAt(1, Location::Any()); 2182 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); 2183 break; 2184 } 2185 case DataType::Type::kFloat32: 2186 case DataType::Type::kFloat64: { 2187 locations->SetInAt(0, Location::RequiresFpuRegister()); 2188 locations->SetInAt(1, Location::Any()); 2189 locations->SetOut(Location::RequiresRegister()); 2190 break; 2191 } 2192 default: 2193 LOG(FATAL) << "Unexpected type for compare operation " << compare->InputAt(0)->GetType(); 2194 } 2195 } 2196 2197 void InstructionCodeGeneratorX86_64::VisitCompare(HCompare* compare) { 2198 LocationSummary* locations = compare->GetLocations(); 2199 CpuRegister out = locations->Out().AsRegister<CpuRegister>(); 2200 Location left = locations->InAt(0); 2201 Location right = locations->InAt(1); 2202 2203 NearLabel less, greater, done; 2204 DataType::Type type = compare->InputAt(0)->GetType(); 2205 Condition less_cond = kLess; 2206 2207 switch (type) { 2208 case DataType::Type::kBool: 2209 case DataType::Type::kUint8: 2210 case DataType::Type::kInt8: 2211 case DataType::Type::kUint16: 2212 case DataType::Type::kInt16: 2213 case DataType::Type::kInt32: { 2214 codegen_->GenerateIntCompare(left, right); 2215 break; 2216 } 2217 case DataType::Type::kInt64: { 2218 codegen_->GenerateLongCompare(left, right); 2219 break; 2220 } 2221 case DataType::Type::kFloat32: { 2222 XmmRegister left_reg = left.AsFpuRegister<XmmRegister>(); 2223 if (right.IsConstant()) { 2224 float value = right.GetConstant()->AsFloatConstant()->GetValue(); 2225 __ ucomiss(left_reg, codegen_->LiteralFloatAddress(value)); 2226 } else if (right.IsStackSlot()) { 2227 __ ucomiss(left_reg, Address(CpuRegister(RSP), right.GetStackIndex())); 2228 } else { 2229 __ ucomiss(left_reg, right.AsFpuRegister<XmmRegister>()); 2230 } 2231 __ j(kUnordered, compare->IsGtBias() ? &greater : &less); 2232 less_cond = kBelow; // ucomis{s,d} sets CF 2233 break; 2234 } 2235 case DataType::Type::kFloat64: { 2236 XmmRegister left_reg = left.AsFpuRegister<XmmRegister>(); 2237 if (right.IsConstant()) { 2238 double value = right.GetConstant()->AsDoubleConstant()->GetValue(); 2239 __ ucomisd(left_reg, codegen_->LiteralDoubleAddress(value)); 2240 } else if (right.IsDoubleStackSlot()) { 2241 __ ucomisd(left_reg, Address(CpuRegister(RSP), right.GetStackIndex())); 2242 } else { 2243 __ ucomisd(left_reg, right.AsFpuRegister<XmmRegister>()); 2244 } 2245 __ j(kUnordered, compare->IsGtBias() ? &greater : &less); 2246 less_cond = kBelow; // ucomis{s,d} sets CF 2247 break; 2248 } 2249 default: 2250 LOG(FATAL) << "Unexpected compare type " << type; 2251 } 2252 2253 __ movl(out, Immediate(0)); 2254 __ j(kEqual, &done); 2255 __ j(less_cond, &less); 2256 2257 __ Bind(&greater); 2258 __ movl(out, Immediate(1)); 2259 __ jmp(&done); 2260 2261 __ Bind(&less); 2262 __ movl(out, Immediate(-1)); 2263 2264 __ Bind(&done); 2265 } 2266 2267 void LocationsBuilderX86_64::VisitIntConstant(HIntConstant* constant) { 2268 LocationSummary* locations = 2269 new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall); 2270 locations->SetOut(Location::ConstantLocation(constant)); 2271 } 2272 2273 void InstructionCodeGeneratorX86_64::VisitIntConstant(HIntConstant* constant ATTRIBUTE_UNUSED) { 2274 // Will be generated at use site. 2275 } 2276 2277 void LocationsBuilderX86_64::VisitNullConstant(HNullConstant* constant) { 2278 LocationSummary* locations = 2279 new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall); 2280 locations->SetOut(Location::ConstantLocation(constant)); 2281 } 2282 2283 void InstructionCodeGeneratorX86_64::VisitNullConstant(HNullConstant* constant ATTRIBUTE_UNUSED) { 2284 // Will be generated at use site. 2285 } 2286 2287 void LocationsBuilderX86_64::VisitLongConstant(HLongConstant* constant) { 2288 LocationSummary* locations = 2289 new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall); 2290 locations->SetOut(Location::ConstantLocation(constant)); 2291 } 2292 2293 void InstructionCodeGeneratorX86_64::VisitLongConstant(HLongConstant* constant ATTRIBUTE_UNUSED) { 2294 // Will be generated at use site. 2295 } 2296 2297 void LocationsBuilderX86_64::VisitFloatConstant(HFloatConstant* constant) { 2298 LocationSummary* locations = 2299 new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall); 2300 locations->SetOut(Location::ConstantLocation(constant)); 2301 } 2302 2303 void InstructionCodeGeneratorX86_64::VisitFloatConstant(HFloatConstant* constant ATTRIBUTE_UNUSED) { 2304 // Will be generated at use site. 2305 } 2306 2307 void LocationsBuilderX86_64::VisitDoubleConstant(HDoubleConstant* constant) { 2308 LocationSummary* locations = 2309 new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall); 2310 locations->SetOut(Location::ConstantLocation(constant)); 2311 } 2312 2313 void InstructionCodeGeneratorX86_64::VisitDoubleConstant( 2314 HDoubleConstant* constant ATTRIBUTE_UNUSED) { 2315 // Will be generated at use site. 2316 } 2317 2318 void LocationsBuilderX86_64::VisitConstructorFence(HConstructorFence* constructor_fence) { 2319 constructor_fence->SetLocations(nullptr); 2320 } 2321 2322 void InstructionCodeGeneratorX86_64::VisitConstructorFence( 2323 HConstructorFence* constructor_fence ATTRIBUTE_UNUSED) { 2324 codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore); 2325 } 2326 2327 void LocationsBuilderX86_64::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) { 2328 memory_barrier->SetLocations(nullptr); 2329 } 2330 2331 void InstructionCodeGeneratorX86_64::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) { 2332 codegen_->GenerateMemoryBarrier(memory_barrier->GetBarrierKind()); 2333 } 2334 2335 void LocationsBuilderX86_64::VisitReturnVoid(HReturnVoid* ret) { 2336 ret->SetLocations(nullptr); 2337 } 2338 2339 void InstructionCodeGeneratorX86_64::VisitReturnVoid(HReturnVoid* ret ATTRIBUTE_UNUSED) { 2340 codegen_->GenerateFrameExit(); 2341 } 2342 2343 void LocationsBuilderX86_64::VisitReturn(HReturn* ret) { 2344 LocationSummary* locations = 2345 new (GetGraph()->GetAllocator()) LocationSummary(ret, LocationSummary::kNoCall); 2346 switch (ret->InputAt(0)->GetType()) { 2347 case DataType::Type::kReference: 2348 case DataType::Type::kBool: 2349 case DataType::Type::kUint8: 2350 case DataType::Type::kInt8: 2351 case DataType::Type::kUint16: 2352 case DataType::Type::kInt16: 2353 case DataType::Type::kInt32: 2354 case DataType::Type::kInt64: 2355 locations->SetInAt(0, Location::RegisterLocation(RAX)); 2356 break; 2357 2358 case DataType::Type::kFloat32: 2359 case DataType::Type::kFloat64: 2360 locations->SetInAt(0, Location::FpuRegisterLocation(XMM0)); 2361 break; 2362 2363 default: 2364 LOG(FATAL) << "Unexpected return type " << ret->InputAt(0)->GetType(); 2365 } 2366 } 2367 2368 void InstructionCodeGeneratorX86_64::VisitReturn(HReturn* ret) { 2369 switch (ret->InputAt(0)->GetType()) { 2370 case DataType::Type::kReference: 2371 case DataType::Type::kBool: 2372 case DataType::Type::kUint8: 2373 case DataType::Type::kInt8: 2374 case DataType::Type::kUint16: 2375 case DataType::Type::kInt16: 2376 case DataType::Type::kInt32: 2377 case DataType::Type::kInt64: 2378 DCHECK_EQ(ret->GetLocations()->InAt(0).AsRegister<CpuRegister>().AsRegister(), RAX); 2379 break; 2380 2381 case DataType::Type::kFloat32: { 2382 DCHECK_EQ(ret->GetLocations()->InAt(0).AsFpuRegister<XmmRegister>().AsFloatRegister(), 2383 XMM0); 2384 // To simplify callers of an OSR method, we put the return value in both 2385 // floating point and core register. 2386 if (GetGraph()->IsCompilingOsr()) { 2387 __ movd(CpuRegister(RAX), XmmRegister(XMM0), /* is64bit= */ false); 2388 } 2389 break; 2390 } 2391 case DataType::Type::kFloat64: { 2392 DCHECK_EQ(ret->GetLocations()->InAt(0).AsFpuRegister<XmmRegister>().AsFloatRegister(), 2393 XMM0); 2394 // To simplify callers of an OSR method, we put the return value in both 2395 // floating point and core register. 2396 if (GetGraph()->IsCompilingOsr()) { 2397 __ movd(CpuRegister(RAX), XmmRegister(XMM0), /* is64bit= */ true); 2398 } 2399 break; 2400 } 2401 2402 default: 2403 LOG(FATAL) << "Unexpected return type " << ret->InputAt(0)->GetType(); 2404 } 2405 codegen_->GenerateFrameExit(); 2406 } 2407 2408 Location InvokeDexCallingConventionVisitorX86_64::GetReturnLocation(DataType::Type type) const { 2409 switch (type) { 2410 case DataType::Type::kReference: 2411 case DataType::Type::kBool: 2412 case DataType::Type::kUint8: 2413 case DataType::Type::kInt8: 2414 case DataType::Type::kUint16: 2415 case DataType::Type::kInt16: 2416 case DataType::Type::kUint32: 2417 case DataType::Type::kInt32: 2418 case DataType::Type::kUint64: 2419 case DataType::Type::kInt64: 2420 return Location::RegisterLocation(RAX); 2421 2422 case DataType::Type::kVoid: 2423 return Location::NoLocation(); 2424 2425 case DataType::Type::kFloat64: 2426 case DataType::Type::kFloat32: 2427 return Location::FpuRegisterLocation(XMM0); 2428 } 2429 2430 UNREACHABLE(); 2431 } 2432 2433 Location InvokeDexCallingConventionVisitorX86_64::GetMethodLocation() const { 2434 return Location::RegisterLocation(kMethodRegisterArgument); 2435 } 2436 2437 Location InvokeDexCallingConventionVisitorX86_64::GetNextLocation(DataType::Type type) { 2438 switch (type) { 2439 case DataType::Type::kReference: 2440 case DataType::Type::kBool: 2441 case DataType::Type::kUint8: 2442 case DataType::Type::kInt8: 2443 case DataType::Type::kUint16: 2444 case DataType::Type::kInt16: 2445 case DataType::Type::kInt32: { 2446 uint32_t index = gp_index_++; 2447 stack_index_++; 2448 if (index < calling_convention.GetNumberOfRegisters()) { 2449 return Location::RegisterLocation(calling_convention.GetRegisterAt(index)); 2450 } else { 2451 return Location::StackSlot(calling_convention.GetStackOffsetOf(stack_index_ - 1)); 2452 } 2453 } 2454 2455 case DataType::Type::kInt64: { 2456 uint32_t index = gp_index_; 2457 stack_index_ += 2; 2458 if (index < calling_convention.GetNumberOfRegisters()) { 2459 gp_index_ += 1; 2460 return Location::RegisterLocation(calling_convention.GetRegisterAt(index)); 2461 } else { 2462 gp_index_ += 2; 2463 return Location::DoubleStackSlot(calling_convention.GetStackOffsetOf(stack_index_ - 2)); 2464 } 2465 } 2466 2467 case DataType::Type::kFloat32: { 2468 uint32_t index = float_index_++; 2469 stack_index_++; 2470 if (index < calling_convention.GetNumberOfFpuRegisters()) { 2471 return Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(index)); 2472 } else { 2473 return Location::StackSlot(calling_convention.GetStackOffsetOf(stack_index_ - 1)); 2474 } 2475 } 2476 2477 case DataType::Type::kFloat64: { 2478 uint32_t index = float_index_++; 2479 stack_index_ += 2; 2480 if (index < calling_convention.GetNumberOfFpuRegisters()) { 2481 return Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(index)); 2482 } else { 2483 return Location::DoubleStackSlot(calling_convention.GetStackOffsetOf(stack_index_ - 2)); 2484 } 2485 } 2486 2487 case DataType::Type::kUint32: 2488 case DataType::Type::kUint64: 2489 case DataType::Type::kVoid: 2490 LOG(FATAL) << "Unexpected parameter type " << type; 2491 UNREACHABLE(); 2492 } 2493 return Location::NoLocation(); 2494 } 2495 2496 void LocationsBuilderX86_64::VisitInvokeUnresolved(HInvokeUnresolved* invoke) { 2497 // The trampoline uses the same calling convention as dex calling conventions, 2498 // except instead of loading arg0/r0 with the target Method*, arg0/r0 will contain 2499 // the method_idx. 2500 HandleInvoke(invoke); 2501 } 2502 2503 void InstructionCodeGeneratorX86_64::VisitInvokeUnresolved(HInvokeUnresolved* invoke) { 2504 codegen_->GenerateInvokeUnresolvedRuntimeCall(invoke); 2505 } 2506 2507 void LocationsBuilderX86_64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) { 2508 // Explicit clinit checks triggered by static invokes must have been pruned by 2509 // art::PrepareForRegisterAllocation. 2510 DCHECK(!invoke->IsStaticWithExplicitClinitCheck()); 2511 2512 IntrinsicLocationsBuilderX86_64 intrinsic(codegen_); 2513 if (intrinsic.TryDispatch(invoke)) { 2514 return; 2515 } 2516 2517 HandleInvoke(invoke); 2518 } 2519 2520 static bool TryGenerateIntrinsicCode(HInvoke* invoke, CodeGeneratorX86_64* codegen) { 2521 if (invoke->GetLocations()->Intrinsified()) { 2522 IntrinsicCodeGeneratorX86_64 intrinsic(codegen); 2523 intrinsic.Dispatch(invoke); 2524 return true; 2525 } 2526 return false; 2527 } 2528 2529 void InstructionCodeGeneratorX86_64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) { 2530 // Explicit clinit checks triggered by static invokes must have been pruned by 2531 // art::PrepareForRegisterAllocation. 2532 DCHECK(!invoke->IsStaticWithExplicitClinitCheck()); 2533 2534 if (TryGenerateIntrinsicCode(invoke, codegen_)) { 2535 return; 2536 } 2537 2538 LocationSummary* locations = invoke->GetLocations(); 2539 codegen_->GenerateStaticOrDirectCall( 2540 invoke, locations->HasTemps() ? locations->GetTemp(0) : Location::NoLocation()); 2541 } 2542 2543 void LocationsBuilderX86_64::HandleInvoke(HInvoke* invoke) { 2544 InvokeDexCallingConventionVisitorX86_64 calling_convention_visitor; 2545 CodeGenerator::CreateCommonInvokeLocationSummary(invoke, &calling_convention_visitor); 2546 } 2547 2548 void LocationsBuilderX86_64::VisitInvokeVirtual(HInvokeVirtual* invoke) { 2549 IntrinsicLocationsBuilderX86_64 intrinsic(codegen_); 2550 if (intrinsic.TryDispatch(invoke)) { 2551 return; 2552 } 2553 2554 HandleInvoke(invoke); 2555 } 2556 2557 void InstructionCodeGeneratorX86_64::VisitInvokeVirtual(HInvokeVirtual* invoke) { 2558 if (TryGenerateIntrinsicCode(invoke, codegen_)) { 2559 return; 2560 } 2561 2562 codegen_->GenerateVirtualCall(invoke, invoke->GetLocations()->GetTemp(0)); 2563 DCHECK(!codegen_->IsLeafMethod()); 2564 } 2565 2566 void LocationsBuilderX86_64::VisitInvokeInterface(HInvokeInterface* invoke) { 2567 HandleInvoke(invoke); 2568 // Add the hidden argument. 2569 invoke->GetLocations()->AddTemp(Location::RegisterLocation(RAX)); 2570 } 2571 2572 void CodeGeneratorX86_64::MaybeGenerateInlineCacheCheck(HInstruction* instruction, 2573 CpuRegister klass) { 2574 DCHECK_EQ(RDI, klass.AsRegister()); 2575 // We know the destination of an intrinsic, so no need to record inline 2576 // caches. 2577 if (!instruction->GetLocations()->Intrinsified() && 2578 GetGraph()->IsCompilingBaseline() && 2579 !Runtime::Current()->IsAotCompiler()) { 2580 ScopedObjectAccess soa(Thread::Current()); 2581 ProfilingInfo* info = GetGraph()->GetArtMethod()->GetProfilingInfo(kRuntimePointerSize); 2582 if (info != nullptr) { 2583 InlineCache* cache = info->GetInlineCache(instruction->GetDexPc()); 2584 uint64_t address = reinterpret_cast64<uint64_t>(cache); 2585 NearLabel done; 2586 __ movq(CpuRegister(TMP), Immediate(address)); 2587 // Fast path for a monomorphic cache. 2588 __ cmpl(Address(CpuRegister(TMP), InlineCache::ClassesOffset().Int32Value()), klass); 2589 __ j(kEqual, &done); 2590 GenerateInvokeRuntime( 2591 GetThreadOffset<kX86_64PointerSize>(kQuickUpdateInlineCache).Int32Value()); 2592 __ Bind(&done); 2593 } 2594 } 2595 } 2596 2597 void InstructionCodeGeneratorX86_64::VisitInvokeInterface(HInvokeInterface* invoke) { 2598 // TODO: b/18116999, our IMTs can miss an IncompatibleClassChangeError. 2599 LocationSummary* locations = invoke->GetLocations(); 2600 CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>(); 2601 CpuRegister hidden_reg = locations->GetTemp(1).AsRegister<CpuRegister>(); 2602 Location receiver = locations->InAt(0); 2603 size_t class_offset = mirror::Object::ClassOffset().SizeValue(); 2604 2605 if (receiver.IsStackSlot()) { 2606 __ movl(temp, Address(CpuRegister(RSP), receiver.GetStackIndex())); 2607 // /* HeapReference<Class> */ temp = temp->klass_ 2608 __ movl(temp, Address(temp, class_offset)); 2609 } else { 2610 // /* HeapReference<Class> */ temp = receiver->klass_ 2611 __ movl(temp, Address(receiver.AsRegister<CpuRegister>(), class_offset)); 2612 } 2613 codegen_->MaybeRecordImplicitNullCheck(invoke); 2614 // Instead of simply (possibly) unpoisoning `temp` here, we should 2615 // emit a read barrier for the previous class reference load. 2616 // However this is not required in practice, as this is an 2617 // intermediate/temporary reference and because the current 2618 // concurrent copying collector keeps the from-space memory 2619 // intact/accessible until the end of the marking phase (the 2620 // concurrent copying collector may not in the future). 2621 __ MaybeUnpoisonHeapReference(temp); 2622 2623 codegen_->MaybeGenerateInlineCacheCheck(invoke, temp); 2624 2625 // Set the hidden argument. This is safe to do this here, as RAX 2626 // won't be modified thereafter, before the `call` instruction. 2627 // We also di it after MaybeGenerateInlineCache that may use RAX. 2628 DCHECK_EQ(RAX, hidden_reg.AsRegister()); 2629 codegen_->Load64BitValue(hidden_reg, invoke->GetDexMethodIndex()); 2630 2631 // temp = temp->GetAddressOfIMT() 2632 __ movq(temp, 2633 Address(temp, mirror::Class::ImtPtrOffset(kX86_64PointerSize).Uint32Value())); 2634 // temp = temp->GetImtEntryAt(method_offset); 2635 uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement( 2636 invoke->GetImtIndex(), kX86_64PointerSize)); 2637 // temp = temp->GetImtEntryAt(method_offset); 2638 __ movq(temp, Address(temp, method_offset)); 2639 // call temp->GetEntryPoint(); 2640 __ call(Address( 2641 temp, ArtMethod::EntryPointFromQuickCompiledCodeOffset(kX86_64PointerSize).SizeValue())); 2642 2643 DCHECK(!codegen_->IsLeafMethod()); 2644 codegen_->RecordPcInfo(invoke, invoke->GetDexPc()); 2645 } 2646 2647 void LocationsBuilderX86_64::VisitInvokePolymorphic(HInvokePolymorphic* invoke) { 2648 HandleInvoke(invoke); 2649 } 2650 2651 void InstructionCodeGeneratorX86_64::VisitInvokePolymorphic(HInvokePolymorphic* invoke) { 2652 codegen_->GenerateInvokePolymorphicCall(invoke); 2653 } 2654 2655 void LocationsBuilderX86_64::VisitInvokeCustom(HInvokeCustom* invoke) { 2656 HandleInvoke(invoke); 2657 } 2658 2659 void InstructionCodeGeneratorX86_64::VisitInvokeCustom(HInvokeCustom* invoke) { 2660 codegen_->GenerateInvokeCustomCall(invoke); 2661 } 2662 2663 void LocationsBuilderX86_64::VisitNeg(HNeg* neg) { 2664 LocationSummary* locations = 2665 new (GetGraph()->GetAllocator()) LocationSummary(neg, LocationSummary::kNoCall); 2666 switch (neg->GetResultType()) { 2667 case DataType::Type::kInt32: 2668 case DataType::Type::kInt64: 2669 locations->SetInAt(0, Location::RequiresRegister()); 2670 locations->SetOut(Location::SameAsFirstInput()); 2671 break; 2672 2673 case DataType::Type::kFloat32: 2674 case DataType::Type::kFloat64: 2675 locations->SetInAt(0, Location::RequiresFpuRegister()); 2676 locations->SetOut(Location::SameAsFirstInput()); 2677 locations->AddTemp(Location::RequiresFpuRegister()); 2678 break; 2679 2680 default: 2681 LOG(FATAL) << "Unexpected neg type " << neg->GetResultType(); 2682 } 2683 } 2684 2685 void InstructionCodeGeneratorX86_64::VisitNeg(HNeg* neg) { 2686 LocationSummary* locations = neg->GetLocations(); 2687 Location out = locations->Out(); 2688 Location in = locations->InAt(0); 2689 switch (neg->GetResultType()) { 2690 case DataType::Type::kInt32: 2691 DCHECK(in.IsRegister()); 2692 DCHECK(in.Equals(out)); 2693 __ negl(out.AsRegister<CpuRegister>()); 2694 break; 2695 2696 case DataType::Type::kInt64: 2697 DCHECK(in.IsRegister()); 2698 DCHECK(in.Equals(out)); 2699 __ negq(out.AsRegister<CpuRegister>()); 2700 break; 2701 2702 case DataType::Type::kFloat32: { 2703 DCHECK(in.Equals(out)); 2704 XmmRegister mask = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); 2705 // Implement float negation with an exclusive or with value 2706 // 0x80000000 (mask for bit 31, representing the sign of a 2707 // single-precision floating-point number). 2708 __ movss(mask, codegen_->LiteralInt32Address(0x80000000)); 2709 __ xorps(out.AsFpuRegister<XmmRegister>(), mask); 2710 break; 2711 } 2712 2713 case DataType::Type::kFloat64: { 2714 DCHECK(in.Equals(out)); 2715 XmmRegister mask = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); 2716 // Implement double negation with an exclusive or with value 2717 // 0x8000000000000000 (mask for bit 63, representing the sign of 2718 // a double-precision floating-point number). 2719 __ movsd(mask, codegen_->LiteralInt64Address(INT64_C(0x8000000000000000))); 2720 __ xorpd(out.AsFpuRegister<XmmRegister>(), mask); 2721 break; 2722 } 2723 2724 default: 2725 LOG(FATAL) << "Unexpected neg type " << neg->GetResultType(); 2726 } 2727 } 2728 2729 void LocationsBuilderX86_64::VisitTypeConversion(HTypeConversion* conversion) { 2730 LocationSummary* locations = 2731 new (GetGraph()->GetAllocator()) LocationSummary(conversion, LocationSummary::kNoCall); 2732 DataType::Type result_type = conversion->GetResultType(); 2733 DataType::Type input_type = conversion->GetInputType(); 2734 DCHECK(!DataType::IsTypeConversionImplicit(input_type, result_type)) 2735 << input_type << " -> " << result_type; 2736 2737 switch (result_type) { 2738 case DataType::Type::kUint8: 2739 case DataType::Type::kInt8: 2740 case DataType::Type::kUint16: 2741 case DataType::Type::kInt16: 2742 DCHECK(DataType::IsIntegralType(input_type)) << input_type; 2743 locations->SetInAt(0, Location::Any()); 2744 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); 2745 break; 2746 2747 case DataType::Type::kInt32: 2748 switch (input_type) { 2749 case DataType::Type::kInt64: 2750 locations->SetInAt(0, Location::Any()); 2751 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); 2752 break; 2753 2754 case DataType::Type::kFloat32: 2755 locations->SetInAt(0, Location::RequiresFpuRegister()); 2756 locations->SetOut(Location::RequiresRegister()); 2757 break; 2758 2759 case DataType::Type::kFloat64: 2760 locations->SetInAt(0, Location::RequiresFpuRegister()); 2761 locations->SetOut(Location::RequiresRegister()); 2762 break; 2763 2764 default: 2765 LOG(FATAL) << "Unexpected type conversion from " << input_type 2766 << " to " << result_type; 2767 } 2768 break; 2769 2770 case DataType::Type::kInt64: 2771 switch (input_type) { 2772 case DataType::Type::kBool: 2773 case DataType::Type::kUint8: 2774 case DataType::Type::kInt8: 2775 case DataType::Type::kUint16: 2776 case DataType::Type::kInt16: 2777 case DataType::Type::kInt32: 2778 // TODO: We would benefit from a (to-be-implemented) 2779 // Location::RegisterOrStackSlot requirement for this input. 2780 locations->SetInAt(0, Location::RequiresRegister()); 2781 locations->SetOut(Location::RequiresRegister()); 2782 break; 2783 2784 case DataType::Type::kFloat32: 2785 locations->SetInAt(0, Location::RequiresFpuRegister()); 2786 locations->SetOut(Location::RequiresRegister()); 2787 break; 2788 2789 case DataType::Type::kFloat64: 2790 locations->SetInAt(0, Location::RequiresFpuRegister()); 2791 locations->SetOut(Location::RequiresRegister()); 2792 break; 2793 2794 default: 2795 LOG(FATAL) << "Unexpected type conversion from " << input_type 2796 << " to " << result_type; 2797 } 2798 break; 2799 2800 case DataType::Type::kFloat32: 2801 switch (input_type) { 2802 case DataType::Type::kBool: 2803 case DataType::Type::kUint8: 2804 case DataType::Type::kInt8: 2805 case DataType::Type::kUint16: 2806 case DataType::Type::kInt16: 2807 case DataType::Type::kInt32: 2808 locations->SetInAt(0, Location::Any()); 2809 locations->SetOut(Location::RequiresFpuRegister()); 2810 break; 2811 2812 case DataType::Type::kInt64: 2813 locations->SetInAt(0, Location::Any()); 2814 locations->SetOut(Location::RequiresFpuRegister()); 2815 break; 2816 2817 case DataType::Type::kFloat64: 2818 locations->SetInAt(0, Location::Any()); 2819 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); 2820 break; 2821 2822 default: 2823 LOG(FATAL) << "Unexpected type conversion from " << input_type 2824 << " to " << result_type; 2825 } 2826 break; 2827 2828 case DataType::Type::kFloat64: 2829 switch (input_type) { 2830 case DataType::Type::kBool: 2831 case DataType::Type::kUint8: 2832 case DataType::Type::kInt8: 2833 case DataType::Type::kUint16: 2834 case DataType::Type::kInt16: 2835 case DataType::Type::kInt32: 2836 locations->SetInAt(0, Location::Any()); 2837 locations->SetOut(Location::RequiresFpuRegister()); 2838 break; 2839 2840 case DataType::Type::kInt64: 2841 locations->SetInAt(0, Location::Any()); 2842 locations->SetOut(Location::RequiresFpuRegister()); 2843 break; 2844 2845 case DataType::Type::kFloat32: 2846 locations->SetInAt(0, Location::Any()); 2847 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); 2848 break; 2849 2850 default: 2851 LOG(FATAL) << "Unexpected type conversion from " << input_type 2852 << " to " << result_type; 2853 } 2854 break; 2855 2856 default: 2857 LOG(FATAL) << "Unexpected type conversion from " << input_type 2858 << " to " << result_type; 2859 } 2860 } 2861 2862 void InstructionCodeGeneratorX86_64::VisitTypeConversion(HTypeConversion* conversion) { 2863 LocationSummary* locations = conversion->GetLocations(); 2864 Location out = locations->Out(); 2865 Location in = locations->InAt(0); 2866 DataType::Type result_type = conversion->GetResultType(); 2867 DataType::Type input_type = conversion->GetInputType(); 2868 DCHECK(!DataType::IsTypeConversionImplicit(input_type, result_type)) 2869 << input_type << " -> " << result_type; 2870 switch (result_type) { 2871 case DataType::Type::kUint8: 2872 switch (input_type) { 2873 case DataType::Type::kInt8: 2874 case DataType::Type::kUint16: 2875 case DataType::Type::kInt16: 2876 case DataType::Type::kInt32: 2877 case DataType::Type::kInt64: 2878 if (in.IsRegister()) { 2879 __ movzxb(out.AsRegister<CpuRegister>(), in.AsRegister<CpuRegister>()); 2880 } else if (in.IsStackSlot() || in.IsDoubleStackSlot()) { 2881 __ movzxb(out.AsRegister<CpuRegister>(), 2882 Address(CpuRegister(RSP), in.GetStackIndex())); 2883 } else { 2884 __ movl(out.AsRegister<CpuRegister>(), 2885 Immediate(static_cast<uint8_t>(Int64FromConstant(in.GetConstant())))); 2886 } 2887 break; 2888 2889 default: 2890 LOG(FATAL) << "Unexpected type conversion from " << input_type 2891 << " to " << result_type; 2892 } 2893 break; 2894 2895 case DataType::Type::kInt8: 2896 switch (input_type) { 2897 case DataType::Type::kUint8: 2898 case DataType::Type::kUint16: 2899 case DataType::Type::kInt16: 2900 case DataType::Type::kInt32: 2901 case DataType::Type::kInt64: 2902 if (in.IsRegister()) { 2903 __ movsxb(out.AsRegister<CpuRegister>(), in.AsRegister<CpuRegister>()); 2904 } else if (in.IsStackSlot() || in.IsDoubleStackSlot()) { 2905 __ movsxb(out.AsRegister<CpuRegister>(), 2906 Address(CpuRegister(RSP), in.GetStackIndex())); 2907 } else { 2908 __ movl(out.AsRegister<CpuRegister>(), 2909 Immediate(static_cast<int8_t>(Int64FromConstant(in.GetConstant())))); 2910 } 2911 break; 2912 2913 default: 2914 LOG(FATAL) << "Unexpected type conversion from " << input_type 2915 << " to " << result_type; 2916 } 2917 break; 2918 2919 case DataType::Type::kUint16: 2920 switch (input_type) { 2921 case DataType::Type::kInt8: 2922 case DataType::Type::kInt16: 2923 case DataType::Type::kInt32: 2924 case DataType::Type::kInt64: 2925 if (in.IsRegister()) { 2926 __ movzxw(out.AsRegister<CpuRegister>(), in.AsRegister<CpuRegister>()); 2927 } else if (in.IsStackSlot() || in.IsDoubleStackSlot()) { 2928 __ movzxw(out.AsRegister<CpuRegister>(), 2929 Address(CpuRegister(RSP), in.GetStackIndex())); 2930 } else { 2931 __ movl(out.AsRegister<CpuRegister>(), 2932 Immediate(static_cast<uint16_t>(Int64FromConstant(in.GetConstant())))); 2933 } 2934 break; 2935 2936 default: 2937 LOG(FATAL) << "Unexpected type conversion from " << input_type 2938 << " to " << result_type; 2939 } 2940 break; 2941 2942 case DataType::Type::kInt16: 2943 switch (input_type) { 2944 case DataType::Type::kUint16: 2945 case DataType::Type::kInt32: 2946 case DataType::Type::kInt64: 2947 if (in.IsRegister()) { 2948 __ movsxw(out.AsRegister<CpuRegister>(), in.AsRegister<CpuRegister>()); 2949 } else if (in.IsStackSlot() || in.IsDoubleStackSlot()) { 2950 __ movsxw(out.AsRegister<CpuRegister>(), 2951 Address(CpuRegister(RSP), in.GetStackIndex())); 2952 } else { 2953 __ movl(out.AsRegister<CpuRegister>(), 2954 Immediate(static_cast<int16_t>(Int64FromConstant(in.GetConstant())))); 2955 } 2956 break; 2957 2958 default: 2959 LOG(FATAL) << "Unexpected type conversion from " << input_type 2960 << " to " << result_type; 2961 } 2962 break; 2963 2964 case DataType::Type::kInt32: 2965 switch (input_type) { 2966 case DataType::Type::kInt64: 2967 if (in.IsRegister()) { 2968 __ movl(out.AsRegister<CpuRegister>(), in.AsRegister<CpuRegister>()); 2969 } else if (in.IsDoubleStackSlot()) { 2970 __ movl(out.AsRegister<CpuRegister>(), 2971 Address(CpuRegister(RSP), in.GetStackIndex())); 2972 } else { 2973 DCHECK(in.IsConstant()); 2974 DCHECK(in.GetConstant()->IsLongConstant()); 2975 int64_t value = in.GetConstant()->AsLongConstant()->GetValue(); 2976 __ movl(out.AsRegister<CpuRegister>(), Immediate(static_cast<int32_t>(value))); 2977 } 2978 break; 2979 2980 case DataType::Type::kFloat32: { 2981 XmmRegister input = in.AsFpuRegister<XmmRegister>(); 2982 CpuRegister output = out.AsRegister<CpuRegister>(); 2983 NearLabel done, nan; 2984 2985 __ movl(output, Immediate(kPrimIntMax)); 2986 // if input >= (float)INT_MAX goto done 2987 __ comiss(input, codegen_->LiteralFloatAddress(static_cast<float>(kPrimIntMax))); 2988 __ j(kAboveEqual, &done); 2989 // if input == NaN goto nan 2990 __ j(kUnordered, &nan); 2991 // output = float-to-int-truncate(input) 2992 __ cvttss2si(output, input, false); 2993 __ jmp(&done); 2994 __ Bind(&nan); 2995 // output = 0 2996 __ xorl(output, output); 2997 __ Bind(&done); 2998 break; 2999 } 3000 3001 case DataType::Type::kFloat64: { 3002 XmmRegister input = in.AsFpuRegister<XmmRegister>(); 3003 CpuRegister output = out.AsRegister<CpuRegister>(); 3004 NearLabel done, nan; 3005 3006 __ movl(output, Immediate(kPrimIntMax)); 3007 // if input >= (double)INT_MAX goto done 3008 __ comisd(input, codegen_->LiteralDoubleAddress(kPrimIntMax)); 3009 __ j(kAboveEqual, &done); 3010 // if input == NaN goto nan 3011 __ j(kUnordered, &nan); 3012 // output = double-to-int-truncate(input) 3013 __ cvttsd2si(output, input); 3014 __ jmp(&done); 3015 __ Bind(&nan); 3016 // output = 0 3017 __ xorl(output, output); 3018 __ Bind(&done); 3019 break; 3020 } 3021 3022 default: 3023 LOG(FATAL) << "Unexpected type conversion from " << input_type 3024 << " to " << result_type; 3025 } 3026 break; 3027 3028 case DataType::Type::kInt64: 3029 switch (input_type) { 3030 DCHECK(out.IsRegister()); 3031 case DataType::Type::kBool: 3032 case DataType::Type::kUint8: 3033 case DataType::Type::kInt8: 3034 case DataType::Type::kUint16: 3035 case DataType::Type::kInt16: 3036 case DataType::Type::kInt32: 3037 DCHECK(in.IsRegister()); 3038 __ movsxd(out.AsRegister<CpuRegister>(), in.AsRegister<CpuRegister>()); 3039 break; 3040 3041 case DataType::Type::kFloat32: { 3042 XmmRegister input = in.AsFpuRegister<XmmRegister>(); 3043 CpuRegister output = out.AsRegister<CpuRegister>(); 3044 NearLabel done, nan; 3045 3046 codegen_->Load64BitValue(output, kPrimLongMax); 3047 // if input >= (float)LONG_MAX goto done 3048 __ comiss(input, codegen_->LiteralFloatAddress(static_cast<float>(kPrimLongMax))); 3049 __ j(kAboveEqual, &done); 3050 // if input == NaN goto nan 3051 __ j(kUnordered, &nan); 3052 // output = float-to-long-truncate(input) 3053 __ cvttss2si(output, input, true); 3054 __ jmp(&done); 3055 __ Bind(&nan); 3056 // output = 0 3057 __ xorl(output, output); 3058 __ Bind(&done); 3059 break; 3060 } 3061 3062 case DataType::Type::kFloat64: { 3063 XmmRegister input = in.AsFpuRegister<XmmRegister>(); 3064 CpuRegister output = out.AsRegister<CpuRegister>(); 3065 NearLabel done, nan; 3066 3067 codegen_->Load64BitValue(output, kPrimLongMax); 3068 // if input >= (double)LONG_MAX goto done 3069 __ comisd(input, codegen_->LiteralDoubleAddress( 3070 static_cast<double>(kPrimLongMax))); 3071 __ j(kAboveEqual, &done); 3072 // if input == NaN goto nan 3073 __ j(kUnordered, &nan); 3074 // output = double-to-long-truncate(input) 3075 __ cvttsd2si(output, input, true); 3076 __ jmp(&done); 3077 __ Bind(&nan); 3078 // output = 0 3079 __ xorl(output, output); 3080 __ Bind(&done); 3081 break; 3082 } 3083 3084 default: 3085 LOG(FATAL) << "Unexpected type conversion from " << input_type 3086 << " to " << result_type; 3087 } 3088 break; 3089 3090 case DataType::Type::kFloat32: 3091 switch (input_type) { 3092 case DataType::Type::kBool: 3093 case DataType::Type::kUint8: 3094 case DataType::Type::kInt8: 3095 case DataType::Type::kUint16: 3096 case DataType::Type::kInt16: 3097 case DataType::Type::kInt32: 3098 if (in.IsRegister()) { 3099 __ cvtsi2ss(out.AsFpuRegister<XmmRegister>(), in.AsRegister<CpuRegister>(), false); 3100 } else if (in.IsConstant()) { 3101 int32_t v = in.GetConstant()->AsIntConstant()->GetValue(); 3102 XmmRegister dest = out.AsFpuRegister<XmmRegister>(); 3103 codegen_->Load32BitValue(dest, static_cast<float>(v)); 3104 } else { 3105 __ cvtsi2ss(out.AsFpuRegister<XmmRegister>(), 3106 Address(CpuRegister(RSP), in.GetStackIndex()), false); 3107 } 3108 break; 3109 3110 case DataType::Type::kInt64: 3111 if (in.IsRegister()) { 3112 __ cvtsi2ss(out.AsFpuRegister<XmmRegister>(), in.AsRegister<CpuRegister>(), true); 3113 } else if (in.IsConstant()) { 3114 int64_t v = in.GetConstant()->AsLongConstant()->GetValue(); 3115 XmmRegister dest = out.AsFpuRegister<XmmRegister>(); 3116 codegen_->Load32BitValue(dest, static_cast<float>(v)); 3117 } else { 3118 __ cvtsi2ss(out.AsFpuRegister<XmmRegister>(), 3119 Address(CpuRegister(RSP), in.GetStackIndex()), true); 3120 } 3121 break; 3122 3123 case DataType::Type::kFloat64: 3124 if (in.IsFpuRegister()) { 3125 __ cvtsd2ss(out.AsFpuRegister<XmmRegister>(), in.AsFpuRegister<XmmRegister>()); 3126 } else if (in.IsConstant()) { 3127 double v = in.GetConstant()->AsDoubleConstant()->GetValue(); 3128 XmmRegister dest = out.AsFpuRegister<XmmRegister>(); 3129 codegen_->Load32BitValue(dest, static_cast<float>(v)); 3130 } else { 3131 __ cvtsd2ss(out.AsFpuRegister<XmmRegister>(), 3132 Address(CpuRegister(RSP), in.GetStackIndex())); 3133 } 3134 break; 3135 3136 default: 3137 LOG(FATAL) << "Unexpected type conversion from " << input_type 3138 << " to " << result_type; 3139 } 3140 break; 3141 3142 case DataType::Type::kFloat64: 3143 switch (input_type) { 3144 case DataType::Type::kBool: 3145 case DataType::Type::kUint8: 3146 case DataType::Type::kInt8: 3147 case DataType::Type::kUint16: 3148 case DataType::Type::kInt16: 3149 case DataType::Type::kInt32: 3150 if (in.IsRegister()) { 3151 __ cvtsi2sd(out.AsFpuRegister<XmmRegister>(), in.AsRegister<CpuRegister>(), false); 3152 } else if (in.IsConstant()) { 3153 int32_t v = in.GetConstant()->AsIntConstant()->GetValue(); 3154 XmmRegister dest = out.AsFpuRegister<XmmRegister>(); 3155 codegen_->Load64BitValue(dest, static_cast<double>(v)); 3156 } else { 3157 __ cvtsi2sd(out.AsFpuRegister<XmmRegister>(), 3158 Address(CpuRegister(RSP), in.GetStackIndex()), false); 3159 } 3160 break; 3161 3162 case DataType::Type::kInt64: 3163 if (in.IsRegister()) { 3164 __ cvtsi2sd(out.AsFpuRegister<XmmRegister>(), in.AsRegister<CpuRegister>(), true); 3165 } else if (in.IsConstant()) { 3166 int64_t v = in.GetConstant()->AsLongConstant()->GetValue(); 3167 XmmRegister dest = out.AsFpuRegister<XmmRegister>(); 3168 codegen_->Load64BitValue(dest, static_cast<double>(v)); 3169 } else { 3170 __ cvtsi2sd(out.AsFpuRegister<XmmRegister>(), 3171 Address(CpuRegister(RSP), in.GetStackIndex()), true); 3172 } 3173 break; 3174 3175 case DataType::Type::kFloat32: 3176 if (in.IsFpuRegister()) { 3177 __ cvtss2sd(out.AsFpuRegister<XmmRegister>(), in.AsFpuRegister<XmmRegister>()); 3178 } else if (in.IsConstant()) { 3179 float v = in.GetConstant()->AsFloatConstant()->GetValue(); 3180 XmmRegister dest = out.AsFpuRegister<XmmRegister>(); 3181 codegen_->Load64BitValue(dest, static_cast<double>(v)); 3182 } else { 3183 __ cvtss2sd(out.AsFpuRegister<XmmRegister>(), 3184 Address(CpuRegister(RSP), in.GetStackIndex())); 3185 } 3186 break; 3187 3188 default: 3189 LOG(FATAL) << "Unexpected type conversion from " << input_type 3190 << " to " << result_type; 3191 } 3192 break; 3193 3194 default: 3195 LOG(FATAL) << "Unexpected type conversion from " << input_type 3196 << " to " << result_type; 3197 } 3198 } 3199 3200 void LocationsBuilderX86_64::VisitAdd(HAdd* add) { 3201 LocationSummary* locations = 3202 new (GetGraph()->GetAllocator()) LocationSummary(add, LocationSummary::kNoCall); 3203 switch (add->GetResultType()) { 3204 case DataType::Type::kInt32: { 3205 locations->SetInAt(0, Location::RequiresRegister()); 3206 locations->SetInAt(1, Location::RegisterOrConstant(add->InputAt(1))); 3207 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); 3208 break; 3209 } 3210 3211 case DataType::Type::kInt64: { 3212 locations->SetInAt(0, Location::RequiresRegister()); 3213 // We can use a leaq or addq if the constant can fit in an immediate. 3214 locations->SetInAt(1, Location::RegisterOrInt32Constant(add->InputAt(1))); 3215 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); 3216 break; 3217 } 3218 3219 case DataType::Type::kFloat64: 3220 case DataType::Type::kFloat32: { 3221 locations->SetInAt(0, Location::RequiresFpuRegister()); 3222 locations->SetInAt(1, Location::Any()); 3223 locations->SetOut(Location::SameAsFirstInput()); 3224 break; 3225 } 3226 3227 default: 3228 LOG(FATAL) << "Unexpected add type " << add->GetResultType(); 3229 } 3230 } 3231 3232 void InstructionCodeGeneratorX86_64::VisitAdd(HAdd* add) { 3233 LocationSummary* locations = add->GetLocations(); 3234 Location first = locations->InAt(0); 3235 Location second = locations->InAt(1); 3236 Location out = locations->Out(); 3237 3238 switch (add->GetResultType()) { 3239 case DataType::Type::kInt32: { 3240 if (second.IsRegister()) { 3241 if (out.AsRegister<Register>() == first.AsRegister<Register>()) { 3242 __ addl(out.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>()); 3243 } else if (out.AsRegister<Register>() == second.AsRegister<Register>()) { 3244 __ addl(out.AsRegister<CpuRegister>(), first.AsRegister<CpuRegister>()); 3245 } else { 3246 __ leal(out.AsRegister<CpuRegister>(), Address( 3247 first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>(), TIMES_1, 0)); 3248 } 3249 } else if (second.IsConstant()) { 3250 if (out.AsRegister<Register>() == first.AsRegister<Register>()) { 3251 __ addl(out.AsRegister<CpuRegister>(), 3252 Immediate(second.GetConstant()->AsIntConstant()->GetValue())); 3253 } else { 3254 __ leal(out.AsRegister<CpuRegister>(), Address( 3255 first.AsRegister<CpuRegister>(), second.GetConstant()->AsIntConstant()->GetValue())); 3256 } 3257 } else { 3258 DCHECK(first.Equals(locations->Out())); 3259 __ addl(first.AsRegister<CpuRegister>(), Address(CpuRegister(RSP), second.GetStackIndex())); 3260 } 3261 break; 3262 } 3263 3264 case DataType::Type::kInt64: { 3265 if (second.IsRegister()) { 3266 if (out.AsRegister<Register>() == first.AsRegister<Register>()) { 3267 __ addq(out.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>()); 3268 } else if (out.AsRegister<Register>() == second.AsRegister<Register>()) { 3269 __ addq(out.AsRegister<CpuRegister>(), first.AsRegister<CpuRegister>()); 3270 } else { 3271 __ leaq(out.AsRegister<CpuRegister>(), Address( 3272 first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>(), TIMES_1, 0)); 3273 } 3274 } else { 3275 DCHECK(second.IsConstant()); 3276 int64_t value = second.GetConstant()->AsLongConstant()->GetValue(); 3277 int32_t int32_value = Low32Bits(value); 3278 DCHECK_EQ(int32_value, value); 3279 if (out.AsRegister<Register>() == first.AsRegister<Register>()) { 3280 __ addq(out.AsRegister<CpuRegister>(), Immediate(int32_value)); 3281 } else { 3282 __ leaq(out.AsRegister<CpuRegister>(), Address( 3283 first.AsRegister<CpuRegister>(), int32_value)); 3284 } 3285 } 3286 break; 3287 } 3288 3289 case DataType::Type::kFloat32: { 3290 if (second.IsFpuRegister()) { 3291 __ addss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>()); 3292 } else if (second.IsConstant()) { 3293 __ addss(first.AsFpuRegister<XmmRegister>(), 3294 codegen_->LiteralFloatAddress( 3295 second.GetConstant()->AsFloatConstant()->GetValue())); 3296 } else { 3297 DCHECK(second.IsStackSlot()); 3298 __ addss(first.AsFpuRegister<XmmRegister>(), 3299 Address(CpuRegister(RSP), second.GetStackIndex())); 3300 } 3301 break; 3302 } 3303 3304 case DataType::Type::kFloat64: { 3305 if (second.IsFpuRegister()) { 3306 __ addsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>()); 3307 } else if (second.IsConstant()) { 3308 __ addsd(first.AsFpuRegister<XmmRegister>(), 3309 codegen_->LiteralDoubleAddress( 3310 second.GetConstant()->AsDoubleConstant()->GetValue())); 3311 } else { 3312 DCHECK(second.IsDoubleStackSlot()); 3313 __ addsd(first.AsFpuRegister<XmmRegister>(), 3314 Address(CpuRegister(RSP), second.GetStackIndex())); 3315 } 3316 break; 3317 } 3318 3319 default: 3320 LOG(FATAL) << "Unexpected add type " << add->GetResultType(); 3321 } 3322 } 3323 3324 void LocationsBuilderX86_64::VisitSub(HSub* sub) { 3325 LocationSummary* locations = 3326 new (GetGraph()->GetAllocator()) LocationSummary(sub, LocationSummary::kNoCall); 3327 switch (sub->GetResultType()) { 3328 case DataType::Type::kInt32: { 3329 locations->SetInAt(0, Location::RequiresRegister()); 3330 locations->SetInAt(1, Location::Any()); 3331 locations->SetOut(Location::SameAsFirstInput()); 3332 break; 3333 } 3334 case DataType::Type::kInt64: { 3335 locations->SetInAt(0, Location::RequiresRegister()); 3336 locations->SetInAt(1, Location::RegisterOrInt32Constant(sub->InputAt(1))); 3337 locations->SetOut(Location::SameAsFirstInput()); 3338 break; 3339 } 3340 case DataType::Type::kFloat32: 3341 case DataType::Type::kFloat64: { 3342 locations->SetInAt(0, Location::RequiresFpuRegister()); 3343 locations->SetInAt(1, Location::Any()); 3344 locations->SetOut(Location::SameAsFirstInput()); 3345 break; 3346 } 3347 default: 3348 LOG(FATAL) << "Unexpected sub type " << sub->GetResultType(); 3349 } 3350 } 3351 3352 void InstructionCodeGeneratorX86_64::VisitSub(HSub* sub) { 3353 LocationSummary* locations = sub->GetLocations(); 3354 Location first = locations->InAt(0); 3355 Location second = locations->InAt(1); 3356 DCHECK(first.Equals(locations->Out())); 3357 switch (sub->GetResultType()) { 3358 case DataType::Type::kInt32: { 3359 if (second.IsRegister()) { 3360 __ subl(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>()); 3361 } else if (second.IsConstant()) { 3362 Immediate imm(second.GetConstant()->AsIntConstant()->GetValue()); 3363 __ subl(first.AsRegister<CpuRegister>(), imm); 3364 } else { 3365 __ subl(first.AsRegister<CpuRegister>(), Address(CpuRegister(RSP), second.GetStackIndex())); 3366 } 3367 break; 3368 } 3369 case DataType::Type::kInt64: { 3370 if (second.IsConstant()) { 3371 int64_t value = second.GetConstant()->AsLongConstant()->GetValue(); 3372 DCHECK(IsInt<32>(value)); 3373 __ subq(first.AsRegister<CpuRegister>(), Immediate(static_cast<int32_t>(value))); 3374 } else { 3375 __ subq(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>()); 3376 } 3377 break; 3378 } 3379 3380 case DataType::Type::kFloat32: { 3381 if (second.IsFpuRegister()) { 3382 __ subss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>()); 3383 } else if (second.IsConstant()) { 3384 __ subss(first.AsFpuRegister<XmmRegister>(), 3385 codegen_->LiteralFloatAddress( 3386 second.GetConstant()->AsFloatConstant()->GetValue())); 3387 } else { 3388 DCHECK(second.IsStackSlot()); 3389 __ subss(first.AsFpuRegister<XmmRegister>(), 3390 Address(CpuRegister(RSP), second.GetStackIndex())); 3391 } 3392 break; 3393 } 3394 3395 case DataType::Type::kFloat64: { 3396 if (second.IsFpuRegister()) { 3397 __ subsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>()); 3398 } else if (second.IsConstant()) { 3399 __ subsd(first.AsFpuRegister<XmmRegister>(), 3400 codegen_->LiteralDoubleAddress( 3401 second.GetConstant()->AsDoubleConstant()->GetValue())); 3402 } else { 3403 DCHECK(second.IsDoubleStackSlot()); 3404 __ subsd(first.AsFpuRegister<XmmRegister>(), 3405 Address(CpuRegister(RSP), second.GetStackIndex())); 3406 } 3407 break; 3408 } 3409 3410 default: 3411 LOG(FATAL) << "Unexpected sub type " << sub->GetResultType(); 3412 } 3413 } 3414 3415 void LocationsBuilderX86_64::VisitMul(HMul* mul) { 3416 LocationSummary* locations = 3417 new (GetGraph()->GetAllocator()) LocationSummary(mul, LocationSummary::kNoCall); 3418 switch (mul->GetResultType()) { 3419 case DataType::Type::kInt32: { 3420 locations->SetInAt(0, Location::RequiresRegister()); 3421 locations->SetInAt(1, Location::Any()); 3422 if (mul->InputAt(1)->IsIntConstant()) { 3423 // Can use 3 operand multiply. 3424 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); 3425 } else { 3426 locations->SetOut(Location::SameAsFirstInput()); 3427 } 3428 break; 3429 } 3430 case DataType::Type::kInt64: { 3431 locations->SetInAt(0, Location::RequiresRegister()); 3432 locations->SetInAt(1, Location::Any()); 3433 if (mul->InputAt(1)->IsLongConstant() && 3434 IsInt<32>(mul->InputAt(1)->AsLongConstant()->GetValue())) { 3435 // Can use 3 operand multiply. 3436 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); 3437 } else { 3438 locations->SetOut(Location::SameAsFirstInput()); 3439 } 3440 break; 3441 } 3442 case DataType::Type::kFloat32: 3443 case DataType::Type::kFloat64: { 3444 locations->SetInAt(0, Location::RequiresFpuRegister()); 3445 locations->SetInAt(1, Location::Any()); 3446 locations->SetOut(Location::SameAsFirstInput()); 3447 break; 3448 } 3449 3450 default: 3451 LOG(FATAL) << "Unexpected mul type " << mul->GetResultType(); 3452 } 3453 } 3454 3455 void InstructionCodeGeneratorX86_64::VisitMul(HMul* mul) { 3456 LocationSummary* locations = mul->GetLocations(); 3457 Location first = locations->InAt(0); 3458 Location second = locations->InAt(1); 3459 Location out = locations->Out(); 3460 switch (mul->GetResultType()) { 3461 case DataType::Type::kInt32: 3462 // The constant may have ended up in a register, so test explicitly to avoid 3463 // problems where the output may not be the same as the first operand. 3464 if (mul->InputAt(1)->IsIntConstant()) { 3465 Immediate imm(mul->InputAt(1)->AsIntConstant()->GetValue()); 3466 __ imull(out.AsRegister<CpuRegister>(), first.AsRegister<CpuRegister>(), imm); 3467 } else if (second.IsRegister()) { 3468 DCHECK(first.Equals(out)); 3469 __ imull(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>()); 3470 } else { 3471 DCHECK(first.Equals(out)); 3472 DCHECK(second.IsStackSlot()); 3473 __ imull(first.AsRegister<CpuRegister>(), 3474 Address(CpuRegister(RSP), second.GetStackIndex())); 3475 } 3476 break; 3477 case DataType::Type::kInt64: { 3478 // The constant may have ended up in a register, so test explicitly to avoid 3479 // problems where the output may not be the same as the first operand. 3480 if (mul->InputAt(1)->IsLongConstant()) { 3481 int64_t value = mul->InputAt(1)->AsLongConstant()->GetValue(); 3482 if (IsInt<32>(value)) { 3483 __ imulq(out.AsRegister<CpuRegister>(), first.AsRegister<CpuRegister>(), 3484 Immediate(static_cast<int32_t>(value))); 3485 } else { 3486 // Have to use the constant area. 3487 DCHECK(first.Equals(out)); 3488 __ imulq(first.AsRegister<CpuRegister>(), codegen_->LiteralInt64Address(value)); 3489 } 3490 } else if (second.IsRegister()) { 3491 DCHECK(first.Equals(out)); 3492 __ imulq(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>()); 3493 } else { 3494 DCHECK(second.IsDoubleStackSlot()); 3495 DCHECK(first.Equals(out)); 3496 __ imulq(first.AsRegister<CpuRegister>(), 3497 Address(CpuRegister(RSP), second.GetStackIndex())); 3498 } 3499 break; 3500 } 3501 3502 case DataType::Type::kFloat32: { 3503 DCHECK(first.Equals(out)); 3504 if (second.IsFpuRegister()) { 3505 __ mulss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>()); 3506 } else if (second.IsConstant()) { 3507 __ mulss(first.AsFpuRegister<XmmRegister>(), 3508 codegen_->LiteralFloatAddress( 3509 second.GetConstant()->AsFloatConstant()->GetValue())); 3510 } else { 3511 DCHECK(second.IsStackSlot()); 3512 __ mulss(first.AsFpuRegister<XmmRegister>(), 3513 Address(CpuRegister(RSP), second.GetStackIndex())); 3514 } 3515 break; 3516 } 3517 3518 case DataType::Type::kFloat64: { 3519 DCHECK(first.Equals(out)); 3520 if (second.IsFpuRegister()) { 3521 __ mulsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>()); 3522 } else if (second.IsConstant()) { 3523 __ mulsd(first.AsFpuRegister<XmmRegister>(), 3524 codegen_->LiteralDoubleAddress( 3525 second.GetConstant()->AsDoubleConstant()->GetValue())); 3526 } else { 3527 DCHECK(second.IsDoubleStackSlot()); 3528 __ mulsd(first.AsFpuRegister<XmmRegister>(), 3529 Address(CpuRegister(RSP), second.GetStackIndex())); 3530 } 3531 break; 3532 } 3533 3534 default: 3535 LOG(FATAL) << "Unexpected mul type " << mul->GetResultType(); 3536 } 3537 } 3538 3539 void InstructionCodeGeneratorX86_64::PushOntoFPStack(Location source, uint32_t temp_offset, 3540 uint32_t stack_adjustment, bool is_float) { 3541 if (source.IsStackSlot()) { 3542 DCHECK(is_float); 3543 __ flds(Address(CpuRegister(RSP), source.GetStackIndex() + stack_adjustment)); 3544 } else if (source.IsDoubleStackSlot()) { 3545 DCHECK(!is_float); 3546 __ fldl(Address(CpuRegister(RSP), source.GetStackIndex() + stack_adjustment)); 3547 } else { 3548 // Write the value to the temporary location on the stack and load to FP stack. 3549 if (is_float) { 3550 Location stack_temp = Location::StackSlot(temp_offset); 3551 codegen_->Move(stack_temp, source); 3552 __ flds(Address(CpuRegister(RSP), temp_offset)); 3553 } else { 3554 Location stack_temp = Location::DoubleStackSlot(temp_offset); 3555 codegen_->Move(stack_temp, source); 3556 __ fldl(Address(CpuRegister(RSP), temp_offset)); 3557 } 3558 } 3559 } 3560 3561 void InstructionCodeGeneratorX86_64::GenerateRemFP(HRem *rem) { 3562 DataType::Type type = rem->GetResultType(); 3563 bool is_float = type == DataType::Type::kFloat32; 3564 size_t elem_size = DataType::Size(type); 3565 LocationSummary* locations = rem->GetLocations(); 3566 Location first = locations->InAt(0); 3567 Location second = locations->InAt(1); 3568 Location out = locations->Out(); 3569 3570 // Create stack space for 2 elements. 3571 // TODO: enhance register allocator to ask for stack temporaries. 3572 __ subq(CpuRegister(RSP), Immediate(2 * elem_size)); 3573 3574 // Load the values to the FP stack in reverse order, using temporaries if needed. 3575 PushOntoFPStack(second, elem_size, 2 * elem_size, is_float); 3576 PushOntoFPStack(first, 0, 2 * elem_size, is_float); 3577 3578 // Loop doing FPREM until we stabilize. 3579 NearLabel retry; 3580 __ Bind(&retry); 3581 __ fprem(); 3582 3583 // Move FP status to AX. 3584 __ fstsw(); 3585 3586 // And see if the argument reduction is complete. This is signaled by the 3587 // C2 FPU flag bit set to 0. 3588 __ andl(CpuRegister(RAX), Immediate(kC2ConditionMask)); 3589 __ j(kNotEqual, &retry); 3590 3591 // We have settled on the final value. Retrieve it into an XMM register. 3592 // Store FP top of stack to real stack. 3593 if (is_float) { 3594 __ fsts(Address(CpuRegister(RSP), 0)); 3595 } else { 3596 __ fstl(Address(CpuRegister(RSP), 0)); 3597 } 3598 3599 // Pop the 2 items from the FP stack. 3600 __ fucompp(); 3601 3602 // Load the value from the stack into an XMM register. 3603 DCHECK(out.IsFpuRegister()) << out; 3604 if (is_float) { 3605 __ movss(out.AsFpuRegister<XmmRegister>(), Address(CpuRegister(RSP), 0)); 3606 } else { 3607 __ movsd(out.AsFpuRegister<XmmRegister>(), Address(CpuRegister(RSP), 0)); 3608 } 3609 3610 // And remove the temporary stack space we allocated. 3611 __ addq(CpuRegister(RSP), Immediate(2 * elem_size)); 3612 } 3613 3614 void InstructionCodeGeneratorX86_64::DivRemOneOrMinusOne(HBinaryOperation* instruction) { 3615 DCHECK(instruction->IsDiv() || instruction->IsRem()); 3616 3617 LocationSummary* locations = instruction->GetLocations(); 3618 Location second = locations->InAt(1); 3619 DCHECK(second.IsConstant()); 3620 3621 CpuRegister output_register = locations->Out().AsRegister<CpuRegister>(); 3622 CpuRegister input_register = locations->InAt(0).AsRegister<CpuRegister>(); 3623 int64_t imm = Int64FromConstant(second.GetConstant()); 3624 3625 DCHECK(imm == 1 || imm == -1); 3626 3627 switch (instruction->GetResultType()) { 3628 case DataType::Type::kInt32: { 3629 if (instruction->IsRem()) { 3630 __ xorl(output_register, output_register); 3631 } else { 3632 __ movl(output_register, input_register); 3633 if (imm == -1) { 3634 __ negl(output_register); 3635 } 3636 } 3637 break; 3638 } 3639 3640 case DataType::Type::kInt64: { 3641 if (instruction->IsRem()) { 3642 __ xorl(output_register, output_register); 3643 } else { 3644 __ movq(output_register, input_register); 3645 if (imm == -1) { 3646 __ negq(output_register); 3647 } 3648 } 3649 break; 3650 } 3651 3652 default: 3653 LOG(FATAL) << "Unexpected type for div by (-)1 " << instruction->GetResultType(); 3654 } 3655 } 3656 void InstructionCodeGeneratorX86_64::RemByPowerOfTwo(HRem* instruction) { 3657 LocationSummary* locations = instruction->GetLocations(); 3658 Location second = locations->InAt(1); 3659 CpuRegister out = locations->Out().AsRegister<CpuRegister>(); 3660 CpuRegister numerator = locations->InAt(0).AsRegister<CpuRegister>(); 3661 int64_t imm = Int64FromConstant(second.GetConstant()); 3662 DCHECK(IsPowerOfTwo(AbsOrMin(imm))); 3663 uint64_t abs_imm = AbsOrMin(imm); 3664 CpuRegister tmp = locations->GetTemp(0).AsRegister<CpuRegister>(); 3665 if (instruction->GetResultType() == DataType::Type::kInt32) { 3666 NearLabel done; 3667 __ movl(out, numerator); 3668 __ andl(out, Immediate(abs_imm-1)); 3669 __ j(Condition::kZero, &done); 3670 __ leal(tmp, Address(out, static_cast<int32_t>(~(abs_imm-1)))); 3671 __ testl(numerator, numerator); 3672 __ cmov(Condition::kLess, out, tmp, false); 3673 __ Bind(&done); 3674 3675 } else { 3676 DCHECK_EQ(instruction->GetResultType(), DataType::Type::kInt64); 3677 codegen_->Load64BitValue(tmp, abs_imm - 1); 3678 NearLabel done; 3679 3680 __ movq(out, numerator); 3681 __ andq(out, tmp); 3682 __ j(Condition::kZero, &done); 3683 __ movq(tmp, numerator); 3684 __ sarq(tmp, Immediate(63)); 3685 __ shlq(tmp, Immediate(WhichPowerOf2(abs_imm))); 3686 __ orq(out, tmp); 3687 __ Bind(&done); 3688 } 3689 } 3690 void InstructionCodeGeneratorX86_64::DivByPowerOfTwo(HDiv* instruction) { 3691 LocationSummary* locations = instruction->GetLocations(); 3692 Location second = locations->InAt(1); 3693 3694 CpuRegister output_register = locations->Out().AsRegister<CpuRegister>(); 3695 CpuRegister numerator = locations->InAt(0).AsRegister<CpuRegister>(); 3696 3697 int64_t imm = Int64FromConstant(second.GetConstant()); 3698 DCHECK(IsPowerOfTwo(AbsOrMin(imm))); 3699 uint64_t abs_imm = AbsOrMin(imm); 3700 3701 CpuRegister tmp = locations->GetTemp(0).AsRegister<CpuRegister>(); 3702 3703 if (instruction->GetResultType() == DataType::Type::kInt32) { 3704 // When denominator is equal to 2, we can add signed bit and numerator to tmp. 3705 // Below we are using addl instruction instead of cmov which give us 1 cycle benefit. 3706 if (abs_imm == 2) { 3707 __ leal(tmp, Address(numerator, 0)); 3708 __ shrl(tmp, Immediate(31)); 3709 __ addl(tmp, numerator); 3710 } else { 3711 __ leal(tmp, Address(numerator, abs_imm - 1)); 3712 __ testl(numerator, numerator); 3713 __ cmov(kGreaterEqual, tmp, numerator); 3714 } 3715 int shift = CTZ(imm); 3716 __ sarl(tmp, Immediate(shift)); 3717 3718 if (imm < 0) { 3719 __ negl(tmp); 3720 } 3721 3722 __ movl(output_register, tmp); 3723 } else { 3724 DCHECK_EQ(instruction->GetResultType(), DataType::Type::kInt64); 3725 CpuRegister rdx = locations->GetTemp(0).AsRegister<CpuRegister>(); 3726 if (abs_imm == 2) { 3727 __ movq(rdx, numerator); 3728 __ shrq(rdx, Immediate(63)); 3729 __ addq(rdx, numerator); 3730 } else { 3731 codegen_->Load64BitValue(rdx, abs_imm - 1); 3732 __ addq(rdx, numerator); 3733 __ testq(numerator, numerator); 3734 __ cmov(kGreaterEqual, rdx, numerator); 3735 } 3736 int shift = CTZ(imm); 3737 __ sarq(rdx, Immediate(shift)); 3738 3739 if (imm < 0) { 3740 __ negq(rdx); 3741 } 3742 3743 __ movq(output_register, rdx); 3744 } 3745 } 3746 3747 void InstructionCodeGeneratorX86_64::GenerateDivRemWithAnyConstant(HBinaryOperation* instruction) { 3748 DCHECK(instruction->IsDiv() || instruction->IsRem()); 3749 3750 LocationSummary* locations = instruction->GetLocations(); 3751 Location second = locations->InAt(1); 3752 3753 CpuRegister numerator = instruction->IsDiv() ? locations->GetTemp(1).AsRegister<CpuRegister>() 3754 : locations->GetTemp(0).AsRegister<CpuRegister>(); 3755 CpuRegister eax = locations->InAt(0).AsRegister<CpuRegister>(); 3756 CpuRegister edx = instruction->IsDiv() ? locations->GetTemp(0).AsRegister<CpuRegister>() 3757 : locations->Out().AsRegister<CpuRegister>(); 3758 CpuRegister out = locations->Out().AsRegister<CpuRegister>(); 3759 3760 DCHECK_EQ(RAX, eax.AsRegister()); 3761 DCHECK_EQ(RDX, edx.AsRegister()); 3762 if (instruction->IsDiv()) { 3763 DCHECK_EQ(RAX, out.AsRegister()); 3764 } else { 3765 DCHECK_EQ(RDX, out.AsRegister()); 3766 } 3767 3768 int64_t magic; 3769 int shift; 3770 3771 // TODO: can these branches be written as one? 3772 if (instruction->GetResultType() == DataType::Type::kInt32) { 3773 int imm = second.GetConstant()->AsIntConstant()->GetValue(); 3774 3775 CalculateMagicAndShiftForDivRem(imm, false /* is_long= */, &magic, &shift); 3776 3777 __ movl(numerator, eax); 3778 3779 __ movl(eax, Immediate(magic)); 3780 __ imull(numerator); 3781 3782 if (imm > 0 && magic < 0) { 3783 __ addl(edx, numerator); 3784 } else if (imm < 0 && magic > 0) { 3785 __ subl(edx, numerator); 3786 } 3787 3788 if (shift != 0) { 3789 __ sarl(edx, Immediate(shift)); 3790 } 3791 3792 __ movl(eax, edx); 3793 __ shrl(edx, Immediate(31)); 3794 __ addl(edx, eax); 3795 3796 if (instruction->IsRem()) { 3797 __ movl(eax, numerator); 3798 __ imull(edx, Immediate(imm)); 3799 __ subl(eax, edx); 3800 __ movl(edx, eax); 3801 } else { 3802 __ movl(eax, edx); 3803 } 3804 } else { 3805 int64_t imm = second.GetConstant()->AsLongConstant()->GetValue(); 3806 3807 DCHECK_EQ(instruction->GetResultType(), DataType::Type::kInt64); 3808 3809 CpuRegister rax = eax; 3810 CpuRegister rdx = edx; 3811 3812 CalculateMagicAndShiftForDivRem(imm, true /* is_long= */, &magic, &shift); 3813 3814 // Save the numerator. 3815 __ movq(numerator, rax); 3816 3817 // RAX = magic 3818 codegen_->Load64BitValue(rax, magic); 3819 3820 // RDX:RAX = magic * numerator 3821 __ imulq(numerator); 3822 3823 if (imm > 0 && magic < 0) { 3824 // RDX += numerator 3825 __ addq(rdx, numerator); 3826 } else if (imm < 0 && magic > 0) { 3827 // RDX -= numerator 3828 __ subq(rdx, numerator); 3829 } 3830 3831 // Shift if needed. 3832 if (shift != 0) { 3833 __ sarq(rdx, Immediate(shift)); 3834 } 3835 3836 // RDX += 1 if RDX < 0 3837 __ movq(rax, rdx); 3838 __ shrq(rdx, Immediate(63)); 3839 __ addq(rdx, rax); 3840 3841 if (instruction->IsRem()) { 3842 __ movq(rax, numerator); 3843 3844 if (IsInt<32>(imm)) { 3845 __ imulq(rdx, Immediate(static_cast<int32_t>(imm))); 3846 } else { 3847 __ imulq(rdx, codegen_->LiteralInt64Address(imm)); 3848 } 3849 3850 __ subq(rax, rdx); 3851 __ movq(rdx, rax); 3852 } else { 3853 __ movq(rax, rdx); 3854 } 3855 } 3856 } 3857 3858 void InstructionCodeGeneratorX86_64::GenerateDivRemIntegral(HBinaryOperation* instruction) { 3859 DCHECK(instruction->IsDiv() || instruction->IsRem()); 3860 DataType::Type type = instruction->GetResultType(); 3861 DCHECK(type == DataType::Type::kInt32 || type == DataType::Type::kInt64); 3862 3863 bool is_div = instruction->IsDiv(); 3864 LocationSummary* locations = instruction->GetLocations(); 3865 3866 CpuRegister out = locations->Out().AsRegister<CpuRegister>(); 3867 Location second = locations->InAt(1); 3868 3869 DCHECK_EQ(RAX, locations->InAt(0).AsRegister<CpuRegister>().AsRegister()); 3870 DCHECK_EQ(is_div ? RAX : RDX, out.AsRegister()); 3871 3872 if (second.IsConstant()) { 3873 int64_t imm = Int64FromConstant(second.GetConstant()); 3874 3875 if (imm == 0) { 3876 // Do not generate anything. DivZeroCheck would prevent any code to be executed. 3877 } else if (imm == 1 || imm == -1) { 3878 DivRemOneOrMinusOne(instruction); 3879 } else if (IsPowerOfTwo(AbsOrMin(imm))) { 3880 if (is_div) { 3881 DivByPowerOfTwo(instruction->AsDiv()); 3882 } else { 3883 RemByPowerOfTwo(instruction->AsRem()); 3884 } 3885 } else { 3886 DCHECK(imm <= -2 || imm >= 2); 3887 GenerateDivRemWithAnyConstant(instruction); 3888 } 3889 } else { 3890 SlowPathCode* slow_path = 3891 new (codegen_->GetScopedAllocator()) DivRemMinusOneSlowPathX86_64( 3892 instruction, out.AsRegister(), type, is_div); 3893 codegen_->AddSlowPath(slow_path); 3894 3895 CpuRegister second_reg = second.AsRegister<CpuRegister>(); 3896 // 0x80000000(00000000)/-1 triggers an arithmetic exception! 3897 // Dividing by -1 is actually negation and -0x800000000(00000000) = 0x80000000(00000000) 3898 // so it's safe to just use negl instead of more complex comparisons. 3899 if (type == DataType::Type::kInt32) { 3900 __ cmpl(second_reg, Immediate(-1)); 3901 __ j(kEqual, slow_path->GetEntryLabel()); 3902 // edx:eax <- sign-extended of eax 3903 __ cdq(); 3904 // eax = quotient, edx = remainder 3905 __ idivl(second_reg); 3906 } else { 3907 __ cmpq(second_reg, Immediate(-1)); 3908 __ j(kEqual, slow_path->GetEntryLabel()); 3909 // rdx:rax <- sign-extended of rax 3910 __ cqo(); 3911 // rax = quotient, rdx = remainder 3912 __ idivq(second_reg); 3913 } 3914 __ Bind(slow_path->GetExitLabel()); 3915 } 3916 } 3917 3918 void LocationsBuilderX86_64::VisitDiv(HDiv* div) { 3919 LocationSummary* locations = 3920 new (GetGraph()->GetAllocator()) LocationSummary(div, LocationSummary::kNoCall); 3921 switch (div->GetResultType()) { 3922 case DataType::Type::kInt32: 3923 case DataType::Type::kInt64: { 3924 locations->SetInAt(0, Location::RegisterLocation(RAX)); 3925 locations->SetInAt(1, Location::RegisterOrConstant(div->InputAt(1))); 3926 locations->SetOut(Location::SameAsFirstInput()); 3927 // Intel uses edx:eax as the dividend. 3928 locations->AddTemp(Location::RegisterLocation(RDX)); 3929 // We need to save the numerator while we tweak rax and rdx. As we are using imul in a way 3930 // which enforces results to be in RAX and RDX, things are simpler if we use RDX also as 3931 // output and request another temp. 3932 if (div->InputAt(1)->IsConstant()) { 3933 locations->AddTemp(Location::RequiresRegister()); 3934 } 3935 break; 3936 } 3937 3938 case DataType::Type::kFloat32: 3939 case DataType::Type::kFloat64: { 3940 locations->SetInAt(0, Location::RequiresFpuRegister()); 3941 locations->SetInAt(1, Location::Any()); 3942 locations->SetOut(Location::SameAsFirstInput()); 3943 break; 3944 } 3945 3946 default: 3947 LOG(FATAL) << "Unexpected div type " << div->GetResultType(); 3948 } 3949 } 3950 3951 void InstructionCodeGeneratorX86_64::VisitDiv(HDiv* div) { 3952 LocationSummary* locations = div->GetLocations(); 3953 Location first = locations->InAt(0); 3954 Location second = locations->InAt(1); 3955 DCHECK(first.Equals(locations->Out())); 3956 3957 DataType::Type type = div->GetResultType(); 3958 switch (type) { 3959 case DataType::Type::kInt32: 3960 case DataType::Type::kInt64: { 3961 GenerateDivRemIntegral(div); 3962 break; 3963 } 3964 3965 case DataType::Type::kFloat32: { 3966 if (second.IsFpuRegister()) { 3967 __ divss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>()); 3968 } else if (second.IsConstant()) { 3969 __ divss(first.AsFpuRegister<XmmRegister>(), 3970 codegen_->LiteralFloatAddress( 3971 second.GetConstant()->AsFloatConstant()->GetValue())); 3972 } else { 3973 DCHECK(second.IsStackSlot()); 3974 __ divss(first.AsFpuRegister<XmmRegister>(), 3975 Address(CpuRegister(RSP), second.GetStackIndex())); 3976 } 3977 break; 3978 } 3979 3980 case DataType::Type::kFloat64: { 3981 if (second.IsFpuRegister()) { 3982 __ divsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>()); 3983 } else if (second.IsConstant()) { 3984 __ divsd(first.AsFpuRegister<XmmRegister>(), 3985 codegen_->LiteralDoubleAddress( 3986 second.GetConstant()->AsDoubleConstant()->GetValue())); 3987 } else { 3988 DCHECK(second.IsDoubleStackSlot()); 3989 __ divsd(first.AsFpuRegister<XmmRegister>(), 3990 Address(CpuRegister(RSP), second.GetStackIndex())); 3991 } 3992 break; 3993 } 3994 3995 default: 3996 LOG(FATAL) << "Unexpected div type " << div->GetResultType(); 3997 } 3998 } 3999 4000 void LocationsBuilderX86_64::VisitRem(HRem* rem) { 4001 DataType::Type type = rem->GetResultType(); 4002 LocationSummary* locations = 4003 new (GetGraph()->GetAllocator()) LocationSummary(rem, LocationSummary::kNoCall); 4004 4005 switch (type) { 4006 case DataType::Type::kInt32: 4007 case DataType::Type::kInt64: { 4008 locations->SetInAt(0, Location::RegisterLocation(RAX)); 4009 locations->SetInAt(1, Location::RegisterOrConstant(rem->InputAt(1))); 4010 // Intel uses rdx:rax as the dividend and puts the remainder in rdx 4011 locations->SetOut(Location::RegisterLocation(RDX)); 4012 // We need to save the numerator while we tweak eax and edx. As we are using imul in a way 4013 // which enforces results to be in RAX and RDX, things are simpler if we use EAX also as 4014 // output and request another temp. 4015 if (rem->InputAt(1)->IsConstant()) { 4016 locations->AddTemp(Location::RequiresRegister()); 4017 } 4018 break; 4019 } 4020 4021 case DataType::Type::kFloat32: 4022 case DataType::Type::kFloat64: { 4023 locations->SetInAt(0, Location::Any()); 4024 locations->SetInAt(1, Location::Any()); 4025 locations->SetOut(Location::RequiresFpuRegister()); 4026 locations->AddTemp(Location::RegisterLocation(RAX)); 4027 break; 4028 } 4029 4030 default: 4031 LOG(FATAL) << "Unexpected rem type " << type; 4032 } 4033 } 4034 4035 void InstructionCodeGeneratorX86_64::VisitRem(HRem* rem) { 4036 DataType::Type type = rem->GetResultType(); 4037 switch (type) { 4038 case DataType::Type::kInt32: 4039 case DataType::Type::kInt64: { 4040 GenerateDivRemIntegral(rem); 4041 break; 4042 } 4043 case DataType::Type::kFloat32: 4044 case DataType::Type::kFloat64: { 4045 GenerateRemFP(rem); 4046 break; 4047 } 4048 default: 4049 LOG(FATAL) << "Unexpected rem type " << rem->GetResultType(); 4050 } 4051 } 4052 4053 static void CreateMinMaxLocations(ArenaAllocator* allocator, HBinaryOperation* minmax) { 4054 LocationSummary* locations = new (allocator) LocationSummary(minmax); 4055 switch (minmax->GetResultType()) { 4056 case DataType::Type::kInt32: 4057 case DataType::Type::kInt64: 4058 locations->SetInAt(0, Location::RequiresRegister()); 4059 locations->SetInAt(1, Location::RequiresRegister()); 4060 locations->SetOut(Location::SameAsFirstInput()); 4061 break; 4062 case DataType::Type::kFloat32: 4063 case DataType::Type::kFloat64: 4064 locations->SetInAt(0, Location::RequiresFpuRegister()); 4065 locations->SetInAt(1, Location::RequiresFpuRegister()); 4066 // The following is sub-optimal, but all we can do for now. It would be fine to also accept 4067 // the second input to be the output (we can simply swap inputs). 4068 locations->SetOut(Location::SameAsFirstInput()); 4069 break; 4070 default: 4071 LOG(FATAL) << "Unexpected type for HMinMax " << minmax->GetResultType(); 4072 } 4073 } 4074 4075 void InstructionCodeGeneratorX86_64::GenerateMinMaxInt(LocationSummary* locations, 4076 bool is_min, 4077 DataType::Type type) { 4078 Location op1_loc = locations->InAt(0); 4079 Location op2_loc = locations->InAt(1); 4080 4081 // Shortcut for same input locations. 4082 if (op1_loc.Equals(op2_loc)) { 4083 // Can return immediately, as op1_loc == out_loc. 4084 // Note: if we ever support separate registers, e.g., output into memory, we need to check for 4085 // a copy here. 4086 DCHECK(locations->Out().Equals(op1_loc)); 4087 return; 4088 } 4089 4090 CpuRegister out = locations->Out().AsRegister<CpuRegister>(); 4091 CpuRegister op2 = op2_loc.AsRegister<CpuRegister>(); 4092 4093 // (out := op1) 4094 // out <=? op2 4095 // if out is min jmp done 4096 // out := op2 4097 // done: 4098 4099 if (type == DataType::Type::kInt64) { 4100 __ cmpq(out, op2); 4101 __ cmov(is_min ? Condition::kGreater : Condition::kLess, out, op2, /*is64bit*/ true); 4102 } else { 4103 DCHECK_EQ(type, DataType::Type::kInt32); 4104 __ cmpl(out, op2); 4105 __ cmov(is_min ? Condition::kGreater : Condition::kLess, out, op2, /*is64bit*/ false); 4106 } 4107 } 4108 4109 void InstructionCodeGeneratorX86_64::GenerateMinMaxFP(LocationSummary* locations, 4110 bool is_min, 4111 DataType::Type type) { 4112 Location op1_loc = locations->InAt(0); 4113 Location op2_loc = locations->InAt(1); 4114 Location out_loc = locations->Out(); 4115 XmmRegister out = out_loc.AsFpuRegister<XmmRegister>(); 4116 4117 // Shortcut for same input locations. 4118 if (op1_loc.Equals(op2_loc)) { 4119 DCHECK(out_loc.Equals(op1_loc)); 4120 return; 4121 } 4122 4123 // (out := op1) 4124 // out <=? op2 4125 // if Nan jmp Nan_label 4126 // if out is min jmp done 4127 // if op2 is min jmp op2_label 4128 // handle -0/+0 4129 // jmp done 4130 // Nan_label: 4131 // out := NaN 4132 // op2_label: 4133 // out := op2 4134 // done: 4135 // 4136 // This removes one jmp, but needs to copy one input (op1) to out. 4137 // 4138 // TODO: This is straight from Quick. Make NaN an out-of-line slowpath? 4139 4140 XmmRegister op2 = op2_loc.AsFpuRegister<XmmRegister>(); 4141 4142 NearLabel nan, done, op2_label; 4143 if (type == DataType::Type::kFloat64) { 4144 __ ucomisd(out, op2); 4145 } else { 4146 DCHECK_EQ(type, DataType::Type::kFloat32); 4147 __ ucomiss(out, op2); 4148 } 4149 4150 __ j(Condition::kParityEven, &nan); 4151 4152 __ j(is_min ? Condition::kAbove : Condition::kBelow, &op2_label); 4153 __ j(is_min ? Condition::kBelow : Condition::kAbove, &done); 4154 4155 // Handle 0.0/-0.0. 4156 if (is_min) { 4157 if (type == DataType::Type::kFloat64) { 4158 __ orpd(out, op2); 4159 } else { 4160 __ orps(out, op2); 4161 } 4162 } else { 4163 if (type == DataType::Type::kFloat64) { 4164 __ andpd(out, op2); 4165 } else { 4166 __ andps(out, op2); 4167 } 4168 } 4169 __ jmp(&done); 4170 4171 // NaN handling. 4172 __ Bind(&nan); 4173 if (type == DataType::Type::kFloat64) { 4174 __ movsd(out, codegen_->LiteralInt64Address(INT64_C(0x7FF8000000000000))); 4175 } else { 4176 __ movss(out, codegen_->LiteralInt32Address(INT32_C(0x7FC00000))); 4177 } 4178 __ jmp(&done); 4179 4180 // out := op2; 4181 __ Bind(&op2_label); 4182 if (type == DataType::Type::kFloat64) { 4183 __ movsd(out, op2); 4184 } else { 4185 __ movss(out, op2); 4186 } 4187 4188 // Done. 4189 __ Bind(&done); 4190 } 4191 4192 void InstructionCodeGeneratorX86_64::GenerateMinMax(HBinaryOperation* minmax, bool is_min) { 4193 DataType::Type type = minmax->GetResultType(); 4194 switch (type) { 4195 case DataType::Type::kInt32: 4196 case DataType::Type::kInt64: 4197 GenerateMinMaxInt(minmax->GetLocations(), is_min, type); 4198 break; 4199 case DataType::Type::kFloat32: 4200 case DataType::Type::kFloat64: 4201 GenerateMinMaxFP(minmax->GetLocations(), is_min, type); 4202 break; 4203 default: 4204 LOG(FATAL) << "Unexpected type for HMinMax " << type; 4205 } 4206 } 4207 4208 void LocationsBuilderX86_64::VisitMin(HMin* min) { 4209 CreateMinMaxLocations(GetGraph()->GetAllocator(), min); 4210 } 4211 4212 void InstructionCodeGeneratorX86_64::VisitMin(HMin* min) { 4213 GenerateMinMax(min, /*is_min*/ true); 4214 } 4215 4216 void LocationsBuilderX86_64::VisitMax(HMax* max) { 4217 CreateMinMaxLocations(GetGraph()->GetAllocator(), max); 4218 } 4219 4220 void InstructionCodeGeneratorX86_64::VisitMax(HMax* max) { 4221 GenerateMinMax(max, /*is_min*/ false); 4222 } 4223 4224 void LocationsBuilderX86_64::VisitAbs(HAbs* abs) { 4225 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(abs); 4226 switch (abs->GetResultType()) { 4227 case DataType::Type::kInt32: 4228 case DataType::Type::kInt64: 4229 locations->SetInAt(0, Location::RequiresRegister()); 4230 locations->SetOut(Location::SameAsFirstInput()); 4231 locations->AddTemp(Location::RequiresRegister()); 4232 break; 4233 case DataType::Type::kFloat32: 4234 case DataType::Type::kFloat64: 4235 locations->SetInAt(0, Location::RequiresFpuRegister()); 4236 locations->SetOut(Location::SameAsFirstInput()); 4237 locations->AddTemp(Location::RequiresFpuRegister()); 4238 break; 4239 default: 4240 LOG(FATAL) << "Unexpected type for HAbs " << abs->GetResultType(); 4241 } 4242 } 4243 4244 void InstructionCodeGeneratorX86_64::VisitAbs(HAbs* abs) { 4245 LocationSummary* locations = abs->GetLocations(); 4246 switch (abs->GetResultType()) { 4247 case DataType::Type::kInt32: { 4248 CpuRegister out = locations->Out().AsRegister<CpuRegister>(); 4249 CpuRegister mask = locations->GetTemp(0).AsRegister<CpuRegister>(); 4250 // Create mask. 4251 __ movl(mask, out); 4252 __ sarl(mask, Immediate(31)); 4253 // Add mask. 4254 __ addl(out, mask); 4255 __ xorl(out, mask); 4256 break; 4257 } 4258 case DataType::Type::kInt64: { 4259 CpuRegister out = locations->Out().AsRegister<CpuRegister>(); 4260 CpuRegister mask = locations->GetTemp(0).AsRegister<CpuRegister>(); 4261 // Create mask. 4262 __ movq(mask, out); 4263 __ sarq(mask, Immediate(63)); 4264 // Add mask. 4265 __ addq(out, mask); 4266 __ xorq(out, mask); 4267 break; 4268 } 4269 case DataType::Type::kFloat32: { 4270 XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>(); 4271 XmmRegister mask = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); 4272 __ movss(mask, codegen_->LiteralInt32Address(INT32_C(0x7FFFFFFF))); 4273 __ andps(out, mask); 4274 break; 4275 } 4276 case DataType::Type::kFloat64: { 4277 XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>(); 4278 XmmRegister mask = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); 4279 __ movsd(mask, codegen_->LiteralInt64Address(INT64_C(0x7FFFFFFFFFFFFFFF))); 4280 __ andpd(out, mask); 4281 break; 4282 } 4283 default: 4284 LOG(FATAL) << "Unexpected type for HAbs " << abs->GetResultType(); 4285 } 4286 } 4287 4288 void LocationsBuilderX86_64::VisitDivZeroCheck(HDivZeroCheck* instruction) { 4289 LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction); 4290 locations->SetInAt(0, Location::Any()); 4291 } 4292 4293 void InstructionCodeGeneratorX86_64::VisitDivZeroCheck(HDivZeroCheck* instruction) { 4294 SlowPathCode* slow_path = 4295 new (codegen_->GetScopedAllocator()) DivZeroCheckSlowPathX86_64(instruction); 4296 codegen_->AddSlowPath(slow_path); 4297 4298 LocationSummary* locations = instruction->GetLocations(); 4299 Location value = locations->InAt(0); 4300 4301 switch (instruction->GetType()) { 4302 case DataType::Type::kBool: 4303 case DataType::Type::kUint8: 4304 case DataType::Type::kInt8: 4305 case DataType::Type::kUint16: 4306 case DataType::Type::kInt16: 4307 case DataType::Type::kInt32: { 4308 if (value.IsRegister()) { 4309 __ testl(value.AsRegister<CpuRegister>(), value.AsRegister<CpuRegister>()); 4310 __ j(kEqual, slow_path->GetEntryLabel()); 4311 } else if (value.IsStackSlot()) { 4312 __ cmpl(Address(CpuRegister(RSP), value.GetStackIndex()), Immediate(0)); 4313 __ j(kEqual, slow_path->GetEntryLabel()); 4314 } else { 4315 DCHECK(value.IsConstant()) << value; 4316 if (value.GetConstant()->AsIntConstant()->GetValue() == 0) { 4317 __ jmp(slow_path->GetEntryLabel()); 4318 } 4319 } 4320 break; 4321 } 4322 case DataType::Type::kInt64: { 4323 if (value.IsRegister()) { 4324 __ testq(value.AsRegister<CpuRegister>(), value.AsRegister<CpuRegister>()); 4325 __ j(kEqual, slow_path->GetEntryLabel()); 4326 } else if (value.IsDoubleStackSlot()) { 4327 __ cmpq(Address(CpuRegister(RSP), value.GetStackIndex()), Immediate(0)); 4328 __ j(kEqual, slow_path->GetEntryLabel()); 4329 } else { 4330 DCHECK(value.IsConstant()) << value; 4331 if (value.GetConstant()->AsLongConstant()->GetValue() == 0) { 4332 __ jmp(slow_path->GetEntryLabel()); 4333 } 4334 } 4335 break; 4336 } 4337 default: 4338 LOG(FATAL) << "Unexpected type for HDivZeroCheck " << instruction->GetType(); 4339 } 4340 } 4341 4342 void LocationsBuilderX86_64::HandleShift(HBinaryOperation* op) { 4343 DCHECK(op->IsShl() || op->IsShr() || op->IsUShr()); 4344 4345 LocationSummary* locations = 4346 new (GetGraph()->GetAllocator()) LocationSummary(op, LocationSummary::kNoCall); 4347 4348 switch (op->GetResultType()) { 4349 case DataType::Type::kInt32: 4350 case DataType::Type::kInt64: { 4351 locations->SetInAt(0, Location::RequiresRegister()); 4352 // The shift count needs to be in CL. 4353 locations->SetInAt(1, Location::ByteRegisterOrConstant(RCX, op->InputAt(1))); 4354 locations->SetOut(Location::SameAsFirstInput()); 4355 break; 4356 } 4357 default: 4358 LOG(FATAL) << "Unexpected operation type " << op->GetResultType(); 4359 } 4360 } 4361 4362 void InstructionCodeGeneratorX86_64::HandleShift(HBinaryOperation* op) { 4363 DCHECK(op->IsShl() || op->IsShr() || op->IsUShr()); 4364 4365 LocationSummary* locations = op->GetLocations(); 4366 CpuRegister first_reg = locations->InAt(0).AsRegister<CpuRegister>(); 4367 Location second = locations->InAt(1); 4368 4369 switch (op->GetResultType()) { 4370 case DataType::Type::kInt32: { 4371 if (second.IsRegister()) { 4372 CpuRegister second_reg = second.AsRegister<CpuRegister>(); 4373 if (op->IsShl()) { 4374 __ shll(first_reg, second_reg); 4375 } else if (op->IsShr()) { 4376 __ sarl(first_reg, second_reg); 4377 } else { 4378 __ shrl(first_reg, second_reg); 4379 } 4380 } else { 4381 Immediate imm(second.GetConstant()->AsIntConstant()->GetValue() & kMaxIntShiftDistance); 4382 if (op->IsShl()) { 4383 __ shll(first_reg, imm); 4384 } else if (op->IsShr()) { 4385 __ sarl(first_reg, imm); 4386 } else { 4387 __ shrl(first_reg, imm); 4388 } 4389 } 4390 break; 4391 } 4392 case DataType::Type::kInt64: { 4393 if (second.IsRegister()) { 4394 CpuRegister second_reg = second.AsRegister<CpuRegister>(); 4395 if (op->IsShl()) { 4396 __ shlq(first_reg, second_reg); 4397 } else if (op->IsShr()) { 4398 __ sarq(first_reg, second_reg); 4399 } else { 4400 __ shrq(first_reg, second_reg); 4401 } 4402 } else { 4403 Immediate imm(second.GetConstant()->AsIntConstant()->GetValue() & kMaxLongShiftDistance); 4404 if (op->IsShl()) { 4405 __ shlq(first_reg, imm); 4406 } else if (op->IsShr()) { 4407 __ sarq(first_reg, imm); 4408 } else { 4409 __ shrq(first_reg, imm); 4410 } 4411 } 4412 break; 4413 } 4414 default: 4415 LOG(FATAL) << "Unexpected operation type " << op->GetResultType(); 4416 UNREACHABLE(); 4417 } 4418 } 4419 4420 void LocationsBuilderX86_64::VisitRor(HRor* ror) { 4421 LocationSummary* locations = 4422 new (GetGraph()->GetAllocator()) LocationSummary(ror, LocationSummary::kNoCall); 4423 4424 switch (ror->GetResultType()) { 4425 case DataType::Type::kInt32: 4426 case DataType::Type::kInt64: { 4427 locations->SetInAt(0, Location::RequiresRegister()); 4428 // The shift count needs to be in CL (unless it is a constant). 4429 locations->SetInAt(1, Location::ByteRegisterOrConstant(RCX, ror->InputAt(1))); 4430 locations->SetOut(Location::SameAsFirstInput()); 4431 break; 4432 } 4433 default: 4434 LOG(FATAL) << "Unexpected operation type " << ror->GetResultType(); 4435 UNREACHABLE(); 4436 } 4437 } 4438 4439 void InstructionCodeGeneratorX86_64::VisitRor(HRor* ror) { 4440 LocationSummary* locations = ror->GetLocations(); 4441 CpuRegister first_reg = locations->InAt(0).AsRegister<CpuRegister>(); 4442 Location second = locations->InAt(1); 4443 4444 switch (ror->GetResultType()) { 4445 case DataType::Type::kInt32: 4446 if (second.IsRegister()) { 4447 CpuRegister second_reg = second.AsRegister<CpuRegister>(); 4448 __ rorl(first_reg, second_reg); 4449 } else { 4450 Immediate imm(second.GetConstant()->AsIntConstant()->GetValue() & kMaxIntShiftDistance); 4451 __ rorl(first_reg, imm); 4452 } 4453 break; 4454 case DataType::Type::kInt64: 4455 if (second.IsRegister()) { 4456 CpuRegister second_reg = second.AsRegister<CpuRegister>(); 4457 __ rorq(first_reg, second_reg); 4458 } else { 4459 Immediate imm(second.GetConstant()->AsIntConstant()->GetValue() & kMaxLongShiftDistance); 4460 __ rorq(first_reg, imm); 4461 } 4462 break; 4463 default: 4464 LOG(FATAL) << "Unexpected operation type " << ror->GetResultType(); 4465 UNREACHABLE(); 4466 } 4467 } 4468 4469 void LocationsBuilderX86_64::VisitShl(HShl* shl) { 4470 HandleShift(shl); 4471 } 4472 4473 void InstructionCodeGeneratorX86_64::VisitShl(HShl* shl) { 4474 HandleShift(shl); 4475 } 4476 4477 void LocationsBuilderX86_64::VisitShr(HShr* shr) { 4478 HandleShift(shr); 4479 } 4480 4481 void InstructionCodeGeneratorX86_64::VisitShr(HShr* shr) { 4482 HandleShift(shr); 4483 } 4484 4485 void LocationsBuilderX86_64::VisitUShr(HUShr* ushr) { 4486 HandleShift(ushr); 4487 } 4488 4489 void InstructionCodeGeneratorX86_64::VisitUShr(HUShr* ushr) { 4490 HandleShift(ushr); 4491 } 4492 4493 void LocationsBuilderX86_64::VisitNewInstance(HNewInstance* instruction) { 4494 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary( 4495 instruction, LocationSummary::kCallOnMainOnly); 4496 InvokeRuntimeCallingConvention calling_convention; 4497 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); 4498 locations->SetOut(Location::RegisterLocation(RAX)); 4499 } 4500 4501 void InstructionCodeGeneratorX86_64::VisitNewInstance(HNewInstance* instruction) { 4502 codegen_->InvokeRuntime(instruction->GetEntrypoint(), instruction, instruction->GetDexPc()); 4503 CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>(); 4504 DCHECK(!codegen_->IsLeafMethod()); 4505 } 4506 4507 void LocationsBuilderX86_64::VisitNewArray(HNewArray* instruction) { 4508 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary( 4509 instruction, LocationSummary::kCallOnMainOnly); 4510 InvokeRuntimeCallingConvention calling_convention; 4511 locations->SetOut(Location::RegisterLocation(RAX)); 4512 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); 4513 locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1))); 4514 } 4515 4516 void InstructionCodeGeneratorX86_64::VisitNewArray(HNewArray* instruction) { 4517 // Note: if heap poisoning is enabled, the entry point takes care of poisoning the reference. 4518 QuickEntrypointEnum entrypoint = CodeGenerator::GetArrayAllocationEntrypoint(instruction); 4519 codegen_->InvokeRuntime(entrypoint, instruction, instruction->GetDexPc()); 4520 CheckEntrypointTypes<kQuickAllocArrayResolved, void*, mirror::Class*, int32_t>(); 4521 DCHECK(!codegen_->IsLeafMethod()); 4522 } 4523 4524 void LocationsBuilderX86_64::VisitParameterValue(HParameterValue* instruction) { 4525 LocationSummary* locations = 4526 new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall); 4527 Location location = parameter_visitor_.GetNextLocation(instruction->GetType()); 4528 if (location.IsStackSlot()) { 4529 location = Location::StackSlot(location.GetStackIndex() + codegen_->GetFrameSize()); 4530 } else if (location.IsDoubleStackSlot()) { 4531 location = Location::DoubleStackSlot(location.GetStackIndex() + codegen_->GetFrameSize()); 4532 } 4533 locations->SetOut(location); 4534 } 4535 4536 void InstructionCodeGeneratorX86_64::VisitParameterValue( 4537 HParameterValue* instruction ATTRIBUTE_UNUSED) { 4538 // Nothing to do, the parameter is already at its location. 4539 } 4540 4541 void LocationsBuilderX86_64::VisitCurrentMethod(HCurrentMethod* instruction) { 4542 LocationSummary* locations = 4543 new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall); 4544 locations->SetOut(Location::RegisterLocation(kMethodRegisterArgument)); 4545 } 4546 4547 void InstructionCodeGeneratorX86_64::VisitCurrentMethod( 4548 HCurrentMethod* instruction ATTRIBUTE_UNUSED) { 4549 // Nothing to do, the method is already at its location. 4550 } 4551 4552 void LocationsBuilderX86_64::VisitClassTableGet(HClassTableGet* instruction) { 4553 LocationSummary* locations = 4554 new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall); 4555 locations->SetInAt(0, Location::RequiresRegister()); 4556 locations->SetOut(Location::RequiresRegister()); 4557 } 4558 4559 void InstructionCodeGeneratorX86_64::VisitClassTableGet(HClassTableGet* instruction) { 4560 LocationSummary* locations = instruction->GetLocations(); 4561 if (instruction->GetTableKind() == HClassTableGet::TableKind::kVTable) { 4562 uint32_t method_offset = mirror::Class::EmbeddedVTableEntryOffset( 4563 instruction->GetIndex(), kX86_64PointerSize).SizeValue(); 4564 __ movq(locations->Out().AsRegister<CpuRegister>(), 4565 Address(locations->InAt(0).AsRegister<CpuRegister>(), method_offset)); 4566 } else { 4567 uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement( 4568 instruction->GetIndex(), kX86_64PointerSize)); 4569 __ movq(locations->Out().AsRegister<CpuRegister>(), 4570 Address(locations->InAt(0).AsRegister<CpuRegister>(), 4571 mirror::Class::ImtPtrOffset(kX86_64PointerSize).Uint32Value())); 4572 __ movq(locations->Out().AsRegister<CpuRegister>(), 4573 Address(locations->Out().AsRegister<CpuRegister>(), method_offset)); 4574 } 4575 } 4576 4577 void LocationsBuilderX86_64::VisitNot(HNot* not_) { 4578 LocationSummary* locations = 4579 new (GetGraph()->GetAllocator()) LocationSummary(not_, LocationSummary::kNoCall); 4580 locations->SetInAt(0, Location::RequiresRegister()); 4581 locations->SetOut(Location::SameAsFirstInput()); 4582 } 4583 4584 void InstructionCodeGeneratorX86_64::VisitNot(HNot* not_) { 4585 LocationSummary* locations = not_->GetLocations(); 4586 DCHECK_EQ(locations->InAt(0).AsRegister<CpuRegister>().AsRegister(), 4587 locations->Out().AsRegister<CpuRegister>().AsRegister()); 4588 Location out = locations->Out(); 4589 switch (not_->GetResultType()) { 4590 case DataType::Type::kInt32: 4591 __ notl(out.AsRegister<CpuRegister>()); 4592 break; 4593 4594 case DataType::Type::kInt64: 4595 __ notq(out.AsRegister<CpuRegister>()); 4596 break; 4597 4598 default: 4599 LOG(FATAL) << "Unimplemented type for not operation " << not_->GetResultType(); 4600 } 4601 } 4602 4603 void LocationsBuilderX86_64::VisitBooleanNot(HBooleanNot* bool_not) { 4604 LocationSummary* locations = 4605 new (GetGraph()->GetAllocator()) LocationSummary(bool_not, LocationSummary::kNoCall); 4606 locations->SetInAt(0, Location::RequiresRegister()); 4607 locations->SetOut(Location::SameAsFirstInput()); 4608 } 4609 4610 void InstructionCodeGeneratorX86_64::VisitBooleanNot(HBooleanNot* bool_not) { 4611 LocationSummary* locations = bool_not->GetLocations(); 4612 DCHECK_EQ(locations->InAt(0).AsRegister<CpuRegister>().AsRegister(), 4613 locations->Out().AsRegister<CpuRegister>().AsRegister()); 4614 Location out = locations->Out(); 4615 __ xorl(out.AsRegister<CpuRegister>(), Immediate(1)); 4616 } 4617 4618 void LocationsBuilderX86_64::VisitPhi(HPhi* instruction) { 4619 LocationSummary* locations = 4620 new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall); 4621 for (size_t i = 0, e = locations->GetInputCount(); i < e; ++i) { 4622 locations->SetInAt(i, Location::Any()); 4623 } 4624 locations->SetOut(Location::Any()); 4625 } 4626 4627 void InstructionCodeGeneratorX86_64::VisitPhi(HPhi* instruction ATTRIBUTE_UNUSED) { 4628 LOG(FATAL) << "Unimplemented"; 4629 } 4630 4631 void CodeGeneratorX86_64::GenerateMemoryBarrier(MemBarrierKind kind) { 4632 /* 4633 * According to the JSR-133 Cookbook, for x86-64 only StoreLoad/AnyAny barriers need memory fence. 4634 * All other barriers (LoadAny, AnyStore, StoreStore) are nops due to the x86-64 memory model. 4635 * For those cases, all we need to ensure is that there is a scheduling barrier in place. 4636 */ 4637 switch (kind) { 4638 case MemBarrierKind::kAnyAny: { 4639 MemoryFence(); 4640 break; 4641 } 4642 case MemBarrierKind::kAnyStore: 4643 case MemBarrierKind::kLoadAny: 4644 case MemBarrierKind::kStoreStore: { 4645 // nop 4646 break; 4647 } 4648 case MemBarrierKind::kNTStoreStore: 4649 // Non-Temporal Store/Store needs an explicit fence. 4650 MemoryFence(/* non-temporal= */ true); 4651 break; 4652 } 4653 } 4654 4655 void LocationsBuilderX86_64::HandleFieldGet(HInstruction* instruction) { 4656 DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet()); 4657 4658 bool object_field_get_with_read_barrier = 4659 kEmitCompilerReadBarrier && (instruction->GetType() == DataType::Type::kReference); 4660 LocationSummary* locations = 4661 new (GetGraph()->GetAllocator()) LocationSummary(instruction, 4662 object_field_get_with_read_barrier 4663 ? LocationSummary::kCallOnSlowPath 4664 : LocationSummary::kNoCall); 4665 if (object_field_get_with_read_barrier && kUseBakerReadBarrier) { 4666 locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. 4667 } 4668 locations->SetInAt(0, Location::RequiresRegister()); 4669 if (DataType::IsFloatingPointType(instruction->GetType())) { 4670 locations->SetOut(Location::RequiresFpuRegister()); 4671 } else { 4672 // The output overlaps for an object field get when read barriers 4673 // are enabled: we do not want the move to overwrite the object's 4674 // location, as we need it to emit the read barrier. 4675 locations->SetOut( 4676 Location::RequiresRegister(), 4677 object_field_get_with_read_barrier ? Location::kOutputOverlap : Location::kNoOutputOverlap); 4678 } 4679 } 4680 4681 void InstructionCodeGeneratorX86_64::HandleFieldGet(HInstruction* instruction, 4682 const FieldInfo& field_info) { 4683 DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet()); 4684 4685 LocationSummary* locations = instruction->GetLocations(); 4686 Location base_loc = locations->InAt(0); 4687 CpuRegister base = base_loc.AsRegister<CpuRegister>(); 4688 Location out = locations->Out(); 4689 bool is_volatile = field_info.IsVolatile(); 4690 DCHECK_EQ(DataType::Size(field_info.GetFieldType()), DataType::Size(instruction->GetType())); 4691 DataType::Type load_type = instruction->GetType(); 4692 uint32_t offset = field_info.GetFieldOffset().Uint32Value(); 4693 4694 switch (load_type) { 4695 case DataType::Type::kBool: 4696 case DataType::Type::kUint8: { 4697 __ movzxb(out.AsRegister<CpuRegister>(), Address(base, offset)); 4698 break; 4699 } 4700 4701 case DataType::Type::kInt8: { 4702 __ movsxb(out.AsRegister<CpuRegister>(), Address(base, offset)); 4703 break; 4704 } 4705 4706 case DataType::Type::kUint16: { 4707 __ movzxw(out.AsRegister<CpuRegister>(), Address(base, offset)); 4708 break; 4709 } 4710 4711 case DataType::Type::kInt16: { 4712 __ movsxw(out.AsRegister<CpuRegister>(), Address(base, offset)); 4713 break; 4714 } 4715 4716 case DataType::Type::kInt32: { 4717 __ movl(out.AsRegister<CpuRegister>(), Address(base, offset)); 4718 break; 4719 } 4720 4721 case DataType::Type::kReference: { 4722 // /* HeapReference<Object> */ out = *(base + offset) 4723 if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { 4724 // Note that a potential implicit null check is handled in this 4725 // CodeGeneratorX86_64::GenerateFieldLoadWithBakerReadBarrier call. 4726 codegen_->GenerateFieldLoadWithBakerReadBarrier( 4727 instruction, out, base, offset, /* needs_null_check= */ true); 4728 if (is_volatile) { 4729 codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny); 4730 } 4731 } else { 4732 __ movl(out.AsRegister<CpuRegister>(), Address(base, offset)); 4733 codegen_->MaybeRecordImplicitNullCheck(instruction); 4734 if (is_volatile) { 4735 codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny); 4736 } 4737 // If read barriers are enabled, emit read barriers other than 4738 // Baker's using a slow path (and also unpoison the loaded 4739 // reference, if heap poisoning is enabled). 4740 codegen_->MaybeGenerateReadBarrierSlow(instruction, out, out, base_loc, offset); 4741 } 4742 break; 4743 } 4744 4745 case DataType::Type::kInt64: { 4746 __ movq(out.AsRegister<CpuRegister>(), Address(base, offset)); 4747 break; 4748 } 4749 4750 case DataType::Type::kFloat32: { 4751 __ movss(out.AsFpuRegister<XmmRegister>(), Address(base, offset)); 4752 break; 4753 } 4754 4755 case DataType::Type::kFloat64: { 4756 __ movsd(out.AsFpuRegister<XmmRegister>(), Address(base, offset)); 4757 break; 4758 } 4759 4760 case DataType::Type::kUint32: 4761 case DataType::Type::kUint64: 4762 case DataType::Type::kVoid: 4763 LOG(FATAL) << "Unreachable type " << load_type; 4764 UNREACHABLE(); 4765 } 4766 4767 if (load_type == DataType::Type::kReference) { 4768 // Potential implicit null checks, in the case of reference 4769 // fields, are handled in the previous switch statement. 4770 } else { 4771 codegen_->MaybeRecordImplicitNullCheck(instruction); 4772 } 4773 4774 if (is_volatile) { 4775 if (load_type == DataType::Type::kReference) { 4776 // Memory barriers, in the case of references, are also handled 4777 // in the previous switch statement. 4778 } else { 4779 codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny); 4780 } 4781 } 4782 } 4783 4784 void LocationsBuilderX86_64::HandleFieldSet(HInstruction* instruction, 4785 const FieldInfo& field_info) { 4786 DCHECK(instruction->IsInstanceFieldSet() || instruction->IsStaticFieldSet()); 4787 4788 LocationSummary* locations = 4789 new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall); 4790 DataType::Type field_type = field_info.GetFieldType(); 4791 bool is_volatile = field_info.IsVolatile(); 4792 bool needs_write_barrier = 4793 CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1)); 4794 4795 locations->SetInAt(0, Location::RequiresRegister()); 4796 if (DataType::IsFloatingPointType(instruction->InputAt(1)->GetType())) { 4797 if (is_volatile) { 4798 // In order to satisfy the semantics of volatile, this must be a single instruction store. 4799 locations->SetInAt(1, Location::FpuRegisterOrInt32Constant(instruction->InputAt(1))); 4800 } else { 4801 locations->SetInAt(1, Location::FpuRegisterOrConstant(instruction->InputAt(1))); 4802 } 4803 } else { 4804 if (is_volatile) { 4805 // In order to satisfy the semantics of volatile, this must be a single instruction store. 4806 locations->SetInAt(1, Location::RegisterOrInt32Constant(instruction->InputAt(1))); 4807 } else { 4808 locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1))); 4809 } 4810 } 4811 if (needs_write_barrier) { 4812 // Temporary registers for the write barrier. 4813 locations->AddTemp(Location::RequiresRegister()); // Possibly used for reference poisoning too. 4814 locations->AddTemp(Location::RequiresRegister()); 4815 } else if (kPoisonHeapReferences && field_type == DataType::Type::kReference) { 4816 // Temporary register for the reference poisoning. 4817 locations->AddTemp(Location::RequiresRegister()); 4818 } 4819 } 4820 4821 void InstructionCodeGeneratorX86_64::HandleFieldSet(HInstruction* instruction, 4822 const FieldInfo& field_info, 4823 bool value_can_be_null) { 4824 DCHECK(instruction->IsInstanceFieldSet() || instruction->IsStaticFieldSet()); 4825 4826 LocationSummary* locations = instruction->GetLocations(); 4827 CpuRegister base = locations->InAt(0).AsRegister<CpuRegister>(); 4828 Location value = locations->InAt(1); 4829 bool is_volatile = field_info.IsVolatile(); 4830 DataType::Type field_type = field_info.GetFieldType(); 4831 uint32_t offset = field_info.GetFieldOffset().Uint32Value(); 4832 4833 if (is_volatile) { 4834 codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyStore); 4835 } 4836 4837 bool maybe_record_implicit_null_check_done = false; 4838 4839 switch (field_type) { 4840 case DataType::Type::kBool: 4841 case DataType::Type::kUint8: 4842 case DataType::Type::kInt8: { 4843 if (value.IsConstant()) { 4844 __ movb(Address(base, offset), 4845 Immediate(CodeGenerator::GetInt8ValueOf(value.GetConstant()))); 4846 } else { 4847 __ movb(Address(base, offset), value.AsRegister<CpuRegister>()); 4848 } 4849 break; 4850 } 4851 4852 case DataType::Type::kUint16: 4853 case DataType::Type::kInt16: { 4854 if (value.IsConstant()) { 4855 __ movw(Address(base, offset), 4856 Immediate(CodeGenerator::GetInt16ValueOf(value.GetConstant()))); 4857 } else { 4858 __ movw(Address(base, offset), value.AsRegister<CpuRegister>()); 4859 } 4860 break; 4861 } 4862 4863 case DataType::Type::kInt32: 4864 case DataType::Type::kReference: { 4865 if (value.IsConstant()) { 4866 int32_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant()); 4867 // `field_type == DataType::Type::kReference` implies `v == 0`. 4868 DCHECK((field_type != DataType::Type::kReference) || (v == 0)); 4869 // Note: if heap poisoning is enabled, no need to poison 4870 // (negate) `v` if it is a reference, as it would be null. 4871 __ movl(Address(base, offset), Immediate(v)); 4872 } else { 4873 if (kPoisonHeapReferences && field_type == DataType::Type::kReference) { 4874 CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>(); 4875 __ movl(temp, value.AsRegister<CpuRegister>()); 4876 __ PoisonHeapReference(temp); 4877 __ movl(Address(base, offset), temp); 4878 } else { 4879 __ movl(Address(base, offset), value.AsRegister<CpuRegister>()); 4880 } 4881 } 4882 break; 4883 } 4884 4885 case DataType::Type::kInt64: { 4886 if (value.IsConstant()) { 4887 int64_t v = value.GetConstant()->AsLongConstant()->GetValue(); 4888 codegen_->MoveInt64ToAddress(Address(base, offset), 4889 Address(base, offset + sizeof(int32_t)), 4890 v, 4891 instruction); 4892 maybe_record_implicit_null_check_done = true; 4893 } else { 4894 __ movq(Address(base, offset), value.AsRegister<CpuRegister>()); 4895 } 4896 break; 4897 } 4898 4899 case DataType::Type::kFloat32: { 4900 if (value.IsConstant()) { 4901 int32_t v = 4902 bit_cast<int32_t, float>(value.GetConstant()->AsFloatConstant()->GetValue()); 4903 __ movl(Address(base, offset), Immediate(v)); 4904 } else { 4905 __ movss(Address(base, offset), value.AsFpuRegister<XmmRegister>()); 4906 } 4907 break; 4908 } 4909 4910 case DataType::Type::kFloat64: { 4911 if (value.IsConstant()) { 4912 int64_t v = 4913 bit_cast<int64_t, double>(value.GetConstant()->AsDoubleConstant()->GetValue()); 4914 codegen_->MoveInt64ToAddress(Address(base, offset), 4915 Address(base, offset + sizeof(int32_t)), 4916 v, 4917 instruction); 4918 maybe_record_implicit_null_check_done = true; 4919 } else { 4920 __ movsd(Address(base, offset), value.AsFpuRegister<XmmRegister>()); 4921 } 4922 break; 4923 } 4924 4925 case DataType::Type::kUint32: 4926 case DataType::Type::kUint64: 4927 case DataType::Type::kVoid: 4928 LOG(FATAL) << "Unreachable type " << field_type; 4929 UNREACHABLE(); 4930 } 4931 4932 if (!maybe_record_implicit_null_check_done) { 4933 codegen_->MaybeRecordImplicitNullCheck(instruction); 4934 } 4935 4936 if (CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1))) { 4937 CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>(); 4938 CpuRegister card = locations->GetTemp(1).AsRegister<CpuRegister>(); 4939 codegen_->MarkGCCard(temp, card, base, value.AsRegister<CpuRegister>(), value_can_be_null); 4940 } 4941 4942 if (is_volatile) { 4943 codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyAny); 4944 } 4945 } 4946 4947 void LocationsBuilderX86_64::VisitInstanceFieldSet(HInstanceFieldSet* instruction) { 4948 HandleFieldSet(instruction, instruction->GetFieldInfo()); 4949 } 4950 4951 void InstructionCodeGeneratorX86_64::VisitInstanceFieldSet(HInstanceFieldSet* instruction) { 4952 HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetValueCanBeNull()); 4953 } 4954 4955 void LocationsBuilderX86_64::VisitInstanceFieldGet(HInstanceFieldGet* instruction) { 4956 HandleFieldGet(instruction); 4957 } 4958 4959 void InstructionCodeGeneratorX86_64::VisitInstanceFieldGet(HInstanceFieldGet* instruction) { 4960 HandleFieldGet(instruction, instruction->GetFieldInfo()); 4961 } 4962 4963 void LocationsBuilderX86_64::VisitStaticFieldGet(HStaticFieldGet* instruction) { 4964 HandleFieldGet(instruction); 4965 } 4966 4967 void InstructionCodeGeneratorX86_64::VisitStaticFieldGet(HStaticFieldGet* instruction) { 4968 HandleFieldGet(instruction, instruction->GetFieldInfo()); 4969 } 4970 4971 void LocationsBuilderX86_64::VisitStaticFieldSet(HStaticFieldSet* instruction) { 4972 HandleFieldSet(instruction, instruction->GetFieldInfo()); 4973 } 4974 4975 void InstructionCodeGeneratorX86_64::VisitStaticFieldSet(HStaticFieldSet* instruction) { 4976 HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetValueCanBeNull()); 4977 } 4978 4979 void LocationsBuilderX86_64::VisitStringBuilderAppend(HStringBuilderAppend* instruction) { 4980 codegen_->CreateStringBuilderAppendLocations(instruction, Location::RegisterLocation(RAX)); 4981 } 4982 4983 void InstructionCodeGeneratorX86_64::VisitStringBuilderAppend(HStringBuilderAppend* instruction) { 4984 __ movl(CpuRegister(RDI), Immediate(instruction->GetFormat()->GetValue())); 4985 codegen_->InvokeRuntime(kQuickStringBuilderAppend, instruction, instruction->GetDexPc()); 4986 } 4987 4988 void LocationsBuilderX86_64::VisitUnresolvedInstanceFieldGet( 4989 HUnresolvedInstanceFieldGet* instruction) { 4990 FieldAccessCallingConventionX86_64 calling_convention; 4991 codegen_->CreateUnresolvedFieldLocationSummary( 4992 instruction, instruction->GetFieldType(), calling_convention); 4993 } 4994 4995 void InstructionCodeGeneratorX86_64::VisitUnresolvedInstanceFieldGet( 4996 HUnresolvedInstanceFieldGet* instruction) { 4997 FieldAccessCallingConventionX86_64 calling_convention; 4998 codegen_->GenerateUnresolvedFieldAccess(instruction, 4999 instruction->GetFieldType(), 5000 instruction->GetFieldIndex(), 5001 instruction->GetDexPc(), 5002 calling_convention); 5003 } 5004 5005 void LocationsBuilderX86_64::VisitUnresolvedInstanceFieldSet( 5006 HUnresolvedInstanceFieldSet* instruction) { 5007 FieldAccessCallingConventionX86_64 calling_convention; 5008 codegen_->CreateUnresolvedFieldLocationSummary( 5009 instruction, instruction->GetFieldType(), calling_convention); 5010 } 5011 5012 void InstructionCodeGeneratorX86_64::VisitUnresolvedInstanceFieldSet( 5013 HUnresolvedInstanceFieldSet* instruction) { 5014 FieldAccessCallingConventionX86_64 calling_convention; 5015 codegen_->GenerateUnresolvedFieldAccess(instruction, 5016 instruction->GetFieldType(), 5017 instruction->GetFieldIndex(), 5018 instruction->GetDexPc(), 5019 calling_convention); 5020 } 5021 5022 void LocationsBuilderX86_64::VisitUnresolvedStaticFieldGet( 5023 HUnresolvedStaticFieldGet* instruction) { 5024 FieldAccessCallingConventionX86_64 calling_convention; 5025 codegen_->CreateUnresolvedFieldLocationSummary( 5026 instruction, instruction->GetFieldType(), calling_convention); 5027 } 5028 5029 void InstructionCodeGeneratorX86_64::VisitUnresolvedStaticFieldGet( 5030 HUnresolvedStaticFieldGet* instruction) { 5031 FieldAccessCallingConventionX86_64 calling_convention; 5032 codegen_->GenerateUnresolvedFieldAccess(instruction, 5033 instruction->GetFieldType(), 5034 instruction->GetFieldIndex(), 5035 instruction->GetDexPc(), 5036 calling_convention); 5037 } 5038 5039 void LocationsBuilderX86_64::VisitUnresolvedStaticFieldSet( 5040 HUnresolvedStaticFieldSet* instruction) { 5041 FieldAccessCallingConventionX86_64 calling_convention; 5042 codegen_->CreateUnresolvedFieldLocationSummary( 5043 instruction, instruction->GetFieldType(), calling_convention); 5044 } 5045 5046 void InstructionCodeGeneratorX86_64::VisitUnresolvedStaticFieldSet( 5047 HUnresolvedStaticFieldSet* instruction) { 5048 FieldAccessCallingConventionX86_64 calling_convention; 5049 codegen_->GenerateUnresolvedFieldAccess(instruction, 5050 instruction->GetFieldType(), 5051 instruction->GetFieldIndex(), 5052 instruction->GetDexPc(), 5053 calling_convention); 5054 } 5055 5056 void LocationsBuilderX86_64::VisitNullCheck(HNullCheck* instruction) { 5057 LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction); 5058 Location loc = codegen_->GetCompilerOptions().GetImplicitNullChecks() 5059 ? Location::RequiresRegister() 5060 : Location::Any(); 5061 locations->SetInAt(0, loc); 5062 } 5063 5064 void CodeGeneratorX86_64::GenerateImplicitNullCheck(HNullCheck* instruction) { 5065 if (CanMoveNullCheckToUser(instruction)) { 5066 return; 5067 } 5068 LocationSummary* locations = instruction->GetLocations(); 5069 Location obj = locations->InAt(0); 5070 5071 __ testl(CpuRegister(RAX), Address(obj.AsRegister<CpuRegister>(), 0)); 5072 RecordPcInfo(instruction, instruction->GetDexPc()); 5073 } 5074 5075 void CodeGeneratorX86_64::GenerateExplicitNullCheck(HNullCheck* instruction) { 5076 SlowPathCode* slow_path = new (GetScopedAllocator()) NullCheckSlowPathX86_64(instruction); 5077 AddSlowPath(slow_path); 5078 5079 LocationSummary* locations = instruction->GetLocations(); 5080 Location obj = locations->InAt(0); 5081 5082 if (obj.IsRegister()) { 5083 __ testl(obj.AsRegister<CpuRegister>(), obj.AsRegister<CpuRegister>()); 5084 } else if (obj.IsStackSlot()) { 5085 __ cmpl(Address(CpuRegister(RSP), obj.GetStackIndex()), Immediate(0)); 5086 } else { 5087 DCHECK(obj.IsConstant()) << obj; 5088 DCHECK(obj.GetConstant()->IsNullConstant()); 5089 __ jmp(slow_path->GetEntryLabel()); 5090 return; 5091 } 5092 __ j(kEqual, slow_path->GetEntryLabel()); 5093 } 5094 5095 void InstructionCodeGeneratorX86_64::VisitNullCheck(HNullCheck* instruction) { 5096 codegen_->GenerateNullCheck(instruction); 5097 } 5098 5099 void LocationsBuilderX86_64::VisitArrayGet(HArrayGet* instruction) { 5100 bool object_array_get_with_read_barrier = 5101 kEmitCompilerReadBarrier && (instruction->GetType() == DataType::Type::kReference); 5102 LocationSummary* locations = 5103 new (GetGraph()->GetAllocator()) LocationSummary(instruction, 5104 object_array_get_with_read_barrier 5105 ? LocationSummary::kCallOnSlowPath 5106 : LocationSummary::kNoCall); 5107 if (object_array_get_with_read_barrier && kUseBakerReadBarrier) { 5108 locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. 5109 } 5110 locations->SetInAt(0, Location::RequiresRegister()); 5111 locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1))); 5112 if (DataType::IsFloatingPointType(instruction->GetType())) { 5113 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); 5114 } else { 5115 // The output overlaps for an object array get when read barriers 5116 // are enabled: we do not want the move to overwrite the array's 5117 // location, as we need it to emit the read barrier. 5118 locations->SetOut( 5119 Location::RequiresRegister(), 5120 object_array_get_with_read_barrier ? Location::kOutputOverlap : Location::kNoOutputOverlap); 5121 } 5122 } 5123 5124 void InstructionCodeGeneratorX86_64::VisitArrayGet(HArrayGet* instruction) { 5125 LocationSummary* locations = instruction->GetLocations(); 5126 Location obj_loc = locations->InAt(0); 5127 CpuRegister obj = obj_loc.AsRegister<CpuRegister>(); 5128 Location index = locations->InAt(1); 5129 Location out_loc = locations->Out(); 5130 uint32_t data_offset = CodeGenerator::GetArrayDataOffset(instruction); 5131 5132 DataType::Type type = instruction->GetType(); 5133 switch (type) { 5134 case DataType::Type::kBool: 5135 case DataType::Type::kUint8: { 5136 CpuRegister out = out_loc.AsRegister<CpuRegister>(); 5137 __ movzxb(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_1, data_offset)); 5138 break; 5139 } 5140 5141 case DataType::Type::kInt8: { 5142 CpuRegister out = out_loc.AsRegister<CpuRegister>(); 5143 __ movsxb(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_1, data_offset)); 5144 break; 5145 } 5146 5147 case DataType::Type::kUint16: { 5148 CpuRegister out = out_loc.AsRegister<CpuRegister>(); 5149 if (mirror::kUseStringCompression && instruction->IsStringCharAt()) { 5150 // Branch cases into compressed and uncompressed for each index's type. 5151 uint32_t count_offset = mirror::String::CountOffset().Uint32Value(); 5152 NearLabel done, not_compressed; 5153 __ testb(Address(obj, count_offset), Immediate(1)); 5154 codegen_->MaybeRecordImplicitNullCheck(instruction); 5155 static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u, 5156 "Expecting 0=compressed, 1=uncompressed"); 5157 __ j(kNotZero, ¬_compressed); 5158 __ movzxb(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_1, data_offset)); 5159 __ jmp(&done); 5160 __ Bind(¬_compressed); 5161 __ movzxw(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_2, data_offset)); 5162 __ Bind(&done); 5163 } else { 5164 __ movzxw(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_2, data_offset)); 5165 } 5166 break; 5167 } 5168 5169 case DataType::Type::kInt16: { 5170 CpuRegister out = out_loc.AsRegister<CpuRegister>(); 5171 __ movsxw(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_2, data_offset)); 5172 break; 5173 } 5174 5175 case DataType::Type::kInt32: { 5176 CpuRegister out = out_loc.AsRegister<CpuRegister>(); 5177 __ movl(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_4, data_offset)); 5178 break; 5179 } 5180 5181 case DataType::Type::kReference: { 5182 static_assert( 5183 sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t), 5184 "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes."); 5185 // /* HeapReference<Object> */ out = 5186 // *(obj + data_offset + index * sizeof(HeapReference<Object>)) 5187 if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { 5188 // Note that a potential implicit null check is handled in this 5189 // CodeGeneratorX86_64::GenerateArrayLoadWithBakerReadBarrier call. 5190 codegen_->GenerateArrayLoadWithBakerReadBarrier( 5191 instruction, out_loc, obj, data_offset, index, /* needs_null_check= */ true); 5192 } else { 5193 CpuRegister out = out_loc.AsRegister<CpuRegister>(); 5194 __ movl(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_4, data_offset)); 5195 codegen_->MaybeRecordImplicitNullCheck(instruction); 5196 // If read barriers are enabled, emit read barriers other than 5197 // Baker's using a slow path (and also unpoison the loaded 5198 // reference, if heap poisoning is enabled). 5199 if (index.IsConstant()) { 5200 uint32_t offset = 5201 (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset; 5202 codegen_->MaybeGenerateReadBarrierSlow(instruction, out_loc, out_loc, obj_loc, offset); 5203 } else { 5204 codegen_->MaybeGenerateReadBarrierSlow( 5205 instruction, out_loc, out_loc, obj_loc, data_offset, index); 5206 } 5207 } 5208 break; 5209 } 5210 5211 case DataType::Type::kInt64: { 5212 CpuRegister out = out_loc.AsRegister<CpuRegister>(); 5213 __ movq(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_8, data_offset)); 5214 break; 5215 } 5216 5217 case DataType::Type::kFloat32: { 5218 XmmRegister out = out_loc.AsFpuRegister<XmmRegister>(); 5219 __ movss(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_4, data_offset)); 5220 break; 5221 } 5222 5223 case DataType::Type::kFloat64: { 5224 XmmRegister out = out_loc.AsFpuRegister<XmmRegister>(); 5225 __ movsd(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_8, data_offset)); 5226 break; 5227 } 5228 5229 case DataType::Type::kUint32: 5230 case DataType::Type::kUint64: 5231 case DataType::Type::kVoid: 5232 LOG(FATAL) << "Unreachable type " << type; 5233 UNREACHABLE(); 5234 } 5235 5236 if (type == DataType::Type::kReference) { 5237 // Potential implicit null checks, in the case of reference 5238 // arrays, are handled in the previous switch statement. 5239 } else { 5240 codegen_->MaybeRecordImplicitNullCheck(instruction); 5241 } 5242 } 5243 5244 void LocationsBuilderX86_64::VisitArraySet(HArraySet* instruction) { 5245 DataType::Type value_type = instruction->GetComponentType(); 5246 5247 bool needs_write_barrier = 5248 CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue()); 5249 bool needs_type_check = instruction->NeedsTypeCheck(); 5250 5251 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary( 5252 instruction, 5253 needs_type_check ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall); 5254 5255 locations->SetInAt(0, Location::RequiresRegister()); 5256 locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1))); 5257 if (DataType::IsFloatingPointType(value_type)) { 5258 locations->SetInAt(2, Location::FpuRegisterOrConstant(instruction->InputAt(2))); 5259 } else { 5260 locations->SetInAt(2, Location::RegisterOrConstant(instruction->InputAt(2))); 5261 } 5262 5263 if (needs_write_barrier) { 5264 // Temporary registers for the write barrier. 5265 locations->AddTemp(Location::RequiresRegister()); // Possibly used for ref. poisoning too. 5266 locations->AddTemp(Location::RequiresRegister()); 5267 } 5268 } 5269 5270 void InstructionCodeGeneratorX86_64::VisitArraySet(HArraySet* instruction) { 5271 LocationSummary* locations = instruction->GetLocations(); 5272 Location array_loc = locations->InAt(0); 5273 CpuRegister array = array_loc.AsRegister<CpuRegister>(); 5274 Location index = locations->InAt(1); 5275 Location value = locations->InAt(2); 5276 DataType::Type value_type = instruction->GetComponentType(); 5277 bool needs_type_check = instruction->NeedsTypeCheck(); 5278 bool needs_write_barrier = 5279 CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue()); 5280 5281 switch (value_type) { 5282 case DataType::Type::kBool: 5283 case DataType::Type::kUint8: 5284 case DataType::Type::kInt8: { 5285 uint32_t offset = mirror::Array::DataOffset(sizeof(uint8_t)).Uint32Value(); 5286 Address address = CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_1, offset); 5287 if (value.IsRegister()) { 5288 __ movb(address, value.AsRegister<CpuRegister>()); 5289 } else { 5290 __ movb(address, Immediate(CodeGenerator::GetInt8ValueOf(value.GetConstant()))); 5291 } 5292 codegen_->MaybeRecordImplicitNullCheck(instruction); 5293 break; 5294 } 5295 5296 case DataType::Type::kUint16: 5297 case DataType::Type::kInt16: { 5298 uint32_t offset = mirror::Array::DataOffset(sizeof(uint16_t)).Uint32Value(); 5299 Address address = CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_2, offset); 5300 if (value.IsRegister()) { 5301 __ movw(address, value.AsRegister<CpuRegister>()); 5302 } else { 5303 DCHECK(value.IsConstant()) << value; 5304 __ movw(address, Immediate(CodeGenerator::GetInt16ValueOf(value.GetConstant()))); 5305 } 5306 codegen_->MaybeRecordImplicitNullCheck(instruction); 5307 break; 5308 } 5309 5310 case DataType::Type::kReference: { 5311 uint32_t offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value(); 5312 Address address = CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_4, offset); 5313 5314 if (!value.IsRegister()) { 5315 // Just setting null. 5316 DCHECK(instruction->InputAt(2)->IsNullConstant()); 5317 DCHECK(value.IsConstant()) << value; 5318 __ movl(address, Immediate(0)); 5319 codegen_->MaybeRecordImplicitNullCheck(instruction); 5320 DCHECK(!needs_write_barrier); 5321 DCHECK(!needs_type_check); 5322 break; 5323 } 5324 5325 DCHECK(needs_write_barrier); 5326 CpuRegister register_value = value.AsRegister<CpuRegister>(); 5327 Location temp_loc = locations->GetTemp(0); 5328 CpuRegister temp = temp_loc.AsRegister<CpuRegister>(); 5329 5330 bool can_value_be_null = instruction->GetValueCanBeNull(); 5331 NearLabel do_store; 5332 if (can_value_be_null) { 5333 __ testl(register_value, register_value); 5334 __ j(kEqual, &do_store); 5335 } 5336 5337 SlowPathCode* slow_path = nullptr; 5338 if (needs_type_check) { 5339 slow_path = new (codegen_->GetScopedAllocator()) ArraySetSlowPathX86_64(instruction); 5340 codegen_->AddSlowPath(slow_path); 5341 5342 const uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); 5343 const uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value(); 5344 const uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value(); 5345 5346 // Note that when Baker read barriers are enabled, the type 5347 // checks are performed without read barriers. This is fine, 5348 // even in the case where a class object is in the from-space 5349 // after the flip, as a comparison involving such a type would 5350 // not produce a false positive; it may of course produce a 5351 // false negative, in which case we would take the ArraySet 5352 // slow path. 5353 5354 // /* HeapReference<Class> */ temp = array->klass_ 5355 __ movl(temp, Address(array, class_offset)); 5356 codegen_->MaybeRecordImplicitNullCheck(instruction); 5357 __ MaybeUnpoisonHeapReference(temp); 5358 5359 // /* HeapReference<Class> */ temp = temp->component_type_ 5360 __ movl(temp, Address(temp, component_offset)); 5361 // If heap poisoning is enabled, no need to unpoison `temp` 5362 // nor the object reference in `register_value->klass`, as 5363 // we are comparing two poisoned references. 5364 __ cmpl(temp, Address(register_value, class_offset)); 5365 5366 if (instruction->StaticTypeOfArrayIsObjectArray()) { 5367 NearLabel do_put; 5368 __ j(kEqual, &do_put); 5369 // If heap poisoning is enabled, the `temp` reference has 5370 // not been unpoisoned yet; unpoison it now. 5371 __ MaybeUnpoisonHeapReference(temp); 5372 5373 // If heap poisoning is enabled, no need to unpoison the 5374 // heap reference loaded below, as it is only used for a 5375 // comparison with null. 5376 __ cmpl(Address(temp, super_offset), Immediate(0)); 5377 __ j(kNotEqual, slow_path->GetEntryLabel()); 5378 __ Bind(&do_put); 5379 } else { 5380 __ j(kNotEqual, slow_path->GetEntryLabel()); 5381 } 5382 } 5383 5384 CpuRegister card = locations->GetTemp(1).AsRegister<CpuRegister>(); 5385 codegen_->MarkGCCard( 5386 temp, card, array, value.AsRegister<CpuRegister>(), /* value_can_be_null= */ false); 5387 5388 if (can_value_be_null) { 5389 DCHECK(do_store.IsLinked()); 5390 __ Bind(&do_store); 5391 } 5392 5393 Location source = value; 5394 if (kPoisonHeapReferences) { 5395 __ movl(temp, register_value); 5396 __ PoisonHeapReference(temp); 5397 source = temp_loc; 5398 } 5399 5400 __ movl(address, source.AsRegister<CpuRegister>()); 5401 5402 if (can_value_be_null || !needs_type_check) { 5403 codegen_->MaybeRecordImplicitNullCheck(instruction); 5404 } 5405 5406 if (slow_path != nullptr) { 5407 __ Bind(slow_path->GetExitLabel()); 5408 } 5409 5410 break; 5411 } 5412 5413 case DataType::Type::kInt32: { 5414 uint32_t offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value(); 5415 Address address = CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_4, offset); 5416 if (value.IsRegister()) { 5417 __ movl(address, value.AsRegister<CpuRegister>()); 5418 } else { 5419 DCHECK(value.IsConstant()) << value; 5420 int32_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant()); 5421 __ movl(address, Immediate(v)); 5422 } 5423 codegen_->MaybeRecordImplicitNullCheck(instruction); 5424 break; 5425 } 5426 5427 case DataType::Type::kInt64: { 5428 uint32_t offset = mirror::Array::DataOffset(sizeof(int64_t)).Uint32Value(); 5429 Address address = CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_8, offset); 5430 if (value.IsRegister()) { 5431 __ movq(address, value.AsRegister<CpuRegister>()); 5432 codegen_->MaybeRecordImplicitNullCheck(instruction); 5433 } else { 5434 int64_t v = value.GetConstant()->AsLongConstant()->GetValue(); 5435 Address address_high = 5436 CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_8, offset + sizeof(int32_t)); 5437 codegen_->MoveInt64ToAddress(address, address_high, v, instruction); 5438 } 5439 break; 5440 } 5441 5442 case DataType::Type::kFloat32: { 5443 uint32_t offset = mirror::Array::DataOffset(sizeof(float)).Uint32Value(); 5444 Address address = CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_4, offset); 5445 if (value.IsFpuRegister()) { 5446 __ movss(address, value.AsFpuRegister<XmmRegister>()); 5447 } else { 5448 DCHECK(value.IsConstant()); 5449 int32_t v = bit_cast<int32_t, float>(value.GetConstant()->AsFloatConstant()->GetValue()); 5450 __ movl(address, Immediate(v)); 5451 } 5452 codegen_->MaybeRecordImplicitNullCheck(instruction); 5453 break; 5454 } 5455 5456 case DataType::Type::kFloat64: { 5457 uint32_t offset = mirror::Array::DataOffset(sizeof(double)).Uint32Value(); 5458 Address address = CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_8, offset); 5459 if (value.IsFpuRegister()) { 5460 __ movsd(address, value.AsFpuRegister<XmmRegister>()); 5461 codegen_->MaybeRecordImplicitNullCheck(instruction); 5462 } else { 5463 int64_t v = 5464 bit_cast<int64_t, double>(value.GetConstant()->AsDoubleConstant()->GetValue()); 5465 Address address_high = 5466 CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_8, offset + sizeof(int32_t)); 5467 codegen_->MoveInt64ToAddress(address, address_high, v, instruction); 5468 } 5469 break; 5470 } 5471 5472 case DataType::Type::kUint32: 5473 case DataType::Type::kUint64: 5474 case DataType::Type::kVoid: 5475 LOG(FATAL) << "Unreachable type " << instruction->GetType(); 5476 UNREACHABLE(); 5477 } 5478 } 5479 5480 void LocationsBuilderX86_64::VisitArrayLength(HArrayLength* instruction) { 5481 LocationSummary* locations = 5482 new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall); 5483 locations->SetInAt(0, Location::RequiresRegister()); 5484 if (!instruction->IsEmittedAtUseSite()) { 5485 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); 5486 } 5487 } 5488 5489 void InstructionCodeGeneratorX86_64::VisitArrayLength(HArrayLength* instruction) { 5490 if (instruction->IsEmittedAtUseSite()) { 5491 return; 5492 } 5493 5494 LocationSummary* locations = instruction->GetLocations(); 5495 uint32_t offset = CodeGenerator::GetArrayLengthOffset(instruction); 5496 CpuRegister obj = locations->InAt(0).AsRegister<CpuRegister>(); 5497 CpuRegister out = locations->Out().AsRegister<CpuRegister>(); 5498 __ movl(out, Address(obj, offset)); 5499 codegen_->MaybeRecordImplicitNullCheck(instruction); 5500 // Mask out most significant bit in case the array is String's array of char. 5501 if (mirror::kUseStringCompression && instruction->IsStringLength()) { 5502 __ shrl(out, Immediate(1)); 5503 } 5504 } 5505 5506 void LocationsBuilderX86_64::VisitBoundsCheck(HBoundsCheck* instruction) { 5507 RegisterSet caller_saves = RegisterSet::Empty(); 5508 InvokeRuntimeCallingConvention calling_convention; 5509 caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0))); 5510 caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(1))); 5511 LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction, caller_saves); 5512 locations->SetInAt(0, Location::RegisterOrConstant(instruction->InputAt(0))); 5513 HInstruction* length = instruction->InputAt(1); 5514 if (!length->IsEmittedAtUseSite()) { 5515 locations->SetInAt(1, Location::RegisterOrConstant(length)); 5516 } 5517 } 5518 5519 void InstructionCodeGeneratorX86_64::VisitBoundsCheck(HBoundsCheck* instruction) { 5520 LocationSummary* locations = instruction->GetLocations(); 5521 Location index_loc = locations->InAt(0); 5522 Location length_loc = locations->InAt(1); 5523 SlowPathCode* slow_path = 5524 new (codegen_->GetScopedAllocator()) BoundsCheckSlowPathX86_64(instruction); 5525 5526 if (length_loc.IsConstant()) { 5527 int32_t length = CodeGenerator::GetInt32ValueOf(length_loc.GetConstant()); 5528 if (index_loc.IsConstant()) { 5529 // BCE will remove the bounds check if we are guarenteed to pass. 5530 int32_t index = CodeGenerator::GetInt32ValueOf(index_loc.GetConstant()); 5531 if (index < 0 || index >= length) { 5532 codegen_->AddSlowPath(slow_path); 5533 __ jmp(slow_path->GetEntryLabel()); 5534 } else { 5535 // Some optimization after BCE may have generated this, and we should not 5536 // generate a bounds check if it is a valid range. 5537 } 5538 return; 5539 } 5540 5541 // We have to reverse the jump condition because the length is the constant. 5542 CpuRegister index_reg = index_loc.AsRegister<CpuRegister>(); 5543 __ cmpl(index_reg, Immediate(length)); 5544 codegen_->AddSlowPath(slow_path); 5545 __ j(kAboveEqual, slow_path->GetEntryLabel()); 5546 } else { 5547 HInstruction* array_length = instruction->InputAt(1); 5548 if (array_length->IsEmittedAtUseSite()) { 5549 // Address the length field in the array. 5550 DCHECK(array_length->IsArrayLength()); 5551 uint32_t len_offset = CodeGenerator::GetArrayLengthOffset(array_length->AsArrayLength()); 5552 Location array_loc = array_length->GetLocations()->InAt(0); 5553 Address array_len(array_loc.AsRegister<CpuRegister>(), len_offset); 5554 if (mirror::kUseStringCompression && instruction->IsStringCharAt()) { 5555 // TODO: if index_loc.IsConstant(), compare twice the index (to compensate for 5556 // the string compression flag) with the in-memory length and avoid the temporary. 5557 CpuRegister length_reg = CpuRegister(TMP); 5558 __ movl(length_reg, array_len); 5559 codegen_->MaybeRecordImplicitNullCheck(array_length); 5560 __ shrl(length_reg, Immediate(1)); 5561 codegen_->GenerateIntCompare(length_reg, index_loc); 5562 } else { 5563 // Checking the bound for general case: 5564 // Array of char or String's array when the compression feature off. 5565 if (index_loc.IsConstant()) { 5566 int32_t value = CodeGenerator::GetInt32ValueOf(index_loc.GetConstant()); 5567 __ cmpl(array_len, Immediate(value)); 5568 } else { 5569 __ cmpl(array_len, index_loc.AsRegister<CpuRegister>()); 5570 } 5571 codegen_->MaybeRecordImplicitNullCheck(array_length); 5572 } 5573 } else { 5574 codegen_->GenerateIntCompare(length_loc, index_loc); 5575 } 5576 codegen_->AddSlowPath(slow_path); 5577 __ j(kBelowEqual, slow_path->GetEntryLabel()); 5578 } 5579 } 5580 5581 void CodeGeneratorX86_64::MarkGCCard(CpuRegister temp, 5582 CpuRegister card, 5583 CpuRegister object, 5584 CpuRegister value, 5585 bool value_can_be_null) { 5586 NearLabel is_null; 5587 if (value_can_be_null) { 5588 __ testl(value, value); 5589 __ j(kEqual, &is_null); 5590 } 5591 // Load the address of the card table into `card`. 5592 __ gs()->movq(card, Address::Absolute(Thread::CardTableOffset<kX86_64PointerSize>().Int32Value(), 5593 /* no_rip= */ true)); 5594 // Calculate the offset (in the card table) of the card corresponding to 5595 // `object`. 5596 __ movq(temp, object); 5597 __ shrq(temp, Immediate(gc::accounting::CardTable::kCardShift)); 5598 // Write the `art::gc::accounting::CardTable::kCardDirty` value into the 5599 // `object`'s card. 5600 // 5601 // Register `card` contains the address of the card table. Note that the card 5602 // table's base is biased during its creation so that it always starts at an 5603 // address whose least-significant byte is equal to `kCardDirty` (see 5604 // art::gc::accounting::CardTable::Create). Therefore the MOVB instruction 5605 // below writes the `kCardDirty` (byte) value into the `object`'s card 5606 // (located at `card + object >> kCardShift`). 5607 // 5608 // This dual use of the value in register `card` (1. to calculate the location 5609 // of the card to mark; and 2. to load the `kCardDirty` value) saves a load 5610 // (no need to explicitly load `kCardDirty` as an immediate value). 5611 __ movb(Address(temp, card, TIMES_1, 0), card); 5612 if (value_can_be_null) { 5613 __ Bind(&is_null); 5614 } 5615 } 5616 5617 void LocationsBuilderX86_64::VisitParallelMove(HParallelMove* instruction ATTRIBUTE_UNUSED) { 5618 LOG(FATAL) << "Unimplemented"; 5619 } 5620 5621 void InstructionCodeGeneratorX86_64::VisitParallelMove(HParallelMove* instruction) { 5622 if (instruction->GetNext()->IsSuspendCheck() && 5623 instruction->GetBlock()->GetLoopInformation() != nullptr) { 5624 HSuspendCheck* suspend_check = instruction->GetNext()->AsSuspendCheck(); 5625 // The back edge will generate the suspend check. 5626 codegen_->ClearSpillSlotsFromLoopPhisInStackMap(suspend_check, instruction); 5627 } 5628 5629 codegen_->GetMoveResolver()->EmitNativeCode(instruction); 5630 } 5631 5632 void LocationsBuilderX86_64::VisitSuspendCheck(HSuspendCheck* instruction) { 5633 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary( 5634 instruction, LocationSummary::kCallOnSlowPath); 5635 // In suspend check slow path, usually there are no caller-save registers at all. 5636 // If SIMD instructions are present, however, we force spilling all live SIMD 5637 // registers in full width (since the runtime only saves/restores lower part). 5638 locations->SetCustomSlowPathCallerSaves( 5639 GetGraph()->HasSIMD() ? RegisterSet::AllFpu() : RegisterSet::Empty()); 5640 } 5641 5642 void InstructionCodeGeneratorX86_64::VisitSuspendCheck(HSuspendCheck* instruction) { 5643 HBasicBlock* block = instruction->GetBlock(); 5644 if (block->GetLoopInformation() != nullptr) { 5645 DCHECK(block->GetLoopInformation()->GetSuspendCheck() == instruction); 5646 // The back edge will generate the suspend check. 5647 return; 5648 } 5649 if (block->IsEntryBlock() && instruction->GetNext()->IsGoto()) { 5650 // The goto will generate the suspend check. 5651 return; 5652 } 5653 GenerateSuspendCheck(instruction, nullptr); 5654 } 5655 5656 void InstructionCodeGeneratorX86_64::GenerateSuspendCheck(HSuspendCheck* instruction, 5657 HBasicBlock* successor) { 5658 SuspendCheckSlowPathX86_64* slow_path = 5659 down_cast<SuspendCheckSlowPathX86_64*>(instruction->GetSlowPath()); 5660 if (slow_path == nullptr) { 5661 slow_path = 5662 new (codegen_->GetScopedAllocator()) SuspendCheckSlowPathX86_64(instruction, successor); 5663 instruction->SetSlowPath(slow_path); 5664 codegen_->AddSlowPath(slow_path); 5665 if (successor != nullptr) { 5666 DCHECK(successor->IsLoopHeader()); 5667 } 5668 } else { 5669 DCHECK_EQ(slow_path->GetSuccessor(), successor); 5670 } 5671 5672 __ gs()->cmpw(Address::Absolute(Thread::ThreadFlagsOffset<kX86_64PointerSize>().Int32Value(), 5673 /* no_rip= */ true), 5674 Immediate(0)); 5675 if (successor == nullptr) { 5676 __ j(kNotEqual, slow_path->GetEntryLabel()); 5677 __ Bind(slow_path->GetReturnLabel()); 5678 } else { 5679 __ j(kEqual, codegen_->GetLabelOf(successor)); 5680 __ jmp(slow_path->GetEntryLabel()); 5681 } 5682 } 5683 5684 X86_64Assembler* ParallelMoveResolverX86_64::GetAssembler() const { 5685 return codegen_->GetAssembler(); 5686 } 5687 5688 void ParallelMoveResolverX86_64::EmitMove(size_t index) { 5689 MoveOperands* move = moves_[index]; 5690 Location source = move->GetSource(); 5691 Location destination = move->GetDestination(); 5692 5693 if (source.IsRegister()) { 5694 if (destination.IsRegister()) { 5695 __ movq(destination.AsRegister<CpuRegister>(), source.AsRegister<CpuRegister>()); 5696 } else if (destination.IsStackSlot()) { 5697 __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), 5698 source.AsRegister<CpuRegister>()); 5699 } else { 5700 DCHECK(destination.IsDoubleStackSlot()); 5701 __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()), 5702 source.AsRegister<CpuRegister>()); 5703 } 5704 } else if (source.IsStackSlot()) { 5705 if (destination.IsRegister()) { 5706 __ movl(destination.AsRegister<CpuRegister>(), 5707 Address(CpuRegister(RSP), source.GetStackIndex())); 5708 } else if (destination.IsFpuRegister()) { 5709 __ movss(destination.AsFpuRegister<XmmRegister>(), 5710 Address(CpuRegister(RSP), source.GetStackIndex())); 5711 } else { 5712 DCHECK(destination.IsStackSlot()); 5713 __ movl(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex())); 5714 __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP)); 5715 } 5716 } else if (source.IsDoubleStackSlot()) { 5717 if (destination.IsRegister()) { 5718 __ movq(destination.AsRegister<CpuRegister>(), 5719 Address(CpuRegister(RSP), source.GetStackIndex())); 5720 } else if (destination.IsFpuRegister()) { 5721 __ movsd(destination.AsFpuRegister<XmmRegister>(), 5722 Address(CpuRegister(RSP), source.GetStackIndex())); 5723 } else { 5724 DCHECK(destination.IsDoubleStackSlot()) << destination; 5725 __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex())); 5726 __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP)); 5727 } 5728 } else if (source.IsSIMDStackSlot()) { 5729 if (destination.IsFpuRegister()) { 5730 __ movups(destination.AsFpuRegister<XmmRegister>(), 5731 Address(CpuRegister(RSP), source.GetStackIndex())); 5732 } else { 5733 DCHECK(destination.IsSIMDStackSlot()); 5734 size_t high = kX86_64WordSize; 5735 __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex())); 5736 __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP)); 5737 __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex() + high)); 5738 __ movq(Address(CpuRegister(RSP), destination.GetStackIndex() + high), CpuRegister(TMP)); 5739 } 5740 } else if (source.IsConstant()) { 5741 HConstant* constant = source.GetConstant(); 5742 if (constant->IsIntConstant() || constant->IsNullConstant()) { 5743 int32_t value = CodeGenerator::GetInt32ValueOf(constant); 5744 if (destination.IsRegister()) { 5745 if (value == 0) { 5746 __ xorl(destination.AsRegister<CpuRegister>(), destination.AsRegister<CpuRegister>()); 5747 } else { 5748 __ movl(destination.AsRegister<CpuRegister>(), Immediate(value)); 5749 } 5750 } else { 5751 DCHECK(destination.IsStackSlot()) << destination; 5752 __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), Immediate(value)); 5753 } 5754 } else if (constant->IsLongConstant()) { 5755 int64_t value = constant->AsLongConstant()->GetValue(); 5756 if (destination.IsRegister()) { 5757 codegen_->Load64BitValue(destination.AsRegister<CpuRegister>(), value); 5758 } else { 5759 DCHECK(destination.IsDoubleStackSlot()) << destination; 5760 codegen_->Store64BitValueToStack(destination, value); 5761 } 5762 } else if (constant->IsFloatConstant()) { 5763 float fp_value = constant->AsFloatConstant()->GetValue(); 5764 if (destination.IsFpuRegister()) { 5765 XmmRegister dest = destination.AsFpuRegister<XmmRegister>(); 5766 codegen_->Load32BitValue(dest, fp_value); 5767 } else { 5768 DCHECK(destination.IsStackSlot()) << destination; 5769 Immediate imm(bit_cast<int32_t, float>(fp_value)); 5770 __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), imm); 5771 } 5772 } else { 5773 DCHECK(constant->IsDoubleConstant()) << constant->DebugName(); 5774 double fp_value = constant->AsDoubleConstant()->GetValue(); 5775 int64_t value = bit_cast<int64_t, double>(fp_value); 5776 if (destination.IsFpuRegister()) { 5777 XmmRegister dest = destination.AsFpuRegister<XmmRegister>(); 5778 codegen_->Load64BitValue(dest, fp_value); 5779 } else { 5780 DCHECK(destination.IsDoubleStackSlot()) << destination; 5781 codegen_->Store64BitValueToStack(destination, value); 5782 } 5783 } 5784 } else if (source.IsFpuRegister()) { 5785 if (destination.IsFpuRegister()) { 5786 __ movaps(destination.AsFpuRegister<XmmRegister>(), source.AsFpuRegister<XmmRegister>()); 5787 } else if (destination.IsStackSlot()) { 5788 __ movss(Address(CpuRegister(RSP), destination.GetStackIndex()), 5789 source.AsFpuRegister<XmmRegister>()); 5790 } else if (destination.IsDoubleStackSlot()) { 5791 __ movsd(Address(CpuRegister(RSP), destination.GetStackIndex()), 5792 source.AsFpuRegister<XmmRegister>()); 5793 } else { 5794 DCHECK(destination.IsSIMDStackSlot()); 5795 __ movups(Address(CpuRegister(RSP), destination.GetStackIndex()), 5796 source.AsFpuRegister<XmmRegister>()); 5797 } 5798 } 5799 } 5800 5801 void ParallelMoveResolverX86_64::Exchange32(CpuRegister reg, int mem) { 5802 __ movl(CpuRegister(TMP), Address(CpuRegister(RSP), mem)); 5803 __ movl(Address(CpuRegister(RSP), mem), reg); 5804 __ movl(reg, CpuRegister(TMP)); 5805 } 5806 5807 void ParallelMoveResolverX86_64::Exchange64(CpuRegister reg1, CpuRegister reg2) { 5808 __ movq(CpuRegister(TMP), reg1); 5809 __ movq(reg1, reg2); 5810 __ movq(reg2, CpuRegister(TMP)); 5811 } 5812 5813 void ParallelMoveResolverX86_64::Exchange64(CpuRegister reg, int mem) { 5814 __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), mem)); 5815 __ movq(Address(CpuRegister(RSP), mem), reg); 5816 __ movq(reg, CpuRegister(TMP)); 5817 } 5818 5819 void ParallelMoveResolverX86_64::Exchange32(XmmRegister reg, int mem) { 5820 __ movl(CpuRegister(TMP), Address(CpuRegister(RSP), mem)); 5821 __ movss(Address(CpuRegister(RSP), mem), reg); 5822 __ movd(reg, CpuRegister(TMP)); 5823 } 5824 5825 void ParallelMoveResolverX86_64::Exchange64(XmmRegister reg, int mem) { 5826 __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), mem)); 5827 __ movsd(Address(CpuRegister(RSP), mem), reg); 5828 __ movd(reg, CpuRegister(TMP)); 5829 } 5830 5831 void ParallelMoveResolverX86_64::Exchange128(XmmRegister reg, int mem) { 5832 size_t extra_slot = 2 * kX86_64WordSize; 5833 __ subq(CpuRegister(RSP), Immediate(extra_slot)); 5834 __ movups(Address(CpuRegister(RSP), 0), XmmRegister(reg)); 5835 ExchangeMemory64(0, mem + extra_slot, 2); 5836 __ movups(XmmRegister(reg), Address(CpuRegister(RSP), 0)); 5837 __ addq(CpuRegister(RSP), Immediate(extra_slot)); 5838 } 5839 5840 void ParallelMoveResolverX86_64::ExchangeMemory32(int mem1, int mem2) { 5841 ScratchRegisterScope ensure_scratch( 5842 this, TMP, RAX, codegen_->GetNumberOfCoreRegisters()); 5843 5844 int stack_offset = ensure_scratch.IsSpilled() ? kX86_64WordSize : 0; 5845 __ movl(CpuRegister(TMP), Address(CpuRegister(RSP), mem1 + stack_offset)); 5846 __ movl(CpuRegister(ensure_scratch.GetRegister()), 5847 Address(CpuRegister(RSP), mem2 + stack_offset)); 5848 __ movl(Address(CpuRegister(RSP), mem2 + stack_offset), CpuRegister(TMP)); 5849 __ movl(Address(CpuRegister(RSP), mem1 + stack_offset), 5850 CpuRegister(ensure_scratch.GetRegister())); 5851 } 5852 5853 void ParallelMoveResolverX86_64::ExchangeMemory64(int mem1, int mem2, int num_of_qwords) { 5854 ScratchRegisterScope ensure_scratch( 5855 this, TMP, RAX, codegen_->GetNumberOfCoreRegisters()); 5856 5857 int stack_offset = ensure_scratch.IsSpilled() ? kX86_64WordSize : 0; 5858 5859 // Now that temp registers are available (possibly spilled), exchange blocks of memory. 5860 for (int i = 0; i < num_of_qwords; i++) { 5861 __ movq(CpuRegister(TMP), 5862 Address(CpuRegister(RSP), mem1 + stack_offset)); 5863 __ movq(CpuRegister(ensure_scratch.GetRegister()), 5864 Address(CpuRegister(RSP), mem2 + stack_offset)); 5865 __ movq(Address(CpuRegister(RSP), mem2 + stack_offset), 5866 CpuRegister(TMP)); 5867 __ movq(Address(CpuRegister(RSP), mem1 + stack_offset), 5868 CpuRegister(ensure_scratch.GetRegister())); 5869 stack_offset += kX86_64WordSize; 5870 } 5871 } 5872 5873 void ParallelMoveResolverX86_64::EmitSwap(size_t index) { 5874 MoveOperands* move = moves_[index]; 5875 Location source = move->GetSource(); 5876 Location destination = move->GetDestination(); 5877 5878 if (source.IsRegister() && destination.IsRegister()) { 5879 Exchange64(source.AsRegister<CpuRegister>(), destination.AsRegister<CpuRegister>()); 5880 } else if (source.IsRegister() && destination.IsStackSlot()) { 5881 Exchange32(source.AsRegister<CpuRegister>(), destination.GetStackIndex()); 5882 } else if (source.IsStackSlot() && destination.IsRegister()) { 5883 Exchange32(destination.AsRegister<CpuRegister>(), source.GetStackIndex()); 5884 } else if (source.IsStackSlot() && destination.IsStackSlot()) { 5885 ExchangeMemory32(destination.GetStackIndex(), source.GetStackIndex()); 5886 } else if (source.IsRegister() && destination.IsDoubleStackSlot()) { 5887 Exchange64(source.AsRegister<CpuRegister>(), destination.GetStackIndex()); 5888 } else if (source.IsDoubleStackSlot() && destination.IsRegister()) { 5889 Exchange64(destination.AsRegister<CpuRegister>(), source.GetStackIndex()); 5890 } else if (source.IsDoubleStackSlot() && destination.IsDoubleStackSlot()) { 5891 ExchangeMemory64(destination.GetStackIndex(), source.GetStackIndex(), 1); 5892 } else if (source.IsFpuRegister() && destination.IsFpuRegister()) { 5893 __ movd(CpuRegister(TMP), source.AsFpuRegister<XmmRegister>()); 5894 __ movaps(source.AsFpuRegister<XmmRegister>(), destination.AsFpuRegister<XmmRegister>()); 5895 __ movd(destination.AsFpuRegister<XmmRegister>(), CpuRegister(TMP)); 5896 } else if (source.IsFpuRegister() && destination.IsStackSlot()) { 5897 Exchange32(source.AsFpuRegister<XmmRegister>(), destination.GetStackIndex()); 5898 } else if (source.IsStackSlot() && destination.IsFpuRegister()) { 5899 Exchange32(destination.AsFpuRegister<XmmRegister>(), source.GetStackIndex()); 5900 } else if (source.IsFpuRegister() && destination.IsDoubleStackSlot()) { 5901 Exchange64(source.AsFpuRegister<XmmRegister>(), destination.GetStackIndex()); 5902 } else if (source.IsDoubleStackSlot() && destination.IsFpuRegister()) { 5903 Exchange64(destination.AsFpuRegister<XmmRegister>(), source.GetStackIndex()); 5904 } else if (source.IsSIMDStackSlot() && destination.IsSIMDStackSlot()) { 5905 ExchangeMemory64(destination.GetStackIndex(), source.GetStackIndex(), 2); 5906 } else if (source.IsFpuRegister() && destination.IsSIMDStackSlot()) { 5907 Exchange128(source.AsFpuRegister<XmmRegister>(), destination.GetStackIndex()); 5908 } else if (destination.IsFpuRegister() && source.IsSIMDStackSlot()) { 5909 Exchange128(destination.AsFpuRegister<XmmRegister>(), source.GetStackIndex()); 5910 } else { 5911 LOG(FATAL) << "Unimplemented swap between " << source << " and " << destination; 5912 } 5913 } 5914 5915 5916 void ParallelMoveResolverX86_64::SpillScratch(int reg) { 5917 __ pushq(CpuRegister(reg)); 5918 } 5919 5920 5921 void ParallelMoveResolverX86_64::RestoreScratch(int reg) { 5922 __ popq(CpuRegister(reg)); 5923 } 5924 5925 void InstructionCodeGeneratorX86_64::GenerateClassInitializationCheck( 5926 SlowPathCode* slow_path, CpuRegister class_reg) { 5927 constexpr size_t status_lsb_position = SubtypeCheckBits::BitStructSizeOf(); 5928 const size_t status_byte_offset = 5929 mirror::Class::StatusOffset().SizeValue() + (status_lsb_position / kBitsPerByte); 5930 constexpr uint32_t shifted_visibly_initialized_value = 5931 enum_cast<uint32_t>(ClassStatus::kVisiblyInitialized) << (status_lsb_position % kBitsPerByte); 5932 5933 __ cmpb(Address(class_reg, status_byte_offset), Immediate(shifted_visibly_initialized_value)); 5934 __ j(kBelow, slow_path->GetEntryLabel()); 5935 __ Bind(slow_path->GetExitLabel()); 5936 } 5937 5938 void InstructionCodeGeneratorX86_64::GenerateBitstringTypeCheckCompare(HTypeCheckInstruction* check, 5939 CpuRegister temp) { 5940 uint32_t path_to_root = check->GetBitstringPathToRoot(); 5941 uint32_t mask = check->GetBitstringMask(); 5942 DCHECK(IsPowerOfTwo(mask + 1)); 5943 size_t mask_bits = WhichPowerOf2(mask + 1); 5944 5945 if (mask_bits == 16u) { 5946 // Compare the bitstring in memory. 5947 __ cmpw(Address(temp, mirror::Class::StatusOffset()), Immediate(path_to_root)); 5948 } else { 5949 // /* uint32_t */ temp = temp->status_ 5950 __ movl(temp, Address(temp, mirror::Class::StatusOffset())); 5951 // Compare the bitstring bits using SUB. 5952 __ subl(temp, Immediate(path_to_root)); 5953 // Shift out bits that do not contribute to the comparison. 5954 __ shll(temp, Immediate(32u - mask_bits)); 5955 } 5956 } 5957 5958 HLoadClass::LoadKind CodeGeneratorX86_64::GetSupportedLoadClassKind( 5959 HLoadClass::LoadKind desired_class_load_kind) { 5960 switch (desired_class_load_kind) { 5961 case HLoadClass::LoadKind::kInvalid: 5962 LOG(FATAL) << "UNREACHABLE"; 5963 UNREACHABLE(); 5964 case HLoadClass::LoadKind::kReferrersClass: 5965 break; 5966 case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: 5967 case HLoadClass::LoadKind::kBootImageRelRo: 5968 case HLoadClass::LoadKind::kBssEntry: 5969 DCHECK(!Runtime::Current()->UseJitCompilation()); 5970 break; 5971 case HLoadClass::LoadKind::kJitBootImageAddress: 5972 case HLoadClass::LoadKind::kJitTableAddress: 5973 DCHECK(Runtime::Current()->UseJitCompilation()); 5974 break; 5975 case HLoadClass::LoadKind::kRuntimeCall: 5976 break; 5977 } 5978 return desired_class_load_kind; 5979 } 5980 5981 void LocationsBuilderX86_64::VisitLoadClass(HLoadClass* cls) { 5982 HLoadClass::LoadKind load_kind = cls->GetLoadKind(); 5983 if (load_kind == HLoadClass::LoadKind::kRuntimeCall) { 5984 // Custom calling convention: RAX serves as both input and output. 5985 CodeGenerator::CreateLoadClassRuntimeCallLocationSummary( 5986 cls, 5987 Location::RegisterLocation(RAX), 5988 Location::RegisterLocation(RAX)); 5989 return; 5990 } 5991 DCHECK(!cls->NeedsAccessCheck()); 5992 5993 const bool requires_read_barrier = kEmitCompilerReadBarrier && !cls->IsInBootImage(); 5994 LocationSummary::CallKind call_kind = (cls->NeedsEnvironment() || requires_read_barrier) 5995 ? LocationSummary::kCallOnSlowPath 5996 : LocationSummary::kNoCall; 5997 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(cls, call_kind); 5998 if (kUseBakerReadBarrier && requires_read_barrier && !cls->NeedsEnvironment()) { 5999 locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. 6000 } 6001 6002 if (load_kind == HLoadClass::LoadKind::kReferrersClass) { 6003 locations->SetInAt(0, Location::RequiresRegister()); 6004 } 6005 locations->SetOut(Location::RequiresRegister()); 6006 if (load_kind == HLoadClass::LoadKind::kBssEntry) { 6007 if (!kUseReadBarrier || kUseBakerReadBarrier) { 6008 // Rely on the type resolution and/or initialization to save everything. 6009 locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves()); 6010 } else { 6011 // For non-Baker read barrier we have a temp-clobbering call. 6012 } 6013 } 6014 } 6015 6016 Label* CodeGeneratorX86_64::NewJitRootClassPatch(const DexFile& dex_file, 6017 dex::TypeIndex type_index, 6018 Handle<mirror::Class> handle) { 6019 ReserveJitClassRoot(TypeReference(&dex_file, type_index), handle); 6020 // Add a patch entry and return the label. 6021 jit_class_patches_.emplace_back(&dex_file, type_index.index_); 6022 PatchInfo<Label>* info = &jit_class_patches_.back(); 6023 return &info->label; 6024 } 6025 6026 // NO_THREAD_SAFETY_ANALYSIS as we manipulate handles whose internal object we know does not 6027 // move. 6028 void InstructionCodeGeneratorX86_64::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAFETY_ANALYSIS { 6029 HLoadClass::LoadKind load_kind = cls->GetLoadKind(); 6030 if (load_kind == HLoadClass::LoadKind::kRuntimeCall) { 6031 codegen_->GenerateLoadClassRuntimeCall(cls); 6032 return; 6033 } 6034 DCHECK(!cls->NeedsAccessCheck()); 6035 6036 LocationSummary* locations = cls->GetLocations(); 6037 Location out_loc = locations->Out(); 6038 CpuRegister out = out_loc.AsRegister<CpuRegister>(); 6039 6040 const ReadBarrierOption read_barrier_option = cls->IsInBootImage() 6041 ? kWithoutReadBarrier 6042 : kCompilerReadBarrierOption; 6043 bool generate_null_check = false; 6044 switch (load_kind) { 6045 case HLoadClass::LoadKind::kReferrersClass: { 6046 DCHECK(!cls->CanCallRuntime()); 6047 DCHECK(!cls->MustGenerateClinitCheck()); 6048 // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_ 6049 CpuRegister current_method = locations->InAt(0).AsRegister<CpuRegister>(); 6050 GenerateGcRootFieldLoad( 6051 cls, 6052 out_loc, 6053 Address(current_method, ArtMethod::DeclaringClassOffset().Int32Value()), 6054 /* fixup_label= */ nullptr, 6055 read_barrier_option); 6056 break; 6057 } 6058 case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: 6059 DCHECK(codegen_->GetCompilerOptions().IsBootImage() || 6060 codegen_->GetCompilerOptions().IsBootImageExtension()); 6061 DCHECK_EQ(read_barrier_option, kWithoutReadBarrier); 6062 __ leal(out, Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset, /* no_rip= */ false)); 6063 codegen_->RecordBootImageTypePatch(cls); 6064 break; 6065 case HLoadClass::LoadKind::kBootImageRelRo: { 6066 DCHECK(!codegen_->GetCompilerOptions().IsBootImage()); 6067 __ movl(out, Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset, /* no_rip= */ false)); 6068 codegen_->RecordBootImageRelRoPatch(codegen_->GetBootImageOffset(cls)); 6069 break; 6070 } 6071 case HLoadClass::LoadKind::kBssEntry: { 6072 Address address = Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset, 6073 /* no_rip= */ false); 6074 Label* fixup_label = codegen_->NewTypeBssEntryPatch(cls); 6075 // /* GcRoot<mirror::Class> */ out = *address /* PC-relative */ 6076 GenerateGcRootFieldLoad(cls, out_loc, address, fixup_label, read_barrier_option); 6077 // No need for memory fence, thanks to the x86-64 memory model. 6078 generate_null_check = true; 6079 break; 6080 } 6081 case HLoadClass::LoadKind::kJitBootImageAddress: { 6082 DCHECK_EQ(read_barrier_option, kWithoutReadBarrier); 6083 uint32_t address = reinterpret_cast32<uint32_t>(cls->GetClass().Get()); 6084 DCHECK_NE(address, 0u); 6085 __ movl(out, Immediate(static_cast<int32_t>(address))); // Zero-extended. 6086 break; 6087 } 6088 case HLoadClass::LoadKind::kJitTableAddress: { 6089 Address address = Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset, 6090 /* no_rip= */ true); 6091 Label* fixup_label = 6092 codegen_->NewJitRootClassPatch(cls->GetDexFile(), cls->GetTypeIndex(), cls->GetClass()); 6093 // /* GcRoot<mirror::Class> */ out = *address 6094 GenerateGcRootFieldLoad(cls, out_loc, address, fixup_label, read_barrier_option); 6095 break; 6096 } 6097 default: 6098 LOG(FATAL) << "Unexpected load kind: " << cls->GetLoadKind(); 6099 UNREACHABLE(); 6100 } 6101 6102 if (generate_null_check || cls->MustGenerateClinitCheck()) { 6103 DCHECK(cls->CanCallRuntime()); 6104 SlowPathCode* slow_path = 6105 new (codegen_->GetScopedAllocator()) LoadClassSlowPathX86_64(cls, cls); 6106 codegen_->AddSlowPath(slow_path); 6107 if (generate_null_check) { 6108 __ testl(out, out); 6109 __ j(kEqual, slow_path->GetEntryLabel()); 6110 } 6111 if (cls->MustGenerateClinitCheck()) { 6112 GenerateClassInitializationCheck(slow_path, out); 6113 } else { 6114 __ Bind(slow_path->GetExitLabel()); 6115 } 6116 } 6117 } 6118 6119 void LocationsBuilderX86_64::VisitClinitCheck(HClinitCheck* check) { 6120 LocationSummary* locations = 6121 new (GetGraph()->GetAllocator()) LocationSummary(check, LocationSummary::kCallOnSlowPath); 6122 locations->SetInAt(0, Location::RequiresRegister()); 6123 if (check->HasUses()) { 6124 locations->SetOut(Location::SameAsFirstInput()); 6125 } 6126 // Rely on the type initialization to save everything we need. 6127 locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves()); 6128 } 6129 6130 void LocationsBuilderX86_64::VisitLoadMethodHandle(HLoadMethodHandle* load) { 6131 // Custom calling convention: RAX serves as both input and output. 6132 Location location = Location::RegisterLocation(RAX); 6133 CodeGenerator::CreateLoadMethodHandleRuntimeCallLocationSummary(load, location, location); 6134 } 6135 6136 void InstructionCodeGeneratorX86_64::VisitLoadMethodHandle(HLoadMethodHandle* load) { 6137 codegen_->GenerateLoadMethodHandleRuntimeCall(load); 6138 } 6139 6140 void LocationsBuilderX86_64::VisitLoadMethodType(HLoadMethodType* load) { 6141 // Custom calling convention: RAX serves as both input and output. 6142 Location location = Location::RegisterLocation(RAX); 6143 CodeGenerator::CreateLoadMethodTypeRuntimeCallLocationSummary(load, location, location); 6144 } 6145 6146 void InstructionCodeGeneratorX86_64::VisitLoadMethodType(HLoadMethodType* load) { 6147 codegen_->GenerateLoadMethodTypeRuntimeCall(load); 6148 } 6149 6150 void InstructionCodeGeneratorX86_64::VisitClinitCheck(HClinitCheck* check) { 6151 // We assume the class to not be null. 6152 SlowPathCode* slow_path = 6153 new (codegen_->GetScopedAllocator()) LoadClassSlowPathX86_64(check->GetLoadClass(), check); 6154 codegen_->AddSlowPath(slow_path); 6155 GenerateClassInitializationCheck(slow_path, 6156 check->GetLocations()->InAt(0).AsRegister<CpuRegister>()); 6157 } 6158 6159 HLoadString::LoadKind CodeGeneratorX86_64::GetSupportedLoadStringKind( 6160 HLoadString::LoadKind desired_string_load_kind) { 6161 switch (desired_string_load_kind) { 6162 case HLoadString::LoadKind::kBootImageLinkTimePcRelative: 6163 case HLoadString::LoadKind::kBootImageRelRo: 6164 case HLoadString::LoadKind::kBssEntry: 6165 DCHECK(!Runtime::Current()->UseJitCompilation()); 6166 break; 6167 case HLoadString::LoadKind::kJitBootImageAddress: 6168 case HLoadString::LoadKind::kJitTableAddress: 6169 DCHECK(Runtime::Current()->UseJitCompilation()); 6170 break; 6171 case HLoadString::LoadKind::kRuntimeCall: 6172 break; 6173 } 6174 return desired_string_load_kind; 6175 } 6176 6177 void LocationsBuilderX86_64::VisitLoadString(HLoadString* load) { 6178 LocationSummary::CallKind call_kind = CodeGenerator::GetLoadStringCallKind(load); 6179 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(load, call_kind); 6180 if (load->GetLoadKind() == HLoadString::LoadKind::kRuntimeCall) { 6181 locations->SetOut(Location::RegisterLocation(RAX)); 6182 } else { 6183 locations->SetOut(Location::RequiresRegister()); 6184 if (load->GetLoadKind() == HLoadString::LoadKind::kBssEntry) { 6185 if (!kUseReadBarrier || kUseBakerReadBarrier) { 6186 // Rely on the pResolveString to save everything. 6187 locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves()); 6188 } else { 6189 // For non-Baker read barrier we have a temp-clobbering call. 6190 } 6191 } 6192 } 6193 } 6194 6195 Label* CodeGeneratorX86_64::NewJitRootStringPatch(const DexFile& dex_file, 6196 dex::StringIndex string_index, 6197 Handle<mirror::String> handle) { 6198 ReserveJitStringRoot(StringReference(&dex_file, string_index), handle); 6199 // Add a patch entry and return the label. 6200 jit_string_patches_.emplace_back(&dex_file, string_index.index_); 6201 PatchInfo<Label>* info = &jit_string_patches_.back(); 6202 return &info->label; 6203 } 6204 6205 // NO_THREAD_SAFETY_ANALYSIS as we manipulate handles whose internal object we know does not 6206 // move. 6207 void InstructionCodeGeneratorX86_64::VisitLoadString(HLoadString* load) NO_THREAD_SAFETY_ANALYSIS { 6208 LocationSummary* locations = load->GetLocations(); 6209 Location out_loc = locations->Out(); 6210 CpuRegister out = out_loc.AsRegister<CpuRegister>(); 6211 6212 switch (load->GetLoadKind()) { 6213 case HLoadString::LoadKind::kBootImageLinkTimePcRelative: { 6214 DCHECK(codegen_->GetCompilerOptions().IsBootImage() || 6215 codegen_->GetCompilerOptions().IsBootImageExtension()); 6216 __ leal(out, Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset, /* no_rip= */ false)); 6217 codegen_->RecordBootImageStringPatch(load); 6218 return; 6219 } 6220 case HLoadString::LoadKind::kBootImageRelRo: { 6221 DCHECK(!codegen_->GetCompilerOptions().IsBootImage()); 6222 __ movl(out, Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset, /* no_rip= */ false)); 6223 codegen_->RecordBootImageRelRoPatch(codegen_->GetBootImageOffset(load)); 6224 return; 6225 } 6226 case HLoadString::LoadKind::kBssEntry: { 6227 Address address = Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset, 6228 /* no_rip= */ false); 6229 Label* fixup_label = codegen_->NewStringBssEntryPatch(load); 6230 // /* GcRoot<mirror::Class> */ out = *address /* PC-relative */ 6231 GenerateGcRootFieldLoad(load, out_loc, address, fixup_label, kCompilerReadBarrierOption); 6232 // No need for memory fence, thanks to the x86-64 memory model. 6233 SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) LoadStringSlowPathX86_64(load); 6234 codegen_->AddSlowPath(slow_path); 6235 __ testl(out, out); 6236 __ j(kEqual, slow_path->GetEntryLabel()); 6237 __ Bind(slow_path->GetExitLabel()); 6238 return; 6239 } 6240 case HLoadString::LoadKind::kJitBootImageAddress: { 6241 uint32_t address = reinterpret_cast32<uint32_t>(load->GetString().Get()); 6242 DCHECK_NE(address, 0u); 6243 __ movl(out, Immediate(static_cast<int32_t>(address))); // Zero-extended. 6244 return; 6245 } 6246 case HLoadString::LoadKind::kJitTableAddress: { 6247 Address address = Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset, 6248 /* no_rip= */ true); 6249 Label* fixup_label = codegen_->NewJitRootStringPatch( 6250 load->GetDexFile(), load->GetStringIndex(), load->GetString()); 6251 // /* GcRoot<mirror::String> */ out = *address 6252 GenerateGcRootFieldLoad(load, out_loc, address, fixup_label, kCompilerReadBarrierOption); 6253 return; 6254 } 6255 default: 6256 break; 6257 } 6258 6259 // TODO: Re-add the compiler code to do string dex cache lookup again. 6260 // Custom calling convention: RAX serves as both input and output. 6261 __ movl(CpuRegister(RAX), Immediate(load->GetStringIndex().index_)); 6262 codegen_->InvokeRuntime(kQuickResolveString, 6263 load, 6264 load->GetDexPc()); 6265 CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>(); 6266 } 6267 6268 static Address GetExceptionTlsAddress() { 6269 return Address::Absolute(Thread::ExceptionOffset<kX86_64PointerSize>().Int32Value(), 6270 /* no_rip= */ true); 6271 } 6272 6273 void LocationsBuilderX86_64::VisitLoadException(HLoadException* load) { 6274 LocationSummary* locations = 6275 new (GetGraph()->GetAllocator()) LocationSummary(load, LocationSummary::kNoCall); 6276 locations->SetOut(Location::RequiresRegister()); 6277 } 6278 6279 void InstructionCodeGeneratorX86_64::VisitLoadException(HLoadException* load) { 6280 __ gs()->movl(load->GetLocations()->Out().AsRegister<CpuRegister>(), GetExceptionTlsAddress()); 6281 } 6282 6283 void LocationsBuilderX86_64::VisitClearException(HClearException* clear) { 6284 new (GetGraph()->GetAllocator()) LocationSummary(clear, LocationSummary::kNoCall); 6285 } 6286 6287 void InstructionCodeGeneratorX86_64::VisitClearException(HClearException* clear ATTRIBUTE_UNUSED) { 6288 __ gs()->movl(GetExceptionTlsAddress(), Immediate(0)); 6289 } 6290 6291 void LocationsBuilderX86_64::VisitThrow(HThrow* instruction) { 6292 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary( 6293 instruction, LocationSummary::kCallOnMainOnly); 6294 InvokeRuntimeCallingConvention calling_convention; 6295 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); 6296 } 6297 6298 void InstructionCodeGeneratorX86_64::VisitThrow(HThrow* instruction) { 6299 codegen_->InvokeRuntime(kQuickDeliverException, instruction, instruction->GetDexPc()); 6300 CheckEntrypointTypes<kQuickDeliverException, void, mirror::Object*>(); 6301 } 6302 6303 // Temp is used for read barrier. 6304 static size_t NumberOfInstanceOfTemps(TypeCheckKind type_check_kind) { 6305 if (kEmitCompilerReadBarrier && 6306 !kUseBakerReadBarrier && 6307 (type_check_kind == TypeCheckKind::kAbstractClassCheck || 6308 type_check_kind == TypeCheckKind::kClassHierarchyCheck || 6309 type_check_kind == TypeCheckKind::kArrayObjectCheck)) { 6310 return 1; 6311 } 6312 return 0; 6313 } 6314 6315 // Interface case has 2 temps, one for holding the number of interfaces, one for the current 6316 // interface pointer, the current interface is compared in memory. 6317 // The other checks have one temp for loading the object's class. 6318 static size_t NumberOfCheckCastTemps(TypeCheckKind type_check_kind) { 6319 if (type_check_kind == TypeCheckKind::kInterfaceCheck) { 6320 return 2; 6321 } 6322 return 1 + NumberOfInstanceOfTemps(type_check_kind); 6323 } 6324 6325 void LocationsBuilderX86_64::VisitInstanceOf(HInstanceOf* instruction) { 6326 LocationSummary::CallKind call_kind = LocationSummary::kNoCall; 6327 TypeCheckKind type_check_kind = instruction->GetTypeCheckKind(); 6328 bool baker_read_barrier_slow_path = false; 6329 switch (type_check_kind) { 6330 case TypeCheckKind::kExactCheck: 6331 case TypeCheckKind::kAbstractClassCheck: 6332 case TypeCheckKind::kClassHierarchyCheck: 6333 case TypeCheckKind::kArrayObjectCheck: { 6334 bool needs_read_barrier = CodeGenerator::InstanceOfNeedsReadBarrier(instruction); 6335 call_kind = needs_read_barrier ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall; 6336 baker_read_barrier_slow_path = kUseBakerReadBarrier && needs_read_barrier; 6337 break; 6338 } 6339 case TypeCheckKind::kArrayCheck: 6340 case TypeCheckKind::kUnresolvedCheck: 6341 case TypeCheckKind::kInterfaceCheck: 6342 call_kind = LocationSummary::kCallOnSlowPath; 6343 break; 6344 case TypeCheckKind::kBitstringCheck: 6345 break; 6346 } 6347 6348 LocationSummary* locations = 6349 new (GetGraph()->GetAllocator()) LocationSummary(instruction, call_kind); 6350 if (baker_read_barrier_slow_path) { 6351 locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. 6352 } 6353 locations->SetInAt(0, Location::RequiresRegister()); 6354 if (type_check_kind == TypeCheckKind::kBitstringCheck) { 6355 locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant())); 6356 locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant())); 6357 locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant())); 6358 } else { 6359 locations->SetInAt(1, Location::Any()); 6360 } 6361 // Note that TypeCheckSlowPathX86_64 uses this "out" register too. 6362 locations->SetOut(Location::RequiresRegister()); 6363 locations->AddRegisterTemps(NumberOfInstanceOfTemps(type_check_kind)); 6364 } 6365 6366 void InstructionCodeGeneratorX86_64::VisitInstanceOf(HInstanceOf* instruction) { 6367 TypeCheckKind type_check_kind = instruction->GetTypeCheckKind(); 6368 LocationSummary* locations = instruction->GetLocations(); 6369 Location obj_loc = locations->InAt(0); 6370 CpuRegister obj = obj_loc.AsRegister<CpuRegister>(); 6371 Location cls = locations->InAt(1); 6372 Location out_loc = locations->Out(); 6373 CpuRegister out = out_loc.AsRegister<CpuRegister>(); 6374 const size_t num_temps = NumberOfInstanceOfTemps(type_check_kind); 6375 DCHECK_LE(num_temps, 1u); 6376 Location maybe_temp_loc = (num_temps >= 1u) ? locations->GetTemp(0) : Location::NoLocation(); 6377 uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); 6378 uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value(); 6379 uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value(); 6380 uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value(); 6381 SlowPathCode* slow_path = nullptr; 6382 NearLabel done, zero; 6383 6384 // Return 0 if `obj` is null. 6385 // Avoid null check if we know obj is not null. 6386 if (instruction->MustDoNullCheck()) { 6387 __ testl(obj, obj); 6388 __ j(kEqual, &zero); 6389 } 6390 6391 switch (type_check_kind) { 6392 case TypeCheckKind::kExactCheck: { 6393 ReadBarrierOption read_barrier_option = 6394 CodeGenerator::ReadBarrierOptionForInstanceOf(instruction); 6395 // /* HeapReference<Class> */ out = obj->klass_ 6396 GenerateReferenceLoadTwoRegisters(instruction, 6397 out_loc, 6398 obj_loc, 6399 class_offset, 6400 read_barrier_option); 6401 if (cls.IsRegister()) { 6402 __ cmpl(out, cls.AsRegister<CpuRegister>()); 6403 } else { 6404 DCHECK(cls.IsStackSlot()) << cls; 6405 __ cmpl(out, Address(CpuRegister(RSP), cls.GetStackIndex())); 6406 } 6407 if (zero.IsLinked()) { 6408 // Classes must be equal for the instanceof to succeed. 6409 __ j(kNotEqual, &zero); 6410 __ movl(out, Immediate(1)); 6411 __ jmp(&done); 6412 } else { 6413 __ setcc(kEqual, out); 6414 // setcc only sets the low byte. 6415 __ andl(out, Immediate(1)); 6416 } 6417 break; 6418 } 6419 6420 case TypeCheckKind::kAbstractClassCheck: { 6421 ReadBarrierOption read_barrier_option = 6422 CodeGenerator::ReadBarrierOptionForInstanceOf(instruction); 6423 // /* HeapReference<Class> */ out = obj->klass_ 6424 GenerateReferenceLoadTwoRegisters(instruction, 6425 out_loc, 6426 obj_loc, 6427 class_offset, 6428 read_barrier_option); 6429 // If the class is abstract, we eagerly fetch the super class of the 6430 // object to avoid doing a comparison we know will fail. 6431 NearLabel loop, success; 6432 __ Bind(&loop); 6433 // /* HeapReference<Class> */ out = out->super_class_ 6434 GenerateReferenceLoadOneRegister(instruction, 6435 out_loc, 6436 super_offset, 6437 maybe_temp_loc, 6438 read_barrier_option); 6439 __ testl(out, out); 6440 // If `out` is null, we use it for the result, and jump to `done`. 6441 __ j(kEqual, &done); 6442 if (cls.IsRegister()) { 6443 __ cmpl(out, cls.AsRegister<CpuRegister>()); 6444 } else { 6445 DCHECK(cls.IsStackSlot()) << cls; 6446 __ cmpl(out, Address(CpuRegister(RSP), cls.GetStackIndex())); 6447 } 6448 __ j(kNotEqual, &loop); 6449 __ movl(out, Immediate(1)); 6450 if (zero.IsLinked()) { 6451 __ jmp(&done); 6452 } 6453 break; 6454 } 6455 6456 case TypeCheckKind::kClassHierarchyCheck: { 6457 ReadBarrierOption read_barrier_option = 6458 CodeGenerator::ReadBarrierOptionForInstanceOf(instruction); 6459 // /* HeapReference<Class> */ out = obj->klass_ 6460 GenerateReferenceLoadTwoRegisters(instruction, 6461 out_loc, 6462 obj_loc, 6463 class_offset, 6464 read_barrier_option); 6465 // Walk over the class hierarchy to find a match. 6466 NearLabel loop, success; 6467 __ Bind(&loop); 6468 if (cls.IsRegister()) { 6469 __ cmpl(out, cls.AsRegister<CpuRegister>()); 6470 } else { 6471 DCHECK(cls.IsStackSlot()) << cls; 6472 __ cmpl(out, Address(CpuRegister(RSP), cls.GetStackIndex())); 6473 } 6474 __ j(kEqual, &success); 6475 // /* HeapReference<Class> */ out = out->super_class_ 6476 GenerateReferenceLoadOneRegister(instruction, 6477 out_loc, 6478 super_offset, 6479 maybe_temp_loc, 6480 read_barrier_option); 6481 __ testl(out, out); 6482 __ j(kNotEqual, &loop); 6483 // If `out` is null, we use it for the result, and jump to `done`. 6484 __ jmp(&done); 6485 __ Bind(&success); 6486 __ movl(out, Immediate(1)); 6487 if (zero.IsLinked()) { 6488 __ jmp(&done); 6489 } 6490 break; 6491 } 6492 6493 case TypeCheckKind::kArrayObjectCheck: { 6494 ReadBarrierOption read_barrier_option = 6495 CodeGenerator::ReadBarrierOptionForInstanceOf(instruction); 6496 // /* HeapReference<Class> */ out = obj->klass_ 6497 GenerateReferenceLoadTwoRegisters(instruction, 6498 out_loc, 6499 obj_loc, 6500 class_offset, 6501 read_barrier_option); 6502 // Do an exact check. 6503 NearLabel exact_check; 6504 if (cls.IsRegister()) { 6505 __ cmpl(out, cls.AsRegister<CpuRegister>()); 6506 } else { 6507 DCHECK(cls.IsStackSlot()) << cls; 6508 __ cmpl(out, Address(CpuRegister(RSP), cls.GetStackIndex())); 6509 } 6510 __ j(kEqual, &exact_check); 6511 // Otherwise, we need to check that the object's class is a non-primitive array. 6512 // /* HeapReference<Class> */ out = out->component_type_ 6513 GenerateReferenceLoadOneRegister(instruction, 6514 out_loc, 6515 component_offset, 6516 maybe_temp_loc, 6517 read_barrier_option); 6518 __ testl(out, out); 6519 // If `out` is null, we use it for the result, and jump to `done`. 6520 __ j(kEqual, &done); 6521 __ cmpw(Address(out, primitive_offset), Immediate(Primitive::kPrimNot)); 6522 __ j(kNotEqual, &zero); 6523 __ Bind(&exact_check); 6524 __ movl(out, Immediate(1)); 6525 __ jmp(&done); 6526 break; 6527 } 6528 6529 case TypeCheckKind::kArrayCheck: { 6530 // No read barrier since the slow path will retry upon failure. 6531 // /* HeapReference<Class> */ out = obj->klass_ 6532 GenerateReferenceLoadTwoRegisters(instruction, 6533 out_loc, 6534 obj_loc, 6535 class_offset, 6536 kWithoutReadBarrier); 6537 if (cls.IsRegister()) { 6538 __ cmpl(out, cls.AsRegister<CpuRegister>()); 6539 } else { 6540 DCHECK(cls.IsStackSlot()) << cls; 6541 __ cmpl(out, Address(CpuRegister(RSP), cls.GetStackIndex())); 6542 } 6543 DCHECK(locations->OnlyCallsOnSlowPath()); 6544 slow_path = new (codegen_->GetScopedAllocator()) TypeCheckSlowPathX86_64( 6545 instruction, /* is_fatal= */ false); 6546 codegen_->AddSlowPath(slow_path); 6547 __ j(kNotEqual, slow_path->GetEntryLabel()); 6548 __ movl(out, Immediate(1)); 6549 if (zero.IsLinked()) { 6550 __ jmp(&done); 6551 } 6552 break; 6553 } 6554 6555 case TypeCheckKind::kUnresolvedCheck: 6556 case TypeCheckKind::kInterfaceCheck: { 6557 // Note that we indeed only call on slow path, but we always go 6558 // into the slow path for the unresolved and interface check 6559 // cases. 6560 // 6561 // We cannot directly call the InstanceofNonTrivial runtime 6562 // entry point without resorting to a type checking slow path 6563 // here (i.e. by calling InvokeRuntime directly), as it would 6564 // require to assign fixed registers for the inputs of this 6565 // HInstanceOf instruction (following the runtime calling 6566 // convention), which might be cluttered by the potential first 6567 // read barrier emission at the beginning of this method. 6568 // 6569 // TODO: Introduce a new runtime entry point taking the object 6570 // to test (instead of its class) as argument, and let it deal 6571 // with the read barrier issues. This will let us refactor this 6572 // case of the `switch` code as it was previously (with a direct 6573 // call to the runtime not using a type checking slow path). 6574 // This should also be beneficial for the other cases above. 6575 DCHECK(locations->OnlyCallsOnSlowPath()); 6576 slow_path = new (codegen_->GetScopedAllocator()) TypeCheckSlowPathX86_64( 6577 instruction, /* is_fatal= */ false); 6578 codegen_->AddSlowPath(slow_path); 6579 __ jmp(slow_path->GetEntryLabel()); 6580 if (zero.IsLinked()) { 6581 __ jmp(&done); 6582 } 6583 break; 6584 } 6585 6586 case TypeCheckKind::kBitstringCheck: { 6587 // /* HeapReference<Class> */ temp = obj->klass_ 6588 GenerateReferenceLoadTwoRegisters(instruction, 6589 out_loc, 6590 obj_loc, 6591 class_offset, 6592 kWithoutReadBarrier); 6593 6594 GenerateBitstringTypeCheckCompare(instruction, out); 6595 if (zero.IsLinked()) { 6596 __ j(kNotEqual, &zero); 6597 __ movl(out, Immediate(1)); 6598 __ jmp(&done); 6599 } else { 6600 __ setcc(kEqual, out); 6601 // setcc only sets the low byte. 6602 __ andl(out, Immediate(1)); 6603 } 6604 break; 6605 } 6606 } 6607 6608 if (zero.IsLinked()) { 6609 __ Bind(&zero); 6610 __ xorl(out, out); 6611 } 6612 6613 if (done.IsLinked()) { 6614 __ Bind(&done); 6615 } 6616 6617 if (slow_path != nullptr) { 6618 __ Bind(slow_path->GetExitLabel()); 6619 } 6620 } 6621 6622 void LocationsBuilderX86_64::VisitCheckCast(HCheckCast* instruction) { 6623 TypeCheckKind type_check_kind = instruction->GetTypeCheckKind(); 6624 LocationSummary::CallKind call_kind = CodeGenerator::GetCheckCastCallKind(instruction); 6625 LocationSummary* locations = 6626 new (GetGraph()->GetAllocator()) LocationSummary(instruction, call_kind); 6627 locations->SetInAt(0, Location::RequiresRegister()); 6628 if (type_check_kind == TypeCheckKind::kInterfaceCheck) { 6629 // Require a register for the interface check since there is a loop that compares the class to 6630 // a memory address. 6631 locations->SetInAt(1, Location::RequiresRegister()); 6632 } else if (type_check_kind == TypeCheckKind::kBitstringCheck) { 6633 locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant())); 6634 locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant())); 6635 locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant())); 6636 } else { 6637 locations->SetInAt(1, Location::Any()); 6638 } 6639 // Add temps for read barriers and other uses. One is used by TypeCheckSlowPathX86. 6640 locations->AddRegisterTemps(NumberOfCheckCastTemps(type_check_kind)); 6641 } 6642 6643 void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) { 6644 TypeCheckKind type_check_kind = instruction->GetTypeCheckKind(); 6645 LocationSummary* locations = instruction->GetLocations(); 6646 Location obj_loc = locations->InAt(0); 6647 CpuRegister obj = obj_loc.AsRegister<CpuRegister>(); 6648 Location cls = locations->InAt(1); 6649 Location temp_loc = locations->GetTemp(0); 6650 CpuRegister temp = temp_loc.AsRegister<CpuRegister>(); 6651 const size_t num_temps = NumberOfCheckCastTemps(type_check_kind); 6652 DCHECK_GE(num_temps, 1u); 6653 DCHECK_LE(num_temps, 2u); 6654 Location maybe_temp2_loc = (num_temps >= 2u) ? locations->GetTemp(1) : Location::NoLocation(); 6655 const uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); 6656 const uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value(); 6657 const uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value(); 6658 const uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value(); 6659 const uint32_t iftable_offset = mirror::Class::IfTableOffset().Uint32Value(); 6660 const uint32_t array_length_offset = mirror::Array::LengthOffset().Uint32Value(); 6661 const uint32_t object_array_data_offset = 6662 mirror::Array::DataOffset(kHeapReferenceSize).Uint32Value(); 6663 6664 bool is_type_check_slow_path_fatal = CodeGenerator::IsTypeCheckSlowPathFatal(instruction); 6665 SlowPathCode* type_check_slow_path = 6666 new (codegen_->GetScopedAllocator()) TypeCheckSlowPathX86_64( 6667 instruction, is_type_check_slow_path_fatal); 6668 codegen_->AddSlowPath(type_check_slow_path); 6669 6670 6671 NearLabel done; 6672 // Avoid null check if we know obj is not null. 6673 if (instruction->MustDoNullCheck()) { 6674 __ testl(obj, obj); 6675 __ j(kEqual, &done); 6676 } 6677 6678 switch (type_check_kind) { 6679 case TypeCheckKind::kExactCheck: 6680 case TypeCheckKind::kArrayCheck: { 6681 // /* HeapReference<Class> */ temp = obj->klass_ 6682 GenerateReferenceLoadTwoRegisters(instruction, 6683 temp_loc, 6684 obj_loc, 6685 class_offset, 6686 kWithoutReadBarrier); 6687 if (cls.IsRegister()) { 6688 __ cmpl(temp, cls.AsRegister<CpuRegister>()); 6689 } else { 6690 DCHECK(cls.IsStackSlot()) << cls; 6691 __ cmpl(temp, Address(CpuRegister(RSP), cls.GetStackIndex())); 6692 } 6693 // Jump to slow path for throwing the exception or doing a 6694 // more involved array check. 6695 __ j(kNotEqual, type_check_slow_path->GetEntryLabel()); 6696 break; 6697 } 6698 6699 case TypeCheckKind::kAbstractClassCheck: { 6700 // /* HeapReference<Class> */ temp = obj->klass_ 6701 GenerateReferenceLoadTwoRegisters(instruction, 6702 temp_loc, 6703 obj_loc, 6704 class_offset, 6705 kWithoutReadBarrier); 6706 // If the class is abstract, we eagerly fetch the super class of the 6707 // object to avoid doing a comparison we know will fail. 6708 NearLabel loop; 6709 __ Bind(&loop); 6710 // /* HeapReference<Class> */ temp = temp->super_class_ 6711 GenerateReferenceLoadOneRegister(instruction, 6712 temp_loc, 6713 super_offset, 6714 maybe_temp2_loc, 6715 kWithoutReadBarrier); 6716 6717 // If the class reference currently in `temp` is null, jump to the slow path to throw the 6718 // exception. 6719 __ testl(temp, temp); 6720 // Otherwise, compare the classes. 6721 __ j(kZero, type_check_slow_path->GetEntryLabel()); 6722 if (cls.IsRegister()) { 6723 __ cmpl(temp, cls.AsRegister<CpuRegister>()); 6724 } else { 6725 DCHECK(cls.IsStackSlot()) << cls; 6726 __ cmpl(temp, Address(CpuRegister(RSP), cls.GetStackIndex())); 6727 } 6728 __ j(kNotEqual, &loop); 6729 break; 6730 } 6731 6732 case TypeCheckKind::kClassHierarchyCheck: { 6733 // /* HeapReference<Class> */ temp = obj->klass_ 6734 GenerateReferenceLoadTwoRegisters(instruction, 6735 temp_loc, 6736 obj_loc, 6737 class_offset, 6738 kWithoutReadBarrier); 6739 // Walk over the class hierarchy to find a match. 6740 NearLabel loop; 6741 __ Bind(&loop); 6742 if (cls.IsRegister()) { 6743 __ cmpl(temp, cls.AsRegister<CpuRegister>()); 6744 } else { 6745 DCHECK(cls.IsStackSlot()) << cls; 6746 __ cmpl(temp, Address(CpuRegister(RSP), cls.GetStackIndex())); 6747 } 6748 __ j(kEqual, &done); 6749 6750 // /* HeapReference<Class> */ temp = temp->super_class_ 6751 GenerateReferenceLoadOneRegister(instruction, 6752 temp_loc, 6753 super_offset, 6754 maybe_temp2_loc, 6755 kWithoutReadBarrier); 6756 6757 // If the class reference currently in `temp` is not null, jump 6758 // back at the beginning of the loop. 6759 __ testl(temp, temp); 6760 __ j(kNotZero, &loop); 6761 // Otherwise, jump to the slow path to throw the exception. 6762 __ jmp(type_check_slow_path->GetEntryLabel()); 6763 break; 6764 } 6765 6766 case TypeCheckKind::kArrayObjectCheck: { 6767 // /* HeapReference<Class> */ temp = obj->klass_ 6768 GenerateReferenceLoadTwoRegisters(instruction, 6769 temp_loc, 6770 obj_loc, 6771 class_offset, 6772 kWithoutReadBarrier); 6773 // Do an exact check. 6774 NearLabel check_non_primitive_component_type; 6775 if (cls.IsRegister()) { 6776 __ cmpl(temp, cls.AsRegister<CpuRegister>()); 6777 } else { 6778 DCHECK(cls.IsStackSlot()) << cls; 6779 __ cmpl(temp, Address(CpuRegister(RSP), cls.GetStackIndex())); 6780 } 6781 __ j(kEqual, &done); 6782 6783 // Otherwise, we need to check that the object's class is a non-primitive array. 6784 // /* HeapReference<Class> */ temp = temp->component_type_ 6785 GenerateReferenceLoadOneRegister(instruction, 6786 temp_loc, 6787 component_offset, 6788 maybe_temp2_loc, 6789 kWithoutReadBarrier); 6790 6791 // If the component type is not null (i.e. the object is indeed 6792 // an array), jump to label `check_non_primitive_component_type` 6793 // to further check that this component type is not a primitive 6794 // type. 6795 __ testl(temp, temp); 6796 // Otherwise, jump to the slow path to throw the exception. 6797 __ j(kZero, type_check_slow_path->GetEntryLabel()); 6798 __ cmpw(Address(temp, primitive_offset), Immediate(Primitive::kPrimNot)); 6799 __ j(kNotEqual, type_check_slow_path->GetEntryLabel()); 6800 break; 6801 } 6802 6803 case TypeCheckKind::kUnresolvedCheck: { 6804 // We always go into the type check slow path for the unresolved case. 6805 // 6806 // We cannot directly call the CheckCast runtime entry point 6807 // without resorting to a type checking slow path here (i.e. by 6808 // calling InvokeRuntime directly), as it would require to 6809 // assign fixed registers for the inputs of this HInstanceOf 6810 // instruction (following the runtime calling convention), which 6811 // might be cluttered by the potential first read barrier 6812 // emission at the beginning of this method. 6813 __ jmp(type_check_slow_path->GetEntryLabel()); 6814 break; 6815 } 6816 6817 case TypeCheckKind::kInterfaceCheck: { 6818 // Fast path for the interface check. Try to avoid read barriers to improve the fast path. 6819 // We can not get false positives by doing this. 6820 // /* HeapReference<Class> */ temp = obj->klass_ 6821 GenerateReferenceLoadTwoRegisters(instruction, 6822 temp_loc, 6823 obj_loc, 6824 class_offset, 6825 kWithoutReadBarrier); 6826 6827 // /* HeapReference<Class> */ temp = temp->iftable_ 6828 GenerateReferenceLoadTwoRegisters(instruction, 6829 temp_loc, 6830 temp_loc, 6831 iftable_offset, 6832 kWithoutReadBarrier); 6833 // Iftable is never null. 6834 __ movl(maybe_temp2_loc.AsRegister<CpuRegister>(), Address(temp, array_length_offset)); 6835 // Maybe poison the `cls` for direct comparison with memory. 6836 __ MaybePoisonHeapReference(cls.AsRegister<CpuRegister>()); 6837 // Loop through the iftable and check if any class matches. 6838 NearLabel start_loop; 6839 __ Bind(&start_loop); 6840 // Need to subtract first to handle the empty array case. 6841 __ subl(maybe_temp2_loc.AsRegister<CpuRegister>(), Immediate(2)); 6842 __ j(kNegative, type_check_slow_path->GetEntryLabel()); 6843 // Go to next interface if the classes do not match. 6844 __ cmpl(cls.AsRegister<CpuRegister>(), 6845 CodeGeneratorX86_64::ArrayAddress(temp, 6846 maybe_temp2_loc, 6847 TIMES_4, 6848 object_array_data_offset)); 6849 __ j(kNotEqual, &start_loop); // Return if same class. 6850 // If `cls` was poisoned above, unpoison it. 6851 __ MaybeUnpoisonHeapReference(cls.AsRegister<CpuRegister>()); 6852 break; 6853 } 6854 6855 case TypeCheckKind::kBitstringCheck: { 6856 // /* HeapReference<Class> */ temp = obj->klass_ 6857 GenerateReferenceLoadTwoRegisters(instruction, 6858 temp_loc, 6859 obj_loc, 6860 class_offset, 6861 kWithoutReadBarrier); 6862 6863 GenerateBitstringTypeCheckCompare(instruction, temp); 6864 __ j(kNotEqual, type_check_slow_path->GetEntryLabel()); 6865 break; 6866 } 6867 } 6868 6869 if (done.IsLinked()) { 6870 __ Bind(&done); 6871 } 6872 6873 __ Bind(type_check_slow_path->GetExitLabel()); 6874 } 6875 6876 void LocationsBuilderX86_64::VisitMonitorOperation(HMonitorOperation* instruction) { 6877 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary( 6878 instruction, LocationSummary::kCallOnMainOnly); 6879 InvokeRuntimeCallingConvention calling_convention; 6880 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); 6881 } 6882 6883 void InstructionCodeGeneratorX86_64::VisitMonitorOperation(HMonitorOperation* instruction) { 6884 codegen_->InvokeRuntime(instruction->IsEnter() ? kQuickLockObject : kQuickUnlockObject, 6885 instruction, 6886 instruction->GetDexPc()); 6887 if (instruction->IsEnter()) { 6888 CheckEntrypointTypes<kQuickLockObject, void, mirror::Object*>(); 6889 } else { 6890 CheckEntrypointTypes<kQuickUnlockObject, void, mirror::Object*>(); 6891 } 6892 } 6893 6894 void LocationsBuilderX86_64::VisitX86AndNot(HX86AndNot* instruction) { 6895 DCHECK(codegen_->GetInstructionSetFeatures().HasAVX2()); 6896 DCHECK(DataType::IsIntOrLongType(instruction->GetType())) << instruction->GetType(); 6897 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction); 6898 locations->SetInAt(0, Location::RequiresRegister()); 6899 // There is no immediate variant of negated bitwise and in X86. 6900 locations->SetInAt(1, Location::RequiresRegister()); 6901 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); 6902 } 6903 6904 void LocationsBuilderX86_64::VisitX86MaskOrResetLeastSetBit(HX86MaskOrResetLeastSetBit* instruction) { 6905 DCHECK(codegen_->GetInstructionSetFeatures().HasAVX2()); 6906 DCHECK(DataType::IsIntOrLongType(instruction->GetType())) << instruction->GetType(); 6907 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction); 6908 locations->SetInAt(0, Location::RequiresRegister()); 6909 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); 6910 } 6911 6912 void InstructionCodeGeneratorX86_64::VisitX86AndNot(HX86AndNot* instruction) { 6913 LocationSummary* locations = instruction->GetLocations(); 6914 Location first = locations->InAt(0); 6915 Location second = locations->InAt(1); 6916 Location dest = locations->Out(); 6917 __ andn(dest.AsRegister<CpuRegister>(), first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>()); 6918 } 6919 6920 void InstructionCodeGeneratorX86_64::VisitX86MaskOrResetLeastSetBit(HX86MaskOrResetLeastSetBit* instruction) { 6921 LocationSummary* locations = instruction->GetLocations(); 6922 Location src = locations->InAt(0); 6923 Location dest = locations->Out(); 6924 switch (instruction->GetOpKind()) { 6925 case HInstruction::kAnd: 6926 __ blsr(dest.AsRegister<CpuRegister>(), src.AsRegister<CpuRegister>()); 6927 break; 6928 case HInstruction::kXor: 6929 __ blsmsk(dest.AsRegister<CpuRegister>(), src.AsRegister<CpuRegister>()); 6930 break; 6931 default: 6932 LOG(FATAL) << "Unreachable"; 6933 } 6934 } 6935 6936 void LocationsBuilderX86_64::VisitAnd(HAnd* instruction) { HandleBitwiseOperation(instruction); } 6937 void LocationsBuilderX86_64::VisitOr(HOr* instruction) { HandleBitwiseOperation(instruction); } 6938 void LocationsBuilderX86_64::VisitXor(HXor* instruction) { HandleBitwiseOperation(instruction); } 6939 6940 void LocationsBuilderX86_64::HandleBitwiseOperation(HBinaryOperation* instruction) { 6941 LocationSummary* locations = 6942 new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall); 6943 DCHECK(instruction->GetResultType() == DataType::Type::kInt32 6944 || instruction->GetResultType() == DataType::Type::kInt64); 6945 locations->SetInAt(0, Location::RequiresRegister()); 6946 locations->SetInAt(1, Location::Any()); 6947 locations->SetOut(Location::SameAsFirstInput()); 6948 } 6949 6950 void InstructionCodeGeneratorX86_64::VisitAnd(HAnd* instruction) { 6951 HandleBitwiseOperation(instruction); 6952 } 6953 6954 void InstructionCodeGeneratorX86_64::VisitOr(HOr* instruction) { 6955 HandleBitwiseOperation(instruction); 6956 } 6957 6958 void InstructionCodeGeneratorX86_64::VisitXor(HXor* instruction) { 6959 HandleBitwiseOperation(instruction); 6960 } 6961 6962 void InstructionCodeGeneratorX86_64::HandleBitwiseOperation(HBinaryOperation* instruction) { 6963 LocationSummary* locations = instruction->GetLocations(); 6964 Location first = locations->InAt(0); 6965 Location second = locations->InAt(1); 6966 DCHECK(first.Equals(locations->Out())); 6967 6968 if (instruction->GetResultType() == DataType::Type::kInt32) { 6969 if (second.IsRegister()) { 6970 if (instruction->IsAnd()) { 6971 __ andl(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>()); 6972 } else if (instruction->IsOr()) { 6973 __ orl(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>()); 6974 } else { 6975 DCHECK(instruction->IsXor()); 6976 __ xorl(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>()); 6977 } 6978 } else if (second.IsConstant()) { 6979 Immediate imm(second.GetConstant()->AsIntConstant()->GetValue()); 6980 if (instruction->IsAnd()) { 6981 __ andl(first.AsRegister<CpuRegister>(), imm); 6982 } else if (instruction->IsOr()) { 6983 __ orl(first.AsRegister<CpuRegister>(), imm); 6984 } else { 6985 DCHECK(instruction->IsXor()); 6986 __ xorl(first.AsRegister<CpuRegister>(), imm); 6987 } 6988 } else { 6989 Address address(CpuRegister(RSP), second.GetStackIndex()); 6990 if (instruction->IsAnd()) { 6991 __ andl(first.AsRegister<CpuRegister>(), address); 6992 } else if (instruction->IsOr()) { 6993 __ orl(first.AsRegister<CpuRegister>(), address); 6994 } else { 6995 DCHECK(instruction->IsXor()); 6996 __ xorl(first.AsRegister<CpuRegister>(), address); 6997 } 6998 } 6999 } else { 7000 DCHECK_EQ(instruction->GetResultType(), DataType::Type::kInt64); 7001 CpuRegister first_reg = first.AsRegister<CpuRegister>(); 7002 bool second_is_constant = false; 7003 int64_t value = 0; 7004 if (second.IsConstant()) { 7005 second_is_constant = true; 7006 value = second.GetConstant()->AsLongConstant()->GetValue(); 7007 } 7008 bool is_int32_value = IsInt<32>(value); 7009 7010 if (instruction->IsAnd()) { 7011 if (second_is_constant) { 7012 if (is_int32_value) { 7013 __ andq(first_reg, Immediate(static_cast<int32_t>(value))); 7014 } else { 7015 __ andq(first_reg, codegen_->LiteralInt64Address(value)); 7016 } 7017 } else if (second.IsDoubleStackSlot()) { 7018 __ andq(first_reg, Address(CpuRegister(RSP), second.GetStackIndex())); 7019 } else { 7020 __ andq(first_reg, second.AsRegister<CpuRegister>()); 7021 } 7022 } else if (instruction->IsOr()) { 7023 if (second_is_constant) { 7024 if (is_int32_value) { 7025 __ orq(first_reg, Immediate(static_cast<int32_t>(value))); 7026 } else { 7027 __ orq(first_reg, codegen_->LiteralInt64Address(value)); 7028 } 7029 } else if (second.IsDoubleStackSlot()) { 7030 __ orq(first_reg, Address(CpuRegister(RSP), second.GetStackIndex())); 7031 } else { 7032 __ orq(first_reg, second.AsRegister<CpuRegister>()); 7033 } 7034 } else { 7035 DCHECK(instruction->IsXor()); 7036 if (second_is_constant) { 7037 if (is_int32_value) { 7038 __ xorq(first_reg, Immediate(static_cast<int32_t>(value))); 7039 } else { 7040 __ xorq(first_reg, codegen_->LiteralInt64Address(value)); 7041 } 7042 } else if (second.IsDoubleStackSlot()) { 7043 __ xorq(first_reg, Address(CpuRegister(RSP), second.GetStackIndex())); 7044 } else { 7045 __ xorq(first_reg, second.AsRegister<CpuRegister>()); 7046 } 7047 } 7048 } 7049 } 7050 7051 void InstructionCodeGeneratorX86_64::GenerateReferenceLoadOneRegister( 7052 HInstruction* instruction, 7053 Location out, 7054 uint32_t offset, 7055 Location maybe_temp, 7056 ReadBarrierOption read_barrier_option) { 7057 CpuRegister out_reg = out.AsRegister<CpuRegister>(); 7058 if (read_barrier_option == kWithReadBarrier) { 7059 CHECK(kEmitCompilerReadBarrier); 7060 if (kUseBakerReadBarrier) { 7061 // Load with fast path based Baker's read barrier. 7062 // /* HeapReference<Object> */ out = *(out + offset) 7063 codegen_->GenerateFieldLoadWithBakerReadBarrier( 7064 instruction, out, out_reg, offset, /* needs_null_check= */ false); 7065 } else { 7066 // Load with slow path based read barrier. 7067 // Save the value of `out` into `maybe_temp` before overwriting it 7068 // in the following move operation, as we will need it for the 7069 // read barrier below. 7070 DCHECK(maybe_temp.IsRegister()) << maybe_temp; 7071 __ movl(maybe_temp.AsRegister<CpuRegister>(), out_reg); 7072 // /* HeapReference<Object> */ out = *(out + offset) 7073 __ movl(out_reg, Address(out_reg, offset)); 7074 codegen_->GenerateReadBarrierSlow(instruction, out, out, maybe_temp, offset); 7075 } 7076 } else { 7077 // Plain load with no read barrier. 7078 // /* HeapReference<Object> */ out = *(out + offset) 7079 __ movl(out_reg, Address(out_reg, offset)); 7080 __ MaybeUnpoisonHeapReference(out_reg); 7081 } 7082 } 7083 7084 void InstructionCodeGeneratorX86_64::GenerateReferenceLoadTwoRegisters( 7085 HInstruction* instruction, 7086 Location out, 7087 Location obj, 7088 uint32_t offset, 7089 ReadBarrierOption read_barrier_option) { 7090 CpuRegister out_reg = out.AsRegister<CpuRegister>(); 7091 CpuRegister obj_reg = obj.AsRegister<CpuRegister>(); 7092 if (read_barrier_option == kWithReadBarrier) { 7093 CHECK(kEmitCompilerReadBarrier); 7094 if (kUseBakerReadBarrier) { 7095 // Load with fast path based Baker's read barrier. 7096 // /* HeapReference<Object> */ out = *(obj + offset) 7097 codegen_->GenerateFieldLoadWithBakerReadBarrier( 7098 instruction, out, obj_reg, offset, /* needs_null_check= */ false); 7099 } else { 7100 // Load with slow path based read barrier. 7101 // /* HeapReference<Object> */ out = *(obj + offset) 7102 __ movl(out_reg, Address(obj_reg, offset)); 7103 codegen_->GenerateReadBarrierSlow(instruction, out, out, obj, offset); 7104 } 7105 } else { 7106 // Plain load with no read barrier. 7107 // /* HeapReference<Object> */ out = *(obj + offset) 7108 __ movl(out_reg, Address(obj_reg, offset)); 7109 __ MaybeUnpoisonHeapReference(out_reg); 7110 } 7111 } 7112 7113 void InstructionCodeGeneratorX86_64::GenerateGcRootFieldLoad( 7114 HInstruction* instruction, 7115 Location root, 7116 const Address& address, 7117 Label* fixup_label, 7118 ReadBarrierOption read_barrier_option) { 7119 CpuRegister root_reg = root.AsRegister<CpuRegister>(); 7120 if (read_barrier_option == kWithReadBarrier) { 7121 DCHECK(kEmitCompilerReadBarrier); 7122 if (kUseBakerReadBarrier) { 7123 // Fast path implementation of art::ReadBarrier::BarrierForRoot when 7124 // Baker's read barrier are used: 7125 // 7126 // root = obj.field; 7127 // temp = Thread::Current()->pReadBarrierMarkReg ## root.reg() 7128 // if (temp != null) { 7129 // root = temp(root) 7130 // } 7131 7132 // /* GcRoot<mirror::Object> */ root = *address 7133 __ movl(root_reg, address); 7134 if (fixup_label != nullptr) { 7135 __ Bind(fixup_label); 7136 } 7137 static_assert( 7138 sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(GcRoot<mirror::Object>), 7139 "art::mirror::CompressedReference<mirror::Object> and art::GcRoot<mirror::Object> " 7140 "have different sizes."); 7141 static_assert(sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(int32_t), 7142 "art::mirror::CompressedReference<mirror::Object> and int32_t " 7143 "have different sizes."); 7144 7145 // Slow path marking the GC root `root`. 7146 SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) ReadBarrierMarkSlowPathX86_64( 7147 instruction, root, /* unpoison_ref_before_marking= */ false); 7148 codegen_->AddSlowPath(slow_path); 7149 7150 // Test the `Thread::Current()->pReadBarrierMarkReg ## root.reg()` entrypoint. 7151 const int32_t entry_point_offset = 7152 Thread::ReadBarrierMarkEntryPointsOffset<kX86_64PointerSize>(root.reg()); 7153 __ gs()->cmpl(Address::Absolute(entry_point_offset, /* no_rip= */ true), Immediate(0)); 7154 // The entrypoint is null when the GC is not marking. 7155 __ j(kNotEqual, slow_path->GetEntryLabel()); 7156 __ Bind(slow_path->GetExitLabel()); 7157 } else { 7158 // GC root loaded through a slow path for read barriers other 7159 // than Baker's. 7160 // /* GcRoot<mirror::Object>* */ root = address 7161 __ leaq(root_reg, address); 7162 if (fixup_label != nullptr) { 7163 __ Bind(fixup_label); 7164 } 7165 // /* mirror::Object* */ root = root->Read() 7166 codegen_->GenerateReadBarrierForRootSlow(instruction, root, root); 7167 } 7168 } else { 7169 // Plain GC root load with no read barrier. 7170 // /* GcRoot<mirror::Object> */ root = *address 7171 __ movl(root_reg, address); 7172 if (fixup_label != nullptr) { 7173 __ Bind(fixup_label); 7174 } 7175 // Note that GC roots are not affected by heap poisoning, thus we 7176 // do not have to unpoison `root_reg` here. 7177 } 7178 } 7179 7180 void CodeGeneratorX86_64::GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction, 7181 Location ref, 7182 CpuRegister obj, 7183 uint32_t offset, 7184 bool needs_null_check) { 7185 DCHECK(kEmitCompilerReadBarrier); 7186 DCHECK(kUseBakerReadBarrier); 7187 7188 // /* HeapReference<Object> */ ref = *(obj + offset) 7189 Address src(obj, offset); 7190 GenerateReferenceLoadWithBakerReadBarrier(instruction, ref, obj, src, needs_null_check); 7191 } 7192 7193 void CodeGeneratorX86_64::GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction, 7194 Location ref, 7195 CpuRegister obj, 7196 uint32_t data_offset, 7197 Location index, 7198 bool needs_null_check) { 7199 DCHECK(kEmitCompilerReadBarrier); 7200 DCHECK(kUseBakerReadBarrier); 7201 7202 static_assert( 7203 sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t), 7204 "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes."); 7205 // /* HeapReference<Object> */ ref = 7206 // *(obj + data_offset + index * sizeof(HeapReference<Object>)) 7207 Address src = CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_4, data_offset); 7208 GenerateReferenceLoadWithBakerReadBarrier(instruction, ref, obj, src, needs_null_check); 7209 } 7210 7211 void CodeGeneratorX86_64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction, 7212 Location ref, 7213 CpuRegister obj, 7214 const Address& src, 7215 bool needs_null_check, 7216 bool always_update_field, 7217 CpuRegister* temp1, 7218 CpuRegister* temp2) { 7219 DCHECK(kEmitCompilerReadBarrier); 7220 DCHECK(kUseBakerReadBarrier); 7221 7222 // In slow path based read barriers, the read barrier call is 7223 // inserted after the original load. However, in fast path based 7224 // Baker's read barriers, we need to perform the load of 7225 // mirror::Object::monitor_ *before* the original reference load. 7226 // This load-load ordering is required by the read barrier. 7227 // The fast path/slow path (for Baker's algorithm) should look like: 7228 // 7229 // uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState(); 7230 // lfence; // Load fence or artificial data dependency to prevent load-load reordering 7231 // HeapReference<Object> ref = *src; // Original reference load. 7232 // bool is_gray = (rb_state == ReadBarrier::GrayState()); 7233 // if (is_gray) { 7234 // ref = ReadBarrier::Mark(ref); // Performed by runtime entrypoint slow path. 7235 // } 7236 // 7237 // Note: the original implementation in ReadBarrier::Barrier is 7238 // slightly more complex as: 7239 // - it implements the load-load fence using a data dependency on 7240 // the high-bits of rb_state, which are expected to be all zeroes 7241 // (we use CodeGeneratorX86_64::GenerateMemoryBarrier instead 7242 // here, which is a no-op thanks to the x86-64 memory model); 7243 // - it performs additional checks that we do not do here for 7244 // performance reasons. 7245 7246 CpuRegister ref_reg = ref.AsRegister<CpuRegister>(); 7247 uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value(); 7248 7249 // Given the numeric representation, it's enough to check the low bit of the rb_state. 7250 static_assert(ReadBarrier::NonGrayState() == 0, "Expecting non-gray to have value 0"); 7251 static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1"); 7252 constexpr uint32_t gray_byte_position = LockWord::kReadBarrierStateShift / kBitsPerByte; 7253 constexpr uint32_t gray_bit_position = LockWord::kReadBarrierStateShift % kBitsPerByte; 7254 constexpr int32_t test_value = static_cast<int8_t>(1 << gray_bit_position); 7255 7256 // if (rb_state == ReadBarrier::GrayState()) 7257 // ref = ReadBarrier::Mark(ref); 7258 // At this point, just do the "if" and make sure that flags are preserved until the branch. 7259 __ testb(Address(obj, monitor_offset + gray_byte_position), Immediate(test_value)); 7260 if (needs_null_check) { 7261 MaybeRecordImplicitNullCheck(instruction); 7262 } 7263 7264 // Load fence to prevent load-load reordering. 7265 // Note that this is a no-op, thanks to the x86-64 memory model. 7266 GenerateMemoryBarrier(MemBarrierKind::kLoadAny); 7267 7268 // The actual reference load. 7269 // /* HeapReference<Object> */ ref = *src 7270 __ movl(ref_reg, src); // Flags are unaffected. 7271 7272 // Note: Reference unpoisoning modifies the flags, so we need to delay it after the branch. 7273 // Slow path marking the object `ref` when it is gray. 7274 SlowPathCode* slow_path; 7275 if (always_update_field) { 7276 DCHECK(temp1 != nullptr); 7277 DCHECK(temp2 != nullptr); 7278 slow_path = new (GetScopedAllocator()) ReadBarrierMarkAndUpdateFieldSlowPathX86_64( 7279 instruction, ref, obj, src, /* unpoison_ref_before_marking= */ true, *temp1, *temp2); 7280 } else { 7281 slow_path = new (GetScopedAllocator()) ReadBarrierMarkSlowPathX86_64( 7282 instruction, ref, /* unpoison_ref_before_marking= */ true); 7283 } 7284 AddSlowPath(slow_path); 7285 7286 // We have done the "if" of the gray bit check above, now branch based on the flags. 7287 __ j(kNotZero, slow_path->GetEntryLabel()); 7288 7289 // Object* ref = ref_addr->AsMirrorPtr() 7290 __ MaybeUnpoisonHeapReference(ref_reg); 7291 7292 __ Bind(slow_path->GetExitLabel()); 7293 } 7294 7295 void CodeGeneratorX86_64::GenerateReadBarrierSlow(HInstruction* instruction, 7296 Location out, 7297 Location ref, 7298 Location obj, 7299 uint32_t offset, 7300 Location index) { 7301 DCHECK(kEmitCompilerReadBarrier); 7302 7303 // Insert a slow path based read barrier *after* the reference load. 7304 // 7305 // If heap poisoning is enabled, the unpoisoning of the loaded 7306 // reference will be carried out by the runtime within the slow 7307 // path. 7308 // 7309 // Note that `ref` currently does not get unpoisoned (when heap 7310 // poisoning is enabled), which is alright as the `ref` argument is 7311 // not used by the artReadBarrierSlow entry point. 7312 // 7313 // TODO: Unpoison `ref` when it is used by artReadBarrierSlow. 7314 SlowPathCode* slow_path = new (GetScopedAllocator()) 7315 ReadBarrierForHeapReferenceSlowPathX86_64(instruction, out, ref, obj, offset, index); 7316 AddSlowPath(slow_path); 7317 7318 __ jmp(slow_path->GetEntryLabel()); 7319 __ Bind(slow_path->GetExitLabel()); 7320 } 7321 7322 void CodeGeneratorX86_64::MaybeGenerateReadBarrierSlow(HInstruction* instruction, 7323 Location out, 7324 Location ref, 7325 Location obj, 7326 uint32_t offset, 7327 Location index) { 7328 if (kEmitCompilerReadBarrier) { 7329 // Baker's read barriers shall be handled by the fast path 7330 // (CodeGeneratorX86_64::GenerateReferenceLoadWithBakerReadBarrier). 7331 DCHECK(!kUseBakerReadBarrier); 7332 // If heap poisoning is enabled, unpoisoning will be taken care of 7333 // by the runtime within the slow path. 7334 GenerateReadBarrierSlow(instruction, out, ref, obj, offset, index); 7335 } else if (kPoisonHeapReferences) { 7336 __ UnpoisonHeapReference(out.AsRegister<CpuRegister>()); 7337 } 7338 } 7339 7340 void CodeGeneratorX86_64::GenerateReadBarrierForRootSlow(HInstruction* instruction, 7341 Location out, 7342 Location root) { 7343 DCHECK(kEmitCompilerReadBarrier); 7344 7345 // Insert a slow path based read barrier *after* the GC root load. 7346 // 7347 // Note that GC roots are not affected by heap poisoning, so we do 7348 // not need to do anything special for this here. 7349 SlowPathCode* slow_path = 7350 new (GetScopedAllocator()) ReadBarrierForRootSlowPathX86_64(instruction, out, root); 7351 AddSlowPath(slow_path); 7352 7353 __ jmp(slow_path->GetEntryLabel()); 7354 __ Bind(slow_path->GetExitLabel()); 7355 } 7356 7357 void LocationsBuilderX86_64::VisitBoundType(HBoundType* instruction ATTRIBUTE_UNUSED) { 7358 // Nothing to do, this should be removed during prepare for register allocator. 7359 LOG(FATAL) << "Unreachable"; 7360 } 7361 7362 void InstructionCodeGeneratorX86_64::VisitBoundType(HBoundType* instruction ATTRIBUTE_UNUSED) { 7363 // Nothing to do, this should be removed during prepare for register allocator. 7364 LOG(FATAL) << "Unreachable"; 7365 } 7366 7367 // Simple implementation of packed switch - generate cascaded compare/jumps. 7368 void LocationsBuilderX86_64::VisitPackedSwitch(HPackedSwitch* switch_instr) { 7369 LocationSummary* locations = 7370 new (GetGraph()->GetAllocator()) LocationSummary(switch_instr, LocationSummary::kNoCall); 7371 locations->SetInAt(0, Location::RequiresRegister()); 7372 locations->AddTemp(Location::RequiresRegister()); 7373 locations->AddTemp(Location::RequiresRegister()); 7374 } 7375 7376 void InstructionCodeGeneratorX86_64::VisitPackedSwitch(HPackedSwitch* switch_instr) { 7377 int32_t lower_bound = switch_instr->GetStartValue(); 7378 uint32_t num_entries = switch_instr->GetNumEntries(); 7379 LocationSummary* locations = switch_instr->GetLocations(); 7380 CpuRegister value_reg_in = locations->InAt(0).AsRegister<CpuRegister>(); 7381 CpuRegister temp_reg = locations->GetTemp(0).AsRegister<CpuRegister>(); 7382 CpuRegister base_reg = locations->GetTemp(1).AsRegister<CpuRegister>(); 7383 HBasicBlock* default_block = switch_instr->GetDefaultBlock(); 7384 7385 // Should we generate smaller inline compare/jumps? 7386 if (num_entries <= kPackedSwitchJumpTableThreshold) { 7387 // Figure out the correct compare values and jump conditions. 7388 // Handle the first compare/branch as a special case because it might 7389 // jump to the default case. 7390 DCHECK_GT(num_entries, 2u); 7391 Condition first_condition; 7392 uint32_t index; 7393 const ArenaVector<HBasicBlock*>& successors = switch_instr->GetBlock()->GetSuccessors(); 7394 if (lower_bound != 0) { 7395 first_condition = kLess; 7396 __ cmpl(value_reg_in, Immediate(lower_bound)); 7397 __ j(first_condition, codegen_->GetLabelOf(default_block)); 7398 __ j(kEqual, codegen_->GetLabelOf(successors[0])); 7399 7400 index = 1; 7401 } else { 7402 // Handle all the compare/jumps below. 7403 first_condition = kBelow; 7404 index = 0; 7405 } 7406 7407 // Handle the rest of the compare/jumps. 7408 for (; index + 1 < num_entries; index += 2) { 7409 int32_t compare_to_value = lower_bound + index + 1; 7410 __ cmpl(value_reg_in, Immediate(compare_to_value)); 7411 // Jump to successors[index] if value < case_value[index]. 7412 __ j(first_condition, codegen_->GetLabelOf(successors[index])); 7413 // Jump to successors[index + 1] if value == case_value[index + 1]. 7414 __ j(kEqual, codegen_->GetLabelOf(successors[index + 1])); 7415 } 7416 7417 if (index != num_entries) { 7418 // There are an odd number of entries. Handle the last one. 7419 DCHECK_EQ(index + 1, num_entries); 7420 __ cmpl(value_reg_in, Immediate(static_cast<int32_t>(lower_bound + index))); 7421 __ j(kEqual, codegen_->GetLabelOf(successors[index])); 7422 } 7423 7424 // And the default for any other value. 7425 if (!codegen_->GoesToNextBlock(switch_instr->GetBlock(), default_block)) { 7426 __ jmp(codegen_->GetLabelOf(default_block)); 7427 } 7428 return; 7429 } 7430 7431 // Remove the bias, if needed. 7432 Register value_reg_out = value_reg_in.AsRegister(); 7433 if (lower_bound != 0) { 7434 __ leal(temp_reg, Address(value_reg_in, -lower_bound)); 7435 value_reg_out = temp_reg.AsRegister(); 7436 } 7437 CpuRegister value_reg(value_reg_out); 7438 7439 // Is the value in range? 7440 __ cmpl(value_reg, Immediate(num_entries - 1)); 7441 __ j(kAbove, codegen_->GetLabelOf(default_block)); 7442 7443 // We are in the range of the table. 7444 // Load the address of the jump table in the constant area. 7445 __ leaq(base_reg, codegen_->LiteralCaseTable(switch_instr)); 7446 7447 // Load the (signed) offset from the jump table. 7448 __ movsxd(temp_reg, Address(base_reg, value_reg, TIMES_4, 0)); 7449 7450 // Add the offset to the address of the table base. 7451 __ addq(temp_reg, base_reg); 7452 7453 // And jump. 7454 __ jmp(temp_reg); 7455 } 7456 7457 void LocationsBuilderX86_64::VisitIntermediateAddress(HIntermediateAddress* instruction 7458 ATTRIBUTE_UNUSED) { 7459 LOG(FATAL) << "Unreachable"; 7460 } 7461 7462 void InstructionCodeGeneratorX86_64::VisitIntermediateAddress(HIntermediateAddress* instruction 7463 ATTRIBUTE_UNUSED) { 7464 LOG(FATAL) << "Unreachable"; 7465 } 7466 7467 void CodeGeneratorX86_64::Load32BitValue(CpuRegister dest, int32_t value) { 7468 if (value == 0) { 7469 __ xorl(dest, dest); 7470 } else { 7471 __ movl(dest, Immediate(value)); 7472 } 7473 } 7474 7475 void CodeGeneratorX86_64::Load64BitValue(CpuRegister dest, int64_t value) { 7476 if (value == 0) { 7477 // Clears upper bits too. 7478 __ xorl(dest, dest); 7479 } else if (IsUint<32>(value)) { 7480 // We can use a 32 bit move, as it will zero-extend and is shorter. 7481 __ movl(dest, Immediate(static_cast<int32_t>(value))); 7482 } else { 7483 __ movq(dest, Immediate(value)); 7484 } 7485 } 7486 7487 void CodeGeneratorX86_64::Load32BitValue(XmmRegister dest, int32_t value) { 7488 if (value == 0) { 7489 __ xorps(dest, dest); 7490 } else { 7491 __ movss(dest, LiteralInt32Address(value)); 7492 } 7493 } 7494 7495 void CodeGeneratorX86_64::Load64BitValue(XmmRegister dest, int64_t value) { 7496 if (value == 0) { 7497 __ xorpd(dest, dest); 7498 } else { 7499 __ movsd(dest, LiteralInt64Address(value)); 7500 } 7501 } 7502 7503 void CodeGeneratorX86_64::Load32BitValue(XmmRegister dest, float value) { 7504 Load32BitValue(dest, bit_cast<int32_t, float>(value)); 7505 } 7506 7507 void CodeGeneratorX86_64::Load64BitValue(XmmRegister dest, double value) { 7508 Load64BitValue(dest, bit_cast<int64_t, double>(value)); 7509 } 7510 7511 void CodeGeneratorX86_64::Compare32BitValue(CpuRegister dest, int32_t value) { 7512 if (value == 0) { 7513 __ testl(dest, dest); 7514 } else { 7515 __ cmpl(dest, Immediate(value)); 7516 } 7517 } 7518 7519 void CodeGeneratorX86_64::Compare64BitValue(CpuRegister dest, int64_t value) { 7520 if (IsInt<32>(value)) { 7521 if (value == 0) { 7522 __ testq(dest, dest); 7523 } else { 7524 __ cmpq(dest, Immediate(static_cast<int32_t>(value))); 7525 } 7526 } else { 7527 // Value won't fit in an int. 7528 __ cmpq(dest, LiteralInt64Address(value)); 7529 } 7530 } 7531 7532 void CodeGeneratorX86_64::GenerateIntCompare(Location lhs, Location rhs) { 7533 CpuRegister lhs_reg = lhs.AsRegister<CpuRegister>(); 7534 GenerateIntCompare(lhs_reg, rhs); 7535 } 7536 7537 void CodeGeneratorX86_64::GenerateIntCompare(CpuRegister lhs, Location rhs) { 7538 if (rhs.IsConstant()) { 7539 int32_t value = CodeGenerator::GetInt32ValueOf(rhs.GetConstant()); 7540 Compare32BitValue(lhs, value); 7541 } else if (rhs.IsStackSlot()) { 7542 __ cmpl(lhs, Address(CpuRegister(RSP), rhs.GetStackIndex())); 7543 } else { 7544 __ cmpl(lhs, rhs.AsRegister<CpuRegister>()); 7545 } 7546 } 7547 7548 void CodeGeneratorX86_64::GenerateLongCompare(Location lhs, Location rhs) { 7549 CpuRegister lhs_reg = lhs.AsRegister<CpuRegister>(); 7550 if (rhs.IsConstant()) { 7551 int64_t value = rhs.GetConstant()->AsLongConstant()->GetValue(); 7552 Compare64BitValue(lhs_reg, value); 7553 } else if (rhs.IsDoubleStackSlot()) { 7554 __ cmpq(lhs_reg, Address(CpuRegister(RSP), rhs.GetStackIndex())); 7555 } else { 7556 __ cmpq(lhs_reg, rhs.AsRegister<CpuRegister>()); 7557 } 7558 } 7559 7560 Address CodeGeneratorX86_64::ArrayAddress(CpuRegister obj, 7561 Location index, 7562 ScaleFactor scale, 7563 uint32_t data_offset) { 7564 return index.IsConstant() ? 7565 Address(obj, (index.GetConstant()->AsIntConstant()->GetValue() << scale) + data_offset) : 7566 Address(obj, index.AsRegister<CpuRegister>(), scale, data_offset); 7567 } 7568 7569 void CodeGeneratorX86_64::Store64BitValueToStack(Location dest, int64_t value) { 7570 DCHECK(dest.IsDoubleStackSlot()); 7571 if (IsInt<32>(value)) { 7572 // Can move directly as an int32 constant. 7573 __ movq(Address(CpuRegister(RSP), dest.GetStackIndex()), 7574 Immediate(static_cast<int32_t>(value))); 7575 } else { 7576 Load64BitValue(CpuRegister(TMP), value); 7577 __ movq(Address(CpuRegister(RSP), dest.GetStackIndex()), CpuRegister(TMP)); 7578 } 7579 } 7580 7581 /** 7582 * Class to handle late fixup of offsets into constant area. 7583 */ 7584 class RIPFixup : public AssemblerFixup, public ArenaObject<kArenaAllocCodeGenerator> { 7585 public: 7586 RIPFixup(CodeGeneratorX86_64& codegen, size_t offset) 7587 : codegen_(&codegen), offset_into_constant_area_(offset) {} 7588 7589 protected: 7590 void SetOffset(size_t offset) { offset_into_constant_area_ = offset; } 7591 7592 CodeGeneratorX86_64* codegen_; 7593 7594 private: 7595 void Process(const MemoryRegion& region, int pos) override { 7596 // Patch the correct offset for the instruction. We use the address of the 7597 // 'next' instruction, which is 'pos' (patch the 4 bytes before). 7598 int32_t constant_offset = codegen_->ConstantAreaStart() + offset_into_constant_area_; 7599 int32_t relative_position = constant_offset - pos; 7600 7601 // Patch in the right value. 7602 region.StoreUnaligned<int32_t>(pos - 4, relative_position); 7603 } 7604 7605 // Location in constant area that the fixup refers to. 7606 size_t offset_into_constant_area_; 7607 }; 7608 7609 /** 7610 t * Class to handle late fixup of offsets to a jump table that will be created in the 7611 * constant area. 7612 */ 7613 class JumpTableRIPFixup : public RIPFixup { 7614 public: 7615 JumpTableRIPFixup(CodeGeneratorX86_64& codegen, HPackedSwitch* switch_instr) 7616 : RIPFixup(codegen, -1), switch_instr_(switch_instr) {} 7617 7618 void CreateJumpTable() { 7619 X86_64Assembler* assembler = codegen_->GetAssembler(); 7620 7621 // Ensure that the reference to the jump table has the correct offset. 7622 const int32_t offset_in_constant_table = assembler->ConstantAreaSize(); 7623 SetOffset(offset_in_constant_table); 7624 7625 // Compute the offset from the start of the function to this jump table. 7626 const int32_t current_table_offset = assembler->CodeSize() + offset_in_constant_table; 7627 7628 // Populate the jump table with the correct values for the jump table. 7629 int32_t num_entries = switch_instr_->GetNumEntries(); 7630 HBasicBlock* block = switch_instr_->GetBlock(); 7631 const ArenaVector<HBasicBlock*>& successors = block->GetSuccessors(); 7632 // The value that we want is the target offset - the position of the table. 7633 for (int32_t i = 0; i < num_entries; i++) { 7634 HBasicBlock* b = successors[i]; 7635 Label* l = codegen_->GetLabelOf(b); 7636 DCHECK(l->IsBound()); 7637 int32_t offset_to_block = l->Position() - current_table_offset; 7638 assembler->AppendInt32(offset_to_block); 7639 } 7640 } 7641 7642 private: 7643 const HPackedSwitch* switch_instr_; 7644 }; 7645 7646 void CodeGeneratorX86_64::Finalize(CodeAllocator* allocator) { 7647 // Generate the constant area if needed. 7648 X86_64Assembler* assembler = GetAssembler(); 7649 if (!assembler->IsConstantAreaEmpty() || !fixups_to_jump_tables_.empty()) { 7650 // Align to 4 byte boundary to reduce cache misses, as the data is 4 and 8 byte values. 7651 assembler->Align(4, 0); 7652 constant_area_start_ = assembler->CodeSize(); 7653 7654 // Populate any jump tables. 7655 for (JumpTableRIPFixup* jump_table : fixups_to_jump_tables_) { 7656 jump_table->CreateJumpTable(); 7657 } 7658 7659 // And now add the constant area to the generated code. 7660 assembler->AddConstantArea(); 7661 } 7662 7663 // And finish up. 7664 CodeGenerator::Finalize(allocator); 7665 } 7666 7667 Address CodeGeneratorX86_64::LiteralDoubleAddress(double v) { 7668 AssemblerFixup* fixup = new (GetGraph()->GetAllocator()) RIPFixup(*this, __ AddDouble(v)); 7669 return Address::RIP(fixup); 7670 } 7671 7672 Address CodeGeneratorX86_64::LiteralFloatAddress(float v) { 7673 AssemblerFixup* fixup = new (GetGraph()->GetAllocator()) RIPFixup(*this, __ AddFloat(v)); 7674 return Address::RIP(fixup); 7675 } 7676 7677 Address CodeGeneratorX86_64::LiteralInt32Address(int32_t v) { 7678 AssemblerFixup* fixup = new (GetGraph()->GetAllocator()) RIPFixup(*this, __ AddInt32(v)); 7679 return Address::RIP(fixup); 7680 } 7681 7682 Address CodeGeneratorX86_64::LiteralInt64Address(int64_t v) { 7683 AssemblerFixup* fixup = new (GetGraph()->GetAllocator()) RIPFixup(*this, __ AddInt64(v)); 7684 return Address::RIP(fixup); 7685 } 7686 7687 // TODO: trg as memory. 7688 void CodeGeneratorX86_64::MoveFromReturnRegister(Location trg, DataType::Type type) { 7689 if (!trg.IsValid()) { 7690 DCHECK_EQ(type, DataType::Type::kVoid); 7691 return; 7692 } 7693 7694 DCHECK_NE(type, DataType::Type::kVoid); 7695 7696 Location return_loc = InvokeDexCallingConventionVisitorX86_64().GetReturnLocation(type); 7697 if (trg.Equals(return_loc)) { 7698 return; 7699 } 7700 7701 // Let the parallel move resolver take care of all of this. 7702 HParallelMove parallel_move(GetGraph()->GetAllocator()); 7703 parallel_move.AddMove(return_loc, trg, type, nullptr); 7704 GetMoveResolver()->EmitNativeCode(¶llel_move); 7705 } 7706 7707 Address CodeGeneratorX86_64::LiteralCaseTable(HPackedSwitch* switch_instr) { 7708 // Create a fixup to be used to create and address the jump table. 7709 JumpTableRIPFixup* table_fixup = 7710 new (GetGraph()->GetAllocator()) JumpTableRIPFixup(*this, switch_instr); 7711 7712 // We have to populate the jump tables. 7713 fixups_to_jump_tables_.push_back(table_fixup); 7714 return Address::RIP(table_fixup); 7715 } 7716 7717 void CodeGeneratorX86_64::MoveInt64ToAddress(const Address& addr_low, 7718 const Address& addr_high, 7719 int64_t v, 7720 HInstruction* instruction) { 7721 if (IsInt<32>(v)) { 7722 int32_t v_32 = v; 7723 __ movq(addr_low, Immediate(v_32)); 7724 MaybeRecordImplicitNullCheck(instruction); 7725 } else { 7726 // Didn't fit in a register. Do it in pieces. 7727 int32_t low_v = Low32Bits(v); 7728 int32_t high_v = High32Bits(v); 7729 __ movl(addr_low, Immediate(low_v)); 7730 MaybeRecordImplicitNullCheck(instruction); 7731 __ movl(addr_high, Immediate(high_v)); 7732 } 7733 } 7734 7735 void CodeGeneratorX86_64::PatchJitRootUse(uint8_t* code, 7736 const uint8_t* roots_data, 7737 const PatchInfo<Label>& info, 7738 uint64_t index_in_table) const { 7739 uint32_t code_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment; 7740 uintptr_t address = 7741 reinterpret_cast<uintptr_t>(roots_data) + index_in_table * sizeof(GcRoot<mirror::Object>); 7742 using unaligned_uint32_t __attribute__((__aligned__(1))) = uint32_t; 7743 reinterpret_cast<unaligned_uint32_t*>(code + code_offset)[0] = 7744 dchecked_integral_cast<uint32_t>(address); 7745 } 7746 7747 void CodeGeneratorX86_64::EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) { 7748 for (const PatchInfo<Label>& info : jit_string_patches_) { 7749 StringReference string_reference(info.target_dex_file, dex::StringIndex(info.offset_or_index)); 7750 uint64_t index_in_table = GetJitStringRootIndex(string_reference); 7751 PatchJitRootUse(code, roots_data, info, index_in_table); 7752 } 7753 7754 for (const PatchInfo<Label>& info : jit_class_patches_) { 7755 TypeReference type_reference(info.target_dex_file, dex::TypeIndex(info.offset_or_index)); 7756 uint64_t index_in_table = GetJitClassRootIndex(type_reference); 7757 PatchJitRootUse(code, roots_data, info, index_in_table); 7758 } 7759 } 7760 7761 bool LocationsBuilderX86_64::CpuHasAvxFeatureFlag() { 7762 return codegen_->GetInstructionSetFeatures().HasAVX(); 7763 } 7764 7765 bool LocationsBuilderX86_64::CpuHasAvx2FeatureFlag() { 7766 return codegen_->GetInstructionSetFeatures().HasAVX2(); 7767 } 7768 7769 bool InstructionCodeGeneratorX86_64::CpuHasAvxFeatureFlag() { 7770 return codegen_->GetInstructionSetFeatures().HasAVX(); 7771 } 7772 7773 bool InstructionCodeGeneratorX86_64::CpuHasAvx2FeatureFlag() { 7774 return codegen_->GetInstructionSetFeatures().HasAVX2(); 7775 } 7776 7777 #undef __ 7778 7779 } // namespace x86_64 7780 } // namespace art 7781