1 /* 2 * Copyright (C) 2015 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #include "intrinsics_arm64.h" 18 19 #include "arch/arm64/instruction_set_features_arm64.h" 20 #include "art_method.h" 21 #include "code_generator_arm64.h" 22 #include "common_arm64.h" 23 #include "entrypoints/quick/quick_entrypoints.h" 24 #include "heap_poisoning.h" 25 #include "intrinsics.h" 26 #include "lock_word.h" 27 #include "mirror/array-inl.h" 28 #include "mirror/object_array-inl.h" 29 #include "mirror/reference.h" 30 #include "mirror/string-inl.h" 31 #include "scoped_thread_state_change-inl.h" 32 #include "thread-current-inl.h" 33 #include "utils/arm64/assembler_arm64.h" 34 35 using namespace vixl::aarch64; // NOLINT(build/namespaces) 36 37 // TODO(VIXL): Make VIXL compile with -Wshadow. 38 #pragma GCC diagnostic push 39 #pragma GCC diagnostic ignored "-Wshadow" 40 #include "aarch64/disasm-aarch64.h" 41 #include "aarch64/macro-assembler-aarch64.h" 42 #pragma GCC diagnostic pop 43 44 namespace art { 45 46 namespace arm64 { 47 48 using helpers::DRegisterFrom; 49 using helpers::FPRegisterFrom; 50 using helpers::HeapOperand; 51 using helpers::LocationFrom; 52 using helpers::OperandFrom; 53 using helpers::RegisterFrom; 54 using helpers::SRegisterFrom; 55 using helpers::WRegisterFrom; 56 using helpers::XRegisterFrom; 57 using helpers::HRegisterFrom; 58 using helpers::InputRegisterAt; 59 using helpers::OutputRegister; 60 61 namespace { 62 63 ALWAYS_INLINE inline MemOperand AbsoluteHeapOperandFrom(Location location, size_t offset = 0) { 64 return MemOperand(XRegisterFrom(location), offset); 65 } 66 67 } // namespace 68 69 MacroAssembler* IntrinsicCodeGeneratorARM64::GetVIXLAssembler() { 70 return codegen_->GetVIXLAssembler(); 71 } 72 73 ArenaAllocator* IntrinsicCodeGeneratorARM64::GetAllocator() { 74 return codegen_->GetGraph()->GetAllocator(); 75 } 76 77 #define __ codegen->GetVIXLAssembler()-> 78 79 static void MoveFromReturnRegister(Location trg, 80 DataType::Type type, 81 CodeGeneratorARM64* codegen) { 82 if (!trg.IsValid()) { 83 DCHECK(type == DataType::Type::kVoid); 84 return; 85 } 86 87 DCHECK_NE(type, DataType::Type::kVoid); 88 89 if (DataType::IsIntegralType(type) || type == DataType::Type::kReference) { 90 Register trg_reg = RegisterFrom(trg, type); 91 Register res_reg = RegisterFrom(ARM64ReturnLocation(type), type); 92 __ Mov(trg_reg, res_reg, kDiscardForSameWReg); 93 } else { 94 VRegister trg_reg = FPRegisterFrom(trg, type); 95 VRegister res_reg = FPRegisterFrom(ARM64ReturnLocation(type), type); 96 __ Fmov(trg_reg, res_reg); 97 } 98 } 99 100 static void MoveArguments(HInvoke* invoke, CodeGeneratorARM64* codegen) { 101 InvokeDexCallingConventionVisitorARM64 calling_convention_visitor; 102 IntrinsicVisitor::MoveArguments(invoke, codegen, &calling_convention_visitor); 103 } 104 105 // Slow-path for fallback (calling the managed code to handle the intrinsic) in an intrinsified 106 // call. This will copy the arguments into the positions for a regular call. 107 // 108 // Note: The actual parameters are required to be in the locations given by the invoke's location 109 // summary. If an intrinsic modifies those locations before a slowpath call, they must be 110 // restored! 111 class IntrinsicSlowPathARM64 : public SlowPathCodeARM64 { 112 public: 113 explicit IntrinsicSlowPathARM64(HInvoke* invoke) 114 : SlowPathCodeARM64(invoke), invoke_(invoke) { } 115 116 void EmitNativeCode(CodeGenerator* codegen_in) override { 117 CodeGeneratorARM64* codegen = down_cast<CodeGeneratorARM64*>(codegen_in); 118 __ Bind(GetEntryLabel()); 119 120 SaveLiveRegisters(codegen, invoke_->GetLocations()); 121 122 MoveArguments(invoke_, codegen); 123 124 { 125 // Ensure that between the BLR (emitted by Generate*Call) and RecordPcInfo there 126 // are no pools emitted. 127 vixl::EmissionCheckScope guard(codegen->GetVIXLAssembler(), kInvokeCodeMarginSizeInBytes); 128 if (invoke_->IsInvokeStaticOrDirect()) { 129 codegen->GenerateStaticOrDirectCall( 130 invoke_->AsInvokeStaticOrDirect(), LocationFrom(kArtMethodRegister), this); 131 } else { 132 codegen->GenerateVirtualCall( 133 invoke_->AsInvokeVirtual(), LocationFrom(kArtMethodRegister), this); 134 } 135 } 136 137 // Copy the result back to the expected output. 138 Location out = invoke_->GetLocations()->Out(); 139 if (out.IsValid()) { 140 DCHECK(out.IsRegister()); // TODO: Replace this when we support output in memory. 141 DCHECK(!invoke_->GetLocations()->GetLiveRegisters()->ContainsCoreRegister(out.reg())); 142 MoveFromReturnRegister(out, invoke_->GetType(), codegen); 143 } 144 145 RestoreLiveRegisters(codegen, invoke_->GetLocations()); 146 __ B(GetExitLabel()); 147 } 148 149 const char* GetDescription() const override { return "IntrinsicSlowPathARM64"; } 150 151 private: 152 // The instruction where this slow path is happening. 153 HInvoke* const invoke_; 154 155 DISALLOW_COPY_AND_ASSIGN(IntrinsicSlowPathARM64); 156 }; 157 158 // Slow path implementing the SystemArrayCopy intrinsic copy loop with read barriers. 159 class ReadBarrierSystemArrayCopySlowPathARM64 : public SlowPathCodeARM64 { 160 public: 161 ReadBarrierSystemArrayCopySlowPathARM64(HInstruction* instruction, Location tmp) 162 : SlowPathCodeARM64(instruction), tmp_(tmp) { 163 DCHECK(kEmitCompilerReadBarrier); 164 DCHECK(kUseBakerReadBarrier); 165 } 166 167 void EmitNativeCode(CodeGenerator* codegen_in) override { 168 CodeGeneratorARM64* codegen = down_cast<CodeGeneratorARM64*>(codegen_in); 169 LocationSummary* locations = instruction_->GetLocations(); 170 DCHECK(locations->CanCall()); 171 DCHECK(instruction_->IsInvokeStaticOrDirect()) 172 << "Unexpected instruction in read barrier arraycopy slow path: " 173 << instruction_->DebugName(); 174 DCHECK(instruction_->GetLocations()->Intrinsified()); 175 DCHECK_EQ(instruction_->AsInvoke()->GetIntrinsic(), Intrinsics::kSystemArrayCopy); 176 177 const int32_t element_size = DataType::Size(DataType::Type::kReference); 178 179 Register src_curr_addr = XRegisterFrom(locations->GetTemp(0)); 180 Register dst_curr_addr = XRegisterFrom(locations->GetTemp(1)); 181 Register src_stop_addr = XRegisterFrom(locations->GetTemp(2)); 182 Register tmp_reg = WRegisterFrom(tmp_); 183 184 __ Bind(GetEntryLabel()); 185 vixl::aarch64::Label slow_copy_loop; 186 __ Bind(&slow_copy_loop); 187 __ Ldr(tmp_reg, MemOperand(src_curr_addr, element_size, PostIndex)); 188 codegen->GetAssembler()->MaybeUnpoisonHeapReference(tmp_reg); 189 // TODO: Inline the mark bit check before calling the runtime? 190 // tmp_reg = ReadBarrier::Mark(tmp_reg); 191 // No need to save live registers; it's taken care of by the 192 // entrypoint. Also, there is no need to update the stack mask, 193 // as this runtime call will not trigger a garbage collection. 194 // (See ReadBarrierMarkSlowPathARM64::EmitNativeCode for more 195 // explanations.) 196 DCHECK_NE(tmp_.reg(), LR); 197 DCHECK_NE(tmp_.reg(), WSP); 198 DCHECK_NE(tmp_.reg(), WZR); 199 // IP0 is used internally by the ReadBarrierMarkRegX entry point 200 // as a temporary (and not preserved). It thus cannot be used by 201 // any live register in this slow path. 202 DCHECK_NE(LocationFrom(src_curr_addr).reg(), IP0); 203 DCHECK_NE(LocationFrom(dst_curr_addr).reg(), IP0); 204 DCHECK_NE(LocationFrom(src_stop_addr).reg(), IP0); 205 DCHECK_NE(tmp_.reg(), IP0); 206 DCHECK(0 <= tmp_.reg() && tmp_.reg() < kNumberOfWRegisters) << tmp_.reg(); 207 // TODO: Load the entrypoint once before the loop, instead of 208 // loading it at every iteration. 209 int32_t entry_point_offset = 210 Thread::ReadBarrierMarkEntryPointsOffset<kArm64PointerSize>(tmp_.reg()); 211 // This runtime call does not require a stack map. 212 codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this); 213 codegen->GetAssembler()->MaybePoisonHeapReference(tmp_reg); 214 __ Str(tmp_reg, MemOperand(dst_curr_addr, element_size, PostIndex)); 215 __ Cmp(src_curr_addr, src_stop_addr); 216 __ B(&slow_copy_loop, ne); 217 __ B(GetExitLabel()); 218 } 219 220 const char* GetDescription() const override { return "ReadBarrierSystemArrayCopySlowPathARM64"; } 221 222 private: 223 Location tmp_; 224 225 DISALLOW_COPY_AND_ASSIGN(ReadBarrierSystemArrayCopySlowPathARM64); 226 }; 227 #undef __ 228 229 bool IntrinsicLocationsBuilderARM64::TryDispatch(HInvoke* invoke) { 230 Dispatch(invoke); 231 LocationSummary* res = invoke->GetLocations(); 232 if (res == nullptr) { 233 return false; 234 } 235 return res->Intrinsified(); 236 } 237 238 #define __ masm-> 239 240 static void CreateFPToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) { 241 LocationSummary* locations = 242 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); 243 locations->SetInAt(0, Location::RequiresFpuRegister()); 244 locations->SetOut(Location::RequiresRegister()); 245 } 246 247 static void CreateIntToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) { 248 LocationSummary* locations = 249 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); 250 locations->SetInAt(0, Location::RequiresRegister()); 251 locations->SetOut(Location::RequiresFpuRegister()); 252 } 253 254 static void MoveFPToInt(LocationSummary* locations, bool is64bit, MacroAssembler* masm) { 255 Location input = locations->InAt(0); 256 Location output = locations->Out(); 257 __ Fmov(is64bit ? XRegisterFrom(output) : WRegisterFrom(output), 258 is64bit ? DRegisterFrom(input) : SRegisterFrom(input)); 259 } 260 261 static void MoveIntToFP(LocationSummary* locations, bool is64bit, MacroAssembler* masm) { 262 Location input = locations->InAt(0); 263 Location output = locations->Out(); 264 __ Fmov(is64bit ? DRegisterFrom(output) : SRegisterFrom(output), 265 is64bit ? XRegisterFrom(input) : WRegisterFrom(input)); 266 } 267 268 void IntrinsicLocationsBuilderARM64::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) { 269 CreateFPToIntLocations(allocator_, invoke); 270 } 271 void IntrinsicLocationsBuilderARM64::VisitDoubleLongBitsToDouble(HInvoke* invoke) { 272 CreateIntToFPLocations(allocator_, invoke); 273 } 274 275 void IntrinsicCodeGeneratorARM64::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) { 276 MoveFPToInt(invoke->GetLocations(), /* is64bit= */ true, GetVIXLAssembler()); 277 } 278 void IntrinsicCodeGeneratorARM64::VisitDoubleLongBitsToDouble(HInvoke* invoke) { 279 MoveIntToFP(invoke->GetLocations(), /* is64bit= */ true, GetVIXLAssembler()); 280 } 281 282 void IntrinsicLocationsBuilderARM64::VisitFloatFloatToRawIntBits(HInvoke* invoke) { 283 CreateFPToIntLocations(allocator_, invoke); 284 } 285 void IntrinsicLocationsBuilderARM64::VisitFloatIntBitsToFloat(HInvoke* invoke) { 286 CreateIntToFPLocations(allocator_, invoke); 287 } 288 289 void IntrinsicCodeGeneratorARM64::VisitFloatFloatToRawIntBits(HInvoke* invoke) { 290 MoveFPToInt(invoke->GetLocations(), /* is64bit= */ false, GetVIXLAssembler()); 291 } 292 void IntrinsicCodeGeneratorARM64::VisitFloatIntBitsToFloat(HInvoke* invoke) { 293 MoveIntToFP(invoke->GetLocations(), /* is64bit= */ false, GetVIXLAssembler()); 294 } 295 296 static void CreateIntToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) { 297 LocationSummary* locations = 298 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); 299 locations->SetInAt(0, Location::RequiresRegister()); 300 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); 301 } 302 303 static void CreateIntIntToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) { 304 LocationSummary* locations = 305 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); 306 locations->SetInAt(0, Location::RequiresRegister()); 307 locations->SetInAt(1, Location::RequiresRegister()); 308 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); 309 } 310 311 static void GenReverseBytes(LocationSummary* locations, 312 DataType::Type type, 313 MacroAssembler* masm) { 314 Location in = locations->InAt(0); 315 Location out = locations->Out(); 316 317 switch (type) { 318 case DataType::Type::kInt16: 319 __ Rev16(WRegisterFrom(out), WRegisterFrom(in)); 320 __ Sxth(WRegisterFrom(out), WRegisterFrom(out)); 321 break; 322 case DataType::Type::kInt32: 323 case DataType::Type::kInt64: 324 __ Rev(RegisterFrom(out, type), RegisterFrom(in, type)); 325 break; 326 default: 327 LOG(FATAL) << "Unexpected size for reverse-bytes: " << type; 328 UNREACHABLE(); 329 } 330 } 331 332 void IntrinsicLocationsBuilderARM64::VisitIntegerReverseBytes(HInvoke* invoke) { 333 CreateIntToIntLocations(allocator_, invoke); 334 } 335 336 void IntrinsicCodeGeneratorARM64::VisitIntegerReverseBytes(HInvoke* invoke) { 337 GenReverseBytes(invoke->GetLocations(), DataType::Type::kInt32, GetVIXLAssembler()); 338 } 339 340 void IntrinsicLocationsBuilderARM64::VisitLongReverseBytes(HInvoke* invoke) { 341 CreateIntToIntLocations(allocator_, invoke); 342 } 343 344 void IntrinsicCodeGeneratorARM64::VisitLongReverseBytes(HInvoke* invoke) { 345 GenReverseBytes(invoke->GetLocations(), DataType::Type::kInt64, GetVIXLAssembler()); 346 } 347 348 void IntrinsicLocationsBuilderARM64::VisitShortReverseBytes(HInvoke* invoke) { 349 CreateIntToIntLocations(allocator_, invoke); 350 } 351 352 void IntrinsicCodeGeneratorARM64::VisitShortReverseBytes(HInvoke* invoke) { 353 GenReverseBytes(invoke->GetLocations(), DataType::Type::kInt16, GetVIXLAssembler()); 354 } 355 356 static void GenNumberOfLeadingZeros(LocationSummary* locations, 357 DataType::Type type, 358 MacroAssembler* masm) { 359 DCHECK(type == DataType::Type::kInt32 || type == DataType::Type::kInt64); 360 361 Location in = locations->InAt(0); 362 Location out = locations->Out(); 363 364 __ Clz(RegisterFrom(out, type), RegisterFrom(in, type)); 365 } 366 367 void IntrinsicLocationsBuilderARM64::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) { 368 CreateIntToIntLocations(allocator_, invoke); 369 } 370 371 void IntrinsicCodeGeneratorARM64::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) { 372 GenNumberOfLeadingZeros(invoke->GetLocations(), DataType::Type::kInt32, GetVIXLAssembler()); 373 } 374 375 void IntrinsicLocationsBuilderARM64::VisitLongNumberOfLeadingZeros(HInvoke* invoke) { 376 CreateIntToIntLocations(allocator_, invoke); 377 } 378 379 void IntrinsicCodeGeneratorARM64::VisitLongNumberOfLeadingZeros(HInvoke* invoke) { 380 GenNumberOfLeadingZeros(invoke->GetLocations(), DataType::Type::kInt64, GetVIXLAssembler()); 381 } 382 383 static void GenNumberOfTrailingZeros(LocationSummary* locations, 384 DataType::Type type, 385 MacroAssembler* masm) { 386 DCHECK(type == DataType::Type::kInt32 || type == DataType::Type::kInt64); 387 388 Location in = locations->InAt(0); 389 Location out = locations->Out(); 390 391 __ Rbit(RegisterFrom(out, type), RegisterFrom(in, type)); 392 __ Clz(RegisterFrom(out, type), RegisterFrom(out, type)); 393 } 394 395 void IntrinsicLocationsBuilderARM64::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) { 396 CreateIntToIntLocations(allocator_, invoke); 397 } 398 399 void IntrinsicCodeGeneratorARM64::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) { 400 GenNumberOfTrailingZeros(invoke->GetLocations(), DataType::Type::kInt32, GetVIXLAssembler()); 401 } 402 403 void IntrinsicLocationsBuilderARM64::VisitLongNumberOfTrailingZeros(HInvoke* invoke) { 404 CreateIntToIntLocations(allocator_, invoke); 405 } 406 407 void IntrinsicCodeGeneratorARM64::VisitLongNumberOfTrailingZeros(HInvoke* invoke) { 408 GenNumberOfTrailingZeros(invoke->GetLocations(), DataType::Type::kInt64, GetVIXLAssembler()); 409 } 410 411 static void GenReverse(LocationSummary* locations, 412 DataType::Type type, 413 MacroAssembler* masm) { 414 DCHECK(type == DataType::Type::kInt32 || type == DataType::Type::kInt64); 415 416 Location in = locations->InAt(0); 417 Location out = locations->Out(); 418 419 __ Rbit(RegisterFrom(out, type), RegisterFrom(in, type)); 420 } 421 422 void IntrinsicLocationsBuilderARM64::VisitIntegerReverse(HInvoke* invoke) { 423 CreateIntToIntLocations(allocator_, invoke); 424 } 425 426 void IntrinsicCodeGeneratorARM64::VisitIntegerReverse(HInvoke* invoke) { 427 GenReverse(invoke->GetLocations(), DataType::Type::kInt32, GetVIXLAssembler()); 428 } 429 430 void IntrinsicLocationsBuilderARM64::VisitLongReverse(HInvoke* invoke) { 431 CreateIntToIntLocations(allocator_, invoke); 432 } 433 434 void IntrinsicCodeGeneratorARM64::VisitLongReverse(HInvoke* invoke) { 435 GenReverse(invoke->GetLocations(), DataType::Type::kInt64, GetVIXLAssembler()); 436 } 437 438 static void GenBitCount(HInvoke* instr, DataType::Type type, MacroAssembler* masm) { 439 DCHECK(DataType::IsIntOrLongType(type)) << type; 440 DCHECK_EQ(instr->GetType(), DataType::Type::kInt32); 441 DCHECK_EQ(DataType::Kind(instr->InputAt(0)->GetType()), type); 442 443 UseScratchRegisterScope temps(masm); 444 445 Register src = InputRegisterAt(instr, 0); 446 Register dst = RegisterFrom(instr->GetLocations()->Out(), type); 447 VRegister fpr = (type == DataType::Type::kInt64) ? temps.AcquireD() : temps.AcquireS(); 448 449 __ Fmov(fpr, src); 450 __ Cnt(fpr.V8B(), fpr.V8B()); 451 __ Addv(fpr.B(), fpr.V8B()); 452 __ Fmov(dst, fpr); 453 } 454 455 void IntrinsicLocationsBuilderARM64::VisitLongBitCount(HInvoke* invoke) { 456 CreateIntToIntLocations(allocator_, invoke); 457 } 458 459 void IntrinsicCodeGeneratorARM64::VisitLongBitCount(HInvoke* invoke) { 460 GenBitCount(invoke, DataType::Type::kInt64, GetVIXLAssembler()); 461 } 462 463 void IntrinsicLocationsBuilderARM64::VisitIntegerBitCount(HInvoke* invoke) { 464 CreateIntToIntLocations(allocator_, invoke); 465 } 466 467 void IntrinsicCodeGeneratorARM64::VisitIntegerBitCount(HInvoke* invoke) { 468 GenBitCount(invoke, DataType::Type::kInt32, GetVIXLAssembler()); 469 } 470 471 static void GenHighestOneBit(HInvoke* invoke, DataType::Type type, MacroAssembler* masm) { 472 DCHECK(type == DataType::Type::kInt32 || type == DataType::Type::kInt64); 473 474 UseScratchRegisterScope temps(masm); 475 476 Register src = InputRegisterAt(invoke, 0); 477 Register dst = RegisterFrom(invoke->GetLocations()->Out(), type); 478 Register temp = (type == DataType::Type::kInt64) ? temps.AcquireX() : temps.AcquireW(); 479 size_t high_bit = (type == DataType::Type::kInt64) ? 63u : 31u; 480 size_t clz_high_bit = (type == DataType::Type::kInt64) ? 6u : 5u; 481 482 __ Clz(temp, src); 483 __ Mov(dst, UINT64_C(1) << high_bit); // MOV (bitmask immediate) 484 __ Bic(dst, dst, Operand(temp, LSL, high_bit - clz_high_bit)); // Clear dst if src was 0. 485 __ Lsr(dst, dst, temp); 486 } 487 488 void IntrinsicLocationsBuilderARM64::VisitIntegerHighestOneBit(HInvoke* invoke) { 489 CreateIntToIntLocations(allocator_, invoke); 490 } 491 492 void IntrinsicCodeGeneratorARM64::VisitIntegerHighestOneBit(HInvoke* invoke) { 493 GenHighestOneBit(invoke, DataType::Type::kInt32, GetVIXLAssembler()); 494 } 495 496 void IntrinsicLocationsBuilderARM64::VisitLongHighestOneBit(HInvoke* invoke) { 497 CreateIntToIntLocations(allocator_, invoke); 498 } 499 500 void IntrinsicCodeGeneratorARM64::VisitLongHighestOneBit(HInvoke* invoke) { 501 GenHighestOneBit(invoke, DataType::Type::kInt64, GetVIXLAssembler()); 502 } 503 504 static void GenLowestOneBit(HInvoke* invoke, DataType::Type type, MacroAssembler* masm) { 505 DCHECK(type == DataType::Type::kInt32 || type == DataType::Type::kInt64); 506 507 UseScratchRegisterScope temps(masm); 508 509 Register src = InputRegisterAt(invoke, 0); 510 Register dst = RegisterFrom(invoke->GetLocations()->Out(), type); 511 Register temp = (type == DataType::Type::kInt64) ? temps.AcquireX() : temps.AcquireW(); 512 513 __ Neg(temp, src); 514 __ And(dst, temp, src); 515 } 516 517 void IntrinsicLocationsBuilderARM64::VisitIntegerLowestOneBit(HInvoke* invoke) { 518 CreateIntToIntLocations(allocator_, invoke); 519 } 520 521 void IntrinsicCodeGeneratorARM64::VisitIntegerLowestOneBit(HInvoke* invoke) { 522 GenLowestOneBit(invoke, DataType::Type::kInt32, GetVIXLAssembler()); 523 } 524 525 void IntrinsicLocationsBuilderARM64::VisitLongLowestOneBit(HInvoke* invoke) { 526 CreateIntToIntLocations(allocator_, invoke); 527 } 528 529 void IntrinsicCodeGeneratorARM64::VisitLongLowestOneBit(HInvoke* invoke) { 530 GenLowestOneBit(invoke, DataType::Type::kInt64, GetVIXLAssembler()); 531 } 532 533 static void CreateFPToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) { 534 LocationSummary* locations = 535 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); 536 locations->SetInAt(0, Location::RequiresFpuRegister()); 537 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); 538 } 539 540 void IntrinsicLocationsBuilderARM64::VisitMathSqrt(HInvoke* invoke) { 541 CreateFPToFPLocations(allocator_, invoke); 542 } 543 544 void IntrinsicCodeGeneratorARM64::VisitMathSqrt(HInvoke* invoke) { 545 LocationSummary* locations = invoke->GetLocations(); 546 MacroAssembler* masm = GetVIXLAssembler(); 547 __ Fsqrt(DRegisterFrom(locations->Out()), DRegisterFrom(locations->InAt(0))); 548 } 549 550 void IntrinsicLocationsBuilderARM64::VisitMathCeil(HInvoke* invoke) { 551 CreateFPToFPLocations(allocator_, invoke); 552 } 553 554 void IntrinsicCodeGeneratorARM64::VisitMathCeil(HInvoke* invoke) { 555 LocationSummary* locations = invoke->GetLocations(); 556 MacroAssembler* masm = GetVIXLAssembler(); 557 __ Frintp(DRegisterFrom(locations->Out()), DRegisterFrom(locations->InAt(0))); 558 } 559 560 void IntrinsicLocationsBuilderARM64::VisitMathFloor(HInvoke* invoke) { 561 CreateFPToFPLocations(allocator_, invoke); 562 } 563 564 void IntrinsicCodeGeneratorARM64::VisitMathFloor(HInvoke* invoke) { 565 LocationSummary* locations = invoke->GetLocations(); 566 MacroAssembler* masm = GetVIXLAssembler(); 567 __ Frintm(DRegisterFrom(locations->Out()), DRegisterFrom(locations->InAt(0))); 568 } 569 570 void IntrinsicLocationsBuilderARM64::VisitMathRint(HInvoke* invoke) { 571 CreateFPToFPLocations(allocator_, invoke); 572 } 573 574 void IntrinsicCodeGeneratorARM64::VisitMathRint(HInvoke* invoke) { 575 LocationSummary* locations = invoke->GetLocations(); 576 MacroAssembler* masm = GetVIXLAssembler(); 577 __ Frintn(DRegisterFrom(locations->Out()), DRegisterFrom(locations->InAt(0))); 578 } 579 580 static void CreateFPToIntPlusFPTempLocations(ArenaAllocator* allocator, HInvoke* invoke) { 581 LocationSummary* locations = 582 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); 583 locations->SetInAt(0, Location::RequiresFpuRegister()); 584 locations->SetOut(Location::RequiresRegister()); 585 locations->AddTemp(Location::RequiresFpuRegister()); 586 } 587 588 static void GenMathRound(HInvoke* invoke, bool is_double, vixl::aarch64::MacroAssembler* masm) { 589 // Java 8 API definition for Math.round(): 590 // Return the closest long or int to the argument, with ties rounding to positive infinity. 591 // 592 // There is no single instruction in ARMv8 that can support the above definition. 593 // We choose to use FCVTAS here, because it has closest semantic. 594 // FCVTAS performs rounding to nearest integer, ties away from zero. 595 // For most inputs (positive values, zero or NaN), this instruction is enough. 596 // We only need a few handling code after FCVTAS if the input is negative half value. 597 // 598 // The reason why we didn't choose FCVTPS instruction here is that 599 // although it performs rounding toward positive infinity, it doesn't perform rounding to nearest. 600 // For example, FCVTPS(-1.9) = -1 and FCVTPS(1.1) = 2. 601 // If we were using this instruction, for most inputs, more handling code would be needed. 602 LocationSummary* l = invoke->GetLocations(); 603 VRegister in_reg = is_double ? DRegisterFrom(l->InAt(0)) : SRegisterFrom(l->InAt(0)); 604 VRegister tmp_fp = is_double ? DRegisterFrom(l->GetTemp(0)) : SRegisterFrom(l->GetTemp(0)); 605 Register out_reg = is_double ? XRegisterFrom(l->Out()) : WRegisterFrom(l->Out()); 606 vixl::aarch64::Label done; 607 608 // Round to nearest integer, ties away from zero. 609 __ Fcvtas(out_reg, in_reg); 610 611 // For positive values, zero or NaN inputs, rounding is done. 612 __ Tbz(out_reg, out_reg.GetSizeInBits() - 1, &done); 613 614 // Handle input < 0 cases. 615 // If input is negative but not a tie, previous result (round to nearest) is valid. 616 // If input is a negative tie, out_reg += 1. 617 __ Frinta(tmp_fp, in_reg); 618 __ Fsub(tmp_fp, in_reg, tmp_fp); 619 __ Fcmp(tmp_fp, 0.5); 620 __ Cinc(out_reg, out_reg, eq); 621 622 __ Bind(&done); 623 } 624 625 void IntrinsicLocationsBuilderARM64::VisitMathRoundDouble(HInvoke* invoke) { 626 CreateFPToIntPlusFPTempLocations(allocator_, invoke); 627 } 628 629 void IntrinsicCodeGeneratorARM64::VisitMathRoundDouble(HInvoke* invoke) { 630 GenMathRound(invoke, /* is_double= */ true, GetVIXLAssembler()); 631 } 632 633 void IntrinsicLocationsBuilderARM64::VisitMathRoundFloat(HInvoke* invoke) { 634 CreateFPToIntPlusFPTempLocations(allocator_, invoke); 635 } 636 637 void IntrinsicCodeGeneratorARM64::VisitMathRoundFloat(HInvoke* invoke) { 638 GenMathRound(invoke, /* is_double= */ false, GetVIXLAssembler()); 639 } 640 641 void IntrinsicLocationsBuilderARM64::VisitMemoryPeekByte(HInvoke* invoke) { 642 CreateIntToIntLocations(allocator_, invoke); 643 } 644 645 void IntrinsicCodeGeneratorARM64::VisitMemoryPeekByte(HInvoke* invoke) { 646 MacroAssembler* masm = GetVIXLAssembler(); 647 __ Ldrsb(WRegisterFrom(invoke->GetLocations()->Out()), 648 AbsoluteHeapOperandFrom(invoke->GetLocations()->InAt(0), 0)); 649 } 650 651 void IntrinsicLocationsBuilderARM64::VisitMemoryPeekIntNative(HInvoke* invoke) { 652 CreateIntToIntLocations(allocator_, invoke); 653 } 654 655 void IntrinsicCodeGeneratorARM64::VisitMemoryPeekIntNative(HInvoke* invoke) { 656 MacroAssembler* masm = GetVIXLAssembler(); 657 __ Ldr(WRegisterFrom(invoke->GetLocations()->Out()), 658 AbsoluteHeapOperandFrom(invoke->GetLocations()->InAt(0), 0)); 659 } 660 661 void IntrinsicLocationsBuilderARM64::VisitMemoryPeekLongNative(HInvoke* invoke) { 662 CreateIntToIntLocations(allocator_, invoke); 663 } 664 665 void IntrinsicCodeGeneratorARM64::VisitMemoryPeekLongNative(HInvoke* invoke) { 666 MacroAssembler* masm = GetVIXLAssembler(); 667 __ Ldr(XRegisterFrom(invoke->GetLocations()->Out()), 668 AbsoluteHeapOperandFrom(invoke->GetLocations()->InAt(0), 0)); 669 } 670 671 void IntrinsicLocationsBuilderARM64::VisitMemoryPeekShortNative(HInvoke* invoke) { 672 CreateIntToIntLocations(allocator_, invoke); 673 } 674 675 void IntrinsicCodeGeneratorARM64::VisitMemoryPeekShortNative(HInvoke* invoke) { 676 MacroAssembler* masm = GetVIXLAssembler(); 677 __ Ldrsh(WRegisterFrom(invoke->GetLocations()->Out()), 678 AbsoluteHeapOperandFrom(invoke->GetLocations()->InAt(0), 0)); 679 } 680 681 static void CreateIntIntToVoidLocations(ArenaAllocator* allocator, HInvoke* invoke) { 682 LocationSummary* locations = 683 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); 684 locations->SetInAt(0, Location::RequiresRegister()); 685 locations->SetInAt(1, Location::RequiresRegister()); 686 } 687 688 void IntrinsicLocationsBuilderARM64::VisitMemoryPokeByte(HInvoke* invoke) { 689 CreateIntIntToVoidLocations(allocator_, invoke); 690 } 691 692 void IntrinsicCodeGeneratorARM64::VisitMemoryPokeByte(HInvoke* invoke) { 693 MacroAssembler* masm = GetVIXLAssembler(); 694 __ Strb(WRegisterFrom(invoke->GetLocations()->InAt(1)), 695 AbsoluteHeapOperandFrom(invoke->GetLocations()->InAt(0), 0)); 696 } 697 698 void IntrinsicLocationsBuilderARM64::VisitMemoryPokeIntNative(HInvoke* invoke) { 699 CreateIntIntToVoidLocations(allocator_, invoke); 700 } 701 702 void IntrinsicCodeGeneratorARM64::VisitMemoryPokeIntNative(HInvoke* invoke) { 703 MacroAssembler* masm = GetVIXLAssembler(); 704 __ Str(WRegisterFrom(invoke->GetLocations()->InAt(1)), 705 AbsoluteHeapOperandFrom(invoke->GetLocations()->InAt(0), 0)); 706 } 707 708 void IntrinsicLocationsBuilderARM64::VisitMemoryPokeLongNative(HInvoke* invoke) { 709 CreateIntIntToVoidLocations(allocator_, invoke); 710 } 711 712 void IntrinsicCodeGeneratorARM64::VisitMemoryPokeLongNative(HInvoke* invoke) { 713 MacroAssembler* masm = GetVIXLAssembler(); 714 __ Str(XRegisterFrom(invoke->GetLocations()->InAt(1)), 715 AbsoluteHeapOperandFrom(invoke->GetLocations()->InAt(0), 0)); 716 } 717 718 void IntrinsicLocationsBuilderARM64::VisitMemoryPokeShortNative(HInvoke* invoke) { 719 CreateIntIntToVoidLocations(allocator_, invoke); 720 } 721 722 void IntrinsicCodeGeneratorARM64::VisitMemoryPokeShortNative(HInvoke* invoke) { 723 MacroAssembler* masm = GetVIXLAssembler(); 724 __ Strh(WRegisterFrom(invoke->GetLocations()->InAt(1)), 725 AbsoluteHeapOperandFrom(invoke->GetLocations()->InAt(0), 0)); 726 } 727 728 void IntrinsicLocationsBuilderARM64::VisitThreadCurrentThread(HInvoke* invoke) { 729 LocationSummary* locations = 730 new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); 731 locations->SetOut(Location::RequiresRegister()); 732 } 733 734 void IntrinsicCodeGeneratorARM64::VisitThreadCurrentThread(HInvoke* invoke) { 735 codegen_->Load(DataType::Type::kReference, WRegisterFrom(invoke->GetLocations()->Out()), 736 MemOperand(tr, Thread::PeerOffset<kArm64PointerSize>().Int32Value())); 737 } 738 739 static void GenUnsafeGet(HInvoke* invoke, 740 DataType::Type type, 741 bool is_volatile, 742 CodeGeneratorARM64* codegen) { 743 LocationSummary* locations = invoke->GetLocations(); 744 DCHECK((type == DataType::Type::kInt32) || 745 (type == DataType::Type::kInt64) || 746 (type == DataType::Type::kReference)); 747 Location base_loc = locations->InAt(1); 748 Register base = WRegisterFrom(base_loc); // Object pointer. 749 Location offset_loc = locations->InAt(2); 750 Register offset = XRegisterFrom(offset_loc); // Long offset. 751 Location trg_loc = locations->Out(); 752 Register trg = RegisterFrom(trg_loc, type); 753 754 if (type == DataType::Type::kReference && kEmitCompilerReadBarrier && kUseBakerReadBarrier) { 755 // UnsafeGetObject/UnsafeGetObjectVolatile with Baker's read barrier case. 756 Register temp = WRegisterFrom(locations->GetTemp(0)); 757 MacroAssembler* masm = codegen->GetVIXLAssembler(); 758 // Piggy-back on the field load path using introspection for the Baker read barrier. 759 __ Add(temp, base, offset.W()); // Offset should not exceed 32 bits. 760 codegen->GenerateFieldLoadWithBakerReadBarrier(invoke, 761 trg_loc, 762 base, 763 MemOperand(temp.X()), 764 /* needs_null_check= */ false, 765 is_volatile); 766 } else { 767 // Other cases. 768 MemOperand mem_op(base.X(), offset); 769 if (is_volatile) { 770 codegen->LoadAcquire(invoke, trg, mem_op, /* needs_null_check= */ true); 771 } else { 772 codegen->Load(type, trg, mem_op); 773 } 774 775 if (type == DataType::Type::kReference) { 776 DCHECK(trg.IsW()); 777 codegen->MaybeGenerateReadBarrierSlow(invoke, trg_loc, trg_loc, base_loc, 0u, offset_loc); 778 } 779 } 780 } 781 782 static void CreateIntIntIntToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) { 783 bool can_call = kEmitCompilerReadBarrier && 784 (invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObject || 785 invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile); 786 LocationSummary* locations = 787 new (allocator) LocationSummary(invoke, 788 can_call 789 ? LocationSummary::kCallOnSlowPath 790 : LocationSummary::kNoCall, 791 kIntrinsified); 792 if (can_call && kUseBakerReadBarrier) { 793 locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. 794 // We need a temporary register for the read barrier load in order to use 795 // CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier(). 796 locations->AddTemp(FixedTempLocation()); 797 } 798 locations->SetInAt(0, Location::NoLocation()); // Unused receiver. 799 locations->SetInAt(1, Location::RequiresRegister()); 800 locations->SetInAt(2, Location::RequiresRegister()); 801 locations->SetOut(Location::RequiresRegister(), 802 (can_call ? Location::kOutputOverlap : Location::kNoOutputOverlap)); 803 } 804 805 void IntrinsicLocationsBuilderARM64::VisitUnsafeGet(HInvoke* invoke) { 806 CreateIntIntIntToIntLocations(allocator_, invoke); 807 } 808 void IntrinsicLocationsBuilderARM64::VisitUnsafeGetVolatile(HInvoke* invoke) { 809 CreateIntIntIntToIntLocations(allocator_, invoke); 810 } 811 void IntrinsicLocationsBuilderARM64::VisitUnsafeGetLong(HInvoke* invoke) { 812 CreateIntIntIntToIntLocations(allocator_, invoke); 813 } 814 void IntrinsicLocationsBuilderARM64::VisitUnsafeGetLongVolatile(HInvoke* invoke) { 815 CreateIntIntIntToIntLocations(allocator_, invoke); 816 } 817 void IntrinsicLocationsBuilderARM64::VisitUnsafeGetObject(HInvoke* invoke) { 818 CreateIntIntIntToIntLocations(allocator_, invoke); 819 } 820 void IntrinsicLocationsBuilderARM64::VisitUnsafeGetObjectVolatile(HInvoke* invoke) { 821 CreateIntIntIntToIntLocations(allocator_, invoke); 822 } 823 824 void IntrinsicCodeGeneratorARM64::VisitUnsafeGet(HInvoke* invoke) { 825 GenUnsafeGet(invoke, DataType::Type::kInt32, /* is_volatile= */ false, codegen_); 826 } 827 void IntrinsicCodeGeneratorARM64::VisitUnsafeGetVolatile(HInvoke* invoke) { 828 GenUnsafeGet(invoke, DataType::Type::kInt32, /* is_volatile= */ true, codegen_); 829 } 830 void IntrinsicCodeGeneratorARM64::VisitUnsafeGetLong(HInvoke* invoke) { 831 GenUnsafeGet(invoke, DataType::Type::kInt64, /* is_volatile= */ false, codegen_); 832 } 833 void IntrinsicCodeGeneratorARM64::VisitUnsafeGetLongVolatile(HInvoke* invoke) { 834 GenUnsafeGet(invoke, DataType::Type::kInt64, /* is_volatile= */ true, codegen_); 835 } 836 void IntrinsicCodeGeneratorARM64::VisitUnsafeGetObject(HInvoke* invoke) { 837 GenUnsafeGet(invoke, DataType::Type::kReference, /* is_volatile= */ false, codegen_); 838 } 839 void IntrinsicCodeGeneratorARM64::VisitUnsafeGetObjectVolatile(HInvoke* invoke) { 840 GenUnsafeGet(invoke, DataType::Type::kReference, /* is_volatile= */ true, codegen_); 841 } 842 843 static void CreateIntIntIntIntToVoid(ArenaAllocator* allocator, HInvoke* invoke) { 844 LocationSummary* locations = 845 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); 846 locations->SetInAt(0, Location::NoLocation()); // Unused receiver. 847 locations->SetInAt(1, Location::RequiresRegister()); 848 locations->SetInAt(2, Location::RequiresRegister()); 849 locations->SetInAt(3, Location::RequiresRegister()); 850 } 851 852 void IntrinsicLocationsBuilderARM64::VisitUnsafePut(HInvoke* invoke) { 853 CreateIntIntIntIntToVoid(allocator_, invoke); 854 } 855 void IntrinsicLocationsBuilderARM64::VisitUnsafePutOrdered(HInvoke* invoke) { 856 CreateIntIntIntIntToVoid(allocator_, invoke); 857 } 858 void IntrinsicLocationsBuilderARM64::VisitUnsafePutVolatile(HInvoke* invoke) { 859 CreateIntIntIntIntToVoid(allocator_, invoke); 860 } 861 void IntrinsicLocationsBuilderARM64::VisitUnsafePutObject(HInvoke* invoke) { 862 CreateIntIntIntIntToVoid(allocator_, invoke); 863 } 864 void IntrinsicLocationsBuilderARM64::VisitUnsafePutObjectOrdered(HInvoke* invoke) { 865 CreateIntIntIntIntToVoid(allocator_, invoke); 866 } 867 void IntrinsicLocationsBuilderARM64::VisitUnsafePutObjectVolatile(HInvoke* invoke) { 868 CreateIntIntIntIntToVoid(allocator_, invoke); 869 } 870 void IntrinsicLocationsBuilderARM64::VisitUnsafePutLong(HInvoke* invoke) { 871 CreateIntIntIntIntToVoid(allocator_, invoke); 872 } 873 void IntrinsicLocationsBuilderARM64::VisitUnsafePutLongOrdered(HInvoke* invoke) { 874 CreateIntIntIntIntToVoid(allocator_, invoke); 875 } 876 void IntrinsicLocationsBuilderARM64::VisitUnsafePutLongVolatile(HInvoke* invoke) { 877 CreateIntIntIntIntToVoid(allocator_, invoke); 878 } 879 880 static void GenUnsafePut(HInvoke* invoke, 881 DataType::Type type, 882 bool is_volatile, 883 bool is_ordered, 884 CodeGeneratorARM64* codegen) { 885 LocationSummary* locations = invoke->GetLocations(); 886 MacroAssembler* masm = codegen->GetVIXLAssembler(); 887 888 Register base = WRegisterFrom(locations->InAt(1)); // Object pointer. 889 Register offset = XRegisterFrom(locations->InAt(2)); // Long offset. 890 Register value = RegisterFrom(locations->InAt(3), type); 891 Register source = value; 892 MemOperand mem_op(base.X(), offset); 893 894 { 895 // We use a block to end the scratch scope before the write barrier, thus 896 // freeing the temporary registers so they can be used in `MarkGCCard`. 897 UseScratchRegisterScope temps(masm); 898 899 if (kPoisonHeapReferences && type == DataType::Type::kReference) { 900 DCHECK(value.IsW()); 901 Register temp = temps.AcquireW(); 902 __ Mov(temp.W(), value.W()); 903 codegen->GetAssembler()->PoisonHeapReference(temp.W()); 904 source = temp; 905 } 906 907 if (is_volatile || is_ordered) { 908 codegen->StoreRelease(invoke, type, source, mem_op, /* needs_null_check= */ false); 909 } else { 910 codegen->Store(type, source, mem_op); 911 } 912 } 913 914 if (type == DataType::Type::kReference) { 915 bool value_can_be_null = true; // TODO: Worth finding out this information? 916 codegen->MarkGCCard(base, value, value_can_be_null); 917 } 918 } 919 920 void IntrinsicCodeGeneratorARM64::VisitUnsafePut(HInvoke* invoke) { 921 GenUnsafePut(invoke, 922 DataType::Type::kInt32, 923 /* is_volatile= */ false, 924 /* is_ordered= */ false, 925 codegen_); 926 } 927 void IntrinsicCodeGeneratorARM64::VisitUnsafePutOrdered(HInvoke* invoke) { 928 GenUnsafePut(invoke, 929 DataType::Type::kInt32, 930 /* is_volatile= */ false, 931 /* is_ordered= */ true, 932 codegen_); 933 } 934 void IntrinsicCodeGeneratorARM64::VisitUnsafePutVolatile(HInvoke* invoke) { 935 GenUnsafePut(invoke, 936 DataType::Type::kInt32, 937 /* is_volatile= */ true, 938 /* is_ordered= */ false, 939 codegen_); 940 } 941 void IntrinsicCodeGeneratorARM64::VisitUnsafePutObject(HInvoke* invoke) { 942 GenUnsafePut(invoke, 943 DataType::Type::kReference, 944 /* is_volatile= */ false, 945 /* is_ordered= */ false, 946 codegen_); 947 } 948 void IntrinsicCodeGeneratorARM64::VisitUnsafePutObjectOrdered(HInvoke* invoke) { 949 GenUnsafePut(invoke, 950 DataType::Type::kReference, 951 /* is_volatile= */ false, 952 /* is_ordered= */ true, 953 codegen_); 954 } 955 void IntrinsicCodeGeneratorARM64::VisitUnsafePutObjectVolatile(HInvoke* invoke) { 956 GenUnsafePut(invoke, 957 DataType::Type::kReference, 958 /* is_volatile= */ true, 959 /* is_ordered= */ false, 960 codegen_); 961 } 962 void IntrinsicCodeGeneratorARM64::VisitUnsafePutLong(HInvoke* invoke) { 963 GenUnsafePut(invoke, 964 DataType::Type::kInt64, 965 /* is_volatile= */ false, 966 /* is_ordered= */ false, 967 codegen_); 968 } 969 void IntrinsicCodeGeneratorARM64::VisitUnsafePutLongOrdered(HInvoke* invoke) { 970 GenUnsafePut(invoke, 971 DataType::Type::kInt64, 972 /* is_volatile= */ false, 973 /* is_ordered= */ true, 974 codegen_); 975 } 976 void IntrinsicCodeGeneratorARM64::VisitUnsafePutLongVolatile(HInvoke* invoke) { 977 GenUnsafePut(invoke, 978 DataType::Type::kInt64, 979 /* is_volatile= */ true, 980 /* is_ordered= */ false, 981 codegen_); 982 } 983 984 static void CreateIntIntIntIntIntToInt(ArenaAllocator* allocator, 985 HInvoke* invoke, 986 DataType::Type type) { 987 bool can_call = kEmitCompilerReadBarrier && 988 kUseBakerReadBarrier && 989 (invoke->GetIntrinsic() == Intrinsics::kUnsafeCASObject); 990 LocationSummary* locations = 991 new (allocator) LocationSummary(invoke, 992 can_call 993 ? LocationSummary::kCallOnSlowPath 994 : LocationSummary::kNoCall, 995 kIntrinsified); 996 if (can_call) { 997 locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. 998 } 999 locations->SetInAt(0, Location::NoLocation()); // Unused receiver. 1000 locations->SetInAt(1, Location::RequiresRegister()); 1001 locations->SetInAt(2, Location::RequiresRegister()); 1002 locations->SetInAt(3, Location::RequiresRegister()); 1003 locations->SetInAt(4, Location::RequiresRegister()); 1004 1005 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); 1006 if (type == DataType::Type::kReference && kEmitCompilerReadBarrier && kUseBakerReadBarrier) { 1007 // We need two non-scratch temporary registers for (Baker) read barrier. 1008 locations->AddTemp(Location::RequiresRegister()); 1009 locations->AddTemp(Location::RequiresRegister()); 1010 } 1011 } 1012 1013 class BakerReadBarrierCasSlowPathARM64 : public SlowPathCodeARM64 { 1014 public: 1015 explicit BakerReadBarrierCasSlowPathARM64(HInvoke* invoke) 1016 : SlowPathCodeARM64(invoke) {} 1017 1018 const char* GetDescription() const override { return "BakerReadBarrierCasSlowPathARM64"; } 1019 1020 void EmitNativeCode(CodeGenerator* codegen) override { 1021 CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen); 1022 Arm64Assembler* assembler = arm64_codegen->GetAssembler(); 1023 MacroAssembler* masm = assembler->GetVIXLAssembler(); 1024 __ Bind(GetEntryLabel()); 1025 1026 // Get the locations. 1027 LocationSummary* locations = instruction_->GetLocations(); 1028 Register base = WRegisterFrom(locations->InAt(1)); // Object pointer. 1029 Register offset = XRegisterFrom(locations->InAt(2)); // Long offset. 1030 Register expected = WRegisterFrom(locations->InAt(3)); // Expected. 1031 Register value = WRegisterFrom(locations->InAt(4)); // Value. 1032 1033 Register old_value = WRegisterFrom(locations->GetTemp(0)); // The old value from main path. 1034 Register marked = WRegisterFrom(locations->GetTemp(1)); // The marked old value. 1035 1036 // Mark the `old_value` from the main path and compare with `expected`. This clobbers the 1037 // `tmp_ptr` scratch register but we do not want to allocate another non-scratch temporary. 1038 arm64_codegen->GenerateUnsafeCasOldValueMovWithBakerReadBarrier(marked, old_value); 1039 __ Cmp(marked, expected); 1040 __ B(GetExitLabel(), ne); // If taken, Z=false indicates failure. 1041 1042 // The `old_value` we have read did not match `expected` (which is always a to-space reference) 1043 // but after the read barrier in GenerateUnsafeCasOldValueMovWithBakerReadBarrier() the marked 1044 // to-space value matched, so the `old_value` must be a from-space reference to the same 1045 // object. Do the same CAS loop as the main path but check for both `expected` and the unmarked 1046 // old value representing the to-space and from-space references for the same object. 1047 1048 UseScratchRegisterScope temps(masm); 1049 Register tmp_ptr = temps.AcquireX(); 1050 Register tmp = temps.AcquireSameSizeAs(value); 1051 1052 // Recalculate the `tmp_ptr` clobbered above. 1053 __ Add(tmp_ptr, base.X(), Operand(offset)); 1054 1055 // do { 1056 // tmp_value = [tmp_ptr]; 1057 // } while ((tmp_value == expected || tmp == old_value) && failure([tmp_ptr] <- r_new_value)); 1058 // result = (tmp_value == expected || tmp == old_value); 1059 1060 vixl::aarch64::Label loop_head; 1061 __ Bind(&loop_head); 1062 __ Ldaxr(tmp, MemOperand(tmp_ptr)); 1063 assembler->MaybeUnpoisonHeapReference(tmp); 1064 __ Cmp(tmp, expected); 1065 __ Ccmp(tmp, old_value, ZFlag, ne); 1066 __ B(GetExitLabel(), ne); // If taken, Z=false indicates failure. 1067 assembler->MaybePoisonHeapReference(value); 1068 __ Stlxr(tmp.W(), value, MemOperand(tmp_ptr)); 1069 assembler->MaybeUnpoisonHeapReference(value); 1070 __ Cbnz(tmp.W(), &loop_head); 1071 1072 // Z=true from the above CMP+CCMP indicates success. 1073 __ B(GetExitLabel()); 1074 } 1075 }; 1076 1077 static void GenCas(HInvoke* invoke, DataType::Type type, CodeGeneratorARM64* codegen) { 1078 Arm64Assembler* assembler = codegen->GetAssembler(); 1079 MacroAssembler* masm = assembler->GetVIXLAssembler(); 1080 LocationSummary* locations = invoke->GetLocations(); 1081 1082 Register out = WRegisterFrom(locations->Out()); // Boolean result. 1083 Register base = WRegisterFrom(locations->InAt(1)); // Object pointer. 1084 Register offset = XRegisterFrom(locations->InAt(2)); // Long offset. 1085 Register expected = RegisterFrom(locations->InAt(3), type); // Expected. 1086 Register value = RegisterFrom(locations->InAt(4), type); // Value. 1087 1088 // This needs to be before the temp registers, as MarkGCCard also uses VIXL temps. 1089 if (type == DataType::Type::kReference) { 1090 // Mark card for object assuming new value is stored. 1091 bool value_can_be_null = true; // TODO: Worth finding out this information? 1092 codegen->MarkGCCard(base, value, value_can_be_null); 1093 } 1094 1095 UseScratchRegisterScope temps(masm); 1096 Register tmp_ptr = temps.AcquireX(); // Pointer to actual memory. 1097 Register old_value; // Value in memory. 1098 1099 vixl::aarch64::Label exit_loop_label; 1100 vixl::aarch64::Label* exit_loop = &exit_loop_label; 1101 vixl::aarch64::Label* failure = &exit_loop_label; 1102 1103 if (kEmitCompilerReadBarrier && type == DataType::Type::kReference) { 1104 // The only read barrier implementation supporting the 1105 // UnsafeCASObject intrinsic is the Baker-style read barriers. 1106 DCHECK(kUseBakerReadBarrier); 1107 1108 BakerReadBarrierCasSlowPathARM64* slow_path = 1109 new (codegen->GetScopedAllocator()) BakerReadBarrierCasSlowPathARM64(invoke); 1110 codegen->AddSlowPath(slow_path); 1111 exit_loop = slow_path->GetExitLabel(); 1112 failure = slow_path->GetEntryLabel(); 1113 // We need to store the `old_value` in a non-scratch register to make sure 1114 // the Baker read barrier in the slow path does not clobber it. 1115 old_value = WRegisterFrom(locations->GetTemp(0)); 1116 } else { 1117 old_value = temps.AcquireSameSizeAs(value); 1118 } 1119 1120 __ Add(tmp_ptr, base.X(), Operand(offset)); 1121 1122 // do { 1123 // tmp_value = [tmp_ptr]; 1124 // } while (tmp_value == expected && failure([tmp_ptr] <- r_new_value)); 1125 // result = tmp_value == expected; 1126 1127 vixl::aarch64::Label loop_head; 1128 __ Bind(&loop_head); 1129 __ Ldaxr(old_value, MemOperand(tmp_ptr)); 1130 if (type == DataType::Type::kReference) { 1131 assembler->MaybeUnpoisonHeapReference(old_value); 1132 } 1133 __ Cmp(old_value, expected); 1134 __ B(failure, ne); 1135 if (type == DataType::Type::kReference) { 1136 assembler->MaybePoisonHeapReference(value); 1137 } 1138 __ Stlxr(old_value.W(), value, MemOperand(tmp_ptr)); // Reuse `old_value` for STLXR result. 1139 if (type == DataType::Type::kReference) { 1140 assembler->MaybeUnpoisonHeapReference(value); 1141 } 1142 __ Cbnz(old_value.W(), &loop_head); 1143 __ Bind(exit_loop); 1144 __ Cset(out, eq); 1145 } 1146 1147 void IntrinsicLocationsBuilderARM64::VisitUnsafeCASInt(HInvoke* invoke) { 1148 CreateIntIntIntIntIntToInt(allocator_, invoke, DataType::Type::kInt32); 1149 } 1150 void IntrinsicLocationsBuilderARM64::VisitUnsafeCASLong(HInvoke* invoke) { 1151 CreateIntIntIntIntIntToInt(allocator_, invoke, DataType::Type::kInt64); 1152 } 1153 void IntrinsicLocationsBuilderARM64::VisitUnsafeCASObject(HInvoke* invoke) { 1154 // The only read barrier implementation supporting the 1155 // UnsafeCASObject intrinsic is the Baker-style read barriers. 1156 if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) { 1157 return; 1158 } 1159 1160 CreateIntIntIntIntIntToInt(allocator_, invoke, DataType::Type::kReference); 1161 } 1162 1163 void IntrinsicCodeGeneratorARM64::VisitUnsafeCASInt(HInvoke* invoke) { 1164 GenCas(invoke, DataType::Type::kInt32, codegen_); 1165 } 1166 void IntrinsicCodeGeneratorARM64::VisitUnsafeCASLong(HInvoke* invoke) { 1167 GenCas(invoke, DataType::Type::kInt64, codegen_); 1168 } 1169 void IntrinsicCodeGeneratorARM64::VisitUnsafeCASObject(HInvoke* invoke) { 1170 // The only read barrier implementation supporting the 1171 // UnsafeCASObject intrinsic is the Baker-style read barriers. 1172 DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier); 1173 1174 GenCas(invoke, DataType::Type::kReference, codegen_); 1175 } 1176 1177 void IntrinsicLocationsBuilderARM64::VisitStringCompareTo(HInvoke* invoke) { 1178 LocationSummary* locations = 1179 new (allocator_) LocationSummary(invoke, 1180 invoke->InputAt(1)->CanBeNull() 1181 ? LocationSummary::kCallOnSlowPath 1182 : LocationSummary::kNoCall, 1183 kIntrinsified); 1184 locations->SetInAt(0, Location::RequiresRegister()); 1185 locations->SetInAt(1, Location::RequiresRegister()); 1186 locations->AddTemp(Location::RequiresRegister()); 1187 locations->AddTemp(Location::RequiresRegister()); 1188 locations->AddTemp(Location::RequiresRegister()); 1189 // Need temporary registers for String compression's feature. 1190 if (mirror::kUseStringCompression) { 1191 locations->AddTemp(Location::RequiresRegister()); 1192 } 1193 locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); 1194 } 1195 1196 void IntrinsicCodeGeneratorARM64::VisitStringCompareTo(HInvoke* invoke) { 1197 MacroAssembler* masm = GetVIXLAssembler(); 1198 LocationSummary* locations = invoke->GetLocations(); 1199 1200 Register str = InputRegisterAt(invoke, 0); 1201 Register arg = InputRegisterAt(invoke, 1); 1202 DCHECK(str.IsW()); 1203 DCHECK(arg.IsW()); 1204 Register out = OutputRegister(invoke); 1205 1206 Register temp0 = WRegisterFrom(locations->GetTemp(0)); 1207 Register temp1 = WRegisterFrom(locations->GetTemp(1)); 1208 Register temp2 = WRegisterFrom(locations->GetTemp(2)); 1209 Register temp3; 1210 if (mirror::kUseStringCompression) { 1211 temp3 = WRegisterFrom(locations->GetTemp(3)); 1212 } 1213 1214 vixl::aarch64::Label loop; 1215 vixl::aarch64::Label find_char_diff; 1216 vixl::aarch64::Label end; 1217 vixl::aarch64::Label different_compression; 1218 1219 // Get offsets of count and value fields within a string object. 1220 const int32_t count_offset = mirror::String::CountOffset().Int32Value(); 1221 const int32_t value_offset = mirror::String::ValueOffset().Int32Value(); 1222 1223 // Note that the null check must have been done earlier. 1224 DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0))); 1225 1226 // Take slow path and throw if input can be and is null. 1227 SlowPathCodeARM64* slow_path = nullptr; 1228 const bool can_slow_path = invoke->InputAt(1)->CanBeNull(); 1229 if (can_slow_path) { 1230 slow_path = new (codegen_->GetScopedAllocator()) IntrinsicSlowPathARM64(invoke); 1231 codegen_->AddSlowPath(slow_path); 1232 __ Cbz(arg, slow_path->GetEntryLabel()); 1233 } 1234 1235 // Reference equality check, return 0 if same reference. 1236 __ Subs(out, str, arg); 1237 __ B(&end, eq); 1238 1239 if (mirror::kUseStringCompression) { 1240 // Load `count` fields of this and argument strings. 1241 __ Ldr(temp3, HeapOperand(str, count_offset)); 1242 __ Ldr(temp2, HeapOperand(arg, count_offset)); 1243 // Clean out compression flag from lengths. 1244 __ Lsr(temp0, temp3, 1u); 1245 __ Lsr(temp1, temp2, 1u); 1246 } else { 1247 // Load lengths of this and argument strings. 1248 __ Ldr(temp0, HeapOperand(str, count_offset)); 1249 __ Ldr(temp1, HeapOperand(arg, count_offset)); 1250 } 1251 // out = length diff. 1252 __ Subs(out, temp0, temp1); 1253 // temp0 = min(len(str), len(arg)). 1254 __ Csel(temp0, temp1, temp0, ge); 1255 // Shorter string is empty? 1256 __ Cbz(temp0, &end); 1257 1258 if (mirror::kUseStringCompression) { 1259 // Check if both strings using same compression style to use this comparison loop. 1260 __ Eor(temp2, temp2, Operand(temp3)); 1261 // Interleave with compression flag extraction which is needed for both paths 1262 // and also set flags which is needed only for the different compressions path. 1263 __ Ands(temp3.W(), temp3.W(), Operand(1)); 1264 __ Tbnz(temp2, 0, &different_compression); // Does not use flags. 1265 } 1266 // Store offset of string value in preparation for comparison loop. 1267 __ Mov(temp1, value_offset); 1268 if (mirror::kUseStringCompression) { 1269 // For string compression, calculate the number of bytes to compare (not chars). 1270 // This could in theory exceed INT32_MAX, so treat temp0 as unsigned. 1271 __ Lsl(temp0, temp0, temp3); 1272 } 1273 1274 UseScratchRegisterScope scratch_scope(masm); 1275 Register temp4 = scratch_scope.AcquireX(); 1276 1277 // Assertions that must hold in order to compare strings 8 bytes at a time. 1278 DCHECK_ALIGNED(value_offset, 8); 1279 static_assert(IsAligned<8>(kObjectAlignment), "String of odd length is not zero padded"); 1280 1281 const size_t char_size = DataType::Size(DataType::Type::kUint16); 1282 DCHECK_EQ(char_size, 2u); 1283 1284 // Promote temp2 to an X reg, ready for LDR. 1285 temp2 = temp2.X(); 1286 1287 // Loop to compare 4x16-bit characters at a time (ok because of string data alignment). 1288 __ Bind(&loop); 1289 __ Ldr(temp4, MemOperand(str.X(), temp1.X())); 1290 __ Ldr(temp2, MemOperand(arg.X(), temp1.X())); 1291 __ Cmp(temp4, temp2); 1292 __ B(ne, &find_char_diff); 1293 __ Add(temp1, temp1, char_size * 4); 1294 // With string compression, we have compared 8 bytes, otherwise 4 chars. 1295 __ Subs(temp0, temp0, (mirror::kUseStringCompression) ? 8 : 4); 1296 __ B(&loop, hi); 1297 __ B(&end); 1298 1299 // Promote temp1 to an X reg, ready for EOR. 1300 temp1 = temp1.X(); 1301 1302 // Find the single character difference. 1303 __ Bind(&find_char_diff); 1304 // Get the bit position of the first character that differs. 1305 __ Eor(temp1, temp2, temp4); 1306 __ Rbit(temp1, temp1); 1307 __ Clz(temp1, temp1); 1308 1309 // If the number of chars remaining <= the index where the difference occurs (0-3), then 1310 // the difference occurs outside the remaining string data, so just return length diff (out). 1311 // Unlike ARM, we're doing the comparison in one go here, without the subtraction at the 1312 // find_char_diff_2nd_cmp path, so it doesn't matter whether the comparison is signed or 1313 // unsigned when string compression is disabled. 1314 // When it's enabled, the comparison must be unsigned. 1315 __ Cmp(temp0, Operand(temp1.W(), LSR, (mirror::kUseStringCompression) ? 3 : 4)); 1316 __ B(ls, &end); 1317 1318 // Extract the characters and calculate the difference. 1319 if (mirror:: kUseStringCompression) { 1320 __ Bic(temp1, temp1, 0x7); 1321 __ Bic(temp1, temp1, Operand(temp3.X(), LSL, 3u)); 1322 } else { 1323 __ Bic(temp1, temp1, 0xf); 1324 } 1325 __ Lsr(temp2, temp2, temp1); 1326 __ Lsr(temp4, temp4, temp1); 1327 if (mirror::kUseStringCompression) { 1328 // Prioritize the case of compressed strings and calculate such result first. 1329 __ Uxtb(temp1, temp4); 1330 __ Sub(out, temp1.W(), Operand(temp2.W(), UXTB)); 1331 __ Tbz(temp3, 0u, &end); // If actually compressed, we're done. 1332 } 1333 __ Uxth(temp4, temp4); 1334 __ Sub(out, temp4.W(), Operand(temp2.W(), UXTH)); 1335 1336 if (mirror::kUseStringCompression) { 1337 __ B(&end); 1338 __ Bind(&different_compression); 1339 1340 // Comparison for different compression style. 1341 const size_t c_char_size = DataType::Size(DataType::Type::kInt8); 1342 DCHECK_EQ(c_char_size, 1u); 1343 temp1 = temp1.W(); 1344 temp2 = temp2.W(); 1345 temp4 = temp4.W(); 1346 1347 // `temp1` will hold the compressed data pointer, `temp2` the uncompressed data pointer. 1348 // Note that flags have been set by the `str` compression flag extraction to `temp3` 1349 // before branching to the `different_compression` label. 1350 __ Csel(temp1, str, arg, eq); // Pointer to the compressed string. 1351 __ Csel(temp2, str, arg, ne); // Pointer to the uncompressed string. 1352 1353 // We want to free up the temp3, currently holding `str` compression flag, for comparison. 1354 // So, we move it to the bottom bit of the iteration count `temp0` which we then need to treat 1355 // as unsigned. Start by freeing the bit with a LSL and continue further down by a SUB which 1356 // will allow `subs temp0, #2; bhi different_compression_loop` to serve as the loop condition. 1357 __ Lsl(temp0, temp0, 1u); 1358 1359 // Adjust temp1 and temp2 from string pointers to data pointers. 1360 __ Add(temp1, temp1, Operand(value_offset)); 1361 __ Add(temp2, temp2, Operand(value_offset)); 1362 1363 // Complete the move of the compression flag. 1364 __ Sub(temp0, temp0, Operand(temp3)); 1365 1366 vixl::aarch64::Label different_compression_loop; 1367 vixl::aarch64::Label different_compression_diff; 1368 1369 __ Bind(&different_compression_loop); 1370 __ Ldrb(temp4, MemOperand(temp1.X(), c_char_size, PostIndex)); 1371 __ Ldrh(temp3, MemOperand(temp2.X(), char_size, PostIndex)); 1372 __ Subs(temp4, temp4, Operand(temp3)); 1373 __ B(&different_compression_diff, ne); 1374 __ Subs(temp0, temp0, 2); 1375 __ B(&different_compression_loop, hi); 1376 __ B(&end); 1377 1378 // Calculate the difference. 1379 __ Bind(&different_compression_diff); 1380 __ Tst(temp0, Operand(1)); 1381 static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u, 1382 "Expecting 0=compressed, 1=uncompressed"); 1383 __ Cneg(out, temp4, ne); 1384 } 1385 1386 __ Bind(&end); 1387 1388 if (can_slow_path) { 1389 __ Bind(slow_path->GetExitLabel()); 1390 } 1391 } 1392 1393 // The cut off for unrolling the loop in String.equals() intrinsic for const strings. 1394 // The normal loop plus the pre-header is 9 instructions without string compression and 12 1395 // instructions with string compression. We can compare up to 8 bytes in 4 instructions 1396 // (LDR+LDR+CMP+BNE) and up to 16 bytes in 5 instructions (LDP+LDP+CMP+CCMP+BNE). Allow up 1397 // to 10 instructions for the unrolled loop. 1398 constexpr size_t kShortConstStringEqualsCutoffInBytes = 32; 1399 1400 static const char* GetConstString(HInstruction* candidate, uint32_t* utf16_length) { 1401 if (candidate->IsLoadString()) { 1402 HLoadString* load_string = candidate->AsLoadString(); 1403 const DexFile& dex_file = load_string->GetDexFile(); 1404 return dex_file.StringDataAndUtf16LengthByIdx(load_string->GetStringIndex(), utf16_length); 1405 } 1406 return nullptr; 1407 } 1408 1409 void IntrinsicLocationsBuilderARM64::VisitStringEquals(HInvoke* invoke) { 1410 LocationSummary* locations = 1411 new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); 1412 locations->SetInAt(0, Location::RequiresRegister()); 1413 locations->SetInAt(1, Location::RequiresRegister()); 1414 1415 // For the generic implementation and for long const strings we need a temporary. 1416 // We do not need it for short const strings, up to 8 bytes, see code generation below. 1417 uint32_t const_string_length = 0u; 1418 const char* const_string = GetConstString(invoke->InputAt(0), &const_string_length); 1419 if (const_string == nullptr) { 1420 const_string = GetConstString(invoke->InputAt(1), &const_string_length); 1421 } 1422 bool is_compressed = 1423 mirror::kUseStringCompression && 1424 const_string != nullptr && 1425 mirror::String::DexFileStringAllASCII(const_string, const_string_length); 1426 if (const_string == nullptr || const_string_length > (is_compressed ? 8u : 4u)) { 1427 locations->AddTemp(Location::RequiresRegister()); 1428 } 1429 1430 // TODO: If the String.equals() is used only for an immediately following HIf, we can 1431 // mark it as emitted-at-use-site and emit branches directly to the appropriate blocks. 1432 // Then we shall need an extra temporary register instead of the output register. 1433 locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); 1434 } 1435 1436 void IntrinsicCodeGeneratorARM64::VisitStringEquals(HInvoke* invoke) { 1437 MacroAssembler* masm = GetVIXLAssembler(); 1438 LocationSummary* locations = invoke->GetLocations(); 1439 1440 Register str = WRegisterFrom(locations->InAt(0)); 1441 Register arg = WRegisterFrom(locations->InAt(1)); 1442 Register out = XRegisterFrom(locations->Out()); 1443 1444 UseScratchRegisterScope scratch_scope(masm); 1445 Register temp = scratch_scope.AcquireW(); 1446 Register temp1 = scratch_scope.AcquireW(); 1447 1448 vixl::aarch64::Label loop; 1449 vixl::aarch64::Label end; 1450 vixl::aarch64::Label return_true; 1451 vixl::aarch64::Label return_false; 1452 1453 // Get offsets of count, value, and class fields within a string object. 1454 const int32_t count_offset = mirror::String::CountOffset().Int32Value(); 1455 const int32_t value_offset = mirror::String::ValueOffset().Int32Value(); 1456 const int32_t class_offset = mirror::Object::ClassOffset().Int32Value(); 1457 1458 // Note that the null check must have been done earlier. 1459 DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0))); 1460 1461 StringEqualsOptimizations optimizations(invoke); 1462 if (!optimizations.GetArgumentNotNull()) { 1463 // Check if input is null, return false if it is. 1464 __ Cbz(arg, &return_false); 1465 } 1466 1467 // Reference equality check, return true if same reference. 1468 __ Cmp(str, arg); 1469 __ B(&return_true, eq); 1470 1471 if (!optimizations.GetArgumentIsString()) { 1472 // Instanceof check for the argument by comparing class fields. 1473 // All string objects must have the same type since String cannot be subclassed. 1474 // Receiver must be a string object, so its class field is equal to all strings' class fields. 1475 // If the argument is a string object, its class field must be equal to receiver's class field. 1476 // 1477 // As the String class is expected to be non-movable, we can read the class 1478 // field from String.equals' arguments without read barriers. 1479 AssertNonMovableStringClass(); 1480 // /* HeapReference<Class> */ temp = str->klass_ 1481 __ Ldr(temp, MemOperand(str.X(), class_offset)); 1482 // /* HeapReference<Class> */ temp1 = arg->klass_ 1483 __ Ldr(temp1, MemOperand(arg.X(), class_offset)); 1484 // Also, because we use the previously loaded class references only in the 1485 // following comparison, we don't need to unpoison them. 1486 __ Cmp(temp, temp1); 1487 __ B(&return_false, ne); 1488 } 1489 1490 // Check if one of the inputs is a const string. Do not special-case both strings 1491 // being const, such cases should be handled by constant folding if needed. 1492 uint32_t const_string_length = 0u; 1493 const char* const_string = GetConstString(invoke->InputAt(0), &const_string_length); 1494 if (const_string == nullptr) { 1495 const_string = GetConstString(invoke->InputAt(1), &const_string_length); 1496 if (const_string != nullptr) { 1497 std::swap(str, arg); // Make sure the const string is in `str`. 1498 } 1499 } 1500 bool is_compressed = 1501 mirror::kUseStringCompression && 1502 const_string != nullptr && 1503 mirror::String::DexFileStringAllASCII(const_string, const_string_length); 1504 1505 if (const_string != nullptr) { 1506 // Load `count` field of the argument string and check if it matches the const string. 1507 // Also compares the compression style, if differs return false. 1508 __ Ldr(temp, MemOperand(arg.X(), count_offset)); 1509 // Temporarily release temp1 as we may not be able to embed the flagged count in CMP immediate. 1510 scratch_scope.Release(temp1); 1511 __ Cmp(temp, Operand(mirror::String::GetFlaggedCount(const_string_length, is_compressed))); 1512 temp1 = scratch_scope.AcquireW(); 1513 __ B(&return_false, ne); 1514 } else { 1515 // Load `count` fields of this and argument strings. 1516 __ Ldr(temp, MemOperand(str.X(), count_offset)); 1517 __ Ldr(temp1, MemOperand(arg.X(), count_offset)); 1518 // Check if `count` fields are equal, return false if they're not. 1519 // Also compares the compression style, if differs return false. 1520 __ Cmp(temp, temp1); 1521 __ B(&return_false, ne); 1522 } 1523 1524 // Assertions that must hold in order to compare strings 8 bytes at a time. 1525 // Ok to do this because strings are zero-padded to kObjectAlignment. 1526 DCHECK_ALIGNED(value_offset, 8); 1527 static_assert(IsAligned<8>(kObjectAlignment), "String of odd length is not zero padded"); 1528 1529 if (const_string != nullptr && 1530 const_string_length <= (is_compressed ? kShortConstStringEqualsCutoffInBytes 1531 : kShortConstStringEqualsCutoffInBytes / 2u)) { 1532 // Load and compare the contents. Though we know the contents of the short const string 1533 // at compile time, materializing constants may be more code than loading from memory. 1534 int32_t offset = value_offset; 1535 size_t remaining_bytes = 1536 RoundUp(is_compressed ? const_string_length : const_string_length * 2u, 8u); 1537 temp = temp.X(); 1538 temp1 = temp1.X(); 1539 while (remaining_bytes > sizeof(uint64_t)) { 1540 Register temp2 = XRegisterFrom(locations->GetTemp(0)); 1541 __ Ldp(temp, temp1, MemOperand(str.X(), offset)); 1542 __ Ldp(temp2, out, MemOperand(arg.X(), offset)); 1543 __ Cmp(temp, temp2); 1544 __ Ccmp(temp1, out, NoFlag, eq); 1545 __ B(&return_false, ne); 1546 offset += 2u * sizeof(uint64_t); 1547 remaining_bytes -= 2u * sizeof(uint64_t); 1548 } 1549 if (remaining_bytes != 0u) { 1550 __ Ldr(temp, MemOperand(str.X(), offset)); 1551 __ Ldr(temp1, MemOperand(arg.X(), offset)); 1552 __ Cmp(temp, temp1); 1553 __ B(&return_false, ne); 1554 } 1555 } else { 1556 // Return true if both strings are empty. Even with string compression `count == 0` means empty. 1557 static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u, 1558 "Expecting 0=compressed, 1=uncompressed"); 1559 __ Cbz(temp, &return_true); 1560 1561 if (mirror::kUseStringCompression) { 1562 // For string compression, calculate the number of bytes to compare (not chars). 1563 // This could in theory exceed INT32_MAX, so treat temp as unsigned. 1564 __ And(temp1, temp, Operand(1)); // Extract compression flag. 1565 __ Lsr(temp, temp, 1u); // Extract length. 1566 __ Lsl(temp, temp, temp1); // Calculate number of bytes to compare. 1567 } 1568 1569 // Store offset of string value in preparation for comparison loop 1570 __ Mov(temp1, value_offset); 1571 1572 temp1 = temp1.X(); 1573 Register temp2 = XRegisterFrom(locations->GetTemp(0)); 1574 // Loop to compare strings 8 bytes at a time starting at the front of the string. 1575 __ Bind(&loop); 1576 __ Ldr(out, MemOperand(str.X(), temp1)); 1577 __ Ldr(temp2, MemOperand(arg.X(), temp1)); 1578 __ Add(temp1, temp1, Operand(sizeof(uint64_t))); 1579 __ Cmp(out, temp2); 1580 __ B(&return_false, ne); 1581 // With string compression, we have compared 8 bytes, otherwise 4 chars. 1582 __ Sub(temp, temp, Operand(mirror::kUseStringCompression ? 8 : 4), SetFlags); 1583 __ B(&loop, hi); 1584 } 1585 1586 // Return true and exit the function. 1587 // If loop does not result in returning false, we return true. 1588 __ Bind(&return_true); 1589 __ Mov(out, 1); 1590 __ B(&end); 1591 1592 // Return false and exit the function. 1593 __ Bind(&return_false); 1594 __ Mov(out, 0); 1595 __ Bind(&end); 1596 } 1597 1598 static void GenerateVisitStringIndexOf(HInvoke* invoke, 1599 MacroAssembler* masm, 1600 CodeGeneratorARM64* codegen, 1601 bool start_at_zero) { 1602 LocationSummary* locations = invoke->GetLocations(); 1603 1604 // Note that the null check must have been done earlier. 1605 DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0))); 1606 1607 // Check for code points > 0xFFFF. Either a slow-path check when we don't know statically, 1608 // or directly dispatch for a large constant, or omit slow-path for a small constant or a char. 1609 SlowPathCodeARM64* slow_path = nullptr; 1610 HInstruction* code_point = invoke->InputAt(1); 1611 if (code_point->IsIntConstant()) { 1612 if (static_cast<uint32_t>(code_point->AsIntConstant()->GetValue()) > 0xFFFFU) { 1613 // Always needs the slow-path. We could directly dispatch to it, but this case should be 1614 // rare, so for simplicity just put the full slow-path down and branch unconditionally. 1615 slow_path = new (codegen->GetScopedAllocator()) IntrinsicSlowPathARM64(invoke); 1616 codegen->AddSlowPath(slow_path); 1617 __ B(slow_path->GetEntryLabel()); 1618 __ Bind(slow_path->GetExitLabel()); 1619 return; 1620 } 1621 } else if (code_point->GetType() != DataType::Type::kUint16) { 1622 Register char_reg = WRegisterFrom(locations->InAt(1)); 1623 __ Tst(char_reg, 0xFFFF0000); 1624 slow_path = new (codegen->GetScopedAllocator()) IntrinsicSlowPathARM64(invoke); 1625 codegen->AddSlowPath(slow_path); 1626 __ B(ne, slow_path->GetEntryLabel()); 1627 } 1628 1629 if (start_at_zero) { 1630 // Start-index = 0. 1631 Register tmp_reg = WRegisterFrom(locations->GetTemp(0)); 1632 __ Mov(tmp_reg, 0); 1633 } 1634 1635 codegen->InvokeRuntime(kQuickIndexOf, invoke, invoke->GetDexPc(), slow_path); 1636 CheckEntrypointTypes<kQuickIndexOf, int32_t, void*, uint32_t, uint32_t>(); 1637 1638 if (slow_path != nullptr) { 1639 __ Bind(slow_path->GetExitLabel()); 1640 } 1641 } 1642 1643 void IntrinsicLocationsBuilderARM64::VisitStringIndexOf(HInvoke* invoke) { 1644 LocationSummary* locations = new (allocator_) LocationSummary( 1645 invoke, LocationSummary::kCallOnMainAndSlowPath, kIntrinsified); 1646 // We have a hand-crafted assembly stub that follows the runtime calling convention. So it's 1647 // best to align the inputs accordingly. 1648 InvokeRuntimeCallingConvention calling_convention; 1649 locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0))); 1650 locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1))); 1651 locations->SetOut(calling_convention.GetReturnLocation(DataType::Type::kInt32)); 1652 1653 // Need to send start_index=0. 1654 locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(2))); 1655 } 1656 1657 void IntrinsicCodeGeneratorARM64::VisitStringIndexOf(HInvoke* invoke) { 1658 GenerateVisitStringIndexOf(invoke, GetVIXLAssembler(), codegen_, /* start_at_zero= */ true); 1659 } 1660 1661 void IntrinsicLocationsBuilderARM64::VisitStringIndexOfAfter(HInvoke* invoke) { 1662 LocationSummary* locations = new (allocator_) LocationSummary( 1663 invoke, LocationSummary::kCallOnMainAndSlowPath, kIntrinsified); 1664 // We have a hand-crafted assembly stub that follows the runtime calling convention. So it's 1665 // best to align the inputs accordingly. 1666 InvokeRuntimeCallingConvention calling_convention; 1667 locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0))); 1668 locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1))); 1669 locations->SetInAt(2, LocationFrom(calling_convention.GetRegisterAt(2))); 1670 locations->SetOut(calling_convention.GetReturnLocation(DataType::Type::kInt32)); 1671 } 1672 1673 void IntrinsicCodeGeneratorARM64::VisitStringIndexOfAfter(HInvoke* invoke) { 1674 GenerateVisitStringIndexOf(invoke, GetVIXLAssembler(), codegen_, /* start_at_zero= */ false); 1675 } 1676 1677 void IntrinsicLocationsBuilderARM64::VisitStringNewStringFromBytes(HInvoke* invoke) { 1678 LocationSummary* locations = new (allocator_) LocationSummary( 1679 invoke, LocationSummary::kCallOnMainAndSlowPath, kIntrinsified); 1680 InvokeRuntimeCallingConvention calling_convention; 1681 locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0))); 1682 locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1))); 1683 locations->SetInAt(2, LocationFrom(calling_convention.GetRegisterAt(2))); 1684 locations->SetInAt(3, LocationFrom(calling_convention.GetRegisterAt(3))); 1685 locations->SetOut(calling_convention.GetReturnLocation(DataType::Type::kReference)); 1686 } 1687 1688 void IntrinsicCodeGeneratorARM64::VisitStringNewStringFromBytes(HInvoke* invoke) { 1689 MacroAssembler* masm = GetVIXLAssembler(); 1690 LocationSummary* locations = invoke->GetLocations(); 1691 1692 Register byte_array = WRegisterFrom(locations->InAt(0)); 1693 __ Cmp(byte_array, 0); 1694 SlowPathCodeARM64* slow_path = 1695 new (codegen_->GetScopedAllocator()) IntrinsicSlowPathARM64(invoke); 1696 codegen_->AddSlowPath(slow_path); 1697 __ B(eq, slow_path->GetEntryLabel()); 1698 1699 codegen_->InvokeRuntime(kQuickAllocStringFromBytes, invoke, invoke->GetDexPc(), slow_path); 1700 CheckEntrypointTypes<kQuickAllocStringFromBytes, void*, void*, int32_t, int32_t, int32_t>(); 1701 __ Bind(slow_path->GetExitLabel()); 1702 } 1703 1704 void IntrinsicLocationsBuilderARM64::VisitStringNewStringFromChars(HInvoke* invoke) { 1705 LocationSummary* locations = 1706 new (allocator_) LocationSummary(invoke, LocationSummary::kCallOnMainOnly, kIntrinsified); 1707 InvokeRuntimeCallingConvention calling_convention; 1708 locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0))); 1709 locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1))); 1710 locations->SetInAt(2, LocationFrom(calling_convention.GetRegisterAt(2))); 1711 locations->SetOut(calling_convention.GetReturnLocation(DataType::Type::kReference)); 1712 } 1713 1714 void IntrinsicCodeGeneratorARM64::VisitStringNewStringFromChars(HInvoke* invoke) { 1715 // No need to emit code checking whether `locations->InAt(2)` is a null 1716 // pointer, as callers of the native method 1717 // 1718 // java.lang.StringFactory.newStringFromChars(int offset, int charCount, char[] data) 1719 // 1720 // all include a null check on `data` before calling that method. 1721 codegen_->InvokeRuntime(kQuickAllocStringFromChars, invoke, invoke->GetDexPc()); 1722 CheckEntrypointTypes<kQuickAllocStringFromChars, void*, int32_t, int32_t, void*>(); 1723 } 1724 1725 void IntrinsicLocationsBuilderARM64::VisitStringNewStringFromString(HInvoke* invoke) { 1726 LocationSummary* locations = new (allocator_) LocationSummary( 1727 invoke, LocationSummary::kCallOnMainAndSlowPath, kIntrinsified); 1728 InvokeRuntimeCallingConvention calling_convention; 1729 locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0))); 1730 locations->SetOut(calling_convention.GetReturnLocation(DataType::Type::kReference)); 1731 } 1732 1733 void IntrinsicCodeGeneratorARM64::VisitStringNewStringFromString(HInvoke* invoke) { 1734 MacroAssembler* masm = GetVIXLAssembler(); 1735 LocationSummary* locations = invoke->GetLocations(); 1736 1737 Register string_to_copy = WRegisterFrom(locations->InAt(0)); 1738 __ Cmp(string_to_copy, 0); 1739 SlowPathCodeARM64* slow_path = 1740 new (codegen_->GetScopedAllocator()) IntrinsicSlowPathARM64(invoke); 1741 codegen_->AddSlowPath(slow_path); 1742 __ B(eq, slow_path->GetEntryLabel()); 1743 1744 codegen_->InvokeRuntime(kQuickAllocStringFromString, invoke, invoke->GetDexPc(), slow_path); 1745 CheckEntrypointTypes<kQuickAllocStringFromString, void*, void*>(); 1746 __ Bind(slow_path->GetExitLabel()); 1747 } 1748 1749 static void CreateFPToFPCallLocations(ArenaAllocator* allocator, HInvoke* invoke) { 1750 DCHECK_EQ(invoke->GetNumberOfArguments(), 1U); 1751 DCHECK(DataType::IsFloatingPointType(invoke->InputAt(0)->GetType())); 1752 DCHECK(DataType::IsFloatingPointType(invoke->GetType())); 1753 1754 LocationSummary* const locations = 1755 new (allocator) LocationSummary(invoke, LocationSummary::kCallOnMainOnly, kIntrinsified); 1756 InvokeRuntimeCallingConvention calling_convention; 1757 1758 locations->SetInAt(0, LocationFrom(calling_convention.GetFpuRegisterAt(0))); 1759 locations->SetOut(calling_convention.GetReturnLocation(invoke->GetType())); 1760 } 1761 1762 static void CreateFPFPToFPCallLocations(ArenaAllocator* allocator, HInvoke* invoke) { 1763 DCHECK_EQ(invoke->GetNumberOfArguments(), 2U); 1764 DCHECK(DataType::IsFloatingPointType(invoke->InputAt(0)->GetType())); 1765 DCHECK(DataType::IsFloatingPointType(invoke->InputAt(1)->GetType())); 1766 DCHECK(DataType::IsFloatingPointType(invoke->GetType())); 1767 1768 LocationSummary* const locations = 1769 new (allocator) LocationSummary(invoke, LocationSummary::kCallOnMainOnly, kIntrinsified); 1770 InvokeRuntimeCallingConvention calling_convention; 1771 1772 locations->SetInAt(0, LocationFrom(calling_convention.GetFpuRegisterAt(0))); 1773 locations->SetInAt(1, LocationFrom(calling_convention.GetFpuRegisterAt(1))); 1774 locations->SetOut(calling_convention.GetReturnLocation(invoke->GetType())); 1775 } 1776 1777 static void GenFPToFPCall(HInvoke* invoke, 1778 CodeGeneratorARM64* codegen, 1779 QuickEntrypointEnum entry) { 1780 codegen->InvokeRuntime(entry, invoke, invoke->GetDexPc()); 1781 } 1782 1783 void IntrinsicLocationsBuilderARM64::VisitMathCos(HInvoke* invoke) { 1784 CreateFPToFPCallLocations(allocator_, invoke); 1785 } 1786 1787 void IntrinsicCodeGeneratorARM64::VisitMathCos(HInvoke* invoke) { 1788 GenFPToFPCall(invoke, codegen_, kQuickCos); 1789 } 1790 1791 void IntrinsicLocationsBuilderARM64::VisitMathSin(HInvoke* invoke) { 1792 CreateFPToFPCallLocations(allocator_, invoke); 1793 } 1794 1795 void IntrinsicCodeGeneratorARM64::VisitMathSin(HInvoke* invoke) { 1796 GenFPToFPCall(invoke, codegen_, kQuickSin); 1797 } 1798 1799 void IntrinsicLocationsBuilderARM64::VisitMathAcos(HInvoke* invoke) { 1800 CreateFPToFPCallLocations(allocator_, invoke); 1801 } 1802 1803 void IntrinsicCodeGeneratorARM64::VisitMathAcos(HInvoke* invoke) { 1804 GenFPToFPCall(invoke, codegen_, kQuickAcos); 1805 } 1806 1807 void IntrinsicLocationsBuilderARM64::VisitMathAsin(HInvoke* invoke) { 1808 CreateFPToFPCallLocations(allocator_, invoke); 1809 } 1810 1811 void IntrinsicCodeGeneratorARM64::VisitMathAsin(HInvoke* invoke) { 1812 GenFPToFPCall(invoke, codegen_, kQuickAsin); 1813 } 1814 1815 void IntrinsicLocationsBuilderARM64::VisitMathAtan(HInvoke* invoke) { 1816 CreateFPToFPCallLocations(allocator_, invoke); 1817 } 1818 1819 void IntrinsicCodeGeneratorARM64::VisitMathAtan(HInvoke* invoke) { 1820 GenFPToFPCall(invoke, codegen_, kQuickAtan); 1821 } 1822 1823 void IntrinsicLocationsBuilderARM64::VisitMathCbrt(HInvoke* invoke) { 1824 CreateFPToFPCallLocations(allocator_, invoke); 1825 } 1826 1827 void IntrinsicCodeGeneratorARM64::VisitMathCbrt(HInvoke* invoke) { 1828 GenFPToFPCall(invoke, codegen_, kQuickCbrt); 1829 } 1830 1831 void IntrinsicLocationsBuilderARM64::VisitMathCosh(HInvoke* invoke) { 1832 CreateFPToFPCallLocations(allocator_, invoke); 1833 } 1834 1835 void IntrinsicCodeGeneratorARM64::VisitMathCosh(HInvoke* invoke) { 1836 GenFPToFPCall(invoke, codegen_, kQuickCosh); 1837 } 1838 1839 void IntrinsicLocationsBuilderARM64::VisitMathExp(HInvoke* invoke) { 1840 CreateFPToFPCallLocations(allocator_, invoke); 1841 } 1842 1843 void IntrinsicCodeGeneratorARM64::VisitMathExp(HInvoke* invoke) { 1844 GenFPToFPCall(invoke, codegen_, kQuickExp); 1845 } 1846 1847 void IntrinsicLocationsBuilderARM64::VisitMathExpm1(HInvoke* invoke) { 1848 CreateFPToFPCallLocations(allocator_, invoke); 1849 } 1850 1851 void IntrinsicCodeGeneratorARM64::VisitMathExpm1(HInvoke* invoke) { 1852 GenFPToFPCall(invoke, codegen_, kQuickExpm1); 1853 } 1854 1855 void IntrinsicLocationsBuilderARM64::VisitMathLog(HInvoke* invoke) { 1856 CreateFPToFPCallLocations(allocator_, invoke); 1857 } 1858 1859 void IntrinsicCodeGeneratorARM64::VisitMathLog(HInvoke* invoke) { 1860 GenFPToFPCall(invoke, codegen_, kQuickLog); 1861 } 1862 1863 void IntrinsicLocationsBuilderARM64::VisitMathLog10(HInvoke* invoke) { 1864 CreateFPToFPCallLocations(allocator_, invoke); 1865 } 1866 1867 void IntrinsicCodeGeneratorARM64::VisitMathLog10(HInvoke* invoke) { 1868 GenFPToFPCall(invoke, codegen_, kQuickLog10); 1869 } 1870 1871 void IntrinsicLocationsBuilderARM64::VisitMathSinh(HInvoke* invoke) { 1872 CreateFPToFPCallLocations(allocator_, invoke); 1873 } 1874 1875 void IntrinsicCodeGeneratorARM64::VisitMathSinh(HInvoke* invoke) { 1876 GenFPToFPCall(invoke, codegen_, kQuickSinh); 1877 } 1878 1879 void IntrinsicLocationsBuilderARM64::VisitMathTan(HInvoke* invoke) { 1880 CreateFPToFPCallLocations(allocator_, invoke); 1881 } 1882 1883 void IntrinsicCodeGeneratorARM64::VisitMathTan(HInvoke* invoke) { 1884 GenFPToFPCall(invoke, codegen_, kQuickTan); 1885 } 1886 1887 void IntrinsicLocationsBuilderARM64::VisitMathTanh(HInvoke* invoke) { 1888 CreateFPToFPCallLocations(allocator_, invoke); 1889 } 1890 1891 void IntrinsicCodeGeneratorARM64::VisitMathTanh(HInvoke* invoke) { 1892 GenFPToFPCall(invoke, codegen_, kQuickTanh); 1893 } 1894 1895 void IntrinsicLocationsBuilderARM64::VisitMathAtan2(HInvoke* invoke) { 1896 CreateFPFPToFPCallLocations(allocator_, invoke); 1897 } 1898 1899 void IntrinsicCodeGeneratorARM64::VisitMathAtan2(HInvoke* invoke) { 1900 GenFPToFPCall(invoke, codegen_, kQuickAtan2); 1901 } 1902 1903 void IntrinsicLocationsBuilderARM64::VisitMathPow(HInvoke* invoke) { 1904 CreateFPFPToFPCallLocations(allocator_, invoke); 1905 } 1906 1907 void IntrinsicCodeGeneratorARM64::VisitMathPow(HInvoke* invoke) { 1908 GenFPToFPCall(invoke, codegen_, kQuickPow); 1909 } 1910 1911 void IntrinsicLocationsBuilderARM64::VisitMathHypot(HInvoke* invoke) { 1912 CreateFPFPToFPCallLocations(allocator_, invoke); 1913 } 1914 1915 void IntrinsicCodeGeneratorARM64::VisitMathHypot(HInvoke* invoke) { 1916 GenFPToFPCall(invoke, codegen_, kQuickHypot); 1917 } 1918 1919 void IntrinsicLocationsBuilderARM64::VisitMathNextAfter(HInvoke* invoke) { 1920 CreateFPFPToFPCallLocations(allocator_, invoke); 1921 } 1922 1923 void IntrinsicCodeGeneratorARM64::VisitMathNextAfter(HInvoke* invoke) { 1924 GenFPToFPCall(invoke, codegen_, kQuickNextAfter); 1925 } 1926 1927 void IntrinsicLocationsBuilderARM64::VisitStringGetCharsNoCheck(HInvoke* invoke) { 1928 LocationSummary* locations = 1929 new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); 1930 locations->SetInAt(0, Location::RequiresRegister()); 1931 locations->SetInAt(1, Location::RequiresRegister()); 1932 locations->SetInAt(2, Location::RequiresRegister()); 1933 locations->SetInAt(3, Location::RequiresRegister()); 1934 locations->SetInAt(4, Location::RequiresRegister()); 1935 1936 locations->AddTemp(Location::RequiresRegister()); 1937 locations->AddTemp(Location::RequiresRegister()); 1938 locations->AddTemp(Location::RequiresRegister()); 1939 } 1940 1941 void IntrinsicCodeGeneratorARM64::VisitStringGetCharsNoCheck(HInvoke* invoke) { 1942 MacroAssembler* masm = GetVIXLAssembler(); 1943 LocationSummary* locations = invoke->GetLocations(); 1944 1945 // Check assumption that sizeof(Char) is 2 (used in scaling below). 1946 const size_t char_size = DataType::Size(DataType::Type::kUint16); 1947 DCHECK_EQ(char_size, 2u); 1948 1949 // Location of data in char array buffer. 1950 const uint32_t data_offset = mirror::Array::DataOffset(char_size).Uint32Value(); 1951 1952 // Location of char array data in string. 1953 const uint32_t value_offset = mirror::String::ValueOffset().Uint32Value(); 1954 1955 // void getCharsNoCheck(int srcBegin, int srcEnd, char[] dst, int dstBegin); 1956 // Since getChars() calls getCharsNoCheck() - we use registers rather than constants. 1957 Register srcObj = XRegisterFrom(locations->InAt(0)); 1958 Register srcBegin = XRegisterFrom(locations->InAt(1)); 1959 Register srcEnd = XRegisterFrom(locations->InAt(2)); 1960 Register dstObj = XRegisterFrom(locations->InAt(3)); 1961 Register dstBegin = XRegisterFrom(locations->InAt(4)); 1962 1963 Register src_ptr = XRegisterFrom(locations->GetTemp(0)); 1964 Register num_chr = XRegisterFrom(locations->GetTemp(1)); 1965 Register tmp1 = XRegisterFrom(locations->GetTemp(2)); 1966 1967 UseScratchRegisterScope temps(masm); 1968 Register dst_ptr = temps.AcquireX(); 1969 Register tmp2 = temps.AcquireX(); 1970 1971 vixl::aarch64::Label done; 1972 vixl::aarch64::Label compressed_string_vector_loop; 1973 vixl::aarch64::Label compressed_string_remainder; 1974 __ Sub(num_chr, srcEnd, srcBegin); 1975 // Early out for valid zero-length retrievals. 1976 __ Cbz(num_chr, &done); 1977 1978 // dst address start to copy to. 1979 __ Add(dst_ptr, dstObj, Operand(data_offset)); 1980 __ Add(dst_ptr, dst_ptr, Operand(dstBegin, LSL, 1)); 1981 1982 // src address to copy from. 1983 __ Add(src_ptr, srcObj, Operand(value_offset)); 1984 vixl::aarch64::Label compressed_string_preloop; 1985 if (mirror::kUseStringCompression) { 1986 // Location of count in string. 1987 const uint32_t count_offset = mirror::String::CountOffset().Uint32Value(); 1988 // String's length. 1989 __ Ldr(tmp2, MemOperand(srcObj, count_offset)); 1990 __ Tbz(tmp2, 0, &compressed_string_preloop); 1991 } 1992 __ Add(src_ptr, src_ptr, Operand(srcBegin, LSL, 1)); 1993 1994 // Do the copy. 1995 vixl::aarch64::Label loop; 1996 vixl::aarch64::Label remainder; 1997 1998 // Save repairing the value of num_chr on the < 8 character path. 1999 __ Subs(tmp1, num_chr, 8); 2000 __ B(lt, &remainder); 2001 2002 // Keep the result of the earlier subs, we are going to fetch at least 8 characters. 2003 __ Mov(num_chr, tmp1); 2004 2005 // Main loop used for longer fetches loads and stores 8x16-bit characters at a time. 2006 // (Unaligned addresses are acceptable here and not worth inlining extra code to rectify.) 2007 __ Bind(&loop); 2008 __ Ldp(tmp1, tmp2, MemOperand(src_ptr, char_size * 8, PostIndex)); 2009 __ Subs(num_chr, num_chr, 8); 2010 __ Stp(tmp1, tmp2, MemOperand(dst_ptr, char_size * 8, PostIndex)); 2011 __ B(ge, &loop); 2012 2013 __ Adds(num_chr, num_chr, 8); 2014 __ B(eq, &done); 2015 2016 // Main loop for < 8 character case and remainder handling. Loads and stores one 2017 // 16-bit Java character at a time. 2018 __ Bind(&remainder); 2019 __ Ldrh(tmp1, MemOperand(src_ptr, char_size, PostIndex)); 2020 __ Subs(num_chr, num_chr, 1); 2021 __ Strh(tmp1, MemOperand(dst_ptr, char_size, PostIndex)); 2022 __ B(gt, &remainder); 2023 __ B(&done); 2024 2025 if (mirror::kUseStringCompression) { 2026 // For compressed strings, acquire a SIMD temporary register. 2027 VRegister vtmp1 = temps.AcquireVRegisterOfSize(kQRegSize); 2028 const size_t c_char_size = DataType::Size(DataType::Type::kInt8); 2029 DCHECK_EQ(c_char_size, 1u); 2030 __ Bind(&compressed_string_preloop); 2031 __ Add(src_ptr, src_ptr, Operand(srcBegin)); 2032 2033 // Save repairing the value of num_chr on the < 8 character path. 2034 __ Subs(tmp1, num_chr, 8); 2035 __ B(lt, &compressed_string_remainder); 2036 2037 // Keep the result of the earlier subs, we are going to fetch at least 8 characters. 2038 __ Mov(num_chr, tmp1); 2039 2040 // Main loop for compressed src, copying 8 characters (8-bit) to (16-bit) at a time. 2041 // Uses SIMD instructions. 2042 __ Bind(&compressed_string_vector_loop); 2043 __ Ld1(vtmp1.V8B(), MemOperand(src_ptr, c_char_size * 8, PostIndex)); 2044 __ Subs(num_chr, num_chr, 8); 2045 __ Uxtl(vtmp1.V8H(), vtmp1.V8B()); 2046 __ St1(vtmp1.V8H(), MemOperand(dst_ptr, char_size * 8, PostIndex)); 2047 __ B(ge, &compressed_string_vector_loop); 2048 2049 __ Adds(num_chr, num_chr, 8); 2050 __ B(eq, &done); 2051 2052 // Loop for < 8 character case and remainder handling with a compressed src. 2053 // Copies 1 character (8-bit) to (16-bit) at a time. 2054 __ Bind(&compressed_string_remainder); 2055 __ Ldrb(tmp1, MemOperand(src_ptr, c_char_size, PostIndex)); 2056 __ Strh(tmp1, MemOperand(dst_ptr, char_size, PostIndex)); 2057 __ Subs(num_chr, num_chr, Operand(1)); 2058 __ B(gt, &compressed_string_remainder); 2059 } 2060 2061 __ Bind(&done); 2062 } 2063 2064 // Mirrors ARRAYCOPY_SHORT_CHAR_ARRAY_THRESHOLD in libcore, so we can choose to use the native 2065 // implementation there for longer copy lengths. 2066 static constexpr int32_t kSystemArrayCopyCharThreshold = 32; 2067 2068 static void SetSystemArrayCopyLocationRequires(LocationSummary* locations, 2069 uint32_t at, 2070 HInstruction* input) { 2071 HIntConstant* const_input = input->AsIntConstant(); 2072 if (const_input != nullptr && !vixl::aarch64::Assembler::IsImmAddSub(const_input->GetValue())) { 2073 locations->SetInAt(at, Location::RequiresRegister()); 2074 } else { 2075 locations->SetInAt(at, Location::RegisterOrConstant(input)); 2076 } 2077 } 2078 2079 void IntrinsicLocationsBuilderARM64::VisitSystemArrayCopyChar(HInvoke* invoke) { 2080 // Check to see if we have known failures that will cause us to have to bail out 2081 // to the runtime, and just generate the runtime call directly. 2082 HIntConstant* src_pos = invoke->InputAt(1)->AsIntConstant(); 2083 HIntConstant* dst_pos = invoke->InputAt(3)->AsIntConstant(); 2084 2085 // The positions must be non-negative. 2086 if ((src_pos != nullptr && src_pos->GetValue() < 0) || 2087 (dst_pos != nullptr && dst_pos->GetValue() < 0)) { 2088 // We will have to fail anyways. 2089 return; 2090 } 2091 2092 // The length must be >= 0 and not so long that we would (currently) prefer libcore's 2093 // native implementation. 2094 HIntConstant* length = invoke->InputAt(4)->AsIntConstant(); 2095 if (length != nullptr) { 2096 int32_t len = length->GetValue(); 2097 if (len < 0 || len > kSystemArrayCopyCharThreshold) { 2098 // Just call as normal. 2099 return; 2100 } 2101 } 2102 2103 ArenaAllocator* allocator = invoke->GetBlock()->GetGraph()->GetAllocator(); 2104 LocationSummary* locations = 2105 new (allocator) LocationSummary(invoke, LocationSummary::kCallOnSlowPath, kIntrinsified); 2106 // arraycopy(char[] src, int src_pos, char[] dst, int dst_pos, int length). 2107 locations->SetInAt(0, Location::RequiresRegister()); 2108 SetSystemArrayCopyLocationRequires(locations, 1, invoke->InputAt(1)); 2109 locations->SetInAt(2, Location::RequiresRegister()); 2110 SetSystemArrayCopyLocationRequires(locations, 3, invoke->InputAt(3)); 2111 SetSystemArrayCopyLocationRequires(locations, 4, invoke->InputAt(4)); 2112 2113 locations->AddTemp(Location::RequiresRegister()); 2114 locations->AddTemp(Location::RequiresRegister()); 2115 locations->AddTemp(Location::RequiresRegister()); 2116 } 2117 2118 static void CheckSystemArrayCopyPosition(MacroAssembler* masm, 2119 const Location& pos, 2120 const Register& input, 2121 const Location& length, 2122 SlowPathCodeARM64* slow_path, 2123 const Register& temp, 2124 bool length_is_input_length = false) { 2125 const int32_t length_offset = mirror::Array::LengthOffset().Int32Value(); 2126 if (pos.IsConstant()) { 2127 int32_t pos_const = pos.GetConstant()->AsIntConstant()->GetValue(); 2128 if (pos_const == 0) { 2129 if (!length_is_input_length) { 2130 // Check that length(input) >= length. 2131 __ Ldr(temp, MemOperand(input, length_offset)); 2132 __ Cmp(temp, OperandFrom(length, DataType::Type::kInt32)); 2133 __ B(slow_path->GetEntryLabel(), lt); 2134 } 2135 } else { 2136 // Check that length(input) >= pos. 2137 __ Ldr(temp, MemOperand(input, length_offset)); 2138 __ Subs(temp, temp, pos_const); 2139 __ B(slow_path->GetEntryLabel(), lt); 2140 2141 // Check that (length(input) - pos) >= length. 2142 __ Cmp(temp, OperandFrom(length, DataType::Type::kInt32)); 2143 __ B(slow_path->GetEntryLabel(), lt); 2144 } 2145 } else if (length_is_input_length) { 2146 // The only way the copy can succeed is if pos is zero. 2147 __ Cbnz(WRegisterFrom(pos), slow_path->GetEntryLabel()); 2148 } else { 2149 // Check that pos >= 0. 2150 Register pos_reg = WRegisterFrom(pos); 2151 __ Tbnz(pos_reg, pos_reg.GetSizeInBits() - 1, slow_path->GetEntryLabel()); 2152 2153 // Check that pos <= length(input) && (length(input) - pos) >= length. 2154 __ Ldr(temp, MemOperand(input, length_offset)); 2155 __ Subs(temp, temp, pos_reg); 2156 // Ccmp if length(input) >= pos, else definitely bail to slow path (N!=V == lt). 2157 __ Ccmp(temp, OperandFrom(length, DataType::Type::kInt32), NFlag, ge); 2158 __ B(slow_path->GetEntryLabel(), lt); 2159 } 2160 } 2161 2162 // Compute base source address, base destination address, and end 2163 // source address for System.arraycopy* intrinsics in `src_base`, 2164 // `dst_base` and `src_end` respectively. 2165 static void GenSystemArrayCopyAddresses(MacroAssembler* masm, 2166 DataType::Type type, 2167 const Register& src, 2168 const Location& src_pos, 2169 const Register& dst, 2170 const Location& dst_pos, 2171 const Location& copy_length, 2172 const Register& src_base, 2173 const Register& dst_base, 2174 const Register& src_end) { 2175 // This routine is used by the SystemArrayCopy and the SystemArrayCopyChar intrinsics. 2176 DCHECK(type == DataType::Type::kReference || type == DataType::Type::kUint16) 2177 << "Unexpected element type: " << type; 2178 const int32_t element_size = DataType::Size(type); 2179 const int32_t element_size_shift = DataType::SizeShift(type); 2180 const uint32_t data_offset = mirror::Array::DataOffset(element_size).Uint32Value(); 2181 2182 if (src_pos.IsConstant()) { 2183 int32_t constant = src_pos.GetConstant()->AsIntConstant()->GetValue(); 2184 __ Add(src_base, src, element_size * constant + data_offset); 2185 } else { 2186 __ Add(src_base, src, data_offset); 2187 __ Add(src_base, src_base, Operand(XRegisterFrom(src_pos), LSL, element_size_shift)); 2188 } 2189 2190 if (dst_pos.IsConstant()) { 2191 int32_t constant = dst_pos.GetConstant()->AsIntConstant()->GetValue(); 2192 __ Add(dst_base, dst, element_size * constant + data_offset); 2193 } else { 2194 __ Add(dst_base, dst, data_offset); 2195 __ Add(dst_base, dst_base, Operand(XRegisterFrom(dst_pos), LSL, element_size_shift)); 2196 } 2197 2198 if (copy_length.IsConstant()) { 2199 int32_t constant = copy_length.GetConstant()->AsIntConstant()->GetValue(); 2200 __ Add(src_end, src_base, element_size * constant); 2201 } else { 2202 __ Add(src_end, src_base, Operand(XRegisterFrom(copy_length), LSL, element_size_shift)); 2203 } 2204 } 2205 2206 void IntrinsicCodeGeneratorARM64::VisitSystemArrayCopyChar(HInvoke* invoke) { 2207 MacroAssembler* masm = GetVIXLAssembler(); 2208 LocationSummary* locations = invoke->GetLocations(); 2209 Register src = XRegisterFrom(locations->InAt(0)); 2210 Location src_pos = locations->InAt(1); 2211 Register dst = XRegisterFrom(locations->InAt(2)); 2212 Location dst_pos = locations->InAt(3); 2213 Location length = locations->InAt(4); 2214 2215 SlowPathCodeARM64* slow_path = 2216 new (codegen_->GetScopedAllocator()) IntrinsicSlowPathARM64(invoke); 2217 codegen_->AddSlowPath(slow_path); 2218 2219 // If source and destination are the same, take the slow path. Overlapping copy regions must be 2220 // copied in reverse and we can't know in all cases if it's needed. 2221 __ Cmp(src, dst); 2222 __ B(slow_path->GetEntryLabel(), eq); 2223 2224 // Bail out if the source is null. 2225 __ Cbz(src, slow_path->GetEntryLabel()); 2226 2227 // Bail out if the destination is null. 2228 __ Cbz(dst, slow_path->GetEntryLabel()); 2229 2230 if (!length.IsConstant()) { 2231 // Merge the following two comparisons into one: 2232 // If the length is negative, bail out (delegate to libcore's native implementation). 2233 // If the length > 32 then (currently) prefer libcore's native implementation. 2234 __ Cmp(WRegisterFrom(length), kSystemArrayCopyCharThreshold); 2235 __ B(slow_path->GetEntryLabel(), hi); 2236 } else { 2237 // We have already checked in the LocationsBuilder for the constant case. 2238 DCHECK_GE(length.GetConstant()->AsIntConstant()->GetValue(), 0); 2239 DCHECK_LE(length.GetConstant()->AsIntConstant()->GetValue(), 32); 2240 } 2241 2242 Register src_curr_addr = WRegisterFrom(locations->GetTemp(0)); 2243 Register dst_curr_addr = WRegisterFrom(locations->GetTemp(1)); 2244 Register src_stop_addr = WRegisterFrom(locations->GetTemp(2)); 2245 2246 CheckSystemArrayCopyPosition(masm, 2247 src_pos, 2248 src, 2249 length, 2250 slow_path, 2251 src_curr_addr, 2252 false); 2253 2254 CheckSystemArrayCopyPosition(masm, 2255 dst_pos, 2256 dst, 2257 length, 2258 slow_path, 2259 src_curr_addr, 2260 false); 2261 2262 src_curr_addr = src_curr_addr.X(); 2263 dst_curr_addr = dst_curr_addr.X(); 2264 src_stop_addr = src_stop_addr.X(); 2265 2266 GenSystemArrayCopyAddresses(masm, 2267 DataType::Type::kUint16, 2268 src, 2269 src_pos, 2270 dst, 2271 dst_pos, 2272 length, 2273 src_curr_addr, 2274 dst_curr_addr, 2275 src_stop_addr); 2276 2277 // Iterate over the arrays and do a raw copy of the chars. 2278 const int32_t char_size = DataType::Size(DataType::Type::kUint16); 2279 UseScratchRegisterScope temps(masm); 2280 Register tmp = temps.AcquireW(); 2281 vixl::aarch64::Label loop, done; 2282 __ Bind(&loop); 2283 __ Cmp(src_curr_addr, src_stop_addr); 2284 __ B(&done, eq); 2285 __ Ldrh(tmp, MemOperand(src_curr_addr, char_size, PostIndex)); 2286 __ Strh(tmp, MemOperand(dst_curr_addr, char_size, PostIndex)); 2287 __ B(&loop); 2288 __ Bind(&done); 2289 2290 __ Bind(slow_path->GetExitLabel()); 2291 } 2292 2293 // We can choose to use the native implementation there for longer copy lengths. 2294 static constexpr int32_t kSystemArrayCopyThreshold = 128; 2295 2296 // CodeGenerator::CreateSystemArrayCopyLocationSummary use three temporary registers. 2297 // We want to use two temporary registers in order to reduce the register pressure in arm64. 2298 // So we don't use the CodeGenerator::CreateSystemArrayCopyLocationSummary. 2299 void IntrinsicLocationsBuilderARM64::VisitSystemArrayCopy(HInvoke* invoke) { 2300 // The only read barrier implementation supporting the 2301 // SystemArrayCopy intrinsic is the Baker-style read barriers. 2302 if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) { 2303 return; 2304 } 2305 2306 // Check to see if we have known failures that will cause us to have to bail out 2307 // to the runtime, and just generate the runtime call directly. 2308 HIntConstant* src_pos = invoke->InputAt(1)->AsIntConstant(); 2309 HIntConstant* dest_pos = invoke->InputAt(3)->AsIntConstant(); 2310 2311 // The positions must be non-negative. 2312 if ((src_pos != nullptr && src_pos->GetValue() < 0) || 2313 (dest_pos != nullptr && dest_pos->GetValue() < 0)) { 2314 // We will have to fail anyways. 2315 return; 2316 } 2317 2318 // The length must be >= 0. 2319 HIntConstant* length = invoke->InputAt(4)->AsIntConstant(); 2320 if (length != nullptr) { 2321 int32_t len = length->GetValue(); 2322 if (len < 0 || len >= kSystemArrayCopyThreshold) { 2323 // Just call as normal. 2324 return; 2325 } 2326 } 2327 2328 SystemArrayCopyOptimizations optimizations(invoke); 2329 2330 if (optimizations.GetDestinationIsSource()) { 2331 if (src_pos != nullptr && dest_pos != nullptr && src_pos->GetValue() < dest_pos->GetValue()) { 2332 // We only support backward copying if source and destination are the same. 2333 return; 2334 } 2335 } 2336 2337 if (optimizations.GetDestinationIsPrimitiveArray() || optimizations.GetSourceIsPrimitiveArray()) { 2338 // We currently don't intrinsify primitive copying. 2339 return; 2340 } 2341 2342 ArenaAllocator* allocator = invoke->GetBlock()->GetGraph()->GetAllocator(); 2343 LocationSummary* locations = 2344 new (allocator) LocationSummary(invoke, LocationSummary::kCallOnSlowPath, kIntrinsified); 2345 // arraycopy(Object src, int src_pos, Object dest, int dest_pos, int length). 2346 locations->SetInAt(0, Location::RequiresRegister()); 2347 SetSystemArrayCopyLocationRequires(locations, 1, invoke->InputAt(1)); 2348 locations->SetInAt(2, Location::RequiresRegister()); 2349 SetSystemArrayCopyLocationRequires(locations, 3, invoke->InputAt(3)); 2350 SetSystemArrayCopyLocationRequires(locations, 4, invoke->InputAt(4)); 2351 2352 locations->AddTemp(Location::RequiresRegister()); 2353 locations->AddTemp(Location::RequiresRegister()); 2354 if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { 2355 // Temporary register IP0, obtained from the VIXL scratch register 2356 // pool, cannot be used in ReadBarrierSystemArrayCopySlowPathARM64 2357 // (because that register is clobbered by ReadBarrierMarkRegX 2358 // entry points). It cannot be used in calls to 2359 // CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier 2360 // either. For these reasons, get a third extra temporary register 2361 // from the register allocator. 2362 locations->AddTemp(Location::RequiresRegister()); 2363 } else { 2364 // Cases other than Baker read barriers: the third temporary will 2365 // be acquired from the VIXL scratch register pool. 2366 } 2367 } 2368 2369 void IntrinsicCodeGeneratorARM64::VisitSystemArrayCopy(HInvoke* invoke) { 2370 // The only read barrier implementation supporting the 2371 // SystemArrayCopy intrinsic is the Baker-style read barriers. 2372 DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier); 2373 2374 MacroAssembler* masm = GetVIXLAssembler(); 2375 LocationSummary* locations = invoke->GetLocations(); 2376 2377 uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); 2378 uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value(); 2379 uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value(); 2380 uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value(); 2381 uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value(); 2382 2383 Register src = XRegisterFrom(locations->InAt(0)); 2384 Location src_pos = locations->InAt(1); 2385 Register dest = XRegisterFrom(locations->InAt(2)); 2386 Location dest_pos = locations->InAt(3); 2387 Location length = locations->InAt(4); 2388 Register temp1 = WRegisterFrom(locations->GetTemp(0)); 2389 Location temp1_loc = LocationFrom(temp1); 2390 Register temp2 = WRegisterFrom(locations->GetTemp(1)); 2391 Location temp2_loc = LocationFrom(temp2); 2392 2393 SlowPathCodeARM64* intrinsic_slow_path = 2394 new (codegen_->GetScopedAllocator()) IntrinsicSlowPathARM64(invoke); 2395 codegen_->AddSlowPath(intrinsic_slow_path); 2396 2397 vixl::aarch64::Label conditions_on_positions_validated; 2398 SystemArrayCopyOptimizations optimizations(invoke); 2399 2400 // If source and destination are the same, we go to slow path if we need to do 2401 // forward copying. 2402 if (src_pos.IsConstant()) { 2403 int32_t src_pos_constant = src_pos.GetConstant()->AsIntConstant()->GetValue(); 2404 if (dest_pos.IsConstant()) { 2405 int32_t dest_pos_constant = dest_pos.GetConstant()->AsIntConstant()->GetValue(); 2406 if (optimizations.GetDestinationIsSource()) { 2407 // Checked when building locations. 2408 DCHECK_GE(src_pos_constant, dest_pos_constant); 2409 } else if (src_pos_constant < dest_pos_constant) { 2410 __ Cmp(src, dest); 2411 __ B(intrinsic_slow_path->GetEntryLabel(), eq); 2412 } 2413 // Checked when building locations. 2414 DCHECK(!optimizations.GetDestinationIsSource() 2415 || (src_pos_constant >= dest_pos.GetConstant()->AsIntConstant()->GetValue())); 2416 } else { 2417 if (!optimizations.GetDestinationIsSource()) { 2418 __ Cmp(src, dest); 2419 __ B(&conditions_on_positions_validated, ne); 2420 } 2421 __ Cmp(WRegisterFrom(dest_pos), src_pos_constant); 2422 __ B(intrinsic_slow_path->GetEntryLabel(), gt); 2423 } 2424 } else { 2425 if (!optimizations.GetDestinationIsSource()) { 2426 __ Cmp(src, dest); 2427 __ B(&conditions_on_positions_validated, ne); 2428 } 2429 __ Cmp(RegisterFrom(src_pos, invoke->InputAt(1)->GetType()), 2430 OperandFrom(dest_pos, invoke->InputAt(3)->GetType())); 2431 __ B(intrinsic_slow_path->GetEntryLabel(), lt); 2432 } 2433 2434 __ Bind(&conditions_on_positions_validated); 2435 2436 if (!optimizations.GetSourceIsNotNull()) { 2437 // Bail out if the source is null. 2438 __ Cbz(src, intrinsic_slow_path->GetEntryLabel()); 2439 } 2440 2441 if (!optimizations.GetDestinationIsNotNull() && !optimizations.GetDestinationIsSource()) { 2442 // Bail out if the destination is null. 2443 __ Cbz(dest, intrinsic_slow_path->GetEntryLabel()); 2444 } 2445 2446 // We have already checked in the LocationsBuilder for the constant case. 2447 if (!length.IsConstant() && 2448 !optimizations.GetCountIsSourceLength() && 2449 !optimizations.GetCountIsDestinationLength()) { 2450 // Merge the following two comparisons into one: 2451 // If the length is negative, bail out (delegate to libcore's native implementation). 2452 // If the length >= 128 then (currently) prefer native implementation. 2453 __ Cmp(WRegisterFrom(length), kSystemArrayCopyThreshold); 2454 __ B(intrinsic_slow_path->GetEntryLabel(), hs); 2455 } 2456 // Validity checks: source. 2457 CheckSystemArrayCopyPosition(masm, 2458 src_pos, 2459 src, 2460 length, 2461 intrinsic_slow_path, 2462 temp1, 2463 optimizations.GetCountIsSourceLength()); 2464 2465 // Validity checks: dest. 2466 CheckSystemArrayCopyPosition(masm, 2467 dest_pos, 2468 dest, 2469 length, 2470 intrinsic_slow_path, 2471 temp1, 2472 optimizations.GetCountIsDestinationLength()); 2473 { 2474 // We use a block to end the scratch scope before the write barrier, thus 2475 // freeing the temporary registers so they can be used in `MarkGCCard`. 2476 UseScratchRegisterScope temps(masm); 2477 Location temp3_loc; // Used only for Baker read barrier. 2478 Register temp3; 2479 if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { 2480 temp3_loc = locations->GetTemp(2); 2481 temp3 = WRegisterFrom(temp3_loc); 2482 } else { 2483 temp3 = temps.AcquireW(); 2484 } 2485 2486 if (!optimizations.GetDoesNotNeedTypeCheck()) { 2487 // Check whether all elements of the source array are assignable to the component 2488 // type of the destination array. We do two checks: the classes are the same, 2489 // or the destination is Object[]. If none of these checks succeed, we go to the 2490 // slow path. 2491 2492 if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { 2493 if (!optimizations.GetSourceIsNonPrimitiveArray()) { 2494 // /* HeapReference<Class> */ temp1 = src->klass_ 2495 codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke, 2496 temp1_loc, 2497 src.W(), 2498 class_offset, 2499 temp3_loc, 2500 /* needs_null_check= */ false, 2501 /* use_load_acquire= */ false); 2502 // Bail out if the source is not a non primitive array. 2503 // /* HeapReference<Class> */ temp1 = temp1->component_type_ 2504 codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke, 2505 temp1_loc, 2506 temp1, 2507 component_offset, 2508 temp3_loc, 2509 /* needs_null_check= */ false, 2510 /* use_load_acquire= */ false); 2511 __ Cbz(temp1, intrinsic_slow_path->GetEntryLabel()); 2512 // If heap poisoning is enabled, `temp1` has been unpoisoned 2513 // by the the previous call to GenerateFieldLoadWithBakerReadBarrier. 2514 // /* uint16_t */ temp1 = static_cast<uint16>(temp1->primitive_type_); 2515 __ Ldrh(temp1, HeapOperand(temp1, primitive_offset)); 2516 static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot"); 2517 __ Cbnz(temp1, intrinsic_slow_path->GetEntryLabel()); 2518 } 2519 2520 // /* HeapReference<Class> */ temp1 = dest->klass_ 2521 codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke, 2522 temp1_loc, 2523 dest.W(), 2524 class_offset, 2525 temp3_loc, 2526 /* needs_null_check= */ false, 2527 /* use_load_acquire= */ false); 2528 2529 if (!optimizations.GetDestinationIsNonPrimitiveArray()) { 2530 // Bail out if the destination is not a non primitive array. 2531 // 2532 // Register `temp1` is not trashed by the read barrier emitted 2533 // by GenerateFieldLoadWithBakerReadBarrier below, as that 2534 // method produces a call to a ReadBarrierMarkRegX entry point, 2535 // which saves all potentially live registers, including 2536 // temporaries such a `temp1`. 2537 // /* HeapReference<Class> */ temp2 = temp1->component_type_ 2538 codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke, 2539 temp2_loc, 2540 temp1, 2541 component_offset, 2542 temp3_loc, 2543 /* needs_null_check= */ false, 2544 /* use_load_acquire= */ false); 2545 __ Cbz(temp2, intrinsic_slow_path->GetEntryLabel()); 2546 // If heap poisoning is enabled, `temp2` has been unpoisoned 2547 // by the the previous call to GenerateFieldLoadWithBakerReadBarrier. 2548 // /* uint16_t */ temp2 = static_cast<uint16>(temp2->primitive_type_); 2549 __ Ldrh(temp2, HeapOperand(temp2, primitive_offset)); 2550 static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot"); 2551 __ Cbnz(temp2, intrinsic_slow_path->GetEntryLabel()); 2552 } 2553 2554 // For the same reason given earlier, `temp1` is not trashed by the 2555 // read barrier emitted by GenerateFieldLoadWithBakerReadBarrier below. 2556 // /* HeapReference<Class> */ temp2 = src->klass_ 2557 codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke, 2558 temp2_loc, 2559 src.W(), 2560 class_offset, 2561 temp3_loc, 2562 /* needs_null_check= */ false, 2563 /* use_load_acquire= */ false); 2564 // Note: if heap poisoning is on, we are comparing two unpoisoned references here. 2565 __ Cmp(temp1, temp2); 2566 2567 if (optimizations.GetDestinationIsTypedObjectArray()) { 2568 vixl::aarch64::Label do_copy; 2569 __ B(&do_copy, eq); 2570 // /* HeapReference<Class> */ temp1 = temp1->component_type_ 2571 codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke, 2572 temp1_loc, 2573 temp1, 2574 component_offset, 2575 temp3_loc, 2576 /* needs_null_check= */ false, 2577 /* use_load_acquire= */ false); 2578 // /* HeapReference<Class> */ temp1 = temp1->super_class_ 2579 // We do not need to emit a read barrier for the following 2580 // heap reference load, as `temp1` is only used in a 2581 // comparison with null below, and this reference is not 2582 // kept afterwards. 2583 __ Ldr(temp1, HeapOperand(temp1, super_offset)); 2584 __ Cbnz(temp1, intrinsic_slow_path->GetEntryLabel()); 2585 __ Bind(&do_copy); 2586 } else { 2587 __ B(intrinsic_slow_path->GetEntryLabel(), ne); 2588 } 2589 } else { 2590 // Non read barrier code. 2591 2592 // /* HeapReference<Class> */ temp1 = dest->klass_ 2593 __ Ldr(temp1, MemOperand(dest, class_offset)); 2594 // /* HeapReference<Class> */ temp2 = src->klass_ 2595 __ Ldr(temp2, MemOperand(src, class_offset)); 2596 bool did_unpoison = false; 2597 if (!optimizations.GetDestinationIsNonPrimitiveArray() || 2598 !optimizations.GetSourceIsNonPrimitiveArray()) { 2599 // One or two of the references need to be unpoisoned. Unpoison them 2600 // both to make the identity check valid. 2601 codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp1); 2602 codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp2); 2603 did_unpoison = true; 2604 } 2605 2606 if (!optimizations.GetDestinationIsNonPrimitiveArray()) { 2607 // Bail out if the destination is not a non primitive array. 2608 // /* HeapReference<Class> */ temp3 = temp1->component_type_ 2609 __ Ldr(temp3, HeapOperand(temp1, component_offset)); 2610 __ Cbz(temp3, intrinsic_slow_path->GetEntryLabel()); 2611 codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp3); 2612 // /* uint16_t */ temp3 = static_cast<uint16>(temp3->primitive_type_); 2613 __ Ldrh(temp3, HeapOperand(temp3, primitive_offset)); 2614 static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot"); 2615 __ Cbnz(temp3, intrinsic_slow_path->GetEntryLabel()); 2616 } 2617 2618 if (!optimizations.GetSourceIsNonPrimitiveArray()) { 2619 // Bail out if the source is not a non primitive array. 2620 // /* HeapReference<Class> */ temp3 = temp2->component_type_ 2621 __ Ldr(temp3, HeapOperand(temp2, component_offset)); 2622 __ Cbz(temp3, intrinsic_slow_path->GetEntryLabel()); 2623 codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp3); 2624 // /* uint16_t */ temp3 = static_cast<uint16>(temp3->primitive_type_); 2625 __ Ldrh(temp3, HeapOperand(temp3, primitive_offset)); 2626 static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot"); 2627 __ Cbnz(temp3, intrinsic_slow_path->GetEntryLabel()); 2628 } 2629 2630 __ Cmp(temp1, temp2); 2631 2632 if (optimizations.GetDestinationIsTypedObjectArray()) { 2633 vixl::aarch64::Label do_copy; 2634 __ B(&do_copy, eq); 2635 if (!did_unpoison) { 2636 codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp1); 2637 } 2638 // /* HeapReference<Class> */ temp1 = temp1->component_type_ 2639 __ Ldr(temp1, HeapOperand(temp1, component_offset)); 2640 codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp1); 2641 // /* HeapReference<Class> */ temp1 = temp1->super_class_ 2642 __ Ldr(temp1, HeapOperand(temp1, super_offset)); 2643 // No need to unpoison the result, we're comparing against null. 2644 __ Cbnz(temp1, intrinsic_slow_path->GetEntryLabel()); 2645 __ Bind(&do_copy); 2646 } else { 2647 __ B(intrinsic_slow_path->GetEntryLabel(), ne); 2648 } 2649 } 2650 } else if (!optimizations.GetSourceIsNonPrimitiveArray()) { 2651 DCHECK(optimizations.GetDestinationIsNonPrimitiveArray()); 2652 // Bail out if the source is not a non primitive array. 2653 if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { 2654 // /* HeapReference<Class> */ temp1 = src->klass_ 2655 codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke, 2656 temp1_loc, 2657 src.W(), 2658 class_offset, 2659 temp3_loc, 2660 /* needs_null_check= */ false, 2661 /* use_load_acquire= */ false); 2662 // /* HeapReference<Class> */ temp2 = temp1->component_type_ 2663 codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke, 2664 temp2_loc, 2665 temp1, 2666 component_offset, 2667 temp3_loc, 2668 /* needs_null_check= */ false, 2669 /* use_load_acquire= */ false); 2670 __ Cbz(temp2, intrinsic_slow_path->GetEntryLabel()); 2671 // If heap poisoning is enabled, `temp2` has been unpoisoned 2672 // by the the previous call to GenerateFieldLoadWithBakerReadBarrier. 2673 } else { 2674 // /* HeapReference<Class> */ temp1 = src->klass_ 2675 __ Ldr(temp1, HeapOperand(src.W(), class_offset)); 2676 codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp1); 2677 // /* HeapReference<Class> */ temp2 = temp1->component_type_ 2678 __ Ldr(temp2, HeapOperand(temp1, component_offset)); 2679 __ Cbz(temp2, intrinsic_slow_path->GetEntryLabel()); 2680 codegen_->GetAssembler()->MaybeUnpoisonHeapReference(temp2); 2681 } 2682 // /* uint16_t */ temp2 = static_cast<uint16>(temp2->primitive_type_); 2683 __ Ldrh(temp2, HeapOperand(temp2, primitive_offset)); 2684 static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot"); 2685 __ Cbnz(temp2, intrinsic_slow_path->GetEntryLabel()); 2686 } 2687 2688 if (length.IsConstant() && length.GetConstant()->AsIntConstant()->GetValue() == 0) { 2689 // Null constant length: not need to emit the loop code at all. 2690 } else { 2691 Register src_curr_addr = temp1.X(); 2692 Register dst_curr_addr = temp2.X(); 2693 Register src_stop_addr = temp3.X(); 2694 vixl::aarch64::Label done; 2695 const DataType::Type type = DataType::Type::kReference; 2696 const int32_t element_size = DataType::Size(type); 2697 2698 if (length.IsRegister()) { 2699 // Don't enter the copy loop if the length is null. 2700 __ Cbz(WRegisterFrom(length), &done); 2701 } 2702 2703 if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { 2704 // TODO: Also convert this intrinsic to the IsGcMarking strategy? 2705 2706 // SystemArrayCopy implementation for Baker read barriers (see 2707 // also CodeGeneratorARM64::GenerateReferenceLoadWithBakerReadBarrier): 2708 // 2709 // uint32_t rb_state = Lockword(src->monitor_).ReadBarrierState(); 2710 // lfence; // Load fence or artificial data dependency to prevent load-load reordering 2711 // bool is_gray = (rb_state == ReadBarrier::GrayState()); 2712 // if (is_gray) { 2713 // // Slow-path copy. 2714 // do { 2715 // *dest_ptr++ = MaybePoison(ReadBarrier::Mark(MaybeUnpoison(*src_ptr++))); 2716 // } while (src_ptr != end_ptr) 2717 // } else { 2718 // // Fast-path copy. 2719 // do { 2720 // *dest_ptr++ = *src_ptr++; 2721 // } while (src_ptr != end_ptr) 2722 // } 2723 2724 // Make sure `tmp` is not IP0, as it is clobbered by 2725 // ReadBarrierMarkRegX entry points in 2726 // ReadBarrierSystemArrayCopySlowPathARM64. 2727 DCHECK(temps.IsAvailable(ip0)); 2728 temps.Exclude(ip0); 2729 Register tmp = temps.AcquireW(); 2730 DCHECK_NE(LocationFrom(tmp).reg(), IP0); 2731 // Put IP0 back in the pool so that VIXL has at least one 2732 // scratch register available to emit macro-instructions (note 2733 // that IP1 is already used for `tmp`). Indeed some 2734 // macro-instructions used in GenSystemArrayCopyAddresses 2735 // (invoked hereunder) may require a scratch register (for 2736 // instance to emit a load with a large constant offset). 2737 temps.Include(ip0); 2738 2739 // /* int32_t */ monitor = src->monitor_ 2740 __ Ldr(tmp, HeapOperand(src.W(), monitor_offset)); 2741 // /* LockWord */ lock_word = LockWord(monitor) 2742 static_assert(sizeof(LockWord) == sizeof(int32_t), 2743 "art::LockWord and int32_t have different sizes."); 2744 2745 // Introduce a dependency on the lock_word including rb_state, 2746 // to prevent load-load reordering, and without using 2747 // a memory barrier (which would be more expensive). 2748 // `src` is unchanged by this operation, but its value now depends 2749 // on `tmp`. 2750 __ Add(src.X(), src.X(), Operand(tmp.X(), LSR, 32)); 2751 2752 // Compute base source address, base destination address, and end 2753 // source address for System.arraycopy* intrinsics in `src_base`, 2754 // `dst_base` and `src_end` respectively. 2755 // Note that `src_curr_addr` is computed from from `src` (and 2756 // `src_pos`) here, and thus honors the artificial dependency 2757 // of `src` on `tmp`. 2758 GenSystemArrayCopyAddresses(masm, 2759 type, 2760 src, 2761 src_pos, 2762 dest, 2763 dest_pos, 2764 length, 2765 src_curr_addr, 2766 dst_curr_addr, 2767 src_stop_addr); 2768 2769 // Slow path used to copy array when `src` is gray. 2770 SlowPathCodeARM64* read_barrier_slow_path = 2771 new (codegen_->GetScopedAllocator()) ReadBarrierSystemArrayCopySlowPathARM64( 2772 invoke, LocationFrom(tmp)); 2773 codegen_->AddSlowPath(read_barrier_slow_path); 2774 2775 // Given the numeric representation, it's enough to check the low bit of the rb_state. 2776 static_assert(ReadBarrier::NonGrayState() == 0, "Expecting non-gray to have value 0"); 2777 static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1"); 2778 __ Tbnz(tmp, LockWord::kReadBarrierStateShift, read_barrier_slow_path->GetEntryLabel()); 2779 2780 // Fast-path copy. 2781 // Iterate over the arrays and do a raw copy of the objects. We don't need to 2782 // poison/unpoison. 2783 vixl::aarch64::Label loop; 2784 __ Bind(&loop); 2785 __ Ldr(tmp, MemOperand(src_curr_addr, element_size, PostIndex)); 2786 __ Str(tmp, MemOperand(dst_curr_addr, element_size, PostIndex)); 2787 __ Cmp(src_curr_addr, src_stop_addr); 2788 __ B(&loop, ne); 2789 2790 __ Bind(read_barrier_slow_path->GetExitLabel()); 2791 } else { 2792 // Non read barrier code. 2793 // Compute base source address, base destination address, and end 2794 // source address for System.arraycopy* intrinsics in `src_base`, 2795 // `dst_base` and `src_end` respectively. 2796 GenSystemArrayCopyAddresses(masm, 2797 type, 2798 src, 2799 src_pos, 2800 dest, 2801 dest_pos, 2802 length, 2803 src_curr_addr, 2804 dst_curr_addr, 2805 src_stop_addr); 2806 // Iterate over the arrays and do a raw copy of the objects. We don't need to 2807 // poison/unpoison. 2808 vixl::aarch64::Label loop; 2809 __ Bind(&loop); 2810 { 2811 Register tmp = temps.AcquireW(); 2812 __ Ldr(tmp, MemOperand(src_curr_addr, element_size, PostIndex)); 2813 __ Str(tmp, MemOperand(dst_curr_addr, element_size, PostIndex)); 2814 } 2815 __ Cmp(src_curr_addr, src_stop_addr); 2816 __ B(&loop, ne); 2817 } 2818 __ Bind(&done); 2819 } 2820 } 2821 2822 // We only need one card marking on the destination array. 2823 codegen_->MarkGCCard(dest.W(), Register(), /* value_can_be_null= */ false); 2824 2825 __ Bind(intrinsic_slow_path->GetExitLabel()); 2826 } 2827 2828 static void GenIsInfinite(LocationSummary* locations, 2829 bool is64bit, 2830 MacroAssembler* masm) { 2831 Operand infinity; 2832 Operand tst_mask; 2833 Register out; 2834 2835 if (is64bit) { 2836 infinity = kPositiveInfinityDouble; 2837 tst_mask = MaskLeastSignificant<uint64_t>(63); 2838 out = XRegisterFrom(locations->Out()); 2839 } else { 2840 infinity = kPositiveInfinityFloat; 2841 tst_mask = MaskLeastSignificant<uint32_t>(31); 2842 out = WRegisterFrom(locations->Out()); 2843 } 2844 2845 MoveFPToInt(locations, is64bit, masm); 2846 // Checks whether exponent bits are all 1 and fraction bits are all 0. 2847 __ Eor(out, out, infinity); 2848 // TST bitmask is used to mask out the sign bit: either 0x7fffffff or 0x7fffffffffffffff 2849 // depending on is64bit. 2850 __ Tst(out, tst_mask); 2851 __ Cset(out, eq); 2852 } 2853 2854 void IntrinsicLocationsBuilderARM64::VisitFloatIsInfinite(HInvoke* invoke) { 2855 CreateFPToIntLocations(allocator_, invoke); 2856 } 2857 2858 void IntrinsicCodeGeneratorARM64::VisitFloatIsInfinite(HInvoke* invoke) { 2859 GenIsInfinite(invoke->GetLocations(), /* is64bit= */ false, GetVIXLAssembler()); 2860 } 2861 2862 void IntrinsicLocationsBuilderARM64::VisitDoubleIsInfinite(HInvoke* invoke) { 2863 CreateFPToIntLocations(allocator_, invoke); 2864 } 2865 2866 void IntrinsicCodeGeneratorARM64::VisitDoubleIsInfinite(HInvoke* invoke) { 2867 GenIsInfinite(invoke->GetLocations(), /* is64bit= */ true, GetVIXLAssembler()); 2868 } 2869 2870 void IntrinsicLocationsBuilderARM64::VisitIntegerValueOf(HInvoke* invoke) { 2871 InvokeRuntimeCallingConvention calling_convention; 2872 IntrinsicVisitor::ComputeIntegerValueOfLocations( 2873 invoke, 2874 codegen_, 2875 calling_convention.GetReturnLocation(DataType::Type::kReference), 2876 Location::RegisterLocation(calling_convention.GetRegisterAt(0).GetCode())); 2877 } 2878 2879 void IntrinsicCodeGeneratorARM64::VisitIntegerValueOf(HInvoke* invoke) { 2880 IntrinsicVisitor::IntegerValueOfInfo info = 2881 IntrinsicVisitor::ComputeIntegerValueOfInfo(invoke, codegen_->GetCompilerOptions()); 2882 LocationSummary* locations = invoke->GetLocations(); 2883 MacroAssembler* masm = GetVIXLAssembler(); 2884 2885 Register out = RegisterFrom(locations->Out(), DataType::Type::kReference); 2886 UseScratchRegisterScope temps(masm); 2887 Register temp = temps.AcquireW(); 2888 if (invoke->InputAt(0)->IsConstant()) { 2889 int32_t value = invoke->InputAt(0)->AsIntConstant()->GetValue(); 2890 if (static_cast<uint32_t>(value - info.low) < info.length) { 2891 // Just embed the j.l.Integer in the code. 2892 DCHECK_NE(info.value_boot_image_reference, IntegerValueOfInfo::kInvalidReference); 2893 codegen_->LoadBootImageAddress(out, info.value_boot_image_reference); 2894 } else { 2895 DCHECK(locations->CanCall()); 2896 // Allocate and initialize a new j.l.Integer. 2897 // TODO: If we JIT, we could allocate the j.l.Integer now, and store it in the 2898 // JIT object table. 2899 codegen_->AllocateInstanceForIntrinsic(invoke->AsInvokeStaticOrDirect(), 2900 info.integer_boot_image_offset); 2901 __ Mov(temp.W(), value); 2902 __ Str(temp.W(), HeapOperand(out.W(), info.value_offset)); 2903 // `value` is a final field :-( Ideally, we'd merge this memory barrier with the allocation 2904 // one. 2905 codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore); 2906 } 2907 } else { 2908 DCHECK(locations->CanCall()); 2909 Register in = RegisterFrom(locations->InAt(0), DataType::Type::kInt32); 2910 // Check bounds of our cache. 2911 __ Add(out.W(), in.W(), -info.low); 2912 __ Cmp(out.W(), info.length); 2913 vixl::aarch64::Label allocate, done; 2914 __ B(&allocate, hs); 2915 // If the value is within the bounds, load the j.l.Integer directly from the array. 2916 codegen_->LoadBootImageAddress(temp, info.array_data_boot_image_reference); 2917 MemOperand source = HeapOperand( 2918 temp, out.X(), LSL, DataType::SizeShift(DataType::Type::kReference)); 2919 codegen_->Load(DataType::Type::kReference, out, source); 2920 codegen_->GetAssembler()->MaybeUnpoisonHeapReference(out); 2921 __ B(&done); 2922 __ Bind(&allocate); 2923 // Otherwise allocate and initialize a new j.l.Integer. 2924 codegen_->AllocateInstanceForIntrinsic(invoke->AsInvokeStaticOrDirect(), 2925 info.integer_boot_image_offset); 2926 __ Str(in.W(), HeapOperand(out.W(), info.value_offset)); 2927 // `value` is a final field :-( Ideally, we'd merge this memory barrier with the allocation 2928 // one. 2929 codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore); 2930 __ Bind(&done); 2931 } 2932 } 2933 2934 void IntrinsicLocationsBuilderARM64::VisitThreadInterrupted(HInvoke* invoke) { 2935 LocationSummary* locations = 2936 new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); 2937 locations->SetOut(Location::RequiresRegister()); 2938 } 2939 2940 void IntrinsicCodeGeneratorARM64::VisitThreadInterrupted(HInvoke* invoke) { 2941 MacroAssembler* masm = GetVIXLAssembler(); 2942 Register out = RegisterFrom(invoke->GetLocations()->Out(), DataType::Type::kInt32); 2943 UseScratchRegisterScope temps(masm); 2944 Register temp = temps.AcquireX(); 2945 2946 __ Add(temp, tr, Thread::InterruptedOffset<kArm64PointerSize>().Int32Value()); 2947 __ Ldar(out.W(), MemOperand(temp)); 2948 2949 vixl::aarch64::Label done; 2950 __ Cbz(out.W(), &done); 2951 __ Stlr(wzr, MemOperand(temp)); 2952 __ Bind(&done); 2953 } 2954 2955 void IntrinsicLocationsBuilderARM64::VisitReachabilityFence(HInvoke* invoke) { 2956 LocationSummary* locations = 2957 new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); 2958 locations->SetInAt(0, Location::Any()); 2959 } 2960 2961 void IntrinsicCodeGeneratorARM64::VisitReachabilityFence(HInvoke* invoke ATTRIBUTE_UNUSED) { } 2962 2963 void IntrinsicLocationsBuilderARM64::VisitCRC32Update(HInvoke* invoke) { 2964 if (!codegen_->GetInstructionSetFeatures().HasCRC()) { 2965 return; 2966 } 2967 2968 LocationSummary* locations = new (allocator_) LocationSummary(invoke, 2969 LocationSummary::kNoCall, 2970 kIntrinsified); 2971 2972 locations->SetInAt(0, Location::RequiresRegister()); 2973 locations->SetInAt(1, Location::RequiresRegister()); 2974 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); 2975 } 2976 2977 // Lower the invoke of CRC32.update(int crc, int b). 2978 void IntrinsicCodeGeneratorARM64::VisitCRC32Update(HInvoke* invoke) { 2979 DCHECK(codegen_->GetInstructionSetFeatures().HasCRC()); 2980 2981 MacroAssembler* masm = GetVIXLAssembler(); 2982 2983 Register crc = InputRegisterAt(invoke, 0); 2984 Register val = InputRegisterAt(invoke, 1); 2985 Register out = OutputRegister(invoke); 2986 2987 // The general algorithm of the CRC32 calculation is: 2988 // crc = ~crc 2989 // result = crc32_for_byte(crc, b) 2990 // crc = ~result 2991 // It is directly lowered to three instructions. 2992 2993 UseScratchRegisterScope temps(masm); 2994 Register tmp = temps.AcquireSameSizeAs(out); 2995 2996 __ Mvn(tmp, crc); 2997 __ Crc32b(tmp, tmp, val); 2998 __ Mvn(out, tmp); 2999 } 3000 3001 // Generate code using CRC32 instructions which calculates 3002 // a CRC32 value of a byte. 3003 // 3004 // Parameters: 3005 // masm - VIXL macro assembler 3006 // crc - a register holding an initial CRC value 3007 // ptr - a register holding a memory address of bytes 3008 // length - a register holding a number of bytes to process 3009 // out - a register to put a result of calculation 3010 static void GenerateCodeForCalculationCRC32ValueOfBytes(MacroAssembler* masm, 3011 const Register& crc, 3012 const Register& ptr, 3013 const Register& length, 3014 const Register& out) { 3015 // The algorithm of CRC32 of bytes is: 3016 // crc = ~crc 3017 // process a few first bytes to make the array 8-byte aligned 3018 // while array has 8 bytes do: 3019 // crc = crc32_of_8bytes(crc, 8_bytes(array)) 3020 // if array has 4 bytes: 3021 // crc = crc32_of_4bytes(crc, 4_bytes(array)) 3022 // if array has 2 bytes: 3023 // crc = crc32_of_2bytes(crc, 2_bytes(array)) 3024 // if array has a byte: 3025 // crc = crc32_of_byte(crc, 1_byte(array)) 3026 // crc = ~crc 3027 3028 vixl::aarch64::Label loop, done; 3029 vixl::aarch64::Label process_4bytes, process_2bytes, process_1byte; 3030 vixl::aarch64::Label aligned2, aligned4, aligned8; 3031 3032 // Use VIXL scratch registers as the VIXL macro assembler won't use them in 3033 // instructions below. 3034 UseScratchRegisterScope temps(masm); 3035 Register len = temps.AcquireW(); 3036 Register array_elem = temps.AcquireW(); 3037 3038 __ Mvn(out, crc); 3039 __ Mov(len, length); 3040 3041 __ Tbz(ptr, 0, &aligned2); 3042 __ Subs(len, len, 1); 3043 __ B(&done, lo); 3044 __ Ldrb(array_elem, MemOperand(ptr, 1, PostIndex)); 3045 __ Crc32b(out, out, array_elem); 3046 3047 __ Bind(&aligned2); 3048 __ Tbz(ptr, 1, &aligned4); 3049 __ Subs(len, len, 2); 3050 __ B(&process_1byte, lo); 3051 __ Ldrh(array_elem, MemOperand(ptr, 2, PostIndex)); 3052 __ Crc32h(out, out, array_elem); 3053 3054 __ Bind(&aligned4); 3055 __ Tbz(ptr, 2, &aligned8); 3056 __ Subs(len, len, 4); 3057 __ B(&process_2bytes, lo); 3058 __ Ldr(array_elem, MemOperand(ptr, 4, PostIndex)); 3059 __ Crc32w(out, out, array_elem); 3060 3061 __ Bind(&aligned8); 3062 __ Subs(len, len, 8); 3063 // If len < 8 go to process data by 4 bytes, 2 bytes and a byte. 3064 __ B(&process_4bytes, lo); 3065 3066 // The main loop processing data by 8 bytes. 3067 __ Bind(&loop); 3068 __ Ldr(array_elem.X(), MemOperand(ptr, 8, PostIndex)); 3069 __ Subs(len, len, 8); 3070 __ Crc32x(out, out, array_elem.X()); 3071 // if len >= 8, process the next 8 bytes. 3072 __ B(&loop, hs); 3073 3074 // Process the data which is less than 8 bytes. 3075 // The code generated below works with values of len 3076 // which come in the range [-8, 0]. 3077 // The first three bits are used to detect whether 4 bytes or 2 bytes or 3078 // a byte can be processed. 3079 // The checking order is from bit 2 to bit 0: 3080 // bit 2 is set: at least 4 bytes available 3081 // bit 1 is set: at least 2 bytes available 3082 // bit 0 is set: at least a byte available 3083 __ Bind(&process_4bytes); 3084 // Goto process_2bytes if less than four bytes available 3085 __ Tbz(len, 2, &process_2bytes); 3086 __ Ldr(array_elem, MemOperand(ptr, 4, PostIndex)); 3087 __ Crc32w(out, out, array_elem); 3088 3089 __ Bind(&process_2bytes); 3090 // Goto process_1bytes if less than two bytes available 3091 __ Tbz(len, 1, &process_1byte); 3092 __ Ldrh(array_elem, MemOperand(ptr, 2, PostIndex)); 3093 __ Crc32h(out, out, array_elem); 3094 3095 __ Bind(&process_1byte); 3096 // Goto done if no bytes available 3097 __ Tbz(len, 0, &done); 3098 __ Ldrb(array_elem, MemOperand(ptr)); 3099 __ Crc32b(out, out, array_elem); 3100 3101 __ Bind(&done); 3102 __ Mvn(out, out); 3103 } 3104 3105 // The threshold for sizes of arrays to use the library provided implementation 3106 // of CRC32.updateBytes instead of the intrinsic. 3107 static constexpr int32_t kCRC32UpdateBytesThreshold = 64 * 1024; 3108 3109 void IntrinsicLocationsBuilderARM64::VisitCRC32UpdateBytes(HInvoke* invoke) { 3110 if (!codegen_->GetInstructionSetFeatures().HasCRC()) { 3111 return; 3112 } 3113 3114 LocationSummary* locations = 3115 new (allocator_) LocationSummary(invoke, 3116 LocationSummary::kCallOnSlowPath, 3117 kIntrinsified); 3118 3119 locations->SetInAt(0, Location::RequiresRegister()); 3120 locations->SetInAt(1, Location::RequiresRegister()); 3121 locations->SetInAt(2, Location::RegisterOrConstant(invoke->InputAt(2))); 3122 locations->SetInAt(3, Location::RequiresRegister()); 3123 locations->AddTemp(Location::RequiresRegister()); 3124 locations->SetOut(Location::RequiresRegister()); 3125 } 3126 3127 // Lower the invoke of CRC32.updateBytes(int crc, byte[] b, int off, int len) 3128 // 3129 // Note: The intrinsic is not used if len exceeds a threshold. 3130 void IntrinsicCodeGeneratorARM64::VisitCRC32UpdateBytes(HInvoke* invoke) { 3131 DCHECK(codegen_->GetInstructionSetFeatures().HasCRC()); 3132 3133 MacroAssembler* masm = GetVIXLAssembler(); 3134 LocationSummary* locations = invoke->GetLocations(); 3135 3136 SlowPathCodeARM64* slow_path = 3137 new (codegen_->GetScopedAllocator()) IntrinsicSlowPathARM64(invoke); 3138 codegen_->AddSlowPath(slow_path); 3139 3140 Register length = WRegisterFrom(locations->InAt(3)); 3141 __ Cmp(length, kCRC32UpdateBytesThreshold); 3142 __ B(slow_path->GetEntryLabel(), hi); 3143 3144 const uint32_t array_data_offset = 3145 mirror::Array::DataOffset(Primitive::kPrimByte).Uint32Value(); 3146 Register ptr = XRegisterFrom(locations->GetTemp(0)); 3147 Register array = XRegisterFrom(locations->InAt(1)); 3148 Location offset = locations->InAt(2); 3149 if (offset.IsConstant()) { 3150 int32_t offset_value = offset.GetConstant()->AsIntConstant()->GetValue(); 3151 __ Add(ptr, array, array_data_offset + offset_value); 3152 } else { 3153 __ Add(ptr, array, array_data_offset); 3154 __ Add(ptr, ptr, XRegisterFrom(offset)); 3155 } 3156 3157 Register crc = WRegisterFrom(locations->InAt(0)); 3158 Register out = WRegisterFrom(locations->Out()); 3159 3160 GenerateCodeForCalculationCRC32ValueOfBytes(masm, crc, ptr, length, out); 3161 3162 __ Bind(slow_path->GetExitLabel()); 3163 } 3164 3165 void IntrinsicLocationsBuilderARM64::VisitCRC32UpdateByteBuffer(HInvoke* invoke) { 3166 if (!codegen_->GetInstructionSetFeatures().HasCRC()) { 3167 return; 3168 } 3169 3170 LocationSummary* locations = 3171 new (allocator_) LocationSummary(invoke, 3172 LocationSummary::kNoCall, 3173 kIntrinsified); 3174 3175 locations->SetInAt(0, Location::RequiresRegister()); 3176 locations->SetInAt(1, Location::RequiresRegister()); 3177 locations->SetInAt(2, Location::RequiresRegister()); 3178 locations->SetInAt(3, Location::RequiresRegister()); 3179 locations->AddTemp(Location::RequiresRegister()); 3180 locations->SetOut(Location::RequiresRegister()); 3181 } 3182 3183 // Lower the invoke of CRC32.updateByteBuffer(int crc, long addr, int off, int len) 3184 // 3185 // There is no need to generate code checking if addr is 0. 3186 // The method updateByteBuffer is a private method of java.util.zip.CRC32. 3187 // This guarantees no calls outside of the CRC32 class. 3188 // An address of DirectBuffer is always passed to the call of updateByteBuffer. 3189 // It might be an implementation of an empty DirectBuffer which can use a zero 3190 // address but it must have the length to be zero. The current generated code 3191 // correctly works with the zero length. 3192 void IntrinsicCodeGeneratorARM64::VisitCRC32UpdateByteBuffer(HInvoke* invoke) { 3193 DCHECK(codegen_->GetInstructionSetFeatures().HasCRC()); 3194 3195 MacroAssembler* masm = GetVIXLAssembler(); 3196 LocationSummary* locations = invoke->GetLocations(); 3197 3198 Register addr = XRegisterFrom(locations->InAt(1)); 3199 Register ptr = XRegisterFrom(locations->GetTemp(0)); 3200 __ Add(ptr, addr, XRegisterFrom(locations->InAt(2))); 3201 3202 Register crc = WRegisterFrom(locations->InAt(0)); 3203 Register length = WRegisterFrom(locations->InAt(3)); 3204 Register out = WRegisterFrom(locations->Out()); 3205 GenerateCodeForCalculationCRC32ValueOfBytes(masm, crc, ptr, length, out); 3206 } 3207 3208 void IntrinsicLocationsBuilderARM64::VisitFP16ToFloat(HInvoke* invoke) { 3209 if (!codegen_->GetInstructionSetFeatures().HasFP16()) { 3210 return; 3211 } 3212 3213 LocationSummary* locations = new (allocator_) LocationSummary(invoke, 3214 LocationSummary::kNoCall, 3215 kIntrinsified); 3216 locations->SetInAt(0, Location::RequiresRegister()); 3217 locations->SetOut(Location::RequiresFpuRegister()); 3218 } 3219 3220 void IntrinsicCodeGeneratorARM64::VisitFP16ToFloat(HInvoke* invoke) { 3221 DCHECK(codegen_->GetInstructionSetFeatures().HasFP16()); 3222 MacroAssembler* masm = GetVIXLAssembler(); 3223 UseScratchRegisterScope scratch_scope(masm); 3224 Register bits = InputRegisterAt(invoke, 0); 3225 VRegister out = SRegisterFrom(invoke->GetLocations()->Out()); 3226 VRegister half = scratch_scope.AcquireH(); 3227 __ Fmov(half, bits); // ARMv8.2 3228 __ Fcvt(out, half); 3229 } 3230 3231 void IntrinsicLocationsBuilderARM64::VisitFP16ToHalf(HInvoke* invoke) { 3232 if (!codegen_->GetInstructionSetFeatures().HasFP16()) { 3233 return; 3234 } 3235 3236 LocationSummary* locations = new (allocator_) LocationSummary(invoke, 3237 LocationSummary::kNoCall, 3238 kIntrinsified); 3239 locations->SetInAt(0, Location::RequiresFpuRegister()); 3240 locations->SetOut(Location::RequiresRegister()); 3241 } 3242 3243 void IntrinsicCodeGeneratorARM64::VisitFP16ToHalf(HInvoke* invoke) { 3244 DCHECK(codegen_->GetInstructionSetFeatures().HasFP16()); 3245 MacroAssembler* masm = GetVIXLAssembler(); 3246 UseScratchRegisterScope scratch_scope(masm); 3247 VRegister in = SRegisterFrom(invoke->GetLocations()->InAt(0)); 3248 VRegister half = scratch_scope.AcquireH(); 3249 Register out = WRegisterFrom(invoke->GetLocations()->Out()); 3250 __ Fcvt(half, in); 3251 __ Fmov(out, half); 3252 __ Sxth(out, out); // sign extend due to returning a short type. 3253 } 3254 3255 template<typename OP> 3256 void GenerateFP16Round(HInvoke* invoke, 3257 CodeGeneratorARM64* const codegen_, 3258 MacroAssembler* masm, 3259 const OP roundOp) { 3260 DCHECK(codegen_->GetInstructionSetFeatures().HasFP16()); 3261 LocationSummary* locations = invoke->GetLocations(); 3262 UseScratchRegisterScope scratch_scope(masm); 3263 Register out = WRegisterFrom(locations->Out()); 3264 VRegister half = scratch_scope.AcquireH(); 3265 __ Fmov(half, WRegisterFrom(locations->InAt(0))); 3266 roundOp(half, half); 3267 __ Fmov(out, half); 3268 __ Sxth(out, out); 3269 } 3270 3271 void IntrinsicLocationsBuilderARM64::VisitFP16Floor(HInvoke* invoke) { 3272 if (!codegen_->GetInstructionSetFeatures().HasFP16()) { 3273 return; 3274 } 3275 3276 CreateIntToIntLocations(allocator_, invoke); 3277 } 3278 3279 void IntrinsicCodeGeneratorARM64::VisitFP16Floor(HInvoke* invoke) { 3280 MacroAssembler* masm = GetVIXLAssembler(); 3281 auto roundOp = [masm](const VRegister& out, const VRegister& in) { 3282 __ Frintm(out, in); // Round towards Minus infinity 3283 }; 3284 GenerateFP16Round(invoke, codegen_, masm, roundOp); 3285 } 3286 3287 void IntrinsicLocationsBuilderARM64::VisitFP16Ceil(HInvoke* invoke) { 3288 if (!codegen_->GetInstructionSetFeatures().HasFP16()) { 3289 return; 3290 } 3291 3292 CreateIntToIntLocations(allocator_, invoke); 3293 } 3294 3295 void IntrinsicCodeGeneratorARM64::VisitFP16Ceil(HInvoke* invoke) { 3296 MacroAssembler* masm = GetVIXLAssembler(); 3297 auto roundOp = [masm](const VRegister& out, const VRegister& in) { 3298 __ Frintp(out, in); // Round towards Plus infinity 3299 }; 3300 GenerateFP16Round(invoke, codegen_, masm, roundOp); 3301 } 3302 3303 void IntrinsicLocationsBuilderARM64::VisitFP16Rint(HInvoke* invoke) { 3304 if (!codegen_->GetInstructionSetFeatures().HasFP16()) { 3305 return; 3306 } 3307 3308 CreateIntToIntLocations(allocator_, invoke); 3309 } 3310 3311 void IntrinsicCodeGeneratorARM64::VisitFP16Rint(HInvoke* invoke) { 3312 MacroAssembler* masm = GetVIXLAssembler(); 3313 auto roundOp = [masm](const VRegister& out, const VRegister& in) { 3314 __ Frintn(out, in); // Round to nearest, with ties to even 3315 }; 3316 GenerateFP16Round(invoke, codegen_, masm, roundOp); 3317 } 3318 3319 template<typename OP> 3320 void GenerateFP16Compare(HInvoke* invoke, 3321 CodeGeneratorARM64* codegen, 3322 MacroAssembler* masm, 3323 const OP compareOp) { 3324 DCHECK(codegen->GetInstructionSetFeatures().HasFP16()); 3325 LocationSummary* locations = invoke->GetLocations(); 3326 Register out = WRegisterFrom(locations->Out()); 3327 VRegister half0 = HRegisterFrom(locations->GetTemp(0)); 3328 VRegister half1 = HRegisterFrom(locations->GetTemp(1)); 3329 __ Fmov(half0, WRegisterFrom(locations->InAt(0))); 3330 __ Fmov(half1, WRegisterFrom(locations->InAt(1))); 3331 compareOp(out, half0, half1); 3332 } 3333 3334 static inline void GenerateFP16Compare(HInvoke* invoke, 3335 CodeGeneratorARM64* codegen, 3336 MacroAssembler* masm, 3337 vixl::aarch64::Condition cond) { 3338 auto compareOp = [masm, cond](const Register out, const VRegister& in0, const VRegister& in1) { 3339 __ Fcmp(in0, in1); 3340 __ Cset(out, cond); 3341 }; 3342 GenerateFP16Compare(invoke, codegen, masm, compareOp); 3343 } 3344 3345 void IntrinsicLocationsBuilderARM64::VisitFP16Greater(HInvoke* invoke) { 3346 if (!codegen_->GetInstructionSetFeatures().HasFP16()) { 3347 return; 3348 } 3349 3350 CreateIntIntToIntLocations(allocator_, invoke); 3351 invoke->GetLocations()->AddTemp(Location::RequiresFpuRegister()); 3352 invoke->GetLocations()->AddTemp(Location::RequiresFpuRegister()); 3353 } 3354 3355 void IntrinsicCodeGeneratorARM64::VisitFP16Greater(HInvoke* invoke) { 3356 MacroAssembler* masm = GetVIXLAssembler(); 3357 GenerateFP16Compare(invoke, codegen_, masm, gt); 3358 } 3359 3360 void IntrinsicLocationsBuilderARM64::VisitFP16GreaterEquals(HInvoke* invoke) { 3361 if (!codegen_->GetInstructionSetFeatures().HasFP16()) { 3362 return; 3363 } 3364 3365 CreateIntIntToIntLocations(allocator_, invoke); 3366 invoke->GetLocations()->AddTemp(Location::RequiresFpuRegister()); 3367 invoke->GetLocations()->AddTemp(Location::RequiresFpuRegister()); 3368 } 3369 3370 void IntrinsicCodeGeneratorARM64::VisitFP16GreaterEquals(HInvoke* invoke) { 3371 MacroAssembler* masm = GetVIXLAssembler(); 3372 GenerateFP16Compare(invoke, codegen_, masm, ge); 3373 } 3374 3375 void IntrinsicLocationsBuilderARM64::VisitFP16Less(HInvoke* invoke) { 3376 if (!codegen_->GetInstructionSetFeatures().HasFP16()) { 3377 return; 3378 } 3379 3380 CreateIntIntToIntLocations(allocator_, invoke); 3381 invoke->GetLocations()->AddTemp(Location::RequiresFpuRegister()); 3382 invoke->GetLocations()->AddTemp(Location::RequiresFpuRegister()); 3383 } 3384 3385 void IntrinsicCodeGeneratorARM64::VisitFP16Less(HInvoke* invoke) { 3386 MacroAssembler* masm = GetVIXLAssembler(); 3387 GenerateFP16Compare(invoke, codegen_, masm, mi); 3388 } 3389 3390 void IntrinsicLocationsBuilderARM64::VisitFP16LessEquals(HInvoke* invoke) { 3391 if (!codegen_->GetInstructionSetFeatures().HasFP16()) { 3392 return; 3393 } 3394 3395 CreateIntIntToIntLocations(allocator_, invoke); 3396 invoke->GetLocations()->AddTemp(Location::RequiresFpuRegister()); 3397 invoke->GetLocations()->AddTemp(Location::RequiresFpuRegister()); 3398 } 3399 3400 void IntrinsicCodeGeneratorARM64::VisitFP16LessEquals(HInvoke* invoke) { 3401 MacroAssembler* masm = GetVIXLAssembler(); 3402 GenerateFP16Compare(invoke, codegen_, masm, ls); 3403 } 3404 3405 UNIMPLEMENTED_INTRINSIC(ARM64, ReferenceGetReferent) 3406 3407 UNIMPLEMENTED_INTRINSIC(ARM64, StringStringIndexOf); 3408 UNIMPLEMENTED_INTRINSIC(ARM64, StringStringIndexOfAfter); 3409 UNIMPLEMENTED_INTRINSIC(ARM64, StringBufferAppend); 3410 UNIMPLEMENTED_INTRINSIC(ARM64, StringBufferLength); 3411 UNIMPLEMENTED_INTRINSIC(ARM64, StringBufferToString); 3412 UNIMPLEMENTED_INTRINSIC(ARM64, StringBuilderAppendObject); 3413 UNIMPLEMENTED_INTRINSIC(ARM64, StringBuilderAppendString); 3414 UNIMPLEMENTED_INTRINSIC(ARM64, StringBuilderAppendCharSequence); 3415 UNIMPLEMENTED_INTRINSIC(ARM64, StringBuilderAppendCharArray); 3416 UNIMPLEMENTED_INTRINSIC(ARM64, StringBuilderAppendBoolean); 3417 UNIMPLEMENTED_INTRINSIC(ARM64, StringBuilderAppendChar); 3418 UNIMPLEMENTED_INTRINSIC(ARM64, StringBuilderAppendInt); 3419 UNIMPLEMENTED_INTRINSIC(ARM64, StringBuilderAppendLong); 3420 UNIMPLEMENTED_INTRINSIC(ARM64, StringBuilderAppendFloat); 3421 UNIMPLEMENTED_INTRINSIC(ARM64, StringBuilderAppendDouble); 3422 UNIMPLEMENTED_INTRINSIC(ARM64, StringBuilderLength); 3423 UNIMPLEMENTED_INTRINSIC(ARM64, StringBuilderToString); 3424 3425 // 1.8. 3426 UNIMPLEMENTED_INTRINSIC(ARM64, UnsafeGetAndAddInt) 3427 UNIMPLEMENTED_INTRINSIC(ARM64, UnsafeGetAndAddLong) 3428 UNIMPLEMENTED_INTRINSIC(ARM64, UnsafeGetAndSetInt) 3429 UNIMPLEMENTED_INTRINSIC(ARM64, UnsafeGetAndSetLong) 3430 UNIMPLEMENTED_INTRINSIC(ARM64, UnsafeGetAndSetObject) 3431 3432 UNREACHABLE_INTRINSICS(ARM64) 3433 3434 #undef __ 3435 3436 } // namespace arm64 3437 } // namespace art 3438