1 /* 2 * Copyright (C) 2014 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #include "assembler_x86_64.h" 18 19 #include "base/casts.h" 20 #include "base/memory_region.h" 21 #include "entrypoints/quick/quick_entrypoints.h" 22 #include "thread.h" 23 24 namespace art { 25 namespace x86_64 { 26 27 std::ostream& operator<<(std::ostream& os, const CpuRegister& reg) { 28 return os << reg.AsRegister(); 29 } 30 31 std::ostream& operator<<(std::ostream& os, const XmmRegister& reg) { 32 return os << reg.AsFloatRegister(); 33 } 34 35 std::ostream& operator<<(std::ostream& os, const X87Register& reg) { 36 return os << "ST" << static_cast<int>(reg); 37 } 38 39 std::ostream& operator<<(std::ostream& os, const Address& addr) { 40 switch (addr.mod()) { 41 case 0: 42 if (addr.rm() != RSP || addr.cpu_index().AsRegister() == RSP) { 43 return os << "(%" << addr.cpu_rm() << ")"; 44 } else if (addr.base() == RBP) { 45 return os << static_cast<int>(addr.disp32()) << "(,%" << addr.cpu_index() 46 << "," << (1 << addr.scale()) << ")"; 47 } 48 return os << "(%" << addr.cpu_base() << ",%" 49 << addr.cpu_index() << "," << (1 << addr.scale()) << ")"; 50 case 1: 51 if (addr.rm() != RSP || addr.cpu_index().AsRegister() == RSP) { 52 return os << static_cast<int>(addr.disp8()) << "(%" << addr.cpu_rm() << ")"; 53 } 54 return os << static_cast<int>(addr.disp8()) << "(%" << addr.cpu_base() << ",%" 55 << addr.cpu_index() << "," << (1 << addr.scale()) << ")"; 56 case 2: 57 if (addr.rm() != RSP || addr.cpu_index().AsRegister() == RSP) { 58 return os << static_cast<int>(addr.disp32()) << "(%" << addr.cpu_rm() << ")"; 59 } 60 return os << static_cast<int>(addr.disp32()) << "(%" << addr.cpu_base() << ",%" 61 << addr.cpu_index() << "," << (1 << addr.scale()) << ")"; 62 default: 63 return os << "<address?>"; 64 } 65 } 66 67 bool X86_64Assembler::CpuHasAVXorAVX2FeatureFlag() { 68 if (has_AVX_ || has_AVX2_) { 69 return true; 70 } 71 return false; 72 } 73 74 75 void X86_64Assembler::call(CpuRegister reg) { 76 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 77 EmitOptionalRex32(reg); 78 EmitUint8(0xFF); 79 EmitRegisterOperand(2, reg.LowBits()); 80 } 81 82 83 void X86_64Assembler::call(const Address& address) { 84 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 85 EmitOptionalRex32(address); 86 EmitUint8(0xFF); 87 EmitOperand(2, address); 88 } 89 90 91 void X86_64Assembler::call(Label* label) { 92 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 93 EmitUint8(0xE8); 94 static const int kSize = 5; 95 // Offset by one because we already have emitted the opcode. 96 EmitLabel(label, kSize - 1); 97 } 98 99 void X86_64Assembler::pushq(CpuRegister reg) { 100 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 101 EmitOptionalRex32(reg); 102 EmitUint8(0x50 + reg.LowBits()); 103 } 104 105 106 void X86_64Assembler::pushq(const Address& address) { 107 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 108 EmitOptionalRex32(address); 109 EmitUint8(0xFF); 110 EmitOperand(6, address); 111 } 112 113 114 void X86_64Assembler::pushq(const Immediate& imm) { 115 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 116 CHECK(imm.is_int32()); // pushq only supports 32b immediate. 117 if (imm.is_int8()) { 118 EmitUint8(0x6A); 119 EmitUint8(imm.value() & 0xFF); 120 } else { 121 EmitUint8(0x68); 122 EmitImmediate(imm); 123 } 124 } 125 126 127 void X86_64Assembler::popq(CpuRegister reg) { 128 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 129 EmitOptionalRex32(reg); 130 EmitUint8(0x58 + reg.LowBits()); 131 } 132 133 134 void X86_64Assembler::popq(const Address& address) { 135 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 136 EmitOptionalRex32(address); 137 EmitUint8(0x8F); 138 EmitOperand(0, address); 139 } 140 141 142 void X86_64Assembler::movq(CpuRegister dst, const Immediate& imm) { 143 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 144 if (imm.is_int32()) { 145 // 32 bit. Note: sign-extends. 146 EmitRex64(dst); 147 EmitUint8(0xC7); 148 EmitRegisterOperand(0, dst.LowBits()); 149 EmitInt32(static_cast<int32_t>(imm.value())); 150 } else { 151 EmitRex64(dst); 152 EmitUint8(0xB8 + dst.LowBits()); 153 EmitInt64(imm.value()); 154 } 155 } 156 157 158 void X86_64Assembler::movl(CpuRegister dst, const Immediate& imm) { 159 CHECK(imm.is_int32()); 160 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 161 EmitOptionalRex32(dst); 162 EmitUint8(0xB8 + dst.LowBits()); 163 EmitImmediate(imm); 164 } 165 166 167 void X86_64Assembler::movq(const Address& dst, const Immediate& imm) { 168 CHECK(imm.is_int32()); 169 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 170 EmitRex64(dst); 171 EmitUint8(0xC7); 172 EmitOperand(0, dst); 173 EmitImmediate(imm); 174 } 175 176 177 void X86_64Assembler::movq(CpuRegister dst, CpuRegister src) { 178 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 179 // 0x89 is movq r/m64 <- r64, with op1 in r/m and op2 in reg: so reverse EmitRex64 180 EmitRex64(src, dst); 181 EmitUint8(0x89); 182 EmitRegisterOperand(src.LowBits(), dst.LowBits()); 183 } 184 185 186 void X86_64Assembler::movl(CpuRegister dst, CpuRegister src) { 187 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 188 EmitOptionalRex32(dst, src); 189 EmitUint8(0x8B); 190 EmitRegisterOperand(dst.LowBits(), src.LowBits()); 191 } 192 193 194 void X86_64Assembler::movq(CpuRegister dst, const Address& src) { 195 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 196 EmitRex64(dst, src); 197 EmitUint8(0x8B); 198 EmitOperand(dst.LowBits(), src); 199 } 200 201 202 void X86_64Assembler::movl(CpuRegister dst, const Address& src) { 203 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 204 EmitOptionalRex32(dst, src); 205 EmitUint8(0x8B); 206 EmitOperand(dst.LowBits(), src); 207 } 208 209 210 void X86_64Assembler::movq(const Address& dst, CpuRegister src) { 211 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 212 EmitRex64(src, dst); 213 EmitUint8(0x89); 214 EmitOperand(src.LowBits(), dst); 215 } 216 217 218 void X86_64Assembler::movl(const Address& dst, CpuRegister src) { 219 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 220 EmitOptionalRex32(src, dst); 221 EmitUint8(0x89); 222 EmitOperand(src.LowBits(), dst); 223 } 224 225 void X86_64Assembler::movl(const Address& dst, const Immediate& imm) { 226 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 227 EmitOptionalRex32(dst); 228 EmitUint8(0xC7); 229 EmitOperand(0, dst); 230 EmitImmediate(imm); 231 } 232 233 void X86_64Assembler::movntl(const Address& dst, CpuRegister src) { 234 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 235 EmitOptionalRex32(src, dst); 236 EmitUint8(0x0F); 237 EmitUint8(0xC3); 238 EmitOperand(src.LowBits(), dst); 239 } 240 241 void X86_64Assembler::movntq(const Address& dst, CpuRegister src) { 242 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 243 EmitRex64(src, dst); 244 EmitUint8(0x0F); 245 EmitUint8(0xC3); 246 EmitOperand(src.LowBits(), dst); 247 } 248 249 void X86_64Assembler::cmov(Condition c, CpuRegister dst, CpuRegister src) { 250 cmov(c, dst, src, true); 251 } 252 253 void X86_64Assembler::cmov(Condition c, CpuRegister dst, CpuRegister src, bool is64bit) { 254 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 255 EmitOptionalRex(false, is64bit, dst.NeedsRex(), false, src.NeedsRex()); 256 EmitUint8(0x0F); 257 EmitUint8(0x40 + c); 258 EmitRegisterOperand(dst.LowBits(), src.LowBits()); 259 } 260 261 262 void X86_64Assembler::cmov(Condition c, CpuRegister dst, const Address& src, bool is64bit) { 263 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 264 if (is64bit) { 265 EmitRex64(dst, src); 266 } else { 267 EmitOptionalRex32(dst, src); 268 } 269 EmitUint8(0x0F); 270 EmitUint8(0x40 + c); 271 EmitOperand(dst.LowBits(), src); 272 } 273 274 275 void X86_64Assembler::movzxb(CpuRegister dst, CpuRegister src) { 276 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 277 EmitOptionalByteRegNormalizingRex32(dst, src); 278 EmitUint8(0x0F); 279 EmitUint8(0xB6); 280 EmitRegisterOperand(dst.LowBits(), src.LowBits()); 281 } 282 283 284 void X86_64Assembler::movzxb(CpuRegister dst, const Address& src) { 285 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 286 // Byte register is only in the source register form, so we don't use 287 // EmitOptionalByteRegNormalizingRex32(dst, src); 288 EmitOptionalRex32(dst, src); 289 EmitUint8(0x0F); 290 EmitUint8(0xB6); 291 EmitOperand(dst.LowBits(), src); 292 } 293 294 295 void X86_64Assembler::movsxb(CpuRegister dst, CpuRegister src) { 296 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 297 EmitOptionalByteRegNormalizingRex32(dst, src); 298 EmitUint8(0x0F); 299 EmitUint8(0xBE); 300 EmitRegisterOperand(dst.LowBits(), src.LowBits()); 301 } 302 303 304 void X86_64Assembler::movsxb(CpuRegister dst, const Address& src) { 305 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 306 // Byte register is only in the source register form, so we don't use 307 // EmitOptionalByteRegNormalizingRex32(dst, src); 308 EmitOptionalRex32(dst, src); 309 EmitUint8(0x0F); 310 EmitUint8(0xBE); 311 EmitOperand(dst.LowBits(), src); 312 } 313 314 315 void X86_64Assembler::movb(CpuRegister /*dst*/, const Address& /*src*/) { 316 LOG(FATAL) << "Use movzxb or movsxb instead."; 317 } 318 319 320 void X86_64Assembler::movb(const Address& dst, CpuRegister src) { 321 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 322 EmitOptionalByteRegNormalizingRex32(src, dst); 323 EmitUint8(0x88); 324 EmitOperand(src.LowBits(), dst); 325 } 326 327 328 void X86_64Assembler::movb(const Address& dst, const Immediate& imm) { 329 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 330 EmitOptionalRex32(dst); 331 EmitUint8(0xC6); 332 EmitOperand(Register::RAX, dst); 333 CHECK(imm.is_int8()); 334 EmitUint8(imm.value() & 0xFF); 335 } 336 337 338 void X86_64Assembler::movzxw(CpuRegister dst, CpuRegister src) { 339 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 340 EmitOptionalRex32(dst, src); 341 EmitUint8(0x0F); 342 EmitUint8(0xB7); 343 EmitRegisterOperand(dst.LowBits(), src.LowBits()); 344 } 345 346 347 void X86_64Assembler::movzxw(CpuRegister dst, const Address& src) { 348 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 349 EmitOptionalRex32(dst, src); 350 EmitUint8(0x0F); 351 EmitUint8(0xB7); 352 EmitOperand(dst.LowBits(), src); 353 } 354 355 356 void X86_64Assembler::movsxw(CpuRegister dst, CpuRegister src) { 357 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 358 EmitOptionalRex32(dst, src); 359 EmitUint8(0x0F); 360 EmitUint8(0xBF); 361 EmitRegisterOperand(dst.LowBits(), src.LowBits()); 362 } 363 364 365 void X86_64Assembler::movsxw(CpuRegister dst, const Address& src) { 366 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 367 EmitOptionalRex32(dst, src); 368 EmitUint8(0x0F); 369 EmitUint8(0xBF); 370 EmitOperand(dst.LowBits(), src); 371 } 372 373 374 void X86_64Assembler::movw(CpuRegister /*dst*/, const Address& /*src*/) { 375 LOG(FATAL) << "Use movzxw or movsxw instead."; 376 } 377 378 379 void X86_64Assembler::movw(const Address& dst, CpuRegister src) { 380 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 381 EmitOperandSizeOverride(); 382 EmitOptionalRex32(src, dst); 383 EmitUint8(0x89); 384 EmitOperand(src.LowBits(), dst); 385 } 386 387 388 void X86_64Assembler::movw(const Address& dst, const Immediate& imm) { 389 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 390 EmitOperandSizeOverride(); 391 EmitOptionalRex32(dst); 392 EmitUint8(0xC7); 393 EmitOperand(Register::RAX, dst); 394 CHECK(imm.is_uint16() || imm.is_int16()); 395 EmitUint8(imm.value() & 0xFF); 396 EmitUint8(imm.value() >> 8); 397 } 398 399 400 void X86_64Assembler::leaq(CpuRegister dst, const Address& src) { 401 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 402 EmitRex64(dst, src); 403 EmitUint8(0x8D); 404 EmitOperand(dst.LowBits(), src); 405 } 406 407 408 void X86_64Assembler::leal(CpuRegister dst, const Address& src) { 409 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 410 EmitOptionalRex32(dst, src); 411 EmitUint8(0x8D); 412 EmitOperand(dst.LowBits(), src); 413 } 414 415 416 void X86_64Assembler::movaps(XmmRegister dst, XmmRegister src) { 417 if (CpuHasAVXorAVX2FeatureFlag()) { 418 vmovaps(dst, src); 419 return; 420 } 421 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 422 EmitOptionalRex32(dst, src); 423 EmitUint8(0x0F); 424 EmitUint8(0x28); 425 EmitXmmRegisterOperand(dst.LowBits(), src); 426 } 427 428 429 /**VEX.128.0F.WIG 28 /r VMOVAPS xmm1, xmm2 */ 430 void X86_64Assembler::vmovaps(XmmRegister dst, XmmRegister src) { 431 DCHECK(CpuHasAVXorAVX2FeatureFlag()); 432 uint8_t byte_zero, byte_one, byte_two; 433 bool is_twobyte_form = true; 434 bool load = dst.NeedsRex(); 435 bool store = !load; 436 437 if (src.NeedsRex()&& dst.NeedsRex()) { 438 is_twobyte_form = false; 439 } 440 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 441 // Instruction VEX Prefix 442 byte_zero = EmitVexPrefixByteZero(is_twobyte_form); 443 X86_64ManagedRegister vvvv_reg = ManagedRegister::NoRegister().AsX86_64(); 444 if (is_twobyte_form) { 445 bool rex_bit = (load) ? dst.NeedsRex() : src.NeedsRex(); 446 byte_one = EmitVexPrefixByteOne(rex_bit, 447 vvvv_reg, 448 SET_VEX_L_128, 449 SET_VEX_PP_NONE); 450 } else { 451 byte_one = EmitVexPrefixByteOne(dst.NeedsRex(), 452 /*X=*/ false, 453 src.NeedsRex(), 454 SET_VEX_M_0F); 455 byte_two = EmitVexPrefixByteTwo(/*W=*/ false, 456 SET_VEX_L_128, 457 SET_VEX_PP_NONE); 458 } 459 EmitUint8(byte_zero); 460 EmitUint8(byte_one); 461 if (!is_twobyte_form) { 462 EmitUint8(byte_two); 463 } 464 // Instruction Opcode 465 if (is_twobyte_form && store) { 466 EmitUint8(0x29); 467 } else { 468 EmitUint8(0x28); 469 } 470 // Instruction Operands 471 if (is_twobyte_form && store) { 472 EmitXmmRegisterOperand(src.LowBits(), dst); 473 } else { 474 EmitXmmRegisterOperand(dst.LowBits(), src); 475 } 476 } 477 478 void X86_64Assembler::movaps(XmmRegister dst, const Address& src) { 479 if (CpuHasAVXorAVX2FeatureFlag()) { 480 vmovaps(dst, src); 481 return; 482 } 483 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 484 EmitOptionalRex32(dst, src); 485 EmitUint8(0x0F); 486 EmitUint8(0x28); 487 EmitOperand(dst.LowBits(), src); 488 } 489 490 /**VEX.128.0F.WIG 28 /r VMOVAPS xmm1, m128 */ 491 void X86_64Assembler::vmovaps(XmmRegister dst, const Address& src) { 492 DCHECK(CpuHasAVXorAVX2FeatureFlag()); 493 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 494 uint8_t ByteZero, ByteOne, ByteTwo; 495 bool is_twobyte_form = false; 496 // Instruction VEX Prefix 497 uint8_t rex = src.rex(); 498 bool Rex_x = rex & GET_REX_X; 499 bool Rex_b = rex & GET_REX_B; 500 if (!Rex_b && !Rex_x) { 501 is_twobyte_form = true; 502 } 503 ByteZero = EmitVexPrefixByteZero(is_twobyte_form); 504 if (is_twobyte_form) { 505 X86_64ManagedRegister vvvv_reg = ManagedRegister::NoRegister().AsX86_64(); 506 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), 507 vvvv_reg, 508 SET_VEX_L_128, 509 SET_VEX_PP_NONE); 510 } else { 511 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), 512 Rex_x, 513 Rex_b, 514 SET_VEX_M_0F); 515 ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, 516 SET_VEX_L_128, 517 SET_VEX_PP_NONE); 518 } 519 EmitUint8(ByteZero); 520 EmitUint8(ByteOne); 521 if (!is_twobyte_form) { 522 EmitUint8(ByteTwo); 523 } 524 // Instruction Opcode 525 EmitUint8(0x28); 526 // Instruction Operands 527 EmitOperand(dst.LowBits(), src); 528 } 529 530 void X86_64Assembler::movups(XmmRegister dst, const Address& src) { 531 if (CpuHasAVXorAVX2FeatureFlag()) { 532 vmovups(dst, src); 533 return; 534 } 535 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 536 EmitOptionalRex32(dst, src); 537 EmitUint8(0x0F); 538 EmitUint8(0x10); 539 EmitOperand(dst.LowBits(), src); 540 } 541 542 /** VEX.128.0F.WIG 10 /r VMOVUPS xmm1, m128 */ 543 void X86_64Assembler::vmovups(XmmRegister dst, const Address& src) { 544 DCHECK(CpuHasAVXorAVX2FeatureFlag()); 545 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 546 uint8_t ByteZero, ByteOne, ByteTwo; 547 bool is_twobyte_form = false; 548 // Instruction VEX Prefix 549 uint8_t rex = src.rex(); 550 bool Rex_x = rex & GET_REX_X; 551 bool Rex_b = rex & GET_REX_B; 552 if (!Rex_x && !Rex_b) { 553 is_twobyte_form = true; 554 } 555 ByteZero = EmitVexPrefixByteZero(is_twobyte_form); 556 if (is_twobyte_form) { 557 X86_64ManagedRegister vvvv_reg = ManagedRegister::NoRegister().AsX86_64(); 558 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), 559 vvvv_reg, 560 SET_VEX_L_128, 561 SET_VEX_PP_NONE); 562 } else { 563 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), 564 Rex_x, 565 Rex_b, 566 SET_VEX_M_0F); 567 ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, 568 SET_VEX_L_128, 569 SET_VEX_PP_NONE); 570 } 571 EmitUint8(ByteZero); 572 EmitUint8(ByteOne); 573 if (!is_twobyte_form) { 574 EmitUint8(ByteTwo); 575 } 576 // Instruction Opcode 577 EmitUint8(0x10); 578 // Instruction Operands 579 EmitOperand(dst.LowBits(), src); 580 } 581 582 583 void X86_64Assembler::movaps(const Address& dst, XmmRegister src) { 584 if (CpuHasAVXorAVX2FeatureFlag()) { 585 vmovaps(dst, src); 586 return; 587 } 588 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 589 EmitOptionalRex32(src, dst); 590 EmitUint8(0x0F); 591 EmitUint8(0x29); 592 EmitOperand(src.LowBits(), dst); 593 } 594 595 /** VEX.128.0F.WIG 29 /r VMOVAPS m128, xmm1 */ 596 void X86_64Assembler::vmovaps(const Address& dst, XmmRegister src) { 597 DCHECK(CpuHasAVXorAVX2FeatureFlag()); 598 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 599 uint8_t ByteZero, ByteOne, ByteTwo; 600 bool is_twobyte_form = false; 601 602 // Instruction VEX Prefix 603 uint8_t rex = dst.rex(); 604 bool Rex_x = rex & GET_REX_X; 605 bool Rex_b = rex & GET_REX_B; 606 if (!Rex_b && !Rex_x) { 607 is_twobyte_form = true; 608 } 609 ByteZero = EmitVexPrefixByteZero(is_twobyte_form); 610 if (is_twobyte_form) { 611 X86_64ManagedRegister vvvv_reg = ManagedRegister::NoRegister().AsX86_64(); 612 ByteOne = EmitVexPrefixByteOne(src.NeedsRex(), 613 vvvv_reg, 614 SET_VEX_L_128, 615 SET_VEX_PP_NONE); 616 } else { 617 ByteOne = EmitVexPrefixByteOne(src.NeedsRex(), 618 Rex_x, 619 Rex_b, 620 SET_VEX_M_0F); 621 ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, 622 SET_VEX_L_128, 623 SET_VEX_PP_NONE); 624 } 625 EmitUint8(ByteZero); 626 EmitUint8(ByteOne); 627 if (!is_twobyte_form) { 628 EmitUint8(ByteTwo); 629 } 630 // Instruction Opcode 631 EmitUint8(0x29); 632 // Instruction Operands 633 EmitOperand(src.LowBits(), dst); 634 } 635 636 void X86_64Assembler::movups(const Address& dst, XmmRegister src) { 637 if (CpuHasAVXorAVX2FeatureFlag()) { 638 vmovups(dst, src); 639 return; 640 } 641 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 642 EmitOptionalRex32(src, dst); 643 EmitUint8(0x0F); 644 EmitUint8(0x11); 645 EmitOperand(src.LowBits(), dst); 646 } 647 648 /** VEX.128.0F.WIG 11 /r VMOVUPS m128, xmm1 */ 649 void X86_64Assembler::vmovups(const Address& dst, XmmRegister src) { 650 DCHECK(CpuHasAVXorAVX2FeatureFlag()); 651 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 652 uint8_t ByteZero, ByteOne, ByteTwo; 653 bool is_twobyte_form = false; 654 655 // Instruction VEX Prefix 656 uint8_t rex = dst.rex(); 657 bool Rex_x = rex & GET_REX_X; 658 bool Rex_b = rex & GET_REX_B; 659 if (!Rex_b && !Rex_x) { 660 is_twobyte_form = true; 661 } 662 ByteZero = EmitVexPrefixByteZero(is_twobyte_form); 663 if (is_twobyte_form) { 664 X86_64ManagedRegister vvvv_reg = ManagedRegister::NoRegister().AsX86_64(); 665 ByteOne = EmitVexPrefixByteOne(src.NeedsRex(), 666 vvvv_reg, 667 SET_VEX_L_128, 668 SET_VEX_PP_NONE); 669 } else { 670 ByteOne = EmitVexPrefixByteOne(src.NeedsRex(), 671 Rex_x, 672 Rex_b, 673 SET_VEX_M_0F); 674 ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, 675 SET_VEX_L_128, 676 SET_VEX_PP_NONE); 677 } 678 EmitUint8(ByteZero); 679 EmitUint8(ByteOne); 680 if (!is_twobyte_form) { 681 EmitUint8(ByteTwo); 682 } 683 // Instruction Opcode 684 EmitUint8(0x11); 685 // Instruction Operands 686 EmitOperand(src.LowBits(), dst); 687 } 688 689 690 void X86_64Assembler::movss(XmmRegister dst, const Address& src) { 691 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 692 EmitUint8(0xF3); 693 EmitOptionalRex32(dst, src); 694 EmitUint8(0x0F); 695 EmitUint8(0x10); 696 EmitOperand(dst.LowBits(), src); 697 } 698 699 700 void X86_64Assembler::movss(const Address& dst, XmmRegister src) { 701 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 702 EmitUint8(0xF3); 703 EmitOptionalRex32(src, dst); 704 EmitUint8(0x0F); 705 EmitUint8(0x11); 706 EmitOperand(src.LowBits(), dst); 707 } 708 709 710 void X86_64Assembler::movss(XmmRegister dst, XmmRegister src) { 711 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 712 EmitUint8(0xF3); 713 EmitOptionalRex32(src, dst); // Movss is MR encoding instead of the usual RM. 714 EmitUint8(0x0F); 715 EmitUint8(0x11); 716 EmitXmmRegisterOperand(src.LowBits(), dst); 717 } 718 719 720 void X86_64Assembler::movsxd(CpuRegister dst, CpuRegister src) { 721 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 722 EmitRex64(dst, src); 723 EmitUint8(0x63); 724 EmitRegisterOperand(dst.LowBits(), src.LowBits()); 725 } 726 727 728 void X86_64Assembler::movsxd(CpuRegister dst, const Address& src) { 729 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 730 EmitRex64(dst, src); 731 EmitUint8(0x63); 732 EmitOperand(dst.LowBits(), src); 733 } 734 735 736 void X86_64Assembler::movd(XmmRegister dst, CpuRegister src) { 737 movd(dst, src, true); 738 } 739 740 void X86_64Assembler::movd(CpuRegister dst, XmmRegister src) { 741 movd(dst, src, true); 742 } 743 744 void X86_64Assembler::movd(XmmRegister dst, CpuRegister src, bool is64bit) { 745 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 746 EmitUint8(0x66); 747 EmitOptionalRex(false, is64bit, dst.NeedsRex(), false, src.NeedsRex()); 748 EmitUint8(0x0F); 749 EmitUint8(0x6E); 750 EmitOperand(dst.LowBits(), Operand(src)); 751 } 752 753 void X86_64Assembler::movd(CpuRegister dst, XmmRegister src, bool is64bit) { 754 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 755 EmitUint8(0x66); 756 EmitOptionalRex(false, is64bit, src.NeedsRex(), false, dst.NeedsRex()); 757 EmitUint8(0x0F); 758 EmitUint8(0x7E); 759 EmitOperand(src.LowBits(), Operand(dst)); 760 } 761 762 void X86_64Assembler::addss(XmmRegister dst, XmmRegister src) { 763 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 764 EmitUint8(0xF3); 765 EmitOptionalRex32(dst, src); 766 EmitUint8(0x0F); 767 EmitUint8(0x58); 768 EmitXmmRegisterOperand(dst.LowBits(), src); 769 } 770 771 void X86_64Assembler::addss(XmmRegister dst, const Address& src) { 772 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 773 EmitUint8(0xF3); 774 EmitOptionalRex32(dst, src); 775 EmitUint8(0x0F); 776 EmitUint8(0x58); 777 EmitOperand(dst.LowBits(), src); 778 } 779 780 781 void X86_64Assembler::subss(XmmRegister dst, XmmRegister src) { 782 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 783 EmitUint8(0xF3); 784 EmitOptionalRex32(dst, src); 785 EmitUint8(0x0F); 786 EmitUint8(0x5C); 787 EmitXmmRegisterOperand(dst.LowBits(), src); 788 } 789 790 791 void X86_64Assembler::subss(XmmRegister dst, const Address& src) { 792 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 793 EmitUint8(0xF3); 794 EmitOptionalRex32(dst, src); 795 EmitUint8(0x0F); 796 EmitUint8(0x5C); 797 EmitOperand(dst.LowBits(), src); 798 } 799 800 801 void X86_64Assembler::mulss(XmmRegister dst, XmmRegister src) { 802 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 803 EmitUint8(0xF3); 804 EmitOptionalRex32(dst, src); 805 EmitUint8(0x0F); 806 EmitUint8(0x59); 807 EmitXmmRegisterOperand(dst.LowBits(), src); 808 } 809 810 811 void X86_64Assembler::mulss(XmmRegister dst, const Address& src) { 812 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 813 EmitUint8(0xF3); 814 EmitOptionalRex32(dst, src); 815 EmitUint8(0x0F); 816 EmitUint8(0x59); 817 EmitOperand(dst.LowBits(), src); 818 } 819 820 821 void X86_64Assembler::divss(XmmRegister dst, XmmRegister src) { 822 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 823 EmitUint8(0xF3); 824 EmitOptionalRex32(dst, src); 825 EmitUint8(0x0F); 826 EmitUint8(0x5E); 827 EmitXmmRegisterOperand(dst.LowBits(), src); 828 } 829 830 831 void X86_64Assembler::divss(XmmRegister dst, const Address& src) { 832 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 833 EmitUint8(0xF3); 834 EmitOptionalRex32(dst, src); 835 EmitUint8(0x0F); 836 EmitUint8(0x5E); 837 EmitOperand(dst.LowBits(), src); 838 } 839 840 841 void X86_64Assembler::addps(XmmRegister dst, XmmRegister src) { 842 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 843 EmitOptionalRex32(dst, src); 844 EmitUint8(0x0F); 845 EmitUint8(0x58); 846 EmitXmmRegisterOperand(dst.LowBits(), src); 847 } 848 849 850 void X86_64Assembler::subps(XmmRegister dst, XmmRegister src) { 851 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 852 EmitOptionalRex32(dst, src); 853 EmitUint8(0x0F); 854 EmitUint8(0x5C); 855 EmitXmmRegisterOperand(dst.LowBits(), src); 856 } 857 858 void X86_64Assembler::vaddps(XmmRegister dst, XmmRegister add_left, XmmRegister add_right) { 859 DCHECK(CpuHasAVXorAVX2FeatureFlag()); 860 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 861 bool is_twobyte_form = false; 862 uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00; 863 if (!add_right.NeedsRex()) { 864 is_twobyte_form = true; 865 } 866 X86_64ManagedRegister vvvv_reg = 867 X86_64ManagedRegister::FromXmmRegister(add_left.AsFloatRegister()); 868 ByteZero = EmitVexPrefixByteZero(is_twobyte_form); 869 if (is_twobyte_form) { 870 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_NONE); 871 } else { 872 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), 873 /*X=*/ false, 874 add_right.NeedsRex(), 875 SET_VEX_M_0F); 876 ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_NONE); 877 } 878 EmitUint8(ByteZero); 879 EmitUint8(ByteOne); 880 if (!is_twobyte_form) { 881 EmitUint8(ByteTwo); 882 } 883 EmitUint8(0x58); 884 EmitXmmRegisterOperand(dst.LowBits(), add_right); 885 } 886 887 void X86_64Assembler::vsubps(XmmRegister dst, XmmRegister src1, XmmRegister src2) { 888 DCHECK(CpuHasAVXorAVX2FeatureFlag()); 889 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 890 bool is_twobyte_form = false; 891 uint8_t byte_zero = 0x00, byte_one = 0x00, byte_two = 0x00; 892 if (!src2.NeedsRex()) { 893 is_twobyte_form = true; 894 } 895 byte_zero = EmitVexPrefixByteZero(is_twobyte_form); 896 X86_64ManagedRegister vvvv_reg = X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister()); 897 if (is_twobyte_form) { 898 byte_one = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_NONE); 899 } else { 900 byte_one = EmitVexPrefixByteOne(dst.NeedsRex(), /*X=*/ false, src2.NeedsRex(), SET_VEX_M_0F); 901 byte_two = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_NONE); 902 } 903 EmitUint8(byte_zero); 904 EmitUint8(byte_one); 905 if (!is_twobyte_form) { 906 EmitUint8(byte_two); 907 } 908 EmitUint8(0x5C); 909 EmitXmmRegisterOperand(dst.LowBits(), src2); 910 } 911 912 913 void X86_64Assembler::mulps(XmmRegister dst, XmmRegister src) { 914 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 915 EmitOptionalRex32(dst, src); 916 EmitUint8(0x0F); 917 EmitUint8(0x59); 918 EmitXmmRegisterOperand(dst.LowBits(), src); 919 } 920 921 void X86_64Assembler::vmulps(XmmRegister dst, XmmRegister src1, XmmRegister src2) { 922 DCHECK(CpuHasAVXorAVX2FeatureFlag()); 923 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 924 bool is_twobyte_form = false; 925 uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00; 926 if (!src2.NeedsRex()) { 927 is_twobyte_form = true; 928 } 929 ByteZero = EmitVexPrefixByteZero(is_twobyte_form); 930 X86_64ManagedRegister vvvv_reg = 931 X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister()); 932 if (is_twobyte_form) { 933 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_NONE); 934 } else { 935 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), 936 /*X=*/ false, 937 src2.NeedsRex(), 938 SET_VEX_M_0F); 939 ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_NONE); 940 } 941 EmitUint8(ByteZero); 942 EmitUint8(ByteOne); 943 if (!is_twobyte_form) { 944 EmitUint8(ByteTwo); 945 } 946 EmitUint8(0x59); 947 EmitXmmRegisterOperand(dst.LowBits(), src2); 948 } 949 950 void X86_64Assembler::divps(XmmRegister dst, XmmRegister src) { 951 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 952 EmitOptionalRex32(dst, src); 953 EmitUint8(0x0F); 954 EmitUint8(0x5E); 955 EmitXmmRegisterOperand(dst.LowBits(), src); 956 } 957 958 void X86_64Assembler::vdivps(XmmRegister dst, XmmRegister src1, XmmRegister src2) { 959 DCHECK(CpuHasAVXorAVX2FeatureFlag()); 960 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 961 bool is_twobyte_form = false; 962 uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00; 963 if (!src2.NeedsRex()) { 964 is_twobyte_form = true; 965 } 966 ByteZero = EmitVexPrefixByteZero(is_twobyte_form); 967 X86_64ManagedRegister vvvv_reg = 968 X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister()); 969 if (is_twobyte_form) { 970 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_NONE); 971 } else { 972 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), 973 /*X=*/ false, 974 src2.NeedsRex(), 975 SET_VEX_M_0F); 976 ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_NONE); 977 } 978 EmitUint8(ByteZero); 979 EmitUint8(ByteOne); 980 if (!is_twobyte_form) { 981 EmitUint8(ByteTwo); 982 } 983 EmitUint8(0x5E); 984 EmitXmmRegisterOperand(dst.LowBits(), src2); 985 } 986 987 void X86_64Assembler::flds(const Address& src) { 988 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 989 EmitUint8(0xD9); 990 EmitOperand(0, src); 991 } 992 993 994 void X86_64Assembler::fsts(const Address& dst) { 995 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 996 EmitUint8(0xD9); 997 EmitOperand(2, dst); 998 } 999 1000 1001 void X86_64Assembler::fstps(const Address& dst) { 1002 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 1003 EmitUint8(0xD9); 1004 EmitOperand(3, dst); 1005 } 1006 1007 1008 void X86_64Assembler::movapd(XmmRegister dst, XmmRegister src) { 1009 if (CpuHasAVXorAVX2FeatureFlag()) { 1010 vmovapd(dst, src); 1011 return; 1012 } 1013 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 1014 EmitUint8(0x66); 1015 EmitOptionalRex32(dst, src); 1016 EmitUint8(0x0F); 1017 EmitUint8(0x28); 1018 EmitXmmRegisterOperand(dst.LowBits(), src); 1019 } 1020 1021 /** VEX.128.66.0F.WIG 28 /r VMOVAPD xmm1, xmm2 */ 1022 void X86_64Assembler::vmovapd(XmmRegister dst, XmmRegister src) { 1023 DCHECK(CpuHasAVXorAVX2FeatureFlag()); 1024 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 1025 uint8_t ByteZero, ByteOne, ByteTwo; 1026 bool is_twobyte_form = true; 1027 1028 if (src.NeedsRex() && dst.NeedsRex()) { 1029 is_twobyte_form = false; 1030 } 1031 // Instruction VEX Prefix 1032 ByteZero = EmitVexPrefixByteZero(is_twobyte_form); 1033 bool load = dst.NeedsRex(); 1034 if (is_twobyte_form) { 1035 X86_64ManagedRegister vvvv_reg = ManagedRegister::NoRegister().AsX86_64(); 1036 bool rex_bit = load ? dst.NeedsRex() : src.NeedsRex(); 1037 ByteOne = EmitVexPrefixByteOne(rex_bit, 1038 vvvv_reg, 1039 SET_VEX_L_128, 1040 SET_VEX_PP_66); 1041 } else { 1042 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), 1043 /*X=*/ false, 1044 src.NeedsRex(), 1045 SET_VEX_M_0F); 1046 ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, 1047 SET_VEX_L_128, 1048 SET_VEX_PP_66); 1049 } 1050 EmitUint8(ByteZero); 1051 EmitUint8(ByteOne); 1052 if (!is_twobyte_form) { 1053 EmitUint8(ByteTwo); 1054 } 1055 // Instruction Opcode 1056 if (is_twobyte_form && !load) { 1057 EmitUint8(0x29); 1058 } else { 1059 EmitUint8(0x28); 1060 } 1061 // Instruction Operands 1062 if (is_twobyte_form && !load) { 1063 EmitXmmRegisterOperand(src.LowBits(), dst); 1064 } else { 1065 EmitXmmRegisterOperand(dst.LowBits(), src); 1066 } 1067 } 1068 1069 void X86_64Assembler::movapd(XmmRegister dst, const Address& src) { 1070 if (CpuHasAVXorAVX2FeatureFlag()) { 1071 vmovapd(dst, src); 1072 return; 1073 } 1074 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 1075 EmitUint8(0x66); 1076 EmitOptionalRex32(dst, src); 1077 EmitUint8(0x0F); 1078 EmitUint8(0x28); 1079 EmitOperand(dst.LowBits(), src); 1080 } 1081 1082 /** VEX.128.66.0F.WIG 28 /r VMOVAPD xmm1, m128 */ 1083 void X86_64Assembler::vmovapd(XmmRegister dst, const Address& src) { 1084 DCHECK(CpuHasAVXorAVX2FeatureFlag()); 1085 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 1086 uint8_t ByteZero, ByteOne, ByteTwo; 1087 bool is_twobyte_form = false; 1088 1089 // Instruction VEX Prefix 1090 uint8_t rex = src.rex(); 1091 bool Rex_x = rex & GET_REX_X; 1092 bool Rex_b = rex & GET_REX_B; 1093 if (!Rex_b && !Rex_x) { 1094 is_twobyte_form = true; 1095 } 1096 ByteZero = EmitVexPrefixByteZero(is_twobyte_form); 1097 if (is_twobyte_form) { 1098 X86_64ManagedRegister vvvv_reg = ManagedRegister::NoRegister().AsX86_64(); 1099 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), 1100 vvvv_reg, 1101 SET_VEX_L_128, 1102 SET_VEX_PP_66); 1103 } else { 1104 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), 1105 Rex_x, 1106 Rex_b, 1107 SET_VEX_M_0F); 1108 ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, 1109 SET_VEX_L_128, 1110 SET_VEX_PP_66); 1111 } 1112 EmitUint8(ByteZero); 1113 EmitUint8(ByteOne); 1114 if (!is_twobyte_form) { 1115 EmitUint8(ByteTwo); 1116 } 1117 // Instruction Opcode 1118 EmitUint8(0x28); 1119 // Instruction Operands 1120 EmitOperand(dst.LowBits(), src); 1121 } 1122 1123 void X86_64Assembler::movupd(XmmRegister dst, const Address& src) { 1124 if (CpuHasAVXorAVX2FeatureFlag()) { 1125 vmovupd(dst, src); 1126 return; 1127 } 1128 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 1129 EmitUint8(0x66); 1130 EmitOptionalRex32(dst, src); 1131 EmitUint8(0x0F); 1132 EmitUint8(0x10); 1133 EmitOperand(dst.LowBits(), src); 1134 } 1135 1136 /** VEX.128.66.0F.WIG 10 /r VMOVUPD xmm1, m128 */ 1137 void X86_64Assembler::vmovupd(XmmRegister dst, const Address& src) { 1138 DCHECK(CpuHasAVXorAVX2FeatureFlag()); 1139 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 1140 bool is_twobyte_form = false; 1141 uint8_t ByteZero, ByteOne, ByteTwo; 1142 1143 // Instruction VEX Prefix 1144 uint8_t rex = src.rex(); 1145 bool Rex_x = rex & GET_REX_X; 1146 bool Rex_b = rex & GET_REX_B; 1147 if (!Rex_b && !Rex_x) { 1148 is_twobyte_form = true; 1149 } 1150 ByteZero = EmitVexPrefixByteZero(is_twobyte_form); 1151 if (is_twobyte_form) { 1152 X86_64ManagedRegister vvvv_reg = ManagedRegister::NoRegister().AsX86_64(); 1153 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), 1154 vvvv_reg, 1155 SET_VEX_L_128, 1156 SET_VEX_PP_66); 1157 } else { 1158 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), 1159 Rex_x, 1160 Rex_b, 1161 SET_VEX_M_0F); 1162 ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, 1163 SET_VEX_L_128, 1164 SET_VEX_PP_66); 1165 } 1166 EmitUint8(ByteZero); 1167 EmitUint8(ByteOne); 1168 if (!is_twobyte_form) 1169 EmitUint8(ByteTwo); 1170 // Instruction Opcode 1171 EmitUint8(0x10); 1172 // Instruction Operands 1173 EmitOperand(dst.LowBits(), src); 1174 } 1175 1176 void X86_64Assembler::movapd(const Address& dst, XmmRegister src) { 1177 if (CpuHasAVXorAVX2FeatureFlag()) { 1178 vmovapd(dst, src); 1179 return; 1180 } 1181 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 1182 EmitUint8(0x66); 1183 EmitOptionalRex32(src, dst); 1184 EmitUint8(0x0F); 1185 EmitUint8(0x29); 1186 EmitOperand(src.LowBits(), dst); 1187 } 1188 1189 /** VEX.128.66.0F.WIG 29 /r VMOVAPD m128, xmm1 */ 1190 void X86_64Assembler::vmovapd(const Address& dst, XmmRegister src) { 1191 DCHECK(CpuHasAVXorAVX2FeatureFlag()); 1192 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 1193 bool is_twobyte_form = false; 1194 uint8_t ByteZero, ByteOne, ByteTwo; 1195 // Instruction VEX Prefix 1196 uint8_t rex = dst.rex(); 1197 bool Rex_x = rex & GET_REX_X; 1198 bool Rex_b = rex & GET_REX_B; 1199 if (!Rex_x && !Rex_b) { 1200 is_twobyte_form = true; 1201 } 1202 ByteZero = EmitVexPrefixByteZero(is_twobyte_form); 1203 if (is_twobyte_form) { 1204 X86_64ManagedRegister vvvv_reg = ManagedRegister::NoRegister().AsX86_64(); 1205 ByteOne = EmitVexPrefixByteOne(src.NeedsRex(), 1206 vvvv_reg, 1207 SET_VEX_L_128, 1208 SET_VEX_PP_66); 1209 } else { 1210 ByteOne = EmitVexPrefixByteOne(src.NeedsRex(), 1211 Rex_x, 1212 Rex_b, 1213 SET_VEX_M_0F); 1214 ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, 1215 SET_VEX_L_128, 1216 SET_VEX_PP_66); 1217 } 1218 EmitUint8(ByteZero); 1219 EmitUint8(ByteOne); 1220 if (!is_twobyte_form) { 1221 EmitUint8(ByteTwo); 1222 } 1223 // Instruction Opcode 1224 EmitUint8(0x29); 1225 // Instruction Operands 1226 EmitOperand(src.LowBits(), dst); 1227 } 1228 1229 void X86_64Assembler::movupd(const Address& dst, XmmRegister src) { 1230 if (CpuHasAVXorAVX2FeatureFlag()) { 1231 vmovupd(dst, src); 1232 return; 1233 } 1234 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 1235 EmitUint8(0x66); 1236 EmitOptionalRex32(src, dst); 1237 EmitUint8(0x0F); 1238 EmitUint8(0x11); 1239 EmitOperand(src.LowBits(), dst); 1240 } 1241 1242 /** VEX.128.66.0F.WIG 11 /r VMOVUPD m128, xmm1 */ 1243 void X86_64Assembler::vmovupd(const Address& dst, XmmRegister src) { 1244 DCHECK(CpuHasAVXorAVX2FeatureFlag()); 1245 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 1246 bool is_twobyte_form = false; 1247 uint8_t ByteZero, ByteOne, ByteTwo; 1248 1249 // Instruction VEX Prefix 1250 uint8_t rex = dst.rex(); 1251 bool Rex_x = rex & GET_REX_X; 1252 bool Rex_b = rex & GET_REX_B; 1253 if (!Rex_x && !Rex_b) { 1254 is_twobyte_form = true; 1255 } 1256 ByteZero = EmitVexPrefixByteZero(is_twobyte_form); 1257 if (is_twobyte_form) { 1258 X86_64ManagedRegister vvvv_reg = ManagedRegister::NoRegister().AsX86_64(); 1259 ByteOne = EmitVexPrefixByteOne(src.NeedsRex(), 1260 vvvv_reg, 1261 SET_VEX_L_128, 1262 SET_VEX_PP_66); 1263 } else { 1264 ByteOne = EmitVexPrefixByteOne(src.NeedsRex(), 1265 Rex_x, 1266 Rex_b, 1267 SET_VEX_M_0F); 1268 ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, 1269 SET_VEX_L_128, 1270 SET_VEX_PP_66); 1271 } 1272 EmitUint8(ByteZero); 1273 EmitUint8(ByteOne); 1274 if (!is_twobyte_form) { 1275 EmitUint8(ByteTwo); 1276 } 1277 // Instruction Opcode 1278 EmitUint8(0x11); 1279 // Instruction Operands 1280 EmitOperand(src.LowBits(), dst); 1281 } 1282 1283 1284 void X86_64Assembler::movsd(XmmRegister dst, const Address& src) { 1285 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 1286 EmitUint8(0xF2); 1287 EmitOptionalRex32(dst, src); 1288 EmitUint8(0x0F); 1289 EmitUint8(0x10); 1290 EmitOperand(dst.LowBits(), src); 1291 } 1292 1293 1294 void X86_64Assembler::movsd(const Address& dst, XmmRegister src) { 1295 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 1296 EmitUint8(0xF2); 1297 EmitOptionalRex32(src, dst); 1298 EmitUint8(0x0F); 1299 EmitUint8(0x11); 1300 EmitOperand(src.LowBits(), dst); 1301 } 1302 1303 1304 void X86_64Assembler::movsd(XmmRegister dst, XmmRegister src) { 1305 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 1306 EmitUint8(0xF2); 1307 EmitOptionalRex32(src, dst); // Movsd is MR encoding instead of the usual RM. 1308 EmitUint8(0x0F); 1309 EmitUint8(0x11); 1310 EmitXmmRegisterOperand(src.LowBits(), dst); 1311 } 1312 1313 1314 void X86_64Assembler::addsd(XmmRegister dst, XmmRegister src) { 1315 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 1316 EmitUint8(0xF2); 1317 EmitOptionalRex32(dst, src); 1318 EmitUint8(0x0F); 1319 EmitUint8(0x58); 1320 EmitXmmRegisterOperand(dst.LowBits(), src); 1321 } 1322 1323 1324 void X86_64Assembler::addsd(XmmRegister dst, const Address& src) { 1325 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 1326 EmitUint8(0xF2); 1327 EmitOptionalRex32(dst, src); 1328 EmitUint8(0x0F); 1329 EmitUint8(0x58); 1330 EmitOperand(dst.LowBits(), src); 1331 } 1332 1333 1334 void X86_64Assembler::subsd(XmmRegister dst, XmmRegister src) { 1335 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 1336 EmitUint8(0xF2); 1337 EmitOptionalRex32(dst, src); 1338 EmitUint8(0x0F); 1339 EmitUint8(0x5C); 1340 EmitXmmRegisterOperand(dst.LowBits(), src); 1341 } 1342 1343 1344 void X86_64Assembler::subsd(XmmRegister dst, const Address& src) { 1345 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 1346 EmitUint8(0xF2); 1347 EmitOptionalRex32(dst, src); 1348 EmitUint8(0x0F); 1349 EmitUint8(0x5C); 1350 EmitOperand(dst.LowBits(), src); 1351 } 1352 1353 1354 void X86_64Assembler::mulsd(XmmRegister dst, XmmRegister src) { 1355 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 1356 EmitUint8(0xF2); 1357 EmitOptionalRex32(dst, src); 1358 EmitUint8(0x0F); 1359 EmitUint8(0x59); 1360 EmitXmmRegisterOperand(dst.LowBits(), src); 1361 } 1362 1363 1364 void X86_64Assembler::mulsd(XmmRegister dst, const Address& src) { 1365 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 1366 EmitUint8(0xF2); 1367 EmitOptionalRex32(dst, src); 1368 EmitUint8(0x0F); 1369 EmitUint8(0x59); 1370 EmitOperand(dst.LowBits(), src); 1371 } 1372 1373 1374 void X86_64Assembler::divsd(XmmRegister dst, XmmRegister src) { 1375 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 1376 EmitUint8(0xF2); 1377 EmitOptionalRex32(dst, src); 1378 EmitUint8(0x0F); 1379 EmitUint8(0x5E); 1380 EmitXmmRegisterOperand(dst.LowBits(), src); 1381 } 1382 1383 1384 void X86_64Assembler::divsd(XmmRegister dst, const Address& src) { 1385 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 1386 EmitUint8(0xF2); 1387 EmitOptionalRex32(dst, src); 1388 EmitUint8(0x0F); 1389 EmitUint8(0x5E); 1390 EmitOperand(dst.LowBits(), src); 1391 } 1392 1393 1394 void X86_64Assembler::addpd(XmmRegister dst, XmmRegister src) { 1395 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 1396 EmitUint8(0x66); 1397 EmitOptionalRex32(dst, src); 1398 EmitUint8(0x0F); 1399 EmitUint8(0x58); 1400 EmitXmmRegisterOperand(dst.LowBits(), src); 1401 } 1402 1403 1404 void X86_64Assembler::vaddpd(XmmRegister dst, XmmRegister add_left, XmmRegister add_right) { 1405 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 1406 bool is_twobyte_form = false; 1407 uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00; 1408 if (!add_right.NeedsRex()) { 1409 is_twobyte_form = true; 1410 } 1411 ByteZero = EmitVexPrefixByteZero(is_twobyte_form); 1412 X86_64ManagedRegister vvvv_reg = 1413 X86_64ManagedRegister::FromXmmRegister(add_left.AsFloatRegister()); 1414 if (is_twobyte_form) { 1415 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66); 1416 } else { 1417 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), 1418 /*X=*/ false, 1419 add_right.NeedsRex(), 1420 SET_VEX_M_0F); 1421 ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66); 1422 } 1423 EmitUint8(ByteZero); 1424 EmitUint8(ByteOne); 1425 if (!is_twobyte_form) { 1426 EmitUint8(ByteTwo); 1427 } 1428 EmitUint8(0x58); 1429 EmitXmmRegisterOperand(dst.LowBits(), add_right); 1430 } 1431 1432 1433 void X86_64Assembler::subpd(XmmRegister dst, XmmRegister src) { 1434 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 1435 EmitUint8(0x66); 1436 EmitOptionalRex32(dst, src); 1437 EmitUint8(0x0F); 1438 EmitUint8(0x5C); 1439 EmitXmmRegisterOperand(dst.LowBits(), src); 1440 } 1441 1442 1443 void X86_64Assembler::vsubpd(XmmRegister dst, XmmRegister src1, XmmRegister src2) { 1444 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 1445 bool is_twobyte_form = false; 1446 uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00; 1447 if (!src2.NeedsRex()) { 1448 is_twobyte_form = true; 1449 } 1450 ByteZero = EmitVexPrefixByteZero(is_twobyte_form); 1451 X86_64ManagedRegister vvvv_reg = 1452 X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister()); 1453 if (is_twobyte_form) { 1454 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66); 1455 } else { 1456 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), 1457 /*X=*/ false, 1458 src2.NeedsRex(), 1459 SET_VEX_M_0F); 1460 ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66); 1461 } 1462 EmitUint8(ByteZero); 1463 EmitUint8(ByteOne); 1464 if (!is_twobyte_form) { 1465 EmitUint8(ByteTwo); 1466 } 1467 EmitUint8(0x5C); 1468 EmitXmmRegisterOperand(dst.LowBits(), src2); 1469 } 1470 1471 1472 void X86_64Assembler::mulpd(XmmRegister dst, XmmRegister src) { 1473 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 1474 EmitUint8(0x66); 1475 EmitOptionalRex32(dst, src); 1476 EmitUint8(0x0F); 1477 EmitUint8(0x59); 1478 EmitXmmRegisterOperand(dst.LowBits(), src); 1479 } 1480 1481 void X86_64Assembler::vmulpd(XmmRegister dst, XmmRegister src1, XmmRegister src2) { 1482 DCHECK(CpuHasAVXorAVX2FeatureFlag()); 1483 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 1484 bool is_twobyte_form = false; 1485 uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00; 1486 if (!src2.NeedsRex()) { 1487 is_twobyte_form = true; 1488 } 1489 ByteZero = EmitVexPrefixByteZero(is_twobyte_form); 1490 X86_64ManagedRegister vvvv_reg = 1491 X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister()); 1492 if (is_twobyte_form) { 1493 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66); 1494 } else { 1495 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), 1496 /*X=*/ false, 1497 src2.NeedsRex(), 1498 SET_VEX_M_0F); 1499 ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66); 1500 } 1501 EmitUint8(ByteZero); 1502 EmitUint8(ByteOne); 1503 if (!is_twobyte_form) { 1504 EmitUint8(ByteTwo); 1505 } 1506 EmitUint8(0x59); 1507 EmitXmmRegisterOperand(dst.LowBits(), src2); 1508 } 1509 1510 void X86_64Assembler::divpd(XmmRegister dst, XmmRegister src) { 1511 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 1512 EmitUint8(0x66); 1513 EmitOptionalRex32(dst, src); 1514 EmitUint8(0x0F); 1515 EmitUint8(0x5E); 1516 EmitXmmRegisterOperand(dst.LowBits(), src); 1517 } 1518 1519 1520 void X86_64Assembler::vdivpd(XmmRegister dst, XmmRegister src1, XmmRegister src2) { 1521 DCHECK(CpuHasAVXorAVX2FeatureFlag()); 1522 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 1523 bool is_twobyte_form = false; 1524 uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00; 1525 if (!src2.NeedsRex()) { 1526 is_twobyte_form = true; 1527 } 1528 ByteZero = EmitVexPrefixByteZero(is_twobyte_form); 1529 X86_64ManagedRegister vvvv_reg = 1530 X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister()); 1531 if (is_twobyte_form) { 1532 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66); 1533 } else { 1534 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), 1535 /*X=*/ false, 1536 src2.NeedsRex(), 1537 SET_VEX_M_0F); 1538 ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66); 1539 } 1540 EmitUint8(ByteZero); 1541 EmitUint8(ByteOne); 1542 if (!is_twobyte_form) { 1543 EmitUint8(ByteTwo); 1544 } 1545 EmitUint8(0x5E); 1546 EmitXmmRegisterOperand(dst.LowBits(), src2); 1547 } 1548 1549 1550 void X86_64Assembler::movdqa(XmmRegister dst, XmmRegister src) { 1551 if (CpuHasAVXorAVX2FeatureFlag()) { 1552 vmovdqa(dst, src); 1553 return; 1554 } 1555 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 1556 EmitUint8(0x66); 1557 EmitOptionalRex32(dst, src); 1558 EmitUint8(0x0F); 1559 EmitUint8(0x6F); 1560 EmitXmmRegisterOperand(dst.LowBits(), src); 1561 } 1562 1563 /** VEX.128.66.0F.WIG 6F /r VMOVDQA xmm1, xmm2 */ 1564 void X86_64Assembler::vmovdqa(XmmRegister dst, XmmRegister src) { 1565 DCHECK(CpuHasAVXorAVX2FeatureFlag()); 1566 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 1567 uint8_t ByteZero, ByteOne, ByteTwo; 1568 bool is_twobyte_form = true; 1569 1570 // Instruction VEX Prefix 1571 if (src.NeedsRex() && dst.NeedsRex()) { 1572 is_twobyte_form = false; 1573 } 1574 bool load = dst.NeedsRex(); 1575 ByteZero = EmitVexPrefixByteZero(is_twobyte_form); 1576 if (is_twobyte_form) { 1577 X86_64ManagedRegister vvvv_reg = ManagedRegister::NoRegister().AsX86_64(); 1578 bool rex_bit = load ? dst.NeedsRex() : src.NeedsRex(); 1579 ByteOne = EmitVexPrefixByteOne(rex_bit, 1580 vvvv_reg, 1581 SET_VEX_L_128, 1582 SET_VEX_PP_66); 1583 } else { 1584 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), 1585 /*X=*/ false, 1586 src.NeedsRex(), 1587 SET_VEX_M_0F); 1588 ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, 1589 SET_VEX_L_128, 1590 SET_VEX_PP_66); 1591 } 1592 EmitUint8(ByteZero); 1593 EmitUint8(ByteOne); 1594 if (!is_twobyte_form) { 1595 EmitUint8(ByteTwo); 1596 } 1597 // Instruction Opcode 1598 if (is_twobyte_form && !load) { 1599 EmitUint8(0x7F); 1600 } else { 1601 EmitUint8(0x6F); 1602 } 1603 // Instruction Operands 1604 if (is_twobyte_form && !load) { 1605 EmitXmmRegisterOperand(src.LowBits(), dst); 1606 } else { 1607 EmitXmmRegisterOperand(dst.LowBits(), src); 1608 } 1609 } 1610 1611 void X86_64Assembler::movdqa(XmmRegister dst, const Address& src) { 1612 if (CpuHasAVXorAVX2FeatureFlag()) { 1613 vmovdqa(dst, src); 1614 return; 1615 } 1616 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 1617 EmitUint8(0x66); 1618 EmitOptionalRex32(dst, src); 1619 EmitUint8(0x0F); 1620 EmitUint8(0x6F); 1621 EmitOperand(dst.LowBits(), src); 1622 } 1623 1624 /** VEX.128.66.0F.WIG 6F /r VMOVDQA xmm1, m128 */ 1625 void X86_64Assembler::vmovdqa(XmmRegister dst, const Address& src) { 1626 DCHECK(CpuHasAVXorAVX2FeatureFlag()); 1627 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 1628 uint8_t ByteZero, ByteOne, ByteTwo; 1629 bool is_twobyte_form = false; 1630 1631 // Instruction VEX Prefix 1632 uint8_t rex = src.rex(); 1633 bool Rex_x = rex & GET_REX_X; 1634 bool Rex_b = rex & GET_REX_B; 1635 if (!Rex_x && !Rex_b) { 1636 is_twobyte_form = true; 1637 } 1638 ByteZero = EmitVexPrefixByteZero(is_twobyte_form); 1639 if (is_twobyte_form) { 1640 X86_64ManagedRegister vvvv_reg = ManagedRegister::NoRegister().AsX86_64(); 1641 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), 1642 vvvv_reg, 1643 SET_VEX_L_128, 1644 SET_VEX_PP_66); 1645 } else { 1646 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), 1647 Rex_x, 1648 Rex_b, 1649 SET_VEX_M_0F); 1650 ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, 1651 SET_VEX_L_128, 1652 SET_VEX_PP_66); 1653 } 1654 EmitUint8(ByteZero); 1655 EmitUint8(ByteOne); 1656 if (!is_twobyte_form) { 1657 EmitUint8(ByteTwo); 1658 } 1659 // Instruction Opcode 1660 EmitUint8(0x6F); 1661 // Instruction Operands 1662 EmitOperand(dst.LowBits(), src); 1663 } 1664 1665 void X86_64Assembler::movdqu(XmmRegister dst, const Address& src) { 1666 if (CpuHasAVXorAVX2FeatureFlag()) { 1667 vmovdqu(dst, src); 1668 return; 1669 } 1670 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 1671 EmitUint8(0xF3); 1672 EmitOptionalRex32(dst, src); 1673 EmitUint8(0x0F); 1674 EmitUint8(0x6F); 1675 EmitOperand(dst.LowBits(), src); 1676 } 1677 1678 /** VEX.128.F3.0F.WIG 6F /r VMOVDQU xmm1, m128 1679 Load Unaligned */ 1680 void X86_64Assembler::vmovdqu(XmmRegister dst, const Address& src) { 1681 DCHECK(CpuHasAVXorAVX2FeatureFlag()); 1682 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 1683 uint8_t ByteZero, ByteOne, ByteTwo; 1684 bool is_twobyte_form = false; 1685 1686 // Instruction VEX Prefix 1687 uint8_t rex = src.rex(); 1688 bool Rex_x = rex & GET_REX_X; 1689 bool Rex_b = rex & GET_REX_B; 1690 if (!Rex_x && !Rex_b) { 1691 is_twobyte_form = true; 1692 } 1693 ByteZero = EmitVexPrefixByteZero(is_twobyte_form); 1694 if (is_twobyte_form) { 1695 X86_64ManagedRegister vvvv_reg = ManagedRegister::NoRegister().AsX86_64(); 1696 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), 1697 vvvv_reg, 1698 SET_VEX_L_128, 1699 SET_VEX_PP_F3); 1700 } else { 1701 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), 1702 Rex_x, 1703 Rex_b, 1704 SET_VEX_M_0F); 1705 ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, 1706 SET_VEX_L_128, 1707 SET_VEX_PP_F3); 1708 } 1709 EmitUint8(ByteZero); 1710 EmitUint8(ByteOne); 1711 if (!is_twobyte_form) { 1712 EmitUint8(ByteTwo); 1713 } 1714 // Instruction Opcode 1715 EmitUint8(0x6F); 1716 // Instruction Operands 1717 EmitOperand(dst.LowBits(), src); 1718 } 1719 1720 void X86_64Assembler::movdqa(const Address& dst, XmmRegister src) { 1721 if (CpuHasAVXorAVX2FeatureFlag()) { 1722 vmovdqa(dst, src); 1723 return; 1724 } 1725 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 1726 EmitUint8(0x66); 1727 EmitOptionalRex32(src, dst); 1728 EmitUint8(0x0F); 1729 EmitUint8(0x7F); 1730 EmitOperand(src.LowBits(), dst); 1731 } 1732 1733 /** VEX.128.66.0F.WIG 7F /r VMOVDQA m128, xmm1 */ 1734 void X86_64Assembler::vmovdqa(const Address& dst, XmmRegister src) { 1735 DCHECK(CpuHasAVXorAVX2FeatureFlag()); 1736 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 1737 bool is_twobyte_form = false; 1738 uint8_t ByteZero, ByteOne, ByteTwo; 1739 // Instruction VEX Prefix 1740 uint8_t rex = dst.rex(); 1741 bool Rex_x = rex & GET_REX_X; 1742 bool Rex_b = rex & GET_REX_B; 1743 if (!Rex_x && !Rex_b) { 1744 is_twobyte_form = true; 1745 } 1746 ByteZero = EmitVexPrefixByteZero(is_twobyte_form); 1747 if (is_twobyte_form) { 1748 X86_64ManagedRegister vvvv_reg = ManagedRegister::NoRegister().AsX86_64(); 1749 ByteOne = EmitVexPrefixByteOne(src.NeedsRex(), 1750 vvvv_reg, 1751 SET_VEX_L_128, 1752 SET_VEX_PP_66); 1753 } else { 1754 ByteOne = EmitVexPrefixByteOne(src.NeedsRex(), 1755 Rex_x, 1756 Rex_b, 1757 SET_VEX_M_0F); 1758 ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, 1759 SET_VEX_L_128, 1760 SET_VEX_PP_66); 1761 } 1762 EmitUint8(ByteZero); 1763 EmitUint8(ByteOne); 1764 if (!is_twobyte_form) { 1765 EmitUint8(ByteTwo); 1766 } 1767 // Instruction Opcode 1768 EmitUint8(0x7F); 1769 // Instruction Operands 1770 EmitOperand(src.LowBits(), dst); 1771 } 1772 1773 void X86_64Assembler::movdqu(const Address& dst, XmmRegister src) { 1774 if (CpuHasAVXorAVX2FeatureFlag()) { 1775 vmovdqu(dst, src); 1776 return; 1777 } 1778 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 1779 EmitUint8(0xF3); 1780 EmitOptionalRex32(src, dst); 1781 EmitUint8(0x0F); 1782 EmitUint8(0x7F); 1783 EmitOperand(src.LowBits(), dst); 1784 } 1785 1786 /** VEX.128.F3.0F.WIG 7F /r VMOVDQU m128, xmm1 */ 1787 void X86_64Assembler::vmovdqu(const Address& dst, XmmRegister src) { 1788 DCHECK(CpuHasAVXorAVX2FeatureFlag()); 1789 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 1790 uint8_t ByteZero, ByteOne, ByteTwo; 1791 bool is_twobyte_form = false; 1792 1793 // Instruction VEX Prefix 1794 uint8_t rex = dst.rex(); 1795 bool Rex_x = rex & GET_REX_X; 1796 bool Rex_b = rex & GET_REX_B; 1797 if (!Rex_b && !Rex_x) { 1798 is_twobyte_form = true; 1799 } 1800 ByteZero = EmitVexPrefixByteZero(is_twobyte_form); 1801 if (is_twobyte_form) { 1802 X86_64ManagedRegister vvvv_reg = ManagedRegister::NoRegister().AsX86_64(); 1803 ByteOne = EmitVexPrefixByteOne(src.NeedsRex(), 1804 vvvv_reg, 1805 SET_VEX_L_128, 1806 SET_VEX_PP_F3); 1807 } else { 1808 ByteOne = EmitVexPrefixByteOne(src.NeedsRex(), 1809 Rex_x, 1810 Rex_b, 1811 SET_VEX_M_0F); 1812 ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, 1813 SET_VEX_L_128, 1814 SET_VEX_PP_F3); 1815 } 1816 EmitUint8(ByteZero); 1817 EmitUint8(ByteOne); 1818 if (!is_twobyte_form) { 1819 EmitUint8(ByteTwo); 1820 } 1821 // Instruction Opcode 1822 EmitUint8(0x7F); 1823 // Instruction Operands 1824 EmitOperand(src.LowBits(), dst); 1825 } 1826 1827 void X86_64Assembler::paddb(XmmRegister dst, XmmRegister src) { 1828 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 1829 EmitUint8(0x66); 1830 EmitOptionalRex32(dst, src); 1831 EmitUint8(0x0F); 1832 EmitUint8(0xFC); 1833 EmitXmmRegisterOperand(dst.LowBits(), src); 1834 } 1835 1836 1837 void X86_64Assembler::vpaddb(XmmRegister dst, XmmRegister add_left, XmmRegister add_right) { 1838 DCHECK(CpuHasAVXorAVX2FeatureFlag()); 1839 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 1840 uint8_t ByteOne = 0x00, ByteZero = 0x00, ByteTwo = 0x00; 1841 bool is_twobyte_form = true; 1842 if (add_right.NeedsRex()) { 1843 is_twobyte_form = false; 1844 } 1845 ByteZero = EmitVexPrefixByteZero(is_twobyte_form); 1846 X86_64ManagedRegister vvvv_reg = 1847 X86_64ManagedRegister::FromXmmRegister(add_left.AsFloatRegister()); 1848 if (is_twobyte_form) { 1849 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66); 1850 } else { 1851 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), 1852 /*X=*/ false, 1853 add_right.NeedsRex(), 1854 SET_VEX_M_0F); 1855 ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66); 1856 } 1857 EmitUint8(ByteZero); 1858 EmitUint8(ByteOne); 1859 if (!is_twobyte_form) { 1860 EmitUint8(ByteTwo); 1861 } 1862 EmitUint8(0xFC); 1863 EmitXmmRegisterOperand(dst.LowBits(), add_right); 1864 } 1865 1866 1867 void X86_64Assembler::psubb(XmmRegister dst, XmmRegister src) { 1868 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 1869 EmitUint8(0x66); 1870 EmitOptionalRex32(dst, src); 1871 EmitUint8(0x0F); 1872 EmitUint8(0xF8); 1873 EmitXmmRegisterOperand(dst.LowBits(), src); 1874 } 1875 1876 1877 void X86_64Assembler::vpsubb(XmmRegister dst, XmmRegister add_left, XmmRegister add_right) { 1878 DCHECK(CpuHasAVXorAVX2FeatureFlag()); 1879 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 1880 bool is_twobyte_form = false; 1881 uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00; 1882 if (!add_right.NeedsRex()) { 1883 is_twobyte_form = true; 1884 } 1885 ByteZero = EmitVexPrefixByteZero(is_twobyte_form); 1886 X86_64ManagedRegister vvvv_reg = 1887 X86_64ManagedRegister::FromXmmRegister(add_left.AsFloatRegister()); 1888 if (is_twobyte_form) { 1889 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66); 1890 } else { 1891 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), 1892 /*X=*/ false, 1893 add_right.NeedsRex(), 1894 SET_VEX_M_0F); 1895 ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66); 1896 } 1897 EmitUint8(ByteZero); 1898 EmitUint8(ByteOne); 1899 if (!is_twobyte_form) { 1900 EmitUint8(ByteTwo); 1901 } 1902 EmitUint8(0xF8); 1903 EmitXmmRegisterOperand(dst.LowBits(), add_right); 1904 } 1905 1906 1907 void X86_64Assembler::paddw(XmmRegister dst, XmmRegister src) { 1908 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 1909 EmitUint8(0x66); 1910 EmitOptionalRex32(dst, src); 1911 EmitUint8(0x0F); 1912 EmitUint8(0xFD); 1913 EmitXmmRegisterOperand(dst.LowBits(), src); 1914 } 1915 1916 void X86_64Assembler::vpaddw(XmmRegister dst, XmmRegister add_left, XmmRegister add_right) { 1917 DCHECK(CpuHasAVXorAVX2FeatureFlag()); 1918 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 1919 bool is_twobyte_form = false; 1920 uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00; 1921 if (!add_right.NeedsRex()) { 1922 is_twobyte_form = true; 1923 } 1924 ByteZero = EmitVexPrefixByteZero(is_twobyte_form); 1925 X86_64ManagedRegister vvvv_reg = 1926 X86_64ManagedRegister::FromXmmRegister(add_left.AsFloatRegister()); 1927 if (is_twobyte_form) { 1928 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66); 1929 } else { 1930 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), 1931 /*X=*/ false, 1932 add_right.NeedsRex(), 1933 SET_VEX_M_0F); 1934 ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66); 1935 } 1936 EmitUint8(ByteZero); 1937 EmitUint8(ByteOne); 1938 if (!is_twobyte_form) { 1939 EmitUint8(ByteTwo); 1940 } 1941 EmitUint8(0xFD); 1942 EmitXmmRegisterOperand(dst.LowBits(), add_right); 1943 } 1944 1945 1946 void X86_64Assembler::psubw(XmmRegister dst, XmmRegister src) { 1947 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 1948 EmitUint8(0x66); 1949 EmitOptionalRex32(dst, src); 1950 EmitUint8(0x0F); 1951 EmitUint8(0xF9); 1952 EmitXmmRegisterOperand(dst.LowBits(), src); 1953 } 1954 1955 void X86_64Assembler::vpsubw(XmmRegister dst, XmmRegister add_left, XmmRegister add_right) { 1956 DCHECK(CpuHasAVXorAVX2FeatureFlag()); 1957 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 1958 bool is_twobyte_form = false; 1959 uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00; 1960 if (!add_right.NeedsRex()) { 1961 is_twobyte_form = true; 1962 } 1963 ByteZero = EmitVexPrefixByteZero(is_twobyte_form); 1964 X86_64ManagedRegister vvvv_reg = 1965 X86_64ManagedRegister::FromXmmRegister(add_left.AsFloatRegister()); 1966 if (is_twobyte_form) { 1967 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66); 1968 } else { 1969 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), 1970 /*X=*/ false, 1971 add_right.NeedsRex(), 1972 SET_VEX_M_0F); 1973 ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66); 1974 } 1975 EmitUint8(ByteZero); 1976 EmitUint8(ByteOne); 1977 if (!is_twobyte_form) { 1978 EmitUint8(ByteTwo); 1979 } 1980 EmitUint8(0xF9); 1981 EmitXmmRegisterOperand(dst.LowBits(), add_right); 1982 } 1983 1984 1985 void X86_64Assembler::pmullw(XmmRegister dst, XmmRegister src) { 1986 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 1987 EmitUint8(0x66); 1988 EmitOptionalRex32(dst, src); 1989 EmitUint8(0x0F); 1990 EmitUint8(0xD5); 1991 EmitXmmRegisterOperand(dst.LowBits(), src); 1992 } 1993 1994 void X86_64Assembler::vpmullw(XmmRegister dst, XmmRegister src1, XmmRegister src2) { 1995 DCHECK(CpuHasAVXorAVX2FeatureFlag()); 1996 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 1997 bool is_twobyte_form = false; 1998 uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00; 1999 if (!src2.NeedsRex()) { 2000 is_twobyte_form = true; 2001 } 2002 ByteZero = EmitVexPrefixByteZero(is_twobyte_form); 2003 X86_64ManagedRegister vvvv_reg = 2004 X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister()); 2005 if (is_twobyte_form) { 2006 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66); 2007 } else { 2008 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), 2009 /*X=*/ false, 2010 src2.NeedsRex(), 2011 SET_VEX_M_0F); 2012 ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66); 2013 } 2014 EmitUint8(ByteZero); 2015 EmitUint8(ByteOne); 2016 if (!is_twobyte_form) { 2017 EmitUint8(ByteTwo); 2018 } 2019 EmitUint8(0xD5); 2020 EmitXmmRegisterOperand(dst.LowBits(), src2); 2021 } 2022 2023 void X86_64Assembler::paddd(XmmRegister dst, XmmRegister src) { 2024 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 2025 EmitUint8(0x66); 2026 EmitOptionalRex32(dst, src); 2027 EmitUint8(0x0F); 2028 EmitUint8(0xFE); 2029 EmitXmmRegisterOperand(dst.LowBits(), src); 2030 } 2031 2032 void X86_64Assembler::vpaddd(XmmRegister dst, XmmRegister add_left, XmmRegister add_right) { 2033 DCHECK(CpuHasAVXorAVX2FeatureFlag()); 2034 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 2035 bool is_twobyte_form = false; 2036 uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00; 2037 if (!add_right.NeedsRex()) { 2038 is_twobyte_form = true; 2039 } 2040 ByteZero = EmitVexPrefixByteZero(is_twobyte_form); 2041 X86_64ManagedRegister vvvv_reg = 2042 X86_64ManagedRegister::FromXmmRegister(add_left.AsFloatRegister()); 2043 if (is_twobyte_form) { 2044 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66); 2045 } else { 2046 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), 2047 /*X=*/ false, 2048 add_right.NeedsRex(), 2049 SET_VEX_M_0F); 2050 ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66); 2051 } 2052 EmitUint8(ByteZero); 2053 EmitUint8(ByteOne); 2054 if (!is_twobyte_form) { 2055 EmitUint8(ByteTwo); 2056 } 2057 EmitUint8(0xFE); 2058 EmitXmmRegisterOperand(dst.LowBits(), add_right); 2059 } 2060 2061 void X86_64Assembler::psubd(XmmRegister dst, XmmRegister src) { 2062 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 2063 EmitUint8(0x66); 2064 EmitOptionalRex32(dst, src); 2065 EmitUint8(0x0F); 2066 EmitUint8(0xFA); 2067 EmitXmmRegisterOperand(dst.LowBits(), src); 2068 } 2069 2070 2071 void X86_64Assembler::pmulld(XmmRegister dst, XmmRegister src) { 2072 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 2073 EmitUint8(0x66); 2074 EmitOptionalRex32(dst, src); 2075 EmitUint8(0x0F); 2076 EmitUint8(0x38); 2077 EmitUint8(0x40); 2078 EmitXmmRegisterOperand(dst.LowBits(), src); 2079 } 2080 2081 void X86_64Assembler::vpmulld(XmmRegister dst, XmmRegister src1, XmmRegister src2) { 2082 DCHECK(CpuHasAVXorAVX2FeatureFlag()); 2083 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 2084 uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00; 2085 ByteZero = EmitVexPrefixByteZero(/*is_twobyte_form*/ false); 2086 X86_64ManagedRegister vvvv_reg = 2087 X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister()); 2088 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), 2089 /*X=*/ false, 2090 src2.NeedsRex(), 2091 SET_VEX_M_0F_38); 2092 ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66); 2093 EmitUint8(ByteZero); 2094 EmitUint8(ByteOne); 2095 EmitUint8(ByteTwo); 2096 EmitUint8(0x40); 2097 EmitXmmRegisterOperand(dst.LowBits(), src2); 2098 } 2099 2100 void X86_64Assembler::paddq(XmmRegister dst, XmmRegister src) { 2101 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 2102 EmitUint8(0x66); 2103 EmitOptionalRex32(dst, src); 2104 EmitUint8(0x0F); 2105 EmitUint8(0xD4); 2106 EmitXmmRegisterOperand(dst.LowBits(), src); 2107 } 2108 2109 2110 void X86_64Assembler::vpaddq(XmmRegister dst, XmmRegister add_left, XmmRegister add_right) { 2111 DCHECK(CpuHasAVXorAVX2FeatureFlag()); 2112 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 2113 bool is_twobyte_form = false; 2114 uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00; 2115 if (!add_right.NeedsRex()) { 2116 is_twobyte_form = true; 2117 } 2118 ByteZero = EmitVexPrefixByteZero(is_twobyte_form); 2119 X86_64ManagedRegister vvvv_reg = 2120 X86_64ManagedRegister::FromXmmRegister(add_left.AsFloatRegister()); 2121 if (is_twobyte_form) { 2122 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66); 2123 } else { 2124 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), 2125 /*X=*/ false, 2126 add_right.NeedsRex(), 2127 SET_VEX_M_0F); 2128 ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66); 2129 } 2130 EmitUint8(ByteZero); 2131 EmitUint8(ByteOne); 2132 if (!is_twobyte_form) { 2133 EmitUint8(ByteTwo); 2134 } 2135 EmitUint8(0xD4); 2136 EmitXmmRegisterOperand(dst.LowBits(), add_right); 2137 } 2138 2139 2140 void X86_64Assembler::psubq(XmmRegister dst, XmmRegister src) { 2141 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 2142 EmitUint8(0x66); 2143 EmitOptionalRex32(dst, src); 2144 EmitUint8(0x0F); 2145 EmitUint8(0xFB); 2146 EmitXmmRegisterOperand(dst.LowBits(), src); 2147 } 2148 2149 void X86_64Assembler::vpsubq(XmmRegister dst, XmmRegister add_left, XmmRegister add_right) { 2150 DCHECK(CpuHasAVXorAVX2FeatureFlag()); 2151 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 2152 bool is_twobyte_form = false; 2153 uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00; 2154 if (!add_right.NeedsRex()) { 2155 is_twobyte_form = true; 2156 } 2157 ByteZero = EmitVexPrefixByteZero(is_twobyte_form); 2158 X86_64ManagedRegister vvvv_reg = 2159 X86_64ManagedRegister::FromXmmRegister(add_left.AsFloatRegister()); 2160 if (is_twobyte_form) { 2161 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66); 2162 } else { 2163 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), 2164 /*X=*/ false, 2165 add_right.NeedsRex(), 2166 SET_VEX_M_0F); 2167 ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66); 2168 } 2169 EmitUint8(ByteZero); 2170 EmitUint8(ByteOne); 2171 if (!is_twobyte_form) { 2172 EmitUint8(ByteTwo); 2173 } 2174 EmitUint8(0xFB); 2175 EmitXmmRegisterOperand(dst.LowBits(), add_right); 2176 } 2177 2178 2179 void X86_64Assembler::paddusb(XmmRegister dst, XmmRegister src) { 2180 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 2181 EmitUint8(0x66); 2182 EmitOptionalRex32(dst, src); 2183 EmitUint8(0x0F); 2184 EmitUint8(0xDC); 2185 EmitXmmRegisterOperand(dst.LowBits(), src); 2186 } 2187 2188 2189 void X86_64Assembler::paddsb(XmmRegister dst, XmmRegister src) { 2190 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 2191 EmitUint8(0x66); 2192 EmitOptionalRex32(dst, src); 2193 EmitUint8(0x0F); 2194 EmitUint8(0xEC); 2195 EmitXmmRegisterOperand(dst.LowBits(), src); 2196 } 2197 2198 2199 void X86_64Assembler::paddusw(XmmRegister dst, XmmRegister src) { 2200 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 2201 EmitUint8(0x66); 2202 EmitOptionalRex32(dst, src); 2203 EmitUint8(0x0F); 2204 EmitUint8(0xDD); 2205 EmitXmmRegisterOperand(dst.LowBits(), src); 2206 } 2207 2208 2209 void X86_64Assembler::paddsw(XmmRegister dst, XmmRegister src) { 2210 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 2211 EmitUint8(0x66); 2212 EmitOptionalRex32(dst, src); 2213 EmitUint8(0x0F); 2214 EmitUint8(0xED); 2215 EmitXmmRegisterOperand(dst.LowBits(), src); 2216 } 2217 2218 2219 void X86_64Assembler::psubusb(XmmRegister dst, XmmRegister src) { 2220 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 2221 EmitUint8(0x66); 2222 EmitOptionalRex32(dst, src); 2223 EmitUint8(0x0F); 2224 EmitUint8(0xD8); 2225 EmitXmmRegisterOperand(dst.LowBits(), src); 2226 } 2227 2228 2229 void X86_64Assembler::psubsb(XmmRegister dst, XmmRegister src) { 2230 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 2231 EmitUint8(0x66); 2232 EmitOptionalRex32(dst, src); 2233 EmitUint8(0x0F); 2234 EmitUint8(0xE8); 2235 EmitXmmRegisterOperand(dst.LowBits(), src); 2236 } 2237 2238 2239 void X86_64Assembler::vpsubd(XmmRegister dst, XmmRegister add_left, XmmRegister add_right) { 2240 DCHECK(CpuHasAVXorAVX2FeatureFlag()); 2241 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 2242 bool is_twobyte_form = false; 2243 uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00; 2244 if (!add_right.NeedsRex()) { 2245 is_twobyte_form = true; 2246 } 2247 ByteZero = EmitVexPrefixByteZero(is_twobyte_form); 2248 X86_64ManagedRegister vvvv_reg = 2249 X86_64ManagedRegister::FromXmmRegister(add_left.AsFloatRegister()); 2250 if (is_twobyte_form) { 2251 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66); 2252 } else { 2253 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), 2254 /*X=*/ false, 2255 add_right.NeedsRex(), 2256 SET_VEX_M_0F); 2257 ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66); 2258 } 2259 EmitUint8(ByteZero); 2260 EmitUint8(ByteOne); 2261 if (!is_twobyte_form) { 2262 EmitUint8(ByteTwo); 2263 } 2264 EmitUint8(0xFA); 2265 EmitXmmRegisterOperand(dst.LowBits(), add_right); 2266 } 2267 2268 2269 void X86_64Assembler::psubusw(XmmRegister dst, XmmRegister src) { 2270 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 2271 EmitUint8(0x66); 2272 EmitOptionalRex32(dst, src); 2273 EmitUint8(0x0F); 2274 EmitUint8(0xD9); 2275 EmitXmmRegisterOperand(dst.LowBits(), src); 2276 } 2277 2278 2279 void X86_64Assembler::psubsw(XmmRegister dst, XmmRegister src) { 2280 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 2281 EmitUint8(0x66); 2282 EmitOptionalRex32(dst, src); 2283 EmitUint8(0x0F); 2284 EmitUint8(0xE9); 2285 EmitXmmRegisterOperand(dst.LowBits(), src); 2286 } 2287 2288 2289 void X86_64Assembler::cvtsi2ss(XmmRegister dst, CpuRegister src) { 2290 cvtsi2ss(dst, src, false); 2291 } 2292 2293 2294 void X86_64Assembler::cvtsi2ss(XmmRegister dst, CpuRegister src, bool is64bit) { 2295 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 2296 EmitUint8(0xF3); 2297 if (is64bit) { 2298 // Emit a REX.W prefix if the operand size is 64 bits. 2299 EmitRex64(dst, src); 2300 } else { 2301 EmitOptionalRex32(dst, src); 2302 } 2303 EmitUint8(0x0F); 2304 EmitUint8(0x2A); 2305 EmitOperand(dst.LowBits(), Operand(src)); 2306 } 2307 2308 2309 void X86_64Assembler::cvtsi2ss(XmmRegister dst, const Address& src, bool is64bit) { 2310 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 2311 EmitUint8(0xF3); 2312 if (is64bit) { 2313 // Emit a REX.W prefix if the operand size is 64 bits. 2314 EmitRex64(dst, src); 2315 } else { 2316 EmitOptionalRex32(dst, src); 2317 } 2318 EmitUint8(0x0F); 2319 EmitUint8(0x2A); 2320 EmitOperand(dst.LowBits(), src); 2321 } 2322 2323 2324 void X86_64Assembler::cvtsi2sd(XmmRegister dst, CpuRegister src) { 2325 cvtsi2sd(dst, src, false); 2326 } 2327 2328 2329 void X86_64Assembler::cvtsi2sd(XmmRegister dst, CpuRegister src, bool is64bit) { 2330 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 2331 EmitUint8(0xF2); 2332 if (is64bit) { 2333 // Emit a REX.W prefix if the operand size is 64 bits. 2334 EmitRex64(dst, src); 2335 } else { 2336 EmitOptionalRex32(dst, src); 2337 } 2338 EmitUint8(0x0F); 2339 EmitUint8(0x2A); 2340 EmitOperand(dst.LowBits(), Operand(src)); 2341 } 2342 2343 2344 void X86_64Assembler::cvtsi2sd(XmmRegister dst, const Address& src, bool is64bit) { 2345 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 2346 EmitUint8(0xF2); 2347 if (is64bit) { 2348 // Emit a REX.W prefix if the operand size is 64 bits. 2349 EmitRex64(dst, src); 2350 } else { 2351 EmitOptionalRex32(dst, src); 2352 } 2353 EmitUint8(0x0F); 2354 EmitUint8(0x2A); 2355 EmitOperand(dst.LowBits(), src); 2356 } 2357 2358 2359 void X86_64Assembler::cvtss2si(CpuRegister dst, XmmRegister src) { 2360 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 2361 EmitUint8(0xF3); 2362 EmitOptionalRex32(dst, src); 2363 EmitUint8(0x0F); 2364 EmitUint8(0x2D); 2365 EmitXmmRegisterOperand(dst.LowBits(), src); 2366 } 2367 2368 2369 void X86_64Assembler::cvtss2sd(XmmRegister dst, XmmRegister src) { 2370 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 2371 EmitUint8(0xF3); 2372 EmitOptionalRex32(dst, src); 2373 EmitUint8(0x0F); 2374 EmitUint8(0x5A); 2375 EmitXmmRegisterOperand(dst.LowBits(), src); 2376 } 2377 2378 2379 void X86_64Assembler::cvtss2sd(XmmRegister dst, const Address& src) { 2380 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 2381 EmitUint8(0xF3); 2382 EmitOptionalRex32(dst, src); 2383 EmitUint8(0x0F); 2384 EmitUint8(0x5A); 2385 EmitOperand(dst.LowBits(), src); 2386 } 2387 2388 2389 void X86_64Assembler::cvtsd2si(CpuRegister dst, XmmRegister src) { 2390 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 2391 EmitUint8(0xF2); 2392 EmitOptionalRex32(dst, src); 2393 EmitUint8(0x0F); 2394 EmitUint8(0x2D); 2395 EmitXmmRegisterOperand(dst.LowBits(), src); 2396 } 2397 2398 2399 void X86_64Assembler::cvttss2si(CpuRegister dst, XmmRegister src) { 2400 cvttss2si(dst, src, false); 2401 } 2402 2403 2404 void X86_64Assembler::cvttss2si(CpuRegister dst, XmmRegister src, bool is64bit) { 2405 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 2406 EmitUint8(0xF3); 2407 if (is64bit) { 2408 // Emit a REX.W prefix if the operand size is 64 bits. 2409 EmitRex64(dst, src); 2410 } else { 2411 EmitOptionalRex32(dst, src); 2412 } 2413 EmitUint8(0x0F); 2414 EmitUint8(0x2C); 2415 EmitXmmRegisterOperand(dst.LowBits(), src); 2416 } 2417 2418 2419 void X86_64Assembler::cvttsd2si(CpuRegister dst, XmmRegister src) { 2420 cvttsd2si(dst, src, false); 2421 } 2422 2423 2424 void X86_64Assembler::cvttsd2si(CpuRegister dst, XmmRegister src, bool is64bit) { 2425 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 2426 EmitUint8(0xF2); 2427 if (is64bit) { 2428 // Emit a REX.W prefix if the operand size is 64 bits. 2429 EmitRex64(dst, src); 2430 } else { 2431 EmitOptionalRex32(dst, src); 2432 } 2433 EmitUint8(0x0F); 2434 EmitUint8(0x2C); 2435 EmitXmmRegisterOperand(dst.LowBits(), src); 2436 } 2437 2438 2439 void X86_64Assembler::cvtsd2ss(XmmRegister dst, XmmRegister src) { 2440 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 2441 EmitUint8(0xF2); 2442 EmitOptionalRex32(dst, src); 2443 EmitUint8(0x0F); 2444 EmitUint8(0x5A); 2445 EmitXmmRegisterOperand(dst.LowBits(), src); 2446 } 2447 2448 2449 void X86_64Assembler::cvtsd2ss(XmmRegister dst, const Address& src) { 2450 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 2451 EmitUint8(0xF2); 2452 EmitOptionalRex32(dst, src); 2453 EmitUint8(0x0F); 2454 EmitUint8(0x5A); 2455 EmitOperand(dst.LowBits(), src); 2456 } 2457 2458 2459 void X86_64Assembler::cvtdq2ps(XmmRegister dst, XmmRegister src) { 2460 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 2461 EmitOptionalRex32(dst, src); 2462 EmitUint8(0x0F); 2463 EmitUint8(0x5B); 2464 EmitXmmRegisterOperand(dst.LowBits(), src); 2465 } 2466 2467 2468 void X86_64Assembler::cvtdq2pd(XmmRegister dst, XmmRegister src) { 2469 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 2470 EmitUint8(0xF3); 2471 EmitOptionalRex32(dst, src); 2472 EmitUint8(0x0F); 2473 EmitUint8(0xE6); 2474 EmitXmmRegisterOperand(dst.LowBits(), src); 2475 } 2476 2477 2478 void X86_64Assembler::comiss(XmmRegister a, XmmRegister b) { 2479 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 2480 EmitOptionalRex32(a, b); 2481 EmitUint8(0x0F); 2482 EmitUint8(0x2F); 2483 EmitXmmRegisterOperand(a.LowBits(), b); 2484 } 2485 2486 2487 void X86_64Assembler::comiss(XmmRegister a, const Address& b) { 2488 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 2489 EmitOptionalRex32(a, b); 2490 EmitUint8(0x0F); 2491 EmitUint8(0x2F); 2492 EmitOperand(a.LowBits(), b); 2493 } 2494 2495 2496 void X86_64Assembler::comisd(XmmRegister a, XmmRegister b) { 2497 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 2498 EmitUint8(0x66); 2499 EmitOptionalRex32(a, b); 2500 EmitUint8(0x0F); 2501 EmitUint8(0x2F); 2502 EmitXmmRegisterOperand(a.LowBits(), b); 2503 } 2504 2505 2506 void X86_64Assembler::comisd(XmmRegister a, const Address& b) { 2507 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 2508 EmitUint8(0x66); 2509 EmitOptionalRex32(a, b); 2510 EmitUint8(0x0F); 2511 EmitUint8(0x2F); 2512 EmitOperand(a.LowBits(), b); 2513 } 2514 2515 2516 void X86_64Assembler::ucomiss(XmmRegister a, XmmRegister b) { 2517 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 2518 EmitOptionalRex32(a, b); 2519 EmitUint8(0x0F); 2520 EmitUint8(0x2E); 2521 EmitXmmRegisterOperand(a.LowBits(), b); 2522 } 2523 2524 2525 void X86_64Assembler::ucomiss(XmmRegister a, const Address& b) { 2526 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 2527 EmitOptionalRex32(a, b); 2528 EmitUint8(0x0F); 2529 EmitUint8(0x2E); 2530 EmitOperand(a.LowBits(), b); 2531 } 2532 2533 2534 void X86_64Assembler::ucomisd(XmmRegister a, XmmRegister b) { 2535 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 2536 EmitUint8(0x66); 2537 EmitOptionalRex32(a, b); 2538 EmitUint8(0x0F); 2539 EmitUint8(0x2E); 2540 EmitXmmRegisterOperand(a.LowBits(), b); 2541 } 2542 2543 2544 void X86_64Assembler::ucomisd(XmmRegister a, const Address& b) { 2545 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 2546 EmitUint8(0x66); 2547 EmitOptionalRex32(a, b); 2548 EmitUint8(0x0F); 2549 EmitUint8(0x2E); 2550 EmitOperand(a.LowBits(), b); 2551 } 2552 2553 2554 void X86_64Assembler::roundsd(XmmRegister dst, XmmRegister src, const Immediate& imm) { 2555 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 2556 EmitUint8(0x66); 2557 EmitOptionalRex32(dst, src); 2558 EmitUint8(0x0F); 2559 EmitUint8(0x3A); 2560 EmitUint8(0x0B); 2561 EmitXmmRegisterOperand(dst.LowBits(), src); 2562 EmitUint8(imm.value()); 2563 } 2564 2565 2566 void X86_64Assembler::roundss(XmmRegister dst, XmmRegister src, const Immediate& imm) { 2567 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 2568 EmitUint8(0x66); 2569 EmitOptionalRex32(dst, src); 2570 EmitUint8(0x0F); 2571 EmitUint8(0x3A); 2572 EmitUint8(0x0A); 2573 EmitXmmRegisterOperand(dst.LowBits(), src); 2574 EmitUint8(imm.value()); 2575 } 2576 2577 2578 void X86_64Assembler::sqrtsd(XmmRegister dst, XmmRegister src) { 2579 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 2580 EmitUint8(0xF2); 2581 EmitOptionalRex32(dst, src); 2582 EmitUint8(0x0F); 2583 EmitUint8(0x51); 2584 EmitXmmRegisterOperand(dst.LowBits(), src); 2585 } 2586 2587 2588 void X86_64Assembler::sqrtss(XmmRegister dst, XmmRegister src) { 2589 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 2590 EmitUint8(0xF3); 2591 EmitOptionalRex32(dst, src); 2592 EmitUint8(0x0F); 2593 EmitUint8(0x51); 2594 EmitXmmRegisterOperand(dst.LowBits(), src); 2595 } 2596 2597 2598 void X86_64Assembler::xorpd(XmmRegister dst, const Address& src) { 2599 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 2600 EmitUint8(0x66); 2601 EmitOptionalRex32(dst, src); 2602 EmitUint8(0x0F); 2603 EmitUint8(0x57); 2604 EmitOperand(dst.LowBits(), src); 2605 } 2606 2607 2608 void X86_64Assembler::xorpd(XmmRegister dst, XmmRegister src) { 2609 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 2610 EmitUint8(0x66); 2611 EmitOptionalRex32(dst, src); 2612 EmitUint8(0x0F); 2613 EmitUint8(0x57); 2614 EmitXmmRegisterOperand(dst.LowBits(), src); 2615 } 2616 2617 2618 void X86_64Assembler::xorps(XmmRegister dst, const Address& src) { 2619 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 2620 EmitOptionalRex32(dst, src); 2621 EmitUint8(0x0F); 2622 EmitUint8(0x57); 2623 EmitOperand(dst.LowBits(), src); 2624 } 2625 2626 2627 void X86_64Assembler::xorps(XmmRegister dst, XmmRegister src) { 2628 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 2629 EmitOptionalRex32(dst, src); 2630 EmitUint8(0x0F); 2631 EmitUint8(0x57); 2632 EmitXmmRegisterOperand(dst.LowBits(), src); 2633 } 2634 2635 void X86_64Assembler::pxor(XmmRegister dst, XmmRegister src) { 2636 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 2637 EmitUint8(0x66); 2638 EmitOptionalRex32(dst, src); 2639 EmitUint8(0x0F); 2640 EmitUint8(0xEF); 2641 EmitXmmRegisterOperand(dst.LowBits(), src); 2642 } 2643 2644 /* VEX.128.66.0F.WIG EF /r VPXOR xmm1, xmm2, xmm3/m128 */ 2645 void X86_64Assembler::vpxor(XmmRegister dst, XmmRegister src1, XmmRegister src2) { 2646 DCHECK(CpuHasAVXorAVX2FeatureFlag()); 2647 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 2648 bool is_twobyte_form = false; 2649 uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00; 2650 if (!src2.NeedsRex()) { 2651 is_twobyte_form = true; 2652 } 2653 X86_64ManagedRegister vvvv_reg = 2654 X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister()); 2655 ByteZero = EmitVexPrefixByteZero(is_twobyte_form); 2656 if (is_twobyte_form) { 2657 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66); 2658 } else { 2659 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), 2660 /*X=*/ false, 2661 src2.NeedsRex(), 2662 SET_VEX_M_0F); 2663 ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66); 2664 } 2665 EmitUint8(ByteZero); 2666 EmitUint8(ByteOne); 2667 if (!is_twobyte_form) { 2668 EmitUint8(ByteTwo); 2669 } 2670 EmitUint8(0xEF); 2671 EmitXmmRegisterOperand(dst.LowBits(), src2); 2672 } 2673 2674 /* VEX.128.0F.WIG 57 /r VXORPS xmm1,xmm2, xmm3/m128 */ 2675 void X86_64Assembler::vxorps(XmmRegister dst, XmmRegister src1, XmmRegister src2) { 2676 DCHECK(CpuHasAVXorAVX2FeatureFlag()); 2677 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 2678 bool is_twobyte_form = false; 2679 uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00; 2680 if (!src2.NeedsRex()) { 2681 is_twobyte_form = true; 2682 } 2683 X86_64ManagedRegister vvvv_reg = 2684 X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister()); 2685 ByteZero = EmitVexPrefixByteZero(is_twobyte_form); 2686 if (is_twobyte_form) { 2687 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_NONE); 2688 } else { 2689 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), 2690 /*X=*/ false, 2691 src2.NeedsRex(), 2692 SET_VEX_M_0F); 2693 ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_NONE); 2694 } 2695 EmitUint8(ByteZero); 2696 EmitUint8(ByteOne); 2697 if (!is_twobyte_form) { 2698 EmitUint8(ByteTwo); 2699 } 2700 EmitUint8(0x57); 2701 EmitXmmRegisterOperand(dst.LowBits(), src2); 2702 } 2703 2704 /* VEX.128.66.0F.WIG 57 /r VXORPD xmm1,xmm2, xmm3/m128 */ 2705 void X86_64Assembler::vxorpd(XmmRegister dst, XmmRegister src1, XmmRegister src2) { 2706 DCHECK(CpuHasAVXorAVX2FeatureFlag()); 2707 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 2708 bool is_twobyte_form = false; 2709 uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00; 2710 if (!src2.NeedsRex()) { 2711 is_twobyte_form = true; 2712 } 2713 X86_64ManagedRegister vvvv_reg = 2714 X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister()); 2715 ByteZero = EmitVexPrefixByteZero(is_twobyte_form); 2716 if (is_twobyte_form) { 2717 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66); 2718 } else { 2719 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), 2720 /*X=*/ false, 2721 src2.NeedsRex(), 2722 SET_VEX_M_0F); 2723 ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66); 2724 } 2725 EmitUint8(ByteZero); 2726 EmitUint8(ByteOne); 2727 if (!is_twobyte_form) { 2728 EmitUint8(ByteTwo); 2729 } 2730 EmitUint8(0x57); 2731 EmitXmmRegisterOperand(dst.LowBits(), src2); 2732 } 2733 2734 void X86_64Assembler::andpd(XmmRegister dst, const Address& src) { 2735 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 2736 EmitUint8(0x66); 2737 EmitOptionalRex32(dst, src); 2738 EmitUint8(0x0F); 2739 EmitUint8(0x54); 2740 EmitOperand(dst.LowBits(), src); 2741 } 2742 2743 void X86_64Assembler::andpd(XmmRegister dst, XmmRegister src) { 2744 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 2745 EmitUint8(0x66); 2746 EmitOptionalRex32(dst, src); 2747 EmitUint8(0x0F); 2748 EmitUint8(0x54); 2749 EmitXmmRegisterOperand(dst.LowBits(), src); 2750 } 2751 2752 void X86_64Assembler::andps(XmmRegister dst, XmmRegister src) { 2753 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 2754 EmitOptionalRex32(dst, src); 2755 EmitUint8(0x0F); 2756 EmitUint8(0x54); 2757 EmitXmmRegisterOperand(dst.LowBits(), src); 2758 } 2759 2760 void X86_64Assembler::pand(XmmRegister dst, XmmRegister src) { 2761 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 2762 EmitUint8(0x66); 2763 EmitOptionalRex32(dst, src); 2764 EmitUint8(0x0F); 2765 EmitUint8(0xDB); 2766 EmitXmmRegisterOperand(dst.LowBits(), src); 2767 } 2768 2769 /* VEX.128.66.0F.WIG DB /r VPAND xmm1, xmm2, xmm3/m128 */ 2770 void X86_64Assembler::vpand(XmmRegister dst, XmmRegister src1, XmmRegister src2) { 2771 DCHECK(CpuHasAVXorAVX2FeatureFlag()); 2772 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 2773 bool is_twobyte_form = false; 2774 uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00; 2775 if (!src2.NeedsRex()) { 2776 is_twobyte_form = true; 2777 } 2778 X86_64ManagedRegister vvvv_reg = 2779 X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister()); 2780 ByteZero = EmitVexPrefixByteZero(is_twobyte_form); 2781 if (is_twobyte_form) { 2782 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66); 2783 } else { 2784 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), 2785 /*X=*/ false, 2786 src2.NeedsRex(), 2787 SET_VEX_M_0F); 2788 ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66); 2789 } 2790 EmitUint8(ByteZero); 2791 EmitUint8(ByteOne); 2792 if (!is_twobyte_form) { 2793 EmitUint8(ByteTwo); 2794 } 2795 EmitUint8(0xDB); 2796 EmitXmmRegisterOperand(dst.LowBits(), src2); 2797 } 2798 2799 /* VEX.128.0F 54 /r VANDPS xmm1,xmm2, xmm3/m128 */ 2800 void X86_64Assembler::vandps(XmmRegister dst, XmmRegister src1, XmmRegister src2) { 2801 DCHECK(CpuHasAVXorAVX2FeatureFlag()); 2802 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 2803 bool is_twobyte_form = false; 2804 uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00; 2805 if (!src2.NeedsRex()) { 2806 is_twobyte_form = true; 2807 } 2808 X86_64ManagedRegister vvvv_reg = 2809 X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister()); 2810 ByteZero = EmitVexPrefixByteZero(is_twobyte_form); 2811 if (is_twobyte_form) { 2812 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_NONE); 2813 } else { 2814 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), 2815 /*X=*/ false, 2816 src2.NeedsRex(), 2817 SET_VEX_M_0F); 2818 ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_NONE); 2819 } 2820 EmitUint8(ByteZero); 2821 EmitUint8(ByteOne); 2822 if (!is_twobyte_form) { 2823 EmitUint8(ByteTwo); 2824 } 2825 EmitUint8(0x54); 2826 EmitXmmRegisterOperand(dst.LowBits(), src2); 2827 } 2828 2829 /* VEX.128.66.0F 54 /r VANDPD xmm1, xmm2, xmm3/m128 */ 2830 void X86_64Assembler::vandpd(XmmRegister dst, XmmRegister src1, XmmRegister src2) { 2831 DCHECK(CpuHasAVXorAVX2FeatureFlag()); 2832 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 2833 bool is_twobyte_form = false; 2834 uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00; 2835 if (!src2.NeedsRex()) { 2836 is_twobyte_form = true; 2837 } 2838 X86_64ManagedRegister vvvv_reg = 2839 X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister()); 2840 ByteZero = EmitVexPrefixByteZero(is_twobyte_form); 2841 if (is_twobyte_form) { 2842 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66); 2843 } else { 2844 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), 2845 /*X=*/ false, 2846 src2.NeedsRex(), 2847 SET_VEX_M_0F); 2848 ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66); 2849 } 2850 EmitUint8(ByteZero); 2851 EmitUint8(ByteOne); 2852 if (!is_twobyte_form) { 2853 EmitUint8(ByteTwo); 2854 } 2855 EmitUint8(0x54); 2856 EmitXmmRegisterOperand(dst.LowBits(), src2); 2857 } 2858 2859 void X86_64Assembler::andn(CpuRegister dst, CpuRegister src1, CpuRegister src2) { 2860 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 2861 uint8_t byte_zero = EmitVexPrefixByteZero(/*is_twobyte_form=*/ false); 2862 uint8_t byte_one = EmitVexPrefixByteOne(dst.NeedsRex(), 2863 /*X=*/ false, 2864 src2.NeedsRex(), 2865 SET_VEX_M_0F_38); 2866 uint8_t byte_two = EmitVexPrefixByteTwo(/*W=*/ true, 2867 X86_64ManagedRegister::FromCpuRegister(src1.AsRegister()), 2868 SET_VEX_L_128, 2869 SET_VEX_PP_NONE); 2870 EmitUint8(byte_zero); 2871 EmitUint8(byte_one); 2872 EmitUint8(byte_two); 2873 // Opcode field 2874 EmitUint8(0xF2); 2875 EmitRegisterOperand(dst.LowBits(), src2.LowBits()); 2876 } 2877 2878 void X86_64Assembler::andnpd(XmmRegister dst, XmmRegister src) { 2879 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 2880 EmitUint8(0x66); 2881 EmitOptionalRex32(dst, src); 2882 EmitUint8(0x0F); 2883 EmitUint8(0x55); 2884 EmitXmmRegisterOperand(dst.LowBits(), src); 2885 } 2886 2887 void X86_64Assembler::andnps(XmmRegister dst, XmmRegister src) { 2888 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 2889 EmitOptionalRex32(dst, src); 2890 EmitUint8(0x0F); 2891 EmitUint8(0x55); 2892 EmitXmmRegisterOperand(dst.LowBits(), src); 2893 } 2894 2895 void X86_64Assembler::pandn(XmmRegister dst, XmmRegister src) { 2896 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 2897 EmitUint8(0x66); 2898 EmitOptionalRex32(dst, src); 2899 EmitUint8(0x0F); 2900 EmitUint8(0xDF); 2901 EmitXmmRegisterOperand(dst.LowBits(), src); 2902 } 2903 2904 /* VEX.128.66.0F.WIG DF /r VPANDN xmm1, xmm2, xmm3/m128 */ 2905 void X86_64Assembler::vpandn(XmmRegister dst, XmmRegister src1, XmmRegister src2) { 2906 DCHECK(CpuHasAVXorAVX2FeatureFlag()); 2907 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 2908 bool is_twobyte_form = false; 2909 uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00; 2910 if (!src2.NeedsRex()) { 2911 is_twobyte_form = true; 2912 } 2913 X86_64ManagedRegister vvvv_reg = 2914 X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister()); 2915 ByteZero = EmitVexPrefixByteZero(is_twobyte_form); 2916 if (is_twobyte_form) { 2917 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66); 2918 } else { 2919 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), 2920 /*X=*/ false, 2921 src2.NeedsRex(), 2922 SET_VEX_M_0F); 2923 ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66); 2924 } 2925 EmitUint8(ByteZero); 2926 EmitUint8(ByteOne); 2927 if (!is_twobyte_form) { 2928 EmitUint8(ByteTwo); 2929 } 2930 EmitUint8(0xDF); 2931 EmitXmmRegisterOperand(dst.LowBits(), src2); 2932 } 2933 2934 /* VEX.128.0F 55 /r VANDNPS xmm1, xmm2, xmm3/m128 */ 2935 void X86_64Assembler::vandnps(XmmRegister dst, XmmRegister src1, XmmRegister src2) { 2936 DCHECK(CpuHasAVXorAVX2FeatureFlag()); 2937 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 2938 bool is_twobyte_form = false; 2939 uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00; 2940 if (!src2.NeedsRex()) { 2941 is_twobyte_form = true; 2942 } 2943 X86_64ManagedRegister vvvv_reg = 2944 X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister()); 2945 ByteZero = EmitVexPrefixByteZero(is_twobyte_form); 2946 if (is_twobyte_form) { 2947 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_NONE); 2948 } else { 2949 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), 2950 /*X=*/ false, 2951 src2.NeedsRex(), 2952 SET_VEX_M_0F); 2953 ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_NONE); 2954 } 2955 EmitUint8(ByteZero); 2956 EmitUint8(ByteOne); 2957 if (!is_twobyte_form) { 2958 EmitUint8(ByteTwo); 2959 } 2960 EmitUint8(0x55); 2961 EmitXmmRegisterOperand(dst.LowBits(), src2); 2962 } 2963 2964 /* VEX.128.66.0F 55 /r VANDNPD xmm1, xmm2, xmm3/m128 */ 2965 void X86_64Assembler::vandnpd(XmmRegister dst, XmmRegister src1, XmmRegister src2) { 2966 DCHECK(CpuHasAVXorAVX2FeatureFlag()); 2967 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 2968 bool is_twobyte_form = false; 2969 uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00; 2970 if (!src2.NeedsRex()) { 2971 is_twobyte_form = true; 2972 } 2973 X86_64ManagedRegister vvvv_reg = 2974 X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister()); 2975 ByteZero = EmitVexPrefixByteZero(is_twobyte_form); 2976 if (is_twobyte_form) { 2977 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66); 2978 } else { 2979 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), 2980 /*X=*/ false, 2981 src2.NeedsRex(), 2982 SET_VEX_M_0F); 2983 ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66); 2984 } 2985 EmitUint8(ByteZero); 2986 EmitUint8(ByteOne); 2987 if (!is_twobyte_form) { 2988 EmitUint8(ByteTwo); 2989 } 2990 EmitUint8(0x55); 2991 EmitXmmRegisterOperand(dst.LowBits(), src2); 2992 } 2993 2994 void X86_64Assembler::orpd(XmmRegister dst, XmmRegister src) { 2995 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 2996 EmitUint8(0x66); 2997 EmitOptionalRex32(dst, src); 2998 EmitUint8(0x0F); 2999 EmitUint8(0x56); 3000 EmitXmmRegisterOperand(dst.LowBits(), src); 3001 } 3002 3003 void X86_64Assembler::orps(XmmRegister dst, XmmRegister src) { 3004 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 3005 EmitOptionalRex32(dst, src); 3006 EmitUint8(0x0F); 3007 EmitUint8(0x56); 3008 EmitXmmRegisterOperand(dst.LowBits(), src); 3009 } 3010 3011 void X86_64Assembler::por(XmmRegister dst, XmmRegister src) { 3012 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 3013 EmitUint8(0x66); 3014 EmitOptionalRex32(dst, src); 3015 EmitUint8(0x0F); 3016 EmitUint8(0xEB); 3017 EmitXmmRegisterOperand(dst.LowBits(), src); 3018 } 3019 3020 /* VEX.128.66.0F.WIG EB /r VPOR xmm1, xmm2, xmm3/m128 */ 3021 void X86_64Assembler::vpor(XmmRegister dst, XmmRegister src1, XmmRegister src2) { 3022 DCHECK(CpuHasAVXorAVX2FeatureFlag()); 3023 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 3024 bool is_twobyte_form = false; 3025 uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00; 3026 if (!src2.NeedsRex()) { 3027 is_twobyte_form = true; 3028 } 3029 X86_64ManagedRegister vvvv_reg = 3030 X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister()); 3031 ByteZero = EmitVexPrefixByteZero(is_twobyte_form); 3032 if (is_twobyte_form) { 3033 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66); 3034 } else { 3035 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), 3036 /*X=*/ false, 3037 src2.NeedsRex(), 3038 SET_VEX_M_0F); 3039 ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66); 3040 } 3041 EmitUint8(ByteZero); 3042 EmitUint8(ByteOne); 3043 if (!is_twobyte_form) { 3044 EmitUint8(ByteTwo); 3045 } 3046 EmitUint8(0xEB); 3047 EmitXmmRegisterOperand(dst.LowBits(), src2); 3048 } 3049 3050 /* VEX.128.0F 56 /r VORPS xmm1,xmm2, xmm3/m128 */ 3051 void X86_64Assembler::vorps(XmmRegister dst, XmmRegister src1, XmmRegister src2) { 3052 DCHECK(CpuHasAVXorAVX2FeatureFlag()); 3053 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 3054 bool is_twobyte_form = false; 3055 uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00; 3056 if (!src2.NeedsRex()) { 3057 is_twobyte_form = true; 3058 } 3059 X86_64ManagedRegister vvvv_reg = 3060 X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister()); 3061 ByteZero = EmitVexPrefixByteZero(is_twobyte_form); 3062 if (is_twobyte_form) { 3063 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_NONE); 3064 } else { 3065 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), 3066 /*X=*/ false, 3067 src2.NeedsRex(), 3068 SET_VEX_M_0F); 3069 ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_NONE); 3070 } 3071 EmitUint8(ByteZero); 3072 EmitUint8(ByteOne); 3073 if (!is_twobyte_form) { 3074 EmitUint8(ByteTwo); 3075 } 3076 EmitUint8(0x56); 3077 EmitXmmRegisterOperand(dst.LowBits(), src2); 3078 } 3079 3080 /* VEX.128.66.0F 56 /r VORPD xmm1,xmm2, xmm3/m128 */ 3081 void X86_64Assembler::vorpd(XmmRegister dst, XmmRegister src1, XmmRegister src2) { 3082 DCHECK(CpuHasAVXorAVX2FeatureFlag()); 3083 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 3084 bool is_twobyte_form = false; 3085 uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00; 3086 if (!src2.NeedsRex()) { 3087 is_twobyte_form = true; 3088 } 3089 X86_64ManagedRegister vvvv_reg = 3090 X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister()); 3091 ByteZero = EmitVexPrefixByteZero(is_twobyte_form); 3092 if (is_twobyte_form) { 3093 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66); 3094 } else { 3095 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), 3096 /*X=*/ false, 3097 src2.NeedsRex(), 3098 SET_VEX_M_0F); 3099 ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66); 3100 } 3101 EmitUint8(ByteZero); 3102 EmitUint8(ByteOne); 3103 if (!is_twobyte_form) { 3104 EmitUint8(ByteTwo); 3105 } 3106 EmitUint8(0x56); 3107 EmitXmmRegisterOperand(dst.LowBits(), src2); 3108 } 3109 3110 void X86_64Assembler::pavgb(XmmRegister dst, XmmRegister src) { 3111 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 3112 EmitUint8(0x66); 3113 EmitOptionalRex32(dst, src); 3114 EmitUint8(0x0F); 3115 EmitUint8(0xE0); 3116 EmitXmmRegisterOperand(dst.LowBits(), src); 3117 } 3118 3119 void X86_64Assembler::pavgw(XmmRegister dst, XmmRegister src) { 3120 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 3121 EmitUint8(0x66); 3122 EmitOptionalRex32(dst, src); 3123 EmitUint8(0x0F); 3124 EmitUint8(0xE3); 3125 EmitXmmRegisterOperand(dst.LowBits(), src); 3126 } 3127 3128 void X86_64Assembler::psadbw(XmmRegister dst, XmmRegister src) { 3129 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 3130 EmitUint8(0x66); 3131 EmitOptionalRex32(dst, src); 3132 EmitUint8(0x0F); 3133 EmitUint8(0xF6); 3134 EmitXmmRegisterOperand(dst.LowBits(), src); 3135 } 3136 3137 void X86_64Assembler::pmaddwd(XmmRegister dst, XmmRegister src) { 3138 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 3139 EmitUint8(0x66); 3140 EmitOptionalRex32(dst, src); 3141 EmitUint8(0x0F); 3142 EmitUint8(0xF5); 3143 EmitXmmRegisterOperand(dst.LowBits(), src); 3144 } 3145 3146 void X86_64Assembler::vpmaddwd(XmmRegister dst, XmmRegister src1, XmmRegister src2) { 3147 DCHECK(CpuHasAVXorAVX2FeatureFlag()); 3148 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 3149 bool is_twobyte_form = false; 3150 uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00; 3151 if (!src2.NeedsRex()) { 3152 is_twobyte_form = true; 3153 } 3154 ByteZero = EmitVexPrefixByteZero(is_twobyte_form); 3155 X86_64ManagedRegister vvvv_reg = 3156 X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister()); 3157 if (is_twobyte_form) { 3158 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66); 3159 } else { 3160 ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), 3161 /*X=*/ false, 3162 src2.NeedsRex(), 3163 SET_VEX_M_0F); 3164 ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66); 3165 } 3166 EmitUint8(ByteZero); 3167 EmitUint8(ByteOne); 3168 if (!is_twobyte_form) { 3169 EmitUint8(ByteTwo); 3170 } 3171 EmitUint8(0xF5); 3172 EmitXmmRegisterOperand(dst.LowBits(), src2); 3173 } 3174 3175 void X86_64Assembler::phaddw(XmmRegister dst, XmmRegister src) { 3176 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 3177 EmitUint8(0x66); 3178 EmitOptionalRex32(dst, src); 3179 EmitUint8(0x0F); 3180 EmitUint8(0x38); 3181 EmitUint8(0x01); 3182 EmitXmmRegisterOperand(dst.LowBits(), src); 3183 } 3184 3185 void X86_64Assembler::phaddd(XmmRegister dst, XmmRegister src) { 3186 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 3187 EmitUint8(0x66); 3188 EmitOptionalRex32(dst, src); 3189 EmitUint8(0x0F); 3190 EmitUint8(0x38); 3191 EmitUint8(0x02); 3192 EmitXmmRegisterOperand(dst.LowBits(), src); 3193 } 3194 3195 void X86_64Assembler::haddps(XmmRegister dst, XmmRegister src) { 3196 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 3197 EmitUint8(0xF2); 3198 EmitOptionalRex32(dst, src); 3199 EmitUint8(0x0F); 3200 EmitUint8(0x7C); 3201 EmitXmmRegisterOperand(dst.LowBits(), src); 3202 } 3203 3204 void X86_64Assembler::haddpd(XmmRegister dst, XmmRegister src) { 3205 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 3206 EmitUint8(0x66); 3207 EmitOptionalRex32(dst, src); 3208 EmitUint8(0x0F); 3209 EmitUint8(0x7C); 3210 EmitXmmRegisterOperand(dst.LowBits(), src); 3211 } 3212 3213 void X86_64Assembler::phsubw(XmmRegister dst, XmmRegister src) { 3214 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 3215 EmitUint8(0x66); 3216 EmitOptionalRex32(dst, src); 3217 EmitUint8(0x0F); 3218 EmitUint8(0x38); 3219 EmitUint8(0x05); 3220 EmitXmmRegisterOperand(dst.LowBits(), src); 3221 } 3222 3223 void X86_64Assembler::phsubd(XmmRegister dst, XmmRegister src) { 3224 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 3225 EmitUint8(0x66); 3226 EmitOptionalRex32(dst, src); 3227 EmitUint8(0x0F); 3228 EmitUint8(0x38); 3229 EmitUint8(0x06); 3230 EmitXmmRegisterOperand(dst.LowBits(), src); 3231 } 3232 3233 void X86_64Assembler::hsubps(XmmRegister dst, XmmRegister src) { 3234 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 3235 EmitUint8(0xF2); 3236 EmitOptionalRex32(dst, src); 3237 EmitUint8(0x0F); 3238 EmitUint8(0x7D); 3239 EmitXmmRegisterOperand(dst.LowBits(), src); 3240 } 3241 3242 void X86_64Assembler::hsubpd(XmmRegister dst, XmmRegister src) { 3243 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 3244 EmitUint8(0x66); 3245 EmitOptionalRex32(dst, src); 3246 EmitUint8(0x0F); 3247 EmitUint8(0x7D); 3248 EmitXmmRegisterOperand(dst.LowBits(), src); 3249 } 3250 3251 void X86_64Assembler::pminsb(XmmRegister dst, XmmRegister src) { 3252 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 3253 EmitUint8(0x66); 3254 EmitOptionalRex32(dst, src); 3255 EmitUint8(0x0F); 3256 EmitUint8(0x38); 3257 EmitUint8(0x38); 3258 EmitXmmRegisterOperand(dst.LowBits(), src); 3259 } 3260 3261 void X86_64Assembler::pmaxsb(XmmRegister dst, XmmRegister src) { 3262 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 3263 EmitUint8(0x66); 3264 EmitOptionalRex32(dst, src); 3265 EmitUint8(0x0F); 3266 EmitUint8(0x38); 3267 EmitUint8(0x3C); 3268 EmitXmmRegisterOperand(dst.LowBits(), src); 3269 } 3270 3271 void X86_64Assembler::pminsw(XmmRegister dst, XmmRegister src) { 3272 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 3273 EmitUint8(0x66); 3274 EmitOptionalRex32(dst, src); 3275 EmitUint8(0x0F); 3276 EmitUint8(0xEA); 3277 EmitXmmRegisterOperand(dst.LowBits(), src); 3278 } 3279 3280 void X86_64Assembler::pmaxsw(XmmRegister dst, XmmRegister src) { 3281 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 3282 EmitUint8(0x66); 3283 EmitOptionalRex32(dst, src); 3284 EmitUint8(0x0F); 3285 EmitUint8(0xEE); 3286 EmitXmmRegisterOperand(dst.LowBits(), src); 3287 } 3288 3289 void X86_64Assembler::pminsd(XmmRegister dst, XmmRegister src) { 3290 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 3291 EmitUint8(0x66); 3292 EmitOptionalRex32(dst, src); 3293 EmitUint8(0x0F); 3294 EmitUint8(0x38); 3295 EmitUint8(0x39); 3296 EmitXmmRegisterOperand(dst.LowBits(), src); 3297 } 3298 3299 void X86_64Assembler::pmaxsd(XmmRegister dst, XmmRegister src) { 3300 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 3301 EmitUint8(0x66); 3302 EmitOptionalRex32(dst, src); 3303 EmitUint8(0x0F); 3304 EmitUint8(0x38); 3305 EmitUint8(0x3D); 3306 EmitXmmRegisterOperand(dst.LowBits(), src); 3307 } 3308 3309 void X86_64Assembler::pminub(XmmRegister dst, XmmRegister src) { 3310 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 3311 EmitUint8(0x66); 3312 EmitOptionalRex32(dst, src); 3313 EmitUint8(0x0F); 3314 EmitUint8(0xDA); 3315 EmitXmmRegisterOperand(dst.LowBits(), src); 3316 } 3317 3318 void X86_64Assembler::pmaxub(XmmRegister dst, XmmRegister src) { 3319 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 3320 EmitUint8(0x66); 3321 EmitOptionalRex32(dst, src); 3322 EmitUint8(0x0F); 3323 EmitUint8(0xDE); 3324 EmitXmmRegisterOperand(dst.LowBits(), src); 3325 } 3326 3327 void X86_64Assembler::pminuw(XmmRegister dst, XmmRegister src) { 3328 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 3329 EmitUint8(0x66); 3330 EmitOptionalRex32(dst, src); 3331 EmitUint8(0x0F); 3332 EmitUint8(0x38); 3333 EmitUint8(0x3A); 3334 EmitXmmRegisterOperand(dst.LowBits(), src); 3335 } 3336 3337 void X86_64Assembler::pmaxuw(XmmRegister dst, XmmRegister src) { 3338 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 3339 EmitUint8(0x66); 3340 EmitOptionalRex32(dst, src); 3341 EmitUint8(0x0F); 3342 EmitUint8(0x38); 3343 EmitUint8(0x3E); 3344 EmitXmmRegisterOperand(dst.LowBits(), src); 3345 } 3346 3347 void X86_64Assembler::pminud(XmmRegister dst, XmmRegister src) { 3348 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 3349 EmitUint8(0x66); 3350 EmitOptionalRex32(dst, src); 3351 EmitUint8(0x0F); 3352 EmitUint8(0x38); 3353 EmitUint8(0x3B); 3354 EmitXmmRegisterOperand(dst.LowBits(), src); 3355 } 3356 3357 void X86_64Assembler::pmaxud(XmmRegister dst, XmmRegister src) { 3358 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 3359 EmitUint8(0x66); 3360 EmitOptionalRex32(dst, src); 3361 EmitUint8(0x0F); 3362 EmitUint8(0x38); 3363 EmitUint8(0x3F); 3364 EmitXmmRegisterOperand(dst.LowBits(), src); 3365 } 3366 3367 void X86_64Assembler::minps(XmmRegister dst, XmmRegister src) { 3368 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 3369 EmitOptionalRex32(dst, src); 3370 EmitUint8(0x0F); 3371 EmitUint8(0x5D); 3372 EmitXmmRegisterOperand(dst.LowBits(), src); 3373 } 3374 3375 void X86_64Assembler::maxps(XmmRegister dst, XmmRegister src) { 3376 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 3377 EmitOptionalRex32(dst, src); 3378 EmitUint8(0x0F); 3379 EmitUint8(0x5F); 3380 EmitXmmRegisterOperand(dst.LowBits(), src); 3381 } 3382 3383 void X86_64Assembler::minpd(XmmRegister dst, XmmRegister src) { 3384 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 3385 EmitUint8(0x66); 3386 EmitOptionalRex32(dst, src); 3387 EmitUint8(0x0F); 3388 EmitUint8(0x5D); 3389 EmitXmmRegisterOperand(dst.LowBits(), src); 3390 } 3391 3392 void X86_64Assembler::maxpd(XmmRegister dst, XmmRegister src) { 3393 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 3394 EmitUint8(0x66); 3395 EmitOptionalRex32(dst, src); 3396 EmitUint8(0x0F); 3397 EmitUint8(0x5F); 3398 EmitXmmRegisterOperand(dst.LowBits(), src); 3399 } 3400 3401 void X86_64Assembler::pcmpeqb(XmmRegister dst, XmmRegister src) { 3402 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 3403 EmitUint8(0x66); 3404 EmitOptionalRex32(dst, src); 3405 EmitUint8(0x0F); 3406 EmitUint8(0x74); 3407 EmitXmmRegisterOperand(dst.LowBits(), src); 3408 } 3409 3410 void X86_64Assembler::pcmpeqw(XmmRegister dst, XmmRegister src) { 3411 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 3412 EmitUint8(0x66); 3413 EmitOptionalRex32(dst, src); 3414 EmitUint8(0x0F); 3415 EmitUint8(0x75); 3416 EmitXmmRegisterOperand(dst.LowBits(), src); 3417 } 3418 3419 void X86_64Assembler::pcmpeqd(XmmRegister dst, XmmRegister src) { 3420 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 3421 EmitUint8(0x66); 3422 EmitOptionalRex32(dst, src); 3423 EmitUint8(0x0F); 3424 EmitUint8(0x76); 3425 EmitXmmRegisterOperand(dst.LowBits(), src); 3426 } 3427 3428 void X86_64Assembler::pcmpeqq(XmmRegister dst, XmmRegister src) { 3429 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 3430 EmitUint8(0x66); 3431 EmitOptionalRex32(dst, src); 3432 EmitUint8(0x0F); 3433 EmitUint8(0x38); 3434 EmitUint8(0x29); 3435 EmitXmmRegisterOperand(dst.LowBits(), src); 3436 } 3437 3438 void X86_64Assembler::pcmpgtb(XmmRegister dst, XmmRegister src) { 3439 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 3440 EmitUint8(0x66); 3441 EmitOptionalRex32(dst, src); 3442 EmitUint8(0x0F); 3443 EmitUint8(0x64); 3444 EmitXmmRegisterOperand(dst.LowBits(), src); 3445 } 3446 3447 void X86_64Assembler::pcmpgtw(XmmRegister dst, XmmRegister src) { 3448 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 3449 EmitUint8(0x66); 3450 EmitOptionalRex32(dst, src); 3451 EmitUint8(0x0F); 3452 EmitUint8(0x65); 3453 EmitXmmRegisterOperand(dst.LowBits(), src); 3454 } 3455 3456 void X86_64Assembler::pcmpgtd(XmmRegister dst, XmmRegister src) { 3457 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 3458 EmitUint8(0x66); 3459 EmitOptionalRex32(dst, src); 3460 EmitUint8(0x0F); 3461 EmitUint8(0x66); 3462 EmitXmmRegisterOperand(dst.LowBits(), src); 3463 } 3464 3465 void X86_64Assembler::pcmpgtq(XmmRegister dst, XmmRegister src) { 3466 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 3467 EmitUint8(0x66); 3468 EmitOptionalRex32(dst, src); 3469 EmitUint8(0x0F); 3470 EmitUint8(0x38); 3471 EmitUint8(0x37); 3472 EmitXmmRegisterOperand(dst.LowBits(), src); 3473 } 3474 3475 void X86_64Assembler::shufpd(XmmRegister dst, XmmRegister src, const Immediate& imm) { 3476 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 3477 EmitUint8(0x66); 3478 EmitOptionalRex32(dst, src); 3479 EmitUint8(0x0F); 3480 EmitUint8(0xC6); 3481 EmitXmmRegisterOperand(dst.LowBits(), src); 3482 EmitUint8(imm.value()); 3483 } 3484 3485 3486 void X86_64Assembler::shufps(XmmRegister dst, XmmRegister src, const Immediate& imm) { 3487 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 3488 EmitOptionalRex32(dst, src); 3489 EmitUint8(0x0F); 3490 EmitUint8(0xC6); 3491 EmitXmmRegisterOperand(dst.LowBits(), src); 3492 EmitUint8(imm.value()); 3493 } 3494 3495 3496 void X86_64Assembler::pshufd(XmmRegister dst, XmmRegister src, const Immediate& imm) { 3497 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 3498 EmitUint8(0x66); 3499 EmitOptionalRex32(dst, src); 3500 EmitUint8(0x0F); 3501 EmitUint8(0x70); 3502 EmitXmmRegisterOperand(dst.LowBits(), src); 3503 EmitUint8(imm.value()); 3504 } 3505 3506 3507 void X86_64Assembler::punpcklbw(XmmRegister dst, XmmRegister src) { 3508 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 3509 EmitUint8(0x66); 3510 EmitOptionalRex32(dst, src); 3511 EmitUint8(0x0F); 3512 EmitUint8(0x60); 3513 EmitXmmRegisterOperand(dst.LowBits(), src); 3514 } 3515 3516 3517 void X86_64Assembler::punpcklwd(XmmRegister dst, XmmRegister src) { 3518 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 3519 EmitUint8(0x66); 3520 EmitOptionalRex32(dst, src); 3521 EmitUint8(0x0F); 3522 EmitUint8(0x61); 3523 EmitXmmRegisterOperand(dst.LowBits(), src); 3524 } 3525 3526 3527 void X86_64Assembler::punpckldq(XmmRegister dst, XmmRegister src) { 3528 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 3529 EmitUint8(0x66); 3530 EmitOptionalRex32(dst, src); 3531 EmitUint8(0x0F); 3532 EmitUint8(0x62); 3533 EmitXmmRegisterOperand(dst.LowBits(), src); 3534 } 3535 3536 3537 void X86_64Assembler::punpcklqdq(XmmRegister dst, XmmRegister src) { 3538 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 3539 EmitUint8(0x66); 3540 EmitOptionalRex32(dst, src); 3541 EmitUint8(0x0F); 3542 EmitUint8(0x6C); 3543 EmitXmmRegisterOperand(dst.LowBits(), src); 3544 } 3545 3546 3547 void X86_64Assembler::punpckhbw(XmmRegister dst, XmmRegister src) { 3548 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 3549 EmitUint8(0x66); 3550 EmitOptionalRex32(dst, src); 3551 EmitUint8(0x0F); 3552 EmitUint8(0x68); 3553 EmitXmmRegisterOperand(dst.LowBits(), src); 3554 } 3555 3556 3557 void X86_64Assembler::punpckhwd(XmmRegister dst, XmmRegister src) { 3558 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 3559 EmitUint8(0x66); 3560 EmitOptionalRex32(dst, src); 3561 EmitUint8(0x0F); 3562 EmitUint8(0x69); 3563 EmitXmmRegisterOperand(dst.LowBits(), src); 3564 } 3565 3566 3567 void X86_64Assembler::punpckhdq(XmmRegister dst, XmmRegister src) { 3568 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 3569 EmitUint8(0x66); 3570 EmitOptionalRex32(dst, src); 3571 EmitUint8(0x0F); 3572 EmitUint8(0x6A); 3573 EmitXmmRegisterOperand(dst.LowBits(), src); 3574 } 3575 3576 3577 void X86_64Assembler::punpckhqdq(XmmRegister dst, XmmRegister src) { 3578 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 3579 EmitUint8(0x66); 3580 EmitOptionalRex32(dst, src); 3581 EmitUint8(0x0F); 3582 EmitUint8(0x6D); 3583 EmitXmmRegisterOperand(dst.LowBits(), src); 3584 } 3585 3586 3587 void X86_64Assembler::psllw(XmmRegister reg, const Immediate& shift_count) { 3588 DCHECK(shift_count.is_uint8()); 3589 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 3590 EmitUint8(0x66); 3591 EmitOptionalRex(false, false, false, false, reg.NeedsRex()); 3592 EmitUint8(0x0F); 3593 EmitUint8(0x71); 3594 EmitXmmRegisterOperand(6, reg); 3595 EmitUint8(shift_count.value()); 3596 } 3597 3598 3599 void X86_64Assembler::pslld(XmmRegister reg, const Immediate& shift_count) { 3600 DCHECK(shift_count.is_uint8()); 3601 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 3602 EmitUint8(0x66); 3603 EmitOptionalRex(false, false, false, false, reg.NeedsRex()); 3604 EmitUint8(0x0F); 3605 EmitUint8(0x72); 3606 EmitXmmRegisterOperand(6, reg); 3607 EmitUint8(shift_count.value()); 3608 } 3609 3610 3611 void X86_64Assembler::psllq(XmmRegister reg, const Immediate& shift_count) { 3612 DCHECK(shift_count.is_uint8()); 3613 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 3614 EmitUint8(0x66); 3615 EmitOptionalRex(false, false, false, false, reg.NeedsRex()); 3616 EmitUint8(0x0F); 3617 EmitUint8(0x73); 3618 EmitXmmRegisterOperand(6, reg); 3619 EmitUint8(shift_count.value()); 3620 } 3621 3622 3623 void X86_64Assembler::psraw(XmmRegister reg, const Immediate& shift_count) { 3624 DCHECK(shift_count.is_uint8()); 3625 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 3626 EmitUint8(0x66); 3627 EmitOptionalRex(false, false, false, false, reg.NeedsRex()); 3628 EmitUint8(0x0F); 3629 EmitUint8(0x71); 3630 EmitXmmRegisterOperand(4, reg); 3631 EmitUint8(shift_count.value()); 3632 } 3633 3634 3635 void X86_64Assembler::psrad(XmmRegister reg, const Immediate& shift_count) { 3636 DCHECK(shift_count.is_uint8()); 3637 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 3638 EmitUint8(0x66); 3639 EmitOptionalRex(false, false, false, false, reg.NeedsRex()); 3640 EmitUint8(0x0F); 3641 EmitUint8(0x72); 3642 EmitXmmRegisterOperand(4, reg); 3643 EmitUint8(shift_count.value()); 3644 } 3645 3646 3647 void X86_64Assembler::psrlw(XmmRegister reg, const Immediate& shift_count) { 3648 DCHECK(shift_count.is_uint8()); 3649 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 3650 EmitUint8(0x66); 3651 EmitOptionalRex(false, false, false, false, reg.NeedsRex()); 3652 EmitUint8(0x0F); 3653 EmitUint8(0x71); 3654 EmitXmmRegisterOperand(2, reg); 3655 EmitUint8(shift_count.value()); 3656 } 3657 3658 3659 void X86_64Assembler::psrld(XmmRegister reg, const Immediate& shift_count) { 3660 DCHECK(shift_count.is_uint8()); 3661 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 3662 EmitUint8(0x66); 3663 EmitOptionalRex(false, false, false, false, reg.NeedsRex()); 3664 EmitUint8(0x0F); 3665 EmitUint8(0x72); 3666 EmitXmmRegisterOperand(2, reg); 3667 EmitUint8(shift_count.value()); 3668 } 3669 3670 3671 void X86_64Assembler::psrlq(XmmRegister reg, const Immediate& shift_count) { 3672 DCHECK(shift_count.is_uint8()); 3673 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 3674 EmitUint8(0x66); 3675 EmitOptionalRex(false, false, false, false, reg.NeedsRex()); 3676 EmitUint8(0x0F); 3677 EmitUint8(0x73); 3678 EmitXmmRegisterOperand(2, reg); 3679 EmitUint8(shift_count.value()); 3680 } 3681 3682 3683 void X86_64Assembler::psrldq(XmmRegister reg, const Immediate& shift_count) { 3684 DCHECK(shift_count.is_uint8()); 3685 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 3686 EmitUint8(0x66); 3687 EmitOptionalRex(false, false, false, false, reg.NeedsRex()); 3688 EmitUint8(0x0F); 3689 EmitUint8(0x73); 3690 EmitXmmRegisterOperand(3, reg); 3691 EmitUint8(shift_count.value()); 3692 } 3693 3694 3695 void X86_64Assembler::fldl(const Address& src) { 3696 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 3697 EmitUint8(0xDD); 3698 EmitOperand(0, src); 3699 } 3700 3701 3702 void X86_64Assembler::fstl(const Address& dst) { 3703 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 3704 EmitUint8(0xDD); 3705 EmitOperand(2, dst); 3706 } 3707 3708 3709 void X86_64Assembler::fstpl(const Address& dst) { 3710 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 3711 EmitUint8(0xDD); 3712 EmitOperand(3, dst); 3713 } 3714 3715 3716 void X86_64Assembler::fstsw() { 3717 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 3718 EmitUint8(0x9B); 3719 EmitUint8(0xDF); 3720 EmitUint8(0xE0); 3721 } 3722 3723 3724 void X86_64Assembler::fnstcw(const Address& dst) { 3725 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 3726 EmitUint8(0xD9); 3727 EmitOperand(7, dst); 3728 } 3729 3730 3731 void X86_64Assembler::fldcw(const Address& src) { 3732 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 3733 EmitUint8(0xD9); 3734 EmitOperand(5, src); 3735 } 3736 3737 3738 void X86_64Assembler::fistpl(const Address& dst) { 3739 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 3740 EmitUint8(0xDF); 3741 EmitOperand(7, dst); 3742 } 3743 3744 3745 void X86_64Assembler::fistps(const Address& dst) { 3746 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 3747 EmitUint8(0xDB); 3748 EmitOperand(3, dst); 3749 } 3750 3751 3752 void X86_64Assembler::fildl(const Address& src) { 3753 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 3754 EmitUint8(0xDF); 3755 EmitOperand(5, src); 3756 } 3757 3758 3759 void X86_64Assembler::filds(const Address& src) { 3760 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 3761 EmitUint8(0xDB); 3762 EmitOperand(0, src); 3763 } 3764 3765 3766 void X86_64Assembler::fincstp() { 3767 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 3768 EmitUint8(0xD9); 3769 EmitUint8(0xF7); 3770 } 3771 3772 3773 void X86_64Assembler::ffree(const Immediate& index) { 3774 CHECK_LT(index.value(), 7); 3775 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 3776 EmitUint8(0xDD); 3777 EmitUint8(0xC0 + index.value()); 3778 } 3779 3780 3781 void X86_64Assembler::fsin() { 3782 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 3783 EmitUint8(0xD9); 3784 EmitUint8(0xFE); 3785 } 3786 3787 3788 void X86_64Assembler::fcos() { 3789 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 3790 EmitUint8(0xD9); 3791 EmitUint8(0xFF); 3792 } 3793 3794 3795 void X86_64Assembler::fptan() { 3796 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 3797 EmitUint8(0xD9); 3798 EmitUint8(0xF2); 3799 } 3800 3801 void X86_64Assembler::fucompp() { 3802 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 3803 EmitUint8(0xDA); 3804 EmitUint8(0xE9); 3805 } 3806 3807 3808 void X86_64Assembler::fprem() { 3809 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 3810 EmitUint8(0xD9); 3811 EmitUint8(0xF8); 3812 } 3813 3814 3815 void X86_64Assembler::xchgl(CpuRegister dst, CpuRegister src) { 3816 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 3817 // There is a short version for rax. 3818 // It's a bit awkward, as CpuRegister has a const field, so assignment and thus swapping doesn't 3819 // work. 3820 const bool src_rax = src.AsRegister() == RAX; 3821 const bool dst_rax = dst.AsRegister() == RAX; 3822 if (src_rax || dst_rax) { 3823 EmitOptionalRex32(src_rax ? dst : src); 3824 EmitUint8(0x90 + (src_rax ? dst.LowBits() : src.LowBits())); 3825 return; 3826 } 3827 3828 // General case. 3829 EmitOptionalRex32(src, dst); 3830 EmitUint8(0x87); 3831 EmitRegisterOperand(src.LowBits(), dst.LowBits()); 3832 } 3833 3834 3835 void X86_64Assembler::xchgq(CpuRegister dst, CpuRegister src) { 3836 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 3837 // There is a short version for rax. 3838 // It's a bit awkward, as CpuRegister has a const field, so assignment and thus swapping doesn't 3839 // work. 3840 const bool src_rax = src.AsRegister() == RAX; 3841 const bool dst_rax = dst.AsRegister() == RAX; 3842 if (src_rax || dst_rax) { 3843 // If src == target, emit a nop instead. 3844 if (src_rax && dst_rax) { 3845 EmitUint8(0x90); 3846 } else { 3847 EmitRex64(src_rax ? dst : src); 3848 EmitUint8(0x90 + (src_rax ? dst.LowBits() : src.LowBits())); 3849 } 3850 return; 3851 } 3852 3853 // General case. 3854 EmitRex64(src, dst); 3855 EmitUint8(0x87); 3856 EmitRegisterOperand(src.LowBits(), dst.LowBits()); 3857 } 3858 3859 3860 void X86_64Assembler::xchgl(CpuRegister reg, const Address& address) { 3861 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 3862 EmitOptionalRex32(reg, address); 3863 EmitUint8(0x87); 3864 EmitOperand(reg.LowBits(), address); 3865 } 3866 3867 3868 void X86_64Assembler::cmpb(const Address& address, const Immediate& imm) { 3869 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 3870 CHECK(imm.is_int32()); 3871 EmitOptionalRex32(address); 3872 EmitUint8(0x80); 3873 EmitOperand(7, address); 3874 EmitUint8(imm.value() & 0xFF); 3875 } 3876 3877 3878 void X86_64Assembler::cmpw(const Address& address, const Immediate& imm) { 3879 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 3880 CHECK(imm.is_int32()); 3881 EmitOperandSizeOverride(); 3882 EmitOptionalRex32(address); 3883 EmitComplex(7, address, imm, /* is_16_op= */ true); 3884 } 3885 3886 3887 void X86_64Assembler::cmpl(CpuRegister reg, const Immediate& imm) { 3888 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 3889 CHECK(imm.is_int32()); 3890 EmitOptionalRex32(reg); 3891 EmitComplex(7, Operand(reg), imm); 3892 } 3893 3894 3895 void X86_64Assembler::cmpl(CpuRegister reg0, CpuRegister reg1) { 3896 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 3897 EmitOptionalRex32(reg0, reg1); 3898 EmitUint8(0x3B); 3899 EmitOperand(reg0.LowBits(), Operand(reg1)); 3900 } 3901 3902 3903 void X86_64Assembler::cmpl(CpuRegister reg, const Address& address) { 3904 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 3905 EmitOptionalRex32(reg, address); 3906 EmitUint8(0x3B); 3907 EmitOperand(reg.LowBits(), address); 3908 } 3909 3910 3911 void X86_64Assembler::cmpl(const Address& address, CpuRegister reg) { 3912 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 3913 EmitOptionalRex32(reg, address); 3914 EmitUint8(0x39); 3915 EmitOperand(reg.LowBits(), address); 3916 } 3917 3918 3919 void X86_64Assembler::cmpl(const Address& address, const Immediate& imm) { 3920 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 3921 CHECK(imm.is_int32()); 3922 EmitOptionalRex32(address); 3923 EmitComplex(7, address, imm); 3924 } 3925 3926 3927 void X86_64Assembler::cmpq(CpuRegister reg0, CpuRegister reg1) { 3928 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 3929 EmitRex64(reg0, reg1); 3930 EmitUint8(0x3B); 3931 EmitOperand(reg0.LowBits(), Operand(reg1)); 3932 } 3933 3934 3935 void X86_64Assembler::cmpq(CpuRegister reg, const Immediate& imm) { 3936 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 3937 CHECK(imm.is_int32()); // cmpq only supports 32b immediate. 3938 EmitRex64(reg); 3939 EmitComplex(7, Operand(reg), imm); 3940 } 3941 3942 3943 void X86_64Assembler::cmpq(CpuRegister reg, const Address& address) { 3944 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 3945 EmitRex64(reg, address); 3946 EmitUint8(0x3B); 3947 EmitOperand(reg.LowBits(), address); 3948 } 3949 3950 3951 void X86_64Assembler::cmpq(const Address& address, const Immediate& imm) { 3952 CHECK(imm.is_int32()); // cmpq only supports 32b immediate. 3953 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 3954 EmitRex64(address); 3955 EmitComplex(7, address, imm); 3956 } 3957 3958 3959 void X86_64Assembler::addl(CpuRegister dst, CpuRegister src) { 3960 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 3961 EmitOptionalRex32(dst, src); 3962 EmitUint8(0x03); 3963 EmitRegisterOperand(dst.LowBits(), src.LowBits()); 3964 } 3965 3966 3967 void X86_64Assembler::addl(CpuRegister reg, const Address& address) { 3968 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 3969 EmitOptionalRex32(reg, address); 3970 EmitUint8(0x03); 3971 EmitOperand(reg.LowBits(), address); 3972 } 3973 3974 3975 void X86_64Assembler::testl(CpuRegister reg1, CpuRegister reg2) { 3976 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 3977 EmitOptionalRex32(reg1, reg2); 3978 EmitUint8(0x85); 3979 EmitRegisterOperand(reg1.LowBits(), reg2.LowBits()); 3980 } 3981 3982 3983 void X86_64Assembler::testl(CpuRegister reg, const Address& address) { 3984 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 3985 EmitOptionalRex32(reg, address); 3986 EmitUint8(0x85); 3987 EmitOperand(reg.LowBits(), address); 3988 } 3989 3990 3991 void X86_64Assembler::testl(CpuRegister reg, const Immediate& immediate) { 3992 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 3993 // For registers that have a byte variant (RAX, RBX, RCX, and RDX) 3994 // we only test the byte CpuRegister to keep the encoding short. 3995 if (immediate.is_uint8() && reg.AsRegister() < 4) { 3996 // Use zero-extended 8-bit immediate. 3997 if (reg.AsRegister() == RAX) { 3998 EmitUint8(0xA8); 3999 } else { 4000 EmitUint8(0xF6); 4001 EmitUint8(0xC0 + reg.AsRegister()); 4002 } 4003 EmitUint8(immediate.value() & 0xFF); 4004 } else if (reg.AsRegister() == RAX) { 4005 // Use short form if the destination is RAX. 4006 EmitUint8(0xA9); 4007 EmitImmediate(immediate); 4008 } else { 4009 EmitOptionalRex32(reg); 4010 EmitUint8(0xF7); 4011 EmitOperand(0, Operand(reg)); 4012 EmitImmediate(immediate); 4013 } 4014 } 4015 4016 4017 void X86_64Assembler::testq(CpuRegister reg1, CpuRegister reg2) { 4018 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 4019 EmitRex64(reg1, reg2); 4020 EmitUint8(0x85); 4021 EmitRegisterOperand(reg1.LowBits(), reg2.LowBits()); 4022 } 4023 4024 4025 void X86_64Assembler::testq(CpuRegister reg, const Address& address) { 4026 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 4027 EmitRex64(reg, address); 4028 EmitUint8(0x85); 4029 EmitOperand(reg.LowBits(), address); 4030 } 4031 4032 4033 void X86_64Assembler::testb(const Address& dst, const Immediate& imm) { 4034 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 4035 EmitOptionalRex32(dst); 4036 EmitUint8(0xF6); 4037 EmitOperand(Register::RAX, dst); 4038 CHECK(imm.is_int8()); 4039 EmitUint8(imm.value() & 0xFF); 4040 } 4041 4042 4043 void X86_64Assembler::testl(const Address& dst, const Immediate& imm) { 4044 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 4045 EmitOptionalRex32(dst); 4046 EmitUint8(0xF7); 4047 EmitOperand(0, dst); 4048 EmitImmediate(imm); 4049 } 4050 4051 4052 void X86_64Assembler::andl(CpuRegister dst, CpuRegister src) { 4053 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 4054 EmitOptionalRex32(dst, src); 4055 EmitUint8(0x23); 4056 EmitOperand(dst.LowBits(), Operand(src)); 4057 } 4058 4059 4060 void X86_64Assembler::andl(CpuRegister reg, const Address& address) { 4061 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 4062 EmitOptionalRex32(reg, address); 4063 EmitUint8(0x23); 4064 EmitOperand(reg.LowBits(), address); 4065 } 4066 4067 4068 void X86_64Assembler::andl(CpuRegister dst, const Immediate& imm) { 4069 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 4070 EmitOptionalRex32(dst); 4071 EmitComplex(4, Operand(dst), imm); 4072 } 4073 4074 4075 void X86_64Assembler::andq(CpuRegister reg, const Immediate& imm) { 4076 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 4077 CHECK(imm.is_int32()); // andq only supports 32b immediate. 4078 EmitRex64(reg); 4079 EmitComplex(4, Operand(reg), imm); 4080 } 4081 4082 4083 void X86_64Assembler::andq(CpuRegister dst, CpuRegister src) { 4084 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 4085 EmitRex64(dst, src); 4086 EmitUint8(0x23); 4087 EmitOperand(dst.LowBits(), Operand(src)); 4088 } 4089 4090 4091 void X86_64Assembler::andq(CpuRegister dst, const Address& src) { 4092 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 4093 EmitRex64(dst, src); 4094 EmitUint8(0x23); 4095 EmitOperand(dst.LowBits(), src); 4096 } 4097 4098 4099 void X86_64Assembler::orl(CpuRegister dst, CpuRegister src) { 4100 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 4101 EmitOptionalRex32(dst, src); 4102 EmitUint8(0x0B); 4103 EmitOperand(dst.LowBits(), Operand(src)); 4104 } 4105 4106 4107 void X86_64Assembler::orl(CpuRegister reg, const Address& address) { 4108 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 4109 EmitOptionalRex32(reg, address); 4110 EmitUint8(0x0B); 4111 EmitOperand(reg.LowBits(), address); 4112 } 4113 4114 4115 void X86_64Assembler::orl(CpuRegister dst, const Immediate& imm) { 4116 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 4117 EmitOptionalRex32(dst); 4118 EmitComplex(1, Operand(dst), imm); 4119 } 4120 4121 4122 void X86_64Assembler::orq(CpuRegister dst, const Immediate& imm) { 4123 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 4124 CHECK(imm.is_int32()); // orq only supports 32b immediate. 4125 EmitRex64(dst); 4126 EmitComplex(1, Operand(dst), imm); 4127 } 4128 4129 4130 void X86_64Assembler::orq(CpuRegister dst, CpuRegister src) { 4131 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 4132 EmitRex64(dst, src); 4133 EmitUint8(0x0B); 4134 EmitOperand(dst.LowBits(), Operand(src)); 4135 } 4136 4137 4138 void X86_64Assembler::orq(CpuRegister dst, const Address& src) { 4139 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 4140 EmitRex64(dst, src); 4141 EmitUint8(0x0B); 4142 EmitOperand(dst.LowBits(), src); 4143 } 4144 4145 4146 void X86_64Assembler::xorl(CpuRegister dst, CpuRegister src) { 4147 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 4148 EmitOptionalRex32(dst, src); 4149 EmitUint8(0x33); 4150 EmitOperand(dst.LowBits(), Operand(src)); 4151 } 4152 4153 4154 void X86_64Assembler::xorl(CpuRegister reg, const Address& address) { 4155 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 4156 EmitOptionalRex32(reg, address); 4157 EmitUint8(0x33); 4158 EmitOperand(reg.LowBits(), address); 4159 } 4160 4161 4162 void X86_64Assembler::xorl(CpuRegister dst, const Immediate& imm) { 4163 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 4164 EmitOptionalRex32(dst); 4165 EmitComplex(6, Operand(dst), imm); 4166 } 4167 4168 4169 void X86_64Assembler::xorq(CpuRegister dst, CpuRegister src) { 4170 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 4171 EmitRex64(dst, src); 4172 EmitUint8(0x33); 4173 EmitOperand(dst.LowBits(), Operand(src)); 4174 } 4175 4176 4177 void X86_64Assembler::xorq(CpuRegister dst, const Immediate& imm) { 4178 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 4179 CHECK(imm.is_int32()); // xorq only supports 32b immediate. 4180 EmitRex64(dst); 4181 EmitComplex(6, Operand(dst), imm); 4182 } 4183 4184 void X86_64Assembler::xorq(CpuRegister dst, const Address& src) { 4185 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 4186 EmitRex64(dst, src); 4187 EmitUint8(0x33); 4188 EmitOperand(dst.LowBits(), src); 4189 } 4190 4191 4192 #if 0 4193 void X86_64Assembler::rex(bool force, bool w, Register* r, Register* x, Register* b) { 4194 // REX.WRXB 4195 // W - 64-bit operand 4196 // R - MODRM.reg 4197 // X - SIB.index 4198 // B - MODRM.rm/SIB.base 4199 uint8_t rex = force ? 0x40 : 0; 4200 if (w) { 4201 rex |= 0x48; // REX.W000 4202 } 4203 if (r != nullptr && *r >= Register::R8 && *r < Register::kNumberOfCpuRegisters) { 4204 rex |= 0x44; // REX.0R00 4205 *r = static_cast<Register>(*r - 8); 4206 } 4207 if (x != nullptr && *x >= Register::R8 && *x < Register::kNumberOfCpuRegisters) { 4208 rex |= 0x42; // REX.00X0 4209 *x = static_cast<Register>(*x - 8); 4210 } 4211 if (b != nullptr && *b >= Register::R8 && *b < Register::kNumberOfCpuRegisters) { 4212 rex |= 0x41; // REX.000B 4213 *b = static_cast<Register>(*b - 8); 4214 } 4215 if (rex != 0) { 4216 EmitUint8(rex); 4217 } 4218 } 4219 4220 void X86_64Assembler::rex_reg_mem(bool force, bool w, Register* dst, const Address& mem) { 4221 // REX.WRXB 4222 // W - 64-bit operand 4223 // R - MODRM.reg 4224 // X - SIB.index 4225 // B - MODRM.rm/SIB.base 4226 uint8_t rex = mem->rex(); 4227 if (force) { 4228 rex |= 0x40; // REX.0000 4229 } 4230 if (w) { 4231 rex |= 0x48; // REX.W000 4232 } 4233 if (dst != nullptr && *dst >= Register::R8 && *dst < Register::kNumberOfCpuRegisters) { 4234 rex |= 0x44; // REX.0R00 4235 *dst = static_cast<Register>(*dst - 8); 4236 } 4237 if (rex != 0) { 4238 EmitUint8(rex); 4239 } 4240 } 4241 4242 void rex_mem_reg(bool force, bool w, Address* mem, Register* src); 4243 #endif 4244 4245 void X86_64Assembler::addl(CpuRegister reg, const Immediate& imm) { 4246 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 4247 EmitOptionalRex32(reg); 4248 EmitComplex(0, Operand(reg), imm); 4249 } 4250 4251 4252 void X86_64Assembler::addq(CpuRegister reg, const Immediate& imm) { 4253 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 4254 CHECK(imm.is_int32()); // addq only supports 32b immediate. 4255 EmitRex64(reg); 4256 EmitComplex(0, Operand(reg), imm); 4257 } 4258 4259 4260 void X86_64Assembler::addq(CpuRegister dst, const Address& address) { 4261 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 4262 EmitRex64(dst, address); 4263 EmitUint8(0x03); 4264 EmitOperand(dst.LowBits(), address); 4265 } 4266 4267 4268 void X86_64Assembler::addq(CpuRegister dst, CpuRegister src) { 4269 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 4270 // 0x01 is addq r/m64 <- r/m64 + r64, with op1 in r/m and op2 in reg: so reverse EmitRex64 4271 EmitRex64(src, dst); 4272 EmitUint8(0x01); 4273 EmitRegisterOperand(src.LowBits(), dst.LowBits()); 4274 } 4275 4276 4277 void X86_64Assembler::addl(const Address& address, CpuRegister reg) { 4278 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 4279 EmitOptionalRex32(reg, address); 4280 EmitUint8(0x01); 4281 EmitOperand(reg.LowBits(), address); 4282 } 4283 4284 4285 void X86_64Assembler::addl(const Address& address, const Immediate& imm) { 4286 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 4287 EmitOptionalRex32(address); 4288 EmitComplex(0, address, imm); 4289 } 4290 4291 4292 void X86_64Assembler::addw(const Address& address, const Immediate& imm) { 4293 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 4294 CHECK(imm.is_uint16() || imm.is_int16()) << imm.value(); 4295 EmitUint8(0x66); 4296 EmitOptionalRex32(address); 4297 EmitComplex(0, address, imm, /* is_16_op= */ true); 4298 } 4299 4300 4301 void X86_64Assembler::subl(CpuRegister dst, CpuRegister src) { 4302 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 4303 EmitOptionalRex32(dst, src); 4304 EmitUint8(0x2B); 4305 EmitOperand(dst.LowBits(), Operand(src)); 4306 } 4307 4308 4309 void X86_64Assembler::subl(CpuRegister reg, const Immediate& imm) { 4310 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 4311 EmitOptionalRex32(reg); 4312 EmitComplex(5, Operand(reg), imm); 4313 } 4314 4315 4316 void X86_64Assembler::subq(CpuRegister reg, const Immediate& imm) { 4317 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 4318 CHECK(imm.is_int32()); // subq only supports 32b immediate. 4319 EmitRex64(reg); 4320 EmitComplex(5, Operand(reg), imm); 4321 } 4322 4323 4324 void X86_64Assembler::subq(CpuRegister dst, CpuRegister src) { 4325 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 4326 EmitRex64(dst, src); 4327 EmitUint8(0x2B); 4328 EmitRegisterOperand(dst.LowBits(), src.LowBits()); 4329 } 4330 4331 4332 void X86_64Assembler::subq(CpuRegister reg, const Address& address) { 4333 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 4334 EmitRex64(reg, address); 4335 EmitUint8(0x2B); 4336 EmitOperand(reg.LowBits() & 7, address); 4337 } 4338 4339 4340 void X86_64Assembler::subl(CpuRegister reg, const Address& address) { 4341 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 4342 EmitOptionalRex32(reg, address); 4343 EmitUint8(0x2B); 4344 EmitOperand(reg.LowBits(), address); 4345 } 4346 4347 4348 void X86_64Assembler::cdq() { 4349 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 4350 EmitUint8(0x99); 4351 } 4352 4353 4354 void X86_64Assembler::cqo() { 4355 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 4356 EmitRex64(); 4357 EmitUint8(0x99); 4358 } 4359 4360 4361 void X86_64Assembler::idivl(CpuRegister reg) { 4362 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 4363 EmitOptionalRex32(reg); 4364 EmitUint8(0xF7); 4365 EmitUint8(0xF8 | reg.LowBits()); 4366 } 4367 4368 4369 void X86_64Assembler::idivq(CpuRegister reg) { 4370 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 4371 EmitRex64(reg); 4372 EmitUint8(0xF7); 4373 EmitUint8(0xF8 | reg.LowBits()); 4374 } 4375 4376 4377 void X86_64Assembler::imull(CpuRegister dst, CpuRegister src) { 4378 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 4379 EmitOptionalRex32(dst, src); 4380 EmitUint8(0x0F); 4381 EmitUint8(0xAF); 4382 EmitOperand(dst.LowBits(), Operand(src)); 4383 } 4384 4385 void X86_64Assembler::imull(CpuRegister dst, CpuRegister src, const Immediate& imm) { 4386 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 4387 CHECK(imm.is_int32()); // imull only supports 32b immediate. 4388 4389 EmitOptionalRex32(dst, src); 4390 4391 // See whether imm can be represented as a sign-extended 8bit value. 4392 int32_t v32 = static_cast<int32_t>(imm.value()); 4393 if (IsInt<8>(v32)) { 4394 // Sign-extension works. 4395 EmitUint8(0x6B); 4396 EmitOperand(dst.LowBits(), Operand(src)); 4397 EmitUint8(static_cast<uint8_t>(v32 & 0xFF)); 4398 } else { 4399 // Not representable, use full immediate. 4400 EmitUint8(0x69); 4401 EmitOperand(dst.LowBits(), Operand(src)); 4402 EmitImmediate(imm); 4403 } 4404 } 4405 4406 4407 void X86_64Assembler::imull(CpuRegister reg, const Immediate& imm) { 4408 imull(reg, reg, imm); 4409 } 4410 4411 4412 void X86_64Assembler::imull(CpuRegister reg, const Address& address) { 4413 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 4414 EmitOptionalRex32(reg, address); 4415 EmitUint8(0x0F); 4416 EmitUint8(0xAF); 4417 EmitOperand(reg.LowBits(), address); 4418 } 4419 4420 4421 void X86_64Assembler::imulq(CpuRegister dst, CpuRegister src) { 4422 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 4423 EmitRex64(dst, src); 4424 EmitUint8(0x0F); 4425 EmitUint8(0xAF); 4426 EmitRegisterOperand(dst.LowBits(), src.LowBits()); 4427 } 4428 4429 4430 void X86_64Assembler::imulq(CpuRegister reg, const Immediate& imm) { 4431 imulq(reg, reg, imm); 4432 } 4433 4434 void X86_64Assembler::imulq(CpuRegister dst, CpuRegister reg, const Immediate& imm) { 4435 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 4436 CHECK(imm.is_int32()); // imulq only supports 32b immediate. 4437 4438 EmitRex64(dst, reg); 4439 4440 // See whether imm can be represented as a sign-extended 8bit value. 4441 int64_t v64 = imm.value(); 4442 if (IsInt<8>(v64)) { 4443 // Sign-extension works. 4444 EmitUint8(0x6B); 4445 EmitOperand(dst.LowBits(), Operand(reg)); 4446 EmitUint8(static_cast<uint8_t>(v64 & 0xFF)); 4447 } else { 4448 // Not representable, use full immediate. 4449 EmitUint8(0x69); 4450 EmitOperand(dst.LowBits(), Operand(reg)); 4451 EmitImmediate(imm); 4452 } 4453 } 4454 4455 void X86_64Assembler::imulq(CpuRegister reg, const Address& address) { 4456 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 4457 EmitRex64(reg, address); 4458 EmitUint8(0x0F); 4459 EmitUint8(0xAF); 4460 EmitOperand(reg.LowBits(), address); 4461 } 4462 4463 4464 void X86_64Assembler::imull(CpuRegister reg) { 4465 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 4466 EmitOptionalRex32(reg); 4467 EmitUint8(0xF7); 4468 EmitOperand(5, Operand(reg)); 4469 } 4470 4471 4472 void X86_64Assembler::imulq(CpuRegister reg) { 4473 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 4474 EmitRex64(reg); 4475 EmitUint8(0xF7); 4476 EmitOperand(5, Operand(reg)); 4477 } 4478 4479 4480 void X86_64Assembler::imull(const Address& address) { 4481 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 4482 EmitOptionalRex32(address); 4483 EmitUint8(0xF7); 4484 EmitOperand(5, address); 4485 } 4486 4487 4488 void X86_64Assembler::mull(CpuRegister reg) { 4489 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 4490 EmitOptionalRex32(reg); 4491 EmitUint8(0xF7); 4492 EmitOperand(4, Operand(reg)); 4493 } 4494 4495 4496 void X86_64Assembler::mull(const Address& address) { 4497 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 4498 EmitOptionalRex32(address); 4499 EmitUint8(0xF7); 4500 EmitOperand(4, address); 4501 } 4502 4503 4504 void X86_64Assembler::shll(CpuRegister reg, const Immediate& imm) { 4505 EmitGenericShift(false, 4, reg, imm); 4506 } 4507 4508 4509 void X86_64Assembler::shlq(CpuRegister reg, const Immediate& imm) { 4510 EmitGenericShift(true, 4, reg, imm); 4511 } 4512 4513 4514 void X86_64Assembler::shll(CpuRegister operand, CpuRegister shifter) { 4515 EmitGenericShift(false, 4, operand, shifter); 4516 } 4517 4518 4519 void X86_64Assembler::shlq(CpuRegister operand, CpuRegister shifter) { 4520 EmitGenericShift(true, 4, operand, shifter); 4521 } 4522 4523 4524 void X86_64Assembler::shrl(CpuRegister reg, const Immediate& imm) { 4525 EmitGenericShift(false, 5, reg, imm); 4526 } 4527 4528 4529 void X86_64Assembler::shrq(CpuRegister reg, const Immediate& imm) { 4530 EmitGenericShift(true, 5, reg, imm); 4531 } 4532 4533 4534 void X86_64Assembler::shrl(CpuRegister operand, CpuRegister shifter) { 4535 EmitGenericShift(false, 5, operand, shifter); 4536 } 4537 4538 4539 void X86_64Assembler::shrq(CpuRegister operand, CpuRegister shifter) { 4540 EmitGenericShift(true, 5, operand, shifter); 4541 } 4542 4543 4544 void X86_64Assembler::sarl(CpuRegister reg, const Immediate& imm) { 4545 EmitGenericShift(false, 7, reg, imm); 4546 } 4547 4548 4549 void X86_64Assembler::sarl(CpuRegister operand, CpuRegister shifter) { 4550 EmitGenericShift(false, 7, operand, shifter); 4551 } 4552 4553 4554 void X86_64Assembler::sarq(CpuRegister reg, const Immediate& imm) { 4555 EmitGenericShift(true, 7, reg, imm); 4556 } 4557 4558 4559 void X86_64Assembler::sarq(CpuRegister operand, CpuRegister shifter) { 4560 EmitGenericShift(true, 7, operand, shifter); 4561 } 4562 4563 4564 void X86_64Assembler::roll(CpuRegister reg, const Immediate& imm) { 4565 EmitGenericShift(false, 0, reg, imm); 4566 } 4567 4568 4569 void X86_64Assembler::roll(CpuRegister operand, CpuRegister shifter) { 4570 EmitGenericShift(false, 0, operand, shifter); 4571 } 4572 4573 4574 void X86_64Assembler::rorl(CpuRegister reg, const Immediate& imm) { 4575 EmitGenericShift(false, 1, reg, imm); 4576 } 4577 4578 4579 void X86_64Assembler::rorl(CpuRegister operand, CpuRegister shifter) { 4580 EmitGenericShift(false, 1, operand, shifter); 4581 } 4582 4583 4584 void X86_64Assembler::rolq(CpuRegister reg, const Immediate& imm) { 4585 EmitGenericShift(true, 0, reg, imm); 4586 } 4587 4588 4589 void X86_64Assembler::rolq(CpuRegister operand, CpuRegister shifter) { 4590 EmitGenericShift(true, 0, operand, shifter); 4591 } 4592 4593 4594 void X86_64Assembler::rorq(CpuRegister reg, const Immediate& imm) { 4595 EmitGenericShift(true, 1, reg, imm); 4596 } 4597 4598 4599 void X86_64Assembler::rorq(CpuRegister operand, CpuRegister shifter) { 4600 EmitGenericShift(true, 1, operand, shifter); 4601 } 4602 4603 4604 void X86_64Assembler::negl(CpuRegister reg) { 4605 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 4606 EmitOptionalRex32(reg); 4607 EmitUint8(0xF7); 4608 EmitOperand(3, Operand(reg)); 4609 } 4610 4611 4612 void X86_64Assembler::negq(CpuRegister reg) { 4613 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 4614 EmitRex64(reg); 4615 EmitUint8(0xF7); 4616 EmitOperand(3, Operand(reg)); 4617 } 4618 4619 4620 void X86_64Assembler::notl(CpuRegister reg) { 4621 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 4622 EmitOptionalRex32(reg); 4623 EmitUint8(0xF7); 4624 EmitUint8(0xD0 | reg.LowBits()); 4625 } 4626 4627 4628 void X86_64Assembler::notq(CpuRegister reg) { 4629 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 4630 EmitRex64(reg); 4631 EmitUint8(0xF7); 4632 EmitOperand(2, Operand(reg)); 4633 } 4634 4635 4636 void X86_64Assembler::enter(const Immediate& imm) { 4637 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 4638 EmitUint8(0xC8); 4639 CHECK(imm.is_uint16()) << imm.value(); 4640 EmitUint8(imm.value() & 0xFF); 4641 EmitUint8((imm.value() >> 8) & 0xFF); 4642 EmitUint8(0x00); 4643 } 4644 4645 4646 void X86_64Assembler::leave() { 4647 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 4648 EmitUint8(0xC9); 4649 } 4650 4651 4652 void X86_64Assembler::ret() { 4653 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 4654 EmitUint8(0xC3); 4655 } 4656 4657 4658 void X86_64Assembler::ret(const Immediate& imm) { 4659 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 4660 EmitUint8(0xC2); 4661 CHECK(imm.is_uint16()); 4662 EmitUint8(imm.value() & 0xFF); 4663 EmitUint8((imm.value() >> 8) & 0xFF); 4664 } 4665 4666 4667 4668 void X86_64Assembler::nop() { 4669 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 4670 EmitUint8(0x90); 4671 } 4672 4673 4674 void X86_64Assembler::int3() { 4675 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 4676 EmitUint8(0xCC); 4677 } 4678 4679 4680 void X86_64Assembler::hlt() { 4681 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 4682 EmitUint8(0xF4); 4683 } 4684 4685 4686 void X86_64Assembler::j(Condition condition, Label* label) { 4687 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 4688 if (label->IsBound()) { 4689 static const int kShortSize = 2; 4690 static const int kLongSize = 6; 4691 int offset = label->Position() - buffer_.Size(); 4692 CHECK_LE(offset, 0); 4693 if (IsInt<8>(offset - kShortSize)) { 4694 EmitUint8(0x70 + condition); 4695 EmitUint8((offset - kShortSize) & 0xFF); 4696 } else { 4697 EmitUint8(0x0F); 4698 EmitUint8(0x80 + condition); 4699 EmitInt32(offset - kLongSize); 4700 } 4701 } else { 4702 EmitUint8(0x0F); 4703 EmitUint8(0x80 + condition); 4704 EmitLabelLink(label); 4705 } 4706 } 4707 4708 4709 void X86_64Assembler::j(Condition condition, NearLabel* label) { 4710 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 4711 if (label->IsBound()) { 4712 static const int kShortSize = 2; 4713 int offset = label->Position() - buffer_.Size(); 4714 CHECK_LE(offset, 0); 4715 CHECK(IsInt<8>(offset - kShortSize)); 4716 EmitUint8(0x70 + condition); 4717 EmitUint8((offset - kShortSize) & 0xFF); 4718 } else { 4719 EmitUint8(0x70 + condition); 4720 EmitLabelLink(label); 4721 } 4722 } 4723 4724 4725 void X86_64Assembler::jrcxz(NearLabel* label) { 4726 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 4727 if (label->IsBound()) { 4728 static const int kShortSize = 2; 4729 int offset = label->Position() - buffer_.Size(); 4730 CHECK_LE(offset, 0); 4731 CHECK(IsInt<8>(offset - kShortSize)); 4732 EmitUint8(0xE3); 4733 EmitUint8((offset - kShortSize) & 0xFF); 4734 } else { 4735 EmitUint8(0xE3); 4736 EmitLabelLink(label); 4737 } 4738 } 4739 4740 4741 void X86_64Assembler::jmp(CpuRegister reg) { 4742 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 4743 EmitOptionalRex32(reg); 4744 EmitUint8(0xFF); 4745 EmitRegisterOperand(4, reg.LowBits()); 4746 } 4747 4748 void X86_64Assembler::jmp(const Address& address) { 4749 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 4750 EmitOptionalRex32(address); 4751 EmitUint8(0xFF); 4752 EmitOperand(4, address); 4753 } 4754 4755 void X86_64Assembler::jmp(Label* label) { 4756 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 4757 if (label->IsBound()) { 4758 static const int kShortSize = 2; 4759 static const int kLongSize = 5; 4760 int offset = label->Position() - buffer_.Size(); 4761 CHECK_LE(offset, 0); 4762 if (IsInt<8>(offset - kShortSize)) { 4763 EmitUint8(0xEB); 4764 EmitUint8((offset - kShortSize) & 0xFF); 4765 } else { 4766 EmitUint8(0xE9); 4767 EmitInt32(offset - kLongSize); 4768 } 4769 } else { 4770 EmitUint8(0xE9); 4771 EmitLabelLink(label); 4772 } 4773 } 4774 4775 4776 void X86_64Assembler::jmp(NearLabel* label) { 4777 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 4778 if (label->IsBound()) { 4779 static const int kShortSize = 2; 4780 int offset = label->Position() - buffer_.Size(); 4781 CHECK_LE(offset, 0); 4782 CHECK(IsInt<8>(offset - kShortSize)); 4783 EmitUint8(0xEB); 4784 EmitUint8((offset - kShortSize) & 0xFF); 4785 } else { 4786 EmitUint8(0xEB); 4787 EmitLabelLink(label); 4788 } 4789 } 4790 4791 4792 void X86_64Assembler::rep_movsw() { 4793 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 4794 EmitUint8(0x66); 4795 EmitUint8(0xF3); 4796 EmitUint8(0xA5); 4797 } 4798 4799 4800 X86_64Assembler* X86_64Assembler::lock() { 4801 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 4802 EmitUint8(0xF0); 4803 return this; 4804 } 4805 4806 4807 void X86_64Assembler::cmpxchgl(const Address& address, CpuRegister reg) { 4808 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 4809 EmitOptionalRex32(reg, address); 4810 EmitUint8(0x0F); 4811 EmitUint8(0xB1); 4812 EmitOperand(reg.LowBits(), address); 4813 } 4814 4815 4816 void X86_64Assembler::cmpxchgq(const Address& address, CpuRegister reg) { 4817 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 4818 EmitRex64(reg, address); 4819 EmitUint8(0x0F); 4820 EmitUint8(0xB1); 4821 EmitOperand(reg.LowBits(), address); 4822 } 4823 4824 4825 void X86_64Assembler::mfence() { 4826 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 4827 EmitUint8(0x0F); 4828 EmitUint8(0xAE); 4829 EmitUint8(0xF0); 4830 } 4831 4832 4833 X86_64Assembler* X86_64Assembler::gs() { 4834 // TODO: gs is a prefix and not an instruction 4835 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 4836 EmitUint8(0x65); 4837 return this; 4838 } 4839 4840 4841 void X86_64Assembler::AddImmediate(CpuRegister reg, const Immediate& imm) { 4842 int value = imm.value(); 4843 if (value != 0) { 4844 if (value > 0) { 4845 addl(reg, imm); 4846 } else { 4847 subl(reg, Immediate(value)); 4848 } 4849 } 4850 } 4851 4852 4853 void X86_64Assembler::setcc(Condition condition, CpuRegister dst) { 4854 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 4855 // RSP, RBP, RDI, RSI need rex prefix (else the pattern encodes ah/bh/ch/dh). 4856 if (dst.NeedsRex() || dst.AsRegister() > 3) { 4857 EmitOptionalRex(true, false, false, false, dst.NeedsRex()); 4858 } 4859 EmitUint8(0x0F); 4860 EmitUint8(0x90 + condition); 4861 EmitUint8(0xC0 + dst.LowBits()); 4862 } 4863 4864 void X86_64Assembler::blsi(CpuRegister dst, CpuRegister src) { 4865 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 4866 uint8_t byte_zero = EmitVexPrefixByteZero(/*is_twobyte_form=*/ false); 4867 uint8_t byte_one = EmitVexPrefixByteOne(/*R=*/ false, 4868 /*X=*/ false, 4869 src.NeedsRex(), 4870 SET_VEX_M_0F_38); 4871 uint8_t byte_two = EmitVexPrefixByteTwo(/*W=*/true, 4872 X86_64ManagedRegister::FromCpuRegister(dst.AsRegister()), 4873 SET_VEX_L_128, 4874 SET_VEX_PP_NONE); 4875 EmitUint8(byte_zero); 4876 EmitUint8(byte_one); 4877 EmitUint8(byte_two); 4878 EmitUint8(0xF3); 4879 EmitRegisterOperand(3, src.LowBits()); 4880 } 4881 4882 void X86_64Assembler::blsmsk(CpuRegister dst, CpuRegister src) { 4883 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 4884 uint8_t byte_zero = EmitVexPrefixByteZero(/*is_twobyte_form=*/ false); 4885 uint8_t byte_one = EmitVexPrefixByteOne(/*R=*/ false, 4886 /*X=*/ false, 4887 src.NeedsRex(), 4888 SET_VEX_M_0F_38); 4889 uint8_t byte_two = EmitVexPrefixByteTwo(/*W=*/ true, 4890 X86_64ManagedRegister::FromCpuRegister(dst.AsRegister()), 4891 SET_VEX_L_128, 4892 SET_VEX_PP_NONE); 4893 EmitUint8(byte_zero); 4894 EmitUint8(byte_one); 4895 EmitUint8(byte_two); 4896 EmitUint8(0xF3); 4897 EmitRegisterOperand(2, src.LowBits()); 4898 } 4899 4900 void X86_64Assembler::blsr(CpuRegister dst, CpuRegister src) { 4901 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 4902 uint8_t byte_zero = EmitVexPrefixByteZero(/*is_twobyte_form=*/false); 4903 uint8_t byte_one = EmitVexPrefixByteOne(/*R=*/ false, 4904 /*X=*/ false, 4905 src.NeedsRex(), 4906 SET_VEX_M_0F_38); 4907 uint8_t byte_two = EmitVexPrefixByteTwo(/*W=*/ true, 4908 X86_64ManagedRegister::FromCpuRegister(dst.AsRegister()), 4909 SET_VEX_L_128, 4910 SET_VEX_PP_NONE); 4911 EmitUint8(byte_zero); 4912 EmitUint8(byte_one); 4913 EmitUint8(byte_two); 4914 EmitUint8(0xF3); 4915 EmitRegisterOperand(1, src.LowBits()); 4916 } 4917 4918 void X86_64Assembler::bswapl(CpuRegister dst) { 4919 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 4920 EmitOptionalRex(false, false, false, false, dst.NeedsRex()); 4921 EmitUint8(0x0F); 4922 EmitUint8(0xC8 + dst.LowBits()); 4923 } 4924 4925 void X86_64Assembler::bswapq(CpuRegister dst) { 4926 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 4927 EmitOptionalRex(false, true, false, false, dst.NeedsRex()); 4928 EmitUint8(0x0F); 4929 EmitUint8(0xC8 + dst.LowBits()); 4930 } 4931 4932 void X86_64Assembler::bsfl(CpuRegister dst, CpuRegister src) { 4933 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 4934 EmitOptionalRex32(dst, src); 4935 EmitUint8(0x0F); 4936 EmitUint8(0xBC); 4937 EmitRegisterOperand(dst.LowBits(), src.LowBits()); 4938 } 4939 4940 void X86_64Assembler::bsfl(CpuRegister dst, const Address& src) { 4941 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 4942 EmitOptionalRex32(dst, src); 4943 EmitUint8(0x0F); 4944 EmitUint8(0xBC); 4945 EmitOperand(dst.LowBits(), src); 4946 } 4947 4948 void X86_64Assembler::bsfq(CpuRegister dst, CpuRegister src) { 4949 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 4950 EmitRex64(dst, src); 4951 EmitUint8(0x0F); 4952 EmitUint8(0xBC); 4953 EmitRegisterOperand(dst.LowBits(), src.LowBits()); 4954 } 4955 4956 void X86_64Assembler::bsfq(CpuRegister dst, const Address& src) { 4957 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 4958 EmitRex64(dst, src); 4959 EmitUint8(0x0F); 4960 EmitUint8(0xBC); 4961 EmitOperand(dst.LowBits(), src); 4962 } 4963 4964 void X86_64Assembler::bsrl(CpuRegister dst, CpuRegister src) { 4965 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 4966 EmitOptionalRex32(dst, src); 4967 EmitUint8(0x0F); 4968 EmitUint8(0xBD); 4969 EmitRegisterOperand(dst.LowBits(), src.LowBits()); 4970 } 4971 4972 void X86_64Assembler::bsrl(CpuRegister dst, const Address& src) { 4973 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 4974 EmitOptionalRex32(dst, src); 4975 EmitUint8(0x0F); 4976 EmitUint8(0xBD); 4977 EmitOperand(dst.LowBits(), src); 4978 } 4979 4980 void X86_64Assembler::bsrq(CpuRegister dst, CpuRegister src) { 4981 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 4982 EmitRex64(dst, src); 4983 EmitUint8(0x0F); 4984 EmitUint8(0xBD); 4985 EmitRegisterOperand(dst.LowBits(), src.LowBits()); 4986 } 4987 4988 void X86_64Assembler::bsrq(CpuRegister dst, const Address& src) { 4989 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 4990 EmitRex64(dst, src); 4991 EmitUint8(0x0F); 4992 EmitUint8(0xBD); 4993 EmitOperand(dst.LowBits(), src); 4994 } 4995 4996 void X86_64Assembler::popcntl(CpuRegister dst, CpuRegister src) { 4997 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 4998 EmitUint8(0xF3); 4999 EmitOptionalRex32(dst, src); 5000 EmitUint8(0x0F); 5001 EmitUint8(0xB8); 5002 EmitRegisterOperand(dst.LowBits(), src.LowBits()); 5003 } 5004 5005 void X86_64Assembler::popcntl(CpuRegister dst, const Address& src) { 5006 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 5007 EmitUint8(0xF3); 5008 EmitOptionalRex32(dst, src); 5009 EmitUint8(0x0F); 5010 EmitUint8(0xB8); 5011 EmitOperand(dst.LowBits(), src); 5012 } 5013 5014 void X86_64Assembler::popcntq(CpuRegister dst, CpuRegister src) { 5015 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 5016 EmitUint8(0xF3); 5017 EmitRex64(dst, src); 5018 EmitUint8(0x0F); 5019 EmitUint8(0xB8); 5020 EmitRegisterOperand(dst.LowBits(), src.LowBits()); 5021 } 5022 5023 void X86_64Assembler::popcntq(CpuRegister dst, const Address& src) { 5024 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 5025 EmitUint8(0xF3); 5026 EmitRex64(dst, src); 5027 EmitUint8(0x0F); 5028 EmitUint8(0xB8); 5029 EmitOperand(dst.LowBits(), src); 5030 } 5031 5032 void X86_64Assembler::repne_scasb() { 5033 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 5034 EmitUint8(0xF2); 5035 EmitUint8(0xAE); 5036 } 5037 5038 void X86_64Assembler::repne_scasw() { 5039 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 5040 EmitUint8(0x66); 5041 EmitUint8(0xF2); 5042 EmitUint8(0xAF); 5043 } 5044 5045 void X86_64Assembler::repe_cmpsw() { 5046 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 5047 EmitUint8(0x66); 5048 EmitUint8(0xF3); 5049 EmitUint8(0xA7); 5050 } 5051 5052 5053 void X86_64Assembler::repe_cmpsl() { 5054 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 5055 EmitUint8(0xF3); 5056 EmitUint8(0xA7); 5057 } 5058 5059 5060 void X86_64Assembler::repe_cmpsq() { 5061 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 5062 EmitUint8(0xF3); 5063 EmitRex64(); 5064 EmitUint8(0xA7); 5065 } 5066 5067 5068 void X86_64Assembler::LoadDoubleConstant(XmmRegister dst, double value) { 5069 // TODO: Need to have a code constants table. 5070 int64_t constant = bit_cast<int64_t, double>(value); 5071 pushq(Immediate(High32Bits(constant))); 5072 pushq(Immediate(Low32Bits(constant))); 5073 movsd(dst, Address(CpuRegister(RSP), 0)); 5074 addq(CpuRegister(RSP), Immediate(2 * sizeof(intptr_t))); 5075 } 5076 5077 5078 void X86_64Assembler::Align(int alignment, int offset) { 5079 CHECK(IsPowerOfTwo(alignment)); 5080 // Emit nop instruction until the real position is aligned. 5081 while (((offset + buffer_.GetPosition()) & (alignment-1)) != 0) { 5082 nop(); 5083 } 5084 } 5085 5086 5087 void X86_64Assembler::Bind(Label* label) { 5088 int bound = buffer_.Size(); 5089 CHECK(!label->IsBound()); // Labels can only be bound once. 5090 while (label->IsLinked()) { 5091 int position = label->LinkPosition(); 5092 int next = buffer_.Load<int32_t>(position); 5093 buffer_.Store<int32_t>(position, bound - (position + 4)); 5094 label->position_ = next; 5095 } 5096 label->BindTo(bound); 5097 } 5098 5099 5100 void X86_64Assembler::Bind(NearLabel* label) { 5101 int bound = buffer_.Size(); 5102 CHECK(!label->IsBound()); // Labels can only be bound once. 5103 while (label->IsLinked()) { 5104 int position = label->LinkPosition(); 5105 uint8_t delta = buffer_.Load<uint8_t>(position); 5106 int offset = bound - (position + 1); 5107 CHECK(IsInt<8>(offset)); 5108 buffer_.Store<int8_t>(position, offset); 5109 label->position_ = delta != 0u ? label->position_ - delta : 0; 5110 } 5111 label->BindTo(bound); 5112 } 5113 5114 5115 void X86_64Assembler::EmitOperand(uint8_t reg_or_opcode, const Operand& operand) { 5116 CHECK_GE(reg_or_opcode, 0); 5117 CHECK_LT(reg_or_opcode, 8); 5118 const int length = operand.length_; 5119 CHECK_GT(length, 0); 5120 // Emit the ModRM byte updated with the given reg value. 5121 CHECK_EQ(operand.encoding_[0] & 0x38, 0); 5122 EmitUint8(operand.encoding_[0] + (reg_or_opcode << 3)); 5123 // Emit the rest of the encoded operand. 5124 for (int i = 1; i < length; i++) { 5125 EmitUint8(operand.encoding_[i]); 5126 } 5127 AssemblerFixup* fixup = operand.GetFixup(); 5128 if (fixup != nullptr) { 5129 EmitFixup(fixup); 5130 } 5131 } 5132 5133 5134 void X86_64Assembler::EmitImmediate(const Immediate& imm, bool is_16_op) { 5135 if (is_16_op) { 5136 EmitUint8(imm.value() & 0xFF); 5137 EmitUint8(imm.value() >> 8); 5138 } else if (imm.is_int32()) { 5139 EmitInt32(static_cast<int32_t>(imm.value())); 5140 } else { 5141 EmitInt64(imm.value()); 5142 } 5143 } 5144 5145 5146 void X86_64Assembler::EmitComplex(uint8_t reg_or_opcode, 5147 const Operand& operand, 5148 const Immediate& immediate, 5149 bool is_16_op) { 5150 CHECK_GE(reg_or_opcode, 0); 5151 CHECK_LT(reg_or_opcode, 8); 5152 if (immediate.is_int8()) { 5153 // Use sign-extended 8-bit immediate. 5154 EmitUint8(0x83); 5155 EmitOperand(reg_or_opcode, operand); 5156 EmitUint8(immediate.value() & 0xFF); 5157 } else if (operand.IsRegister(CpuRegister(RAX))) { 5158 // Use short form if the destination is eax. 5159 EmitUint8(0x05 + (reg_or_opcode << 3)); 5160 EmitImmediate(immediate, is_16_op); 5161 } else { 5162 EmitUint8(0x81); 5163 EmitOperand(reg_or_opcode, operand); 5164 EmitImmediate(immediate, is_16_op); 5165 } 5166 } 5167 5168 5169 void X86_64Assembler::EmitLabel(Label* label, int instruction_size) { 5170 if (label->IsBound()) { 5171 int offset = label->Position() - buffer_.Size(); 5172 CHECK_LE(offset, 0); 5173 EmitInt32(offset - instruction_size); 5174 } else { 5175 EmitLabelLink(label); 5176 } 5177 } 5178 5179 5180 void X86_64Assembler::EmitLabelLink(Label* label) { 5181 CHECK(!label->IsBound()); 5182 int position = buffer_.Size(); 5183 EmitInt32(label->position_); 5184 label->LinkTo(position); 5185 } 5186 5187 5188 void X86_64Assembler::EmitLabelLink(NearLabel* label) { 5189 CHECK(!label->IsBound()); 5190 int position = buffer_.Size(); 5191 if (label->IsLinked()) { 5192 // Save the delta in the byte that we have to play with. 5193 uint32_t delta = position - label->LinkPosition(); 5194 CHECK(IsUint<8>(delta)); 5195 EmitUint8(delta & 0xFF); 5196 } else { 5197 EmitUint8(0); 5198 } 5199 label->LinkTo(position); 5200 } 5201 5202 5203 void X86_64Assembler::EmitGenericShift(bool wide, 5204 int reg_or_opcode, 5205 CpuRegister reg, 5206 const Immediate& imm) { 5207 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 5208 CHECK(imm.is_int8()); 5209 if (wide) { 5210 EmitRex64(reg); 5211 } else { 5212 EmitOptionalRex32(reg); 5213 } 5214 if (imm.value() == 1) { 5215 EmitUint8(0xD1); 5216 EmitOperand(reg_or_opcode, Operand(reg)); 5217 } else { 5218 EmitUint8(0xC1); 5219 EmitOperand(reg_or_opcode, Operand(reg)); 5220 EmitUint8(imm.value() & 0xFF); 5221 } 5222 } 5223 5224 5225 void X86_64Assembler::EmitGenericShift(bool wide, 5226 int reg_or_opcode, 5227 CpuRegister operand, 5228 CpuRegister shifter) { 5229 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 5230 CHECK_EQ(shifter.AsRegister(), RCX); 5231 if (wide) { 5232 EmitRex64(operand); 5233 } else { 5234 EmitOptionalRex32(operand); 5235 } 5236 EmitUint8(0xD3); 5237 EmitOperand(reg_or_opcode, Operand(operand)); 5238 } 5239 5240 void X86_64Assembler::EmitOptionalRex(bool force, bool w, bool r, bool x, bool b) { 5241 // REX.WRXB 5242 // W - 64-bit operand 5243 // R - MODRM.reg 5244 // X - SIB.index 5245 // B - MODRM.rm/SIB.base 5246 uint8_t rex = force ? 0x40 : 0; 5247 if (w) { 5248 rex |= 0x48; // REX.W000 5249 } 5250 if (r) { 5251 rex |= 0x44; // REX.0R00 5252 } 5253 if (x) { 5254 rex |= 0x42; // REX.00X0 5255 } 5256 if (b) { 5257 rex |= 0x41; // REX.000B 5258 } 5259 if (rex != 0) { 5260 EmitUint8(rex); 5261 } 5262 } 5263 5264 void X86_64Assembler::EmitOptionalRex32(CpuRegister reg) { 5265 EmitOptionalRex(false, false, false, false, reg.NeedsRex()); 5266 } 5267 5268 void X86_64Assembler::EmitOptionalRex32(CpuRegister dst, CpuRegister src) { 5269 EmitOptionalRex(false, false, dst.NeedsRex(), false, src.NeedsRex()); 5270 } 5271 5272 void X86_64Assembler::EmitOptionalRex32(XmmRegister dst, XmmRegister src) { 5273 EmitOptionalRex(false, false, dst.NeedsRex(), false, src.NeedsRex()); 5274 } 5275 5276 void X86_64Assembler::EmitOptionalRex32(CpuRegister dst, XmmRegister src) { 5277 EmitOptionalRex(false, false, dst.NeedsRex(), false, src.NeedsRex()); 5278 } 5279 5280 void X86_64Assembler::EmitOptionalRex32(XmmRegister dst, CpuRegister src) { 5281 EmitOptionalRex(false, false, dst.NeedsRex(), false, src.NeedsRex()); 5282 } 5283 5284 void X86_64Assembler::EmitOptionalRex32(const Operand& operand) { 5285 uint8_t rex = operand.rex(); 5286 if (rex != 0) { 5287 EmitUint8(rex); 5288 } 5289 } 5290 5291 void X86_64Assembler::EmitOptionalRex32(CpuRegister dst, const Operand& operand) { 5292 uint8_t rex = operand.rex(); 5293 if (dst.NeedsRex()) { 5294 rex |= 0x44; // REX.0R00 5295 } 5296 if (rex != 0) { 5297 EmitUint8(rex); 5298 } 5299 } 5300 5301 void X86_64Assembler::EmitOptionalRex32(XmmRegister dst, const Operand& operand) { 5302 uint8_t rex = operand.rex(); 5303 if (dst.NeedsRex()) { 5304 rex |= 0x44; // REX.0R00 5305 } 5306 if (rex != 0) { 5307 EmitUint8(rex); 5308 } 5309 } 5310 5311 void X86_64Assembler::EmitRex64() { 5312 EmitOptionalRex(false, true, false, false, false); 5313 } 5314 5315 void X86_64Assembler::EmitRex64(CpuRegister reg) { 5316 EmitOptionalRex(false, true, false, false, reg.NeedsRex()); 5317 } 5318 5319 void X86_64Assembler::EmitRex64(const Operand& operand) { 5320 uint8_t rex = operand.rex(); 5321 rex |= 0x48; // REX.W000 5322 EmitUint8(rex); 5323 } 5324 5325 void X86_64Assembler::EmitRex64(CpuRegister dst, CpuRegister src) { 5326 EmitOptionalRex(false, true, dst.NeedsRex(), false, src.NeedsRex()); 5327 } 5328 5329 void X86_64Assembler::EmitRex64(XmmRegister dst, CpuRegister src) { 5330 EmitOptionalRex(false, true, dst.NeedsRex(), false, src.NeedsRex()); 5331 } 5332 5333 void X86_64Assembler::EmitRex64(CpuRegister dst, XmmRegister src) { 5334 EmitOptionalRex(false, true, dst.NeedsRex(), false, src.NeedsRex()); 5335 } 5336 5337 void X86_64Assembler::EmitRex64(CpuRegister dst, const Operand& operand) { 5338 uint8_t rex = 0x48 | operand.rex(); // REX.W000 5339 if (dst.NeedsRex()) { 5340 rex |= 0x44; // REX.0R00 5341 } 5342 EmitUint8(rex); 5343 } 5344 5345 void X86_64Assembler::EmitRex64(XmmRegister dst, const Operand& operand) { 5346 uint8_t rex = 0x48 | operand.rex(); // REX.W000 5347 if (dst.NeedsRex()) { 5348 rex |= 0x44; // REX.0R00 5349 } 5350 EmitUint8(rex); 5351 } 5352 5353 void X86_64Assembler::EmitOptionalByteRegNormalizingRex32(CpuRegister dst, CpuRegister src) { 5354 // For src, SPL, BPL, SIL, DIL need the rex prefix. 5355 bool force = src.AsRegister() > 3; 5356 EmitOptionalRex(force, false, dst.NeedsRex(), false, src.NeedsRex()); 5357 } 5358 5359 void X86_64Assembler::EmitOptionalByteRegNormalizingRex32(CpuRegister dst, const Operand& operand) { 5360 uint8_t rex = operand.rex(); 5361 // For dst, SPL, BPL, SIL, DIL need the rex prefix. 5362 bool force = dst.AsRegister() > 3; 5363 if (force) { 5364 rex |= 0x40; // REX.0000 5365 } 5366 if (dst.NeedsRex()) { 5367 rex |= 0x44; // REX.0R00 5368 } 5369 if (rex != 0) { 5370 EmitUint8(rex); 5371 } 5372 } 5373 5374 void X86_64Assembler::AddConstantArea() { 5375 ArrayRef<const int32_t> area = constant_area_.GetBuffer(); 5376 for (size_t i = 0, e = area.size(); i < e; i++) { 5377 AssemblerBuffer::EnsureCapacity ensured(&buffer_); 5378 EmitInt32(area[i]); 5379 } 5380 } 5381 5382 size_t ConstantArea::AppendInt32(int32_t v) { 5383 size_t result = buffer_.size() * elem_size_; 5384 buffer_.push_back(v); 5385 return result; 5386 } 5387 5388 size_t ConstantArea::AddInt32(int32_t v) { 5389 // Look for an existing match. 5390 for (size_t i = 0, e = buffer_.size(); i < e; i++) { 5391 if (v == buffer_[i]) { 5392 return i * elem_size_; 5393 } 5394 } 5395 5396 // Didn't match anything. 5397 return AppendInt32(v); 5398 } 5399 5400 size_t ConstantArea::AddInt64(int64_t v) { 5401 int32_t v_low = v; 5402 int32_t v_high = v >> 32; 5403 if (buffer_.size() > 1) { 5404 // Ensure we don't pass the end of the buffer. 5405 for (size_t i = 0, e = buffer_.size() - 1; i < e; i++) { 5406 if (v_low == buffer_[i] && v_high == buffer_[i + 1]) { 5407 return i * elem_size_; 5408 } 5409 } 5410 } 5411 5412 // Didn't match anything. 5413 size_t result = buffer_.size() * elem_size_; 5414 buffer_.push_back(v_low); 5415 buffer_.push_back(v_high); 5416 return result; 5417 } 5418 5419 size_t ConstantArea::AddDouble(double v) { 5420 // Treat the value as a 64-bit integer value. 5421 return AddInt64(bit_cast<int64_t, double>(v)); 5422 } 5423 5424 size_t ConstantArea::AddFloat(float v) { 5425 // Treat the value as a 32-bit integer value. 5426 return AddInt32(bit_cast<int32_t, float>(v)); 5427 } 5428 5429 uint8_t X86_64Assembler::EmitVexPrefixByteZero(bool is_twobyte_form) { 5430 // Vex Byte 0, 5431 // Bits [7:0] must contain the value 11000101b (0xC5) for 2-byte Vex 5432 // Bits [7:0] must contain the value 11000100b (0xC4) for 3-byte Vex 5433 uint8_t vex_prefix = 0xC0; 5434 if (is_twobyte_form) { 5435 vex_prefix |= TWO_BYTE_VEX; // 2-Byte Vex 5436 } else { 5437 vex_prefix |= THREE_BYTE_VEX; // 3-Byte Vex 5438 } 5439 return vex_prefix; 5440 } 5441 5442 uint8_t X86_64Assembler::EmitVexPrefixByteOne(bool R, bool X, bool B, int SET_VEX_M) { 5443 // Vex Byte 1, 5444 uint8_t vex_prefix = VEX_INIT; 5445 /** Bit[7] This bit needs to be set to '1' 5446 otherwise the instruction is LES or LDS */ 5447 if (!R) { 5448 // R . 5449 vex_prefix |= SET_VEX_R; 5450 } 5451 /** Bit[6] This bit needs to be set to '1' 5452 otherwise the instruction is LES or LDS */ 5453 if (!X) { 5454 // X . 5455 vex_prefix |= SET_VEX_X; 5456 } 5457 /** Bit[5] This bit needs to be set to '1' */ 5458 if (!B) { 5459 // B . 5460 vex_prefix |= SET_VEX_B; 5461 } 5462 /** Bits[4:0], Based on the instruction documentaion */ 5463 vex_prefix |= SET_VEX_M; 5464 return vex_prefix; 5465 } 5466 5467 uint8_t X86_64Assembler::EmitVexPrefixByteOne(bool R, 5468 X86_64ManagedRegister operand, 5469 int SET_VEX_L, 5470 int SET_VEX_PP) { 5471 // Vex Byte 1, 5472 uint8_t vex_prefix = VEX_INIT; 5473 /** Bit[7] This bit needs to be set to '1' 5474 otherwise the instruction is LES or LDS */ 5475 if (!R) { 5476 // R . 5477 vex_prefix |= SET_VEX_R; 5478 } 5479 /**Bits[6:3] - 'vvvv' the source or dest register specifier */ 5480 if (operand.IsNoRegister()) { 5481 vex_prefix |= 0x78; 5482 } else if (operand.IsXmmRegister()) { 5483 XmmRegister vvvv = operand.AsXmmRegister(); 5484 int inverted_reg = 15 - static_cast<int>(vvvv.AsFloatRegister()); 5485 uint8_t reg = static_cast<uint8_t>(inverted_reg); 5486 vex_prefix |= ((reg & 0x0F) << 3); 5487 } else if (operand.IsCpuRegister()) { 5488 CpuRegister vvvv = operand.AsCpuRegister(); 5489 int inverted_reg = 15 - static_cast<int>(vvvv.AsRegister()); 5490 uint8_t reg = static_cast<uint8_t>(inverted_reg); 5491 vex_prefix |= ((reg & 0x0F) << 3); 5492 } 5493 /** Bit[2] - "L" If VEX.L = 1 indicates 256-bit vector operation, 5494 VEX.L = 0 indicates 128 bit vector operation */ 5495 vex_prefix |= SET_VEX_L; 5496 // Bits[1:0] - "pp" 5497 vex_prefix |= SET_VEX_PP; 5498 return vex_prefix; 5499 } 5500 5501 uint8_t X86_64Assembler::EmitVexPrefixByteTwo(bool W, 5502 X86_64ManagedRegister operand, 5503 int SET_VEX_L, 5504 int SET_VEX_PP) { 5505 // Vex Byte 2, 5506 uint8_t vex_prefix = VEX_INIT; 5507 5508 /** Bit[7] This bits needs to be set to '1' with default value. 5509 When using C4H form of VEX prefix, REX.W value is ignored */ 5510 if (W) { 5511 vex_prefix |= SET_VEX_W; 5512 } 5513 // Bits[6:3] - 'vvvv' the source or dest register specifier 5514 if (operand.IsXmmRegister()) { 5515 XmmRegister vvvv = operand.AsXmmRegister(); 5516 int inverted_reg = 15 - static_cast<int>(vvvv.AsFloatRegister()); 5517 uint8_t reg = static_cast<uint8_t>(inverted_reg); 5518 vex_prefix |= ((reg & 0x0F) << 3); 5519 } else if (operand.IsCpuRegister()) { 5520 CpuRegister vvvv = operand.AsCpuRegister(); 5521 int inverted_reg = 15 - static_cast<int>(vvvv.AsRegister()); 5522 uint8_t reg = static_cast<uint8_t>(inverted_reg); 5523 vex_prefix |= ((reg & 0x0F) << 3); 5524 } 5525 /** Bit[2] - "L" If VEX.L = 1 indicates 256-bit vector operation, 5526 VEX.L = 0 indicates 128 bit vector operation */ 5527 vex_prefix |= SET_VEX_L; 5528 // Bits[1:0] - "pp" 5529 vex_prefix |= SET_VEX_PP; 5530 return vex_prefix; 5531 } 5532 5533 uint8_t X86_64Assembler::EmitVexPrefixByteTwo(bool W, 5534 int SET_VEX_L, 5535 int SET_VEX_PP) { 5536 // Vex Byte 2, 5537 uint8_t vex_prefix = VEX_INIT; 5538 5539 /** Bit[7] This bits needs to be set to '1' with default value. 5540 When using C4H form of VEX prefix, REX.W value is ignored */ 5541 if (W) { 5542 vex_prefix |= SET_VEX_W; 5543 } 5544 /** Bits[6:3] - 'vvvv' the source or dest register specifier */ 5545 vex_prefix |= (0x0F << 3); 5546 /** Bit[2] - "L" If VEX.L = 1 indicates 256-bit vector operation, 5547 VEX.L = 0 indicates 128 bit vector operation */ 5548 vex_prefix |= SET_VEX_L; 5549 5550 // Bits[1:0] - "pp" 5551 if (SET_VEX_PP != SET_VEX_PP_NONE) { 5552 vex_prefix |= SET_VEX_PP; 5553 } 5554 return vex_prefix; 5555 } 5556 5557 } // namespace x86_64 5558 } // namespace art 5559