1 /*
2  * Copyright (C) 2014 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "assembler_x86_64.h"
18 
19 #include "base/casts.h"
20 #include "base/memory_region.h"
21 #include "entrypoints/quick/quick_entrypoints.h"
22 #include "thread.h"
23 
24 namespace art {
25 namespace x86_64 {
26 
27 std::ostream& operator<<(std::ostream& os, const CpuRegister& reg) {
28   return os << reg.AsRegister();
29 }
30 
31 std::ostream& operator<<(std::ostream& os, const XmmRegister& reg) {
32   return os << reg.AsFloatRegister();
33 }
34 
35 std::ostream& operator<<(std::ostream& os, const X87Register& reg) {
36   return os << "ST" << static_cast<int>(reg);
37 }
38 
39 std::ostream& operator<<(std::ostream& os, const Address& addr) {
40   switch (addr.mod()) {
41     case 0:
42       if (addr.rm() != RSP || addr.cpu_index().AsRegister() == RSP) {
43         return os << "(%" << addr.cpu_rm() << ")";
44       } else if (addr.base() == RBP) {
45         return os << static_cast<int>(addr.disp32()) << "(,%" << addr.cpu_index()
46                   << "," << (1 << addr.scale()) << ")";
47       }
48       return os << "(%" << addr.cpu_base() << ",%"
49                 << addr.cpu_index() << "," << (1 << addr.scale()) << ")";
50     case 1:
51       if (addr.rm() != RSP || addr.cpu_index().AsRegister() == RSP) {
52         return os << static_cast<int>(addr.disp8()) << "(%" << addr.cpu_rm() << ")";
53       }
54       return os << static_cast<int>(addr.disp8()) << "(%" << addr.cpu_base() << ",%"
55                 << addr.cpu_index() << "," << (1 << addr.scale()) << ")";
56     case 2:
57       if (addr.rm() != RSP || addr.cpu_index().AsRegister() == RSP) {
58         return os << static_cast<int>(addr.disp32()) << "(%" << addr.cpu_rm() << ")";
59       }
60       return os << static_cast<int>(addr.disp32()) << "(%" << addr.cpu_base() << ",%"
61                 << addr.cpu_index() << "," << (1 << addr.scale()) << ")";
62     default:
63       return os << "<address?>";
64   }
65 }
66 
67 bool X86_64Assembler::CpuHasAVXorAVX2FeatureFlag() {
68   if (has_AVX_ || has_AVX2_) {
69     return true;
70   }
71   return false;
72 }
73 
74 
75 void X86_64Assembler::call(CpuRegister reg) {
76   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
77   EmitOptionalRex32(reg);
78   EmitUint8(0xFF);
79   EmitRegisterOperand(2, reg.LowBits());
80 }
81 
82 
83 void X86_64Assembler::call(const Address& address) {
84   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
85   EmitOptionalRex32(address);
86   EmitUint8(0xFF);
87   EmitOperand(2, address);
88 }
89 
90 
91 void X86_64Assembler::call(Label* label) {
92   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
93   EmitUint8(0xE8);
94   static const int kSize = 5;
95   // Offset by one because we already have emitted the opcode.
96   EmitLabel(label, kSize - 1);
97 }
98 
99 void X86_64Assembler::pushq(CpuRegister reg) {
100   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
101   EmitOptionalRex32(reg);
102   EmitUint8(0x50 + reg.LowBits());
103 }
104 
105 
106 void X86_64Assembler::pushq(const Address& address) {
107   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
108   EmitOptionalRex32(address);
109   EmitUint8(0xFF);
110   EmitOperand(6, address);
111 }
112 
113 
114 void X86_64Assembler::pushq(const Immediate& imm) {
115   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
116   CHECK(imm.is_int32());  // pushq only supports 32b immediate.
117   if (imm.is_int8()) {
118     EmitUint8(0x6A);
119     EmitUint8(imm.value() & 0xFF);
120   } else {
121     EmitUint8(0x68);
122     EmitImmediate(imm);
123   }
124 }
125 
126 
127 void X86_64Assembler::popq(CpuRegister reg) {
128   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
129   EmitOptionalRex32(reg);
130   EmitUint8(0x58 + reg.LowBits());
131 }
132 
133 
134 void X86_64Assembler::popq(const Address& address) {
135   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
136   EmitOptionalRex32(address);
137   EmitUint8(0x8F);
138   EmitOperand(0, address);
139 }
140 
141 
142 void X86_64Assembler::movq(CpuRegister dst, const Immediate& imm) {
143   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
144   if (imm.is_int32()) {
145     // 32 bit. Note: sign-extends.
146     EmitRex64(dst);
147     EmitUint8(0xC7);
148     EmitRegisterOperand(0, dst.LowBits());
149     EmitInt32(static_cast<int32_t>(imm.value()));
150   } else {
151     EmitRex64(dst);
152     EmitUint8(0xB8 + dst.LowBits());
153     EmitInt64(imm.value());
154   }
155 }
156 
157 
158 void X86_64Assembler::movl(CpuRegister dst, const Immediate& imm) {
159   CHECK(imm.is_int32());
160   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
161   EmitOptionalRex32(dst);
162   EmitUint8(0xB8 + dst.LowBits());
163   EmitImmediate(imm);
164 }
165 
166 
167 void X86_64Assembler::movq(const Address& dst, const Immediate& imm) {
168   CHECK(imm.is_int32());
169   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
170   EmitRex64(dst);
171   EmitUint8(0xC7);
172   EmitOperand(0, dst);
173   EmitImmediate(imm);
174 }
175 
176 
177 void X86_64Assembler::movq(CpuRegister dst, CpuRegister src) {
178   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
179   // 0x89 is movq r/m64 <- r64, with op1 in r/m and op2 in reg: so reverse EmitRex64
180   EmitRex64(src, dst);
181   EmitUint8(0x89);
182   EmitRegisterOperand(src.LowBits(), dst.LowBits());
183 }
184 
185 
186 void X86_64Assembler::movl(CpuRegister dst, CpuRegister src) {
187   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
188   EmitOptionalRex32(dst, src);
189   EmitUint8(0x8B);
190   EmitRegisterOperand(dst.LowBits(), src.LowBits());
191 }
192 
193 
194 void X86_64Assembler::movq(CpuRegister dst, const Address& src) {
195   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
196   EmitRex64(dst, src);
197   EmitUint8(0x8B);
198   EmitOperand(dst.LowBits(), src);
199 }
200 
201 
202 void X86_64Assembler::movl(CpuRegister dst, const Address& src) {
203   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
204   EmitOptionalRex32(dst, src);
205   EmitUint8(0x8B);
206   EmitOperand(dst.LowBits(), src);
207 }
208 
209 
210 void X86_64Assembler::movq(const Address& dst, CpuRegister src) {
211   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
212   EmitRex64(src, dst);
213   EmitUint8(0x89);
214   EmitOperand(src.LowBits(), dst);
215 }
216 
217 
218 void X86_64Assembler::movl(const Address& dst, CpuRegister src) {
219   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
220   EmitOptionalRex32(src, dst);
221   EmitUint8(0x89);
222   EmitOperand(src.LowBits(), dst);
223 }
224 
225 void X86_64Assembler::movl(const Address& dst, const Immediate& imm) {
226   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
227   EmitOptionalRex32(dst);
228   EmitUint8(0xC7);
229   EmitOperand(0, dst);
230   EmitImmediate(imm);
231 }
232 
233 void X86_64Assembler::movntl(const Address& dst, CpuRegister src) {
234   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
235   EmitOptionalRex32(src, dst);
236   EmitUint8(0x0F);
237   EmitUint8(0xC3);
238   EmitOperand(src.LowBits(), dst);
239 }
240 
241 void X86_64Assembler::movntq(const Address& dst, CpuRegister src) {
242   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
243   EmitRex64(src, dst);
244   EmitUint8(0x0F);
245   EmitUint8(0xC3);
246   EmitOperand(src.LowBits(), dst);
247 }
248 
249 void X86_64Assembler::cmov(Condition c, CpuRegister dst, CpuRegister src) {
250   cmov(c, dst, src, true);
251 }
252 
253 void X86_64Assembler::cmov(Condition c, CpuRegister dst, CpuRegister src, bool is64bit) {
254   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
255   EmitOptionalRex(false, is64bit, dst.NeedsRex(), false, src.NeedsRex());
256   EmitUint8(0x0F);
257   EmitUint8(0x40 + c);
258   EmitRegisterOperand(dst.LowBits(), src.LowBits());
259 }
260 
261 
262 void X86_64Assembler::cmov(Condition c, CpuRegister dst, const Address& src, bool is64bit) {
263   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
264   if (is64bit) {
265     EmitRex64(dst, src);
266   } else {
267     EmitOptionalRex32(dst, src);
268   }
269   EmitUint8(0x0F);
270   EmitUint8(0x40 + c);
271   EmitOperand(dst.LowBits(), src);
272 }
273 
274 
275 void X86_64Assembler::movzxb(CpuRegister dst, CpuRegister src) {
276   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
277   EmitOptionalByteRegNormalizingRex32(dst, src);
278   EmitUint8(0x0F);
279   EmitUint8(0xB6);
280   EmitRegisterOperand(dst.LowBits(), src.LowBits());
281 }
282 
283 
284 void X86_64Assembler::movzxb(CpuRegister dst, const Address& src) {
285   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
286   // Byte register is only in the source register form, so we don't use
287   // EmitOptionalByteRegNormalizingRex32(dst, src);
288   EmitOptionalRex32(dst, src);
289   EmitUint8(0x0F);
290   EmitUint8(0xB6);
291   EmitOperand(dst.LowBits(), src);
292 }
293 
294 
295 void X86_64Assembler::movsxb(CpuRegister dst, CpuRegister src) {
296   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
297   EmitOptionalByteRegNormalizingRex32(dst, src);
298   EmitUint8(0x0F);
299   EmitUint8(0xBE);
300   EmitRegisterOperand(dst.LowBits(), src.LowBits());
301 }
302 
303 
304 void X86_64Assembler::movsxb(CpuRegister dst, const Address& src) {
305   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
306   // Byte register is only in the source register form, so we don't use
307   // EmitOptionalByteRegNormalizingRex32(dst, src);
308   EmitOptionalRex32(dst, src);
309   EmitUint8(0x0F);
310   EmitUint8(0xBE);
311   EmitOperand(dst.LowBits(), src);
312 }
313 
314 
315 void X86_64Assembler::movb(CpuRegister /*dst*/, const Address& /*src*/) {
316   LOG(FATAL) << "Use movzxb or movsxb instead.";
317 }
318 
319 
320 void X86_64Assembler::movb(const Address& dst, CpuRegister src) {
321   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
322   EmitOptionalByteRegNormalizingRex32(src, dst);
323   EmitUint8(0x88);
324   EmitOperand(src.LowBits(), dst);
325 }
326 
327 
328 void X86_64Assembler::movb(const Address& dst, const Immediate& imm) {
329   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
330   EmitOptionalRex32(dst);
331   EmitUint8(0xC6);
332   EmitOperand(Register::RAX, dst);
333   CHECK(imm.is_int8());
334   EmitUint8(imm.value() & 0xFF);
335 }
336 
337 
338 void X86_64Assembler::movzxw(CpuRegister dst, CpuRegister src) {
339   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
340   EmitOptionalRex32(dst, src);
341   EmitUint8(0x0F);
342   EmitUint8(0xB7);
343   EmitRegisterOperand(dst.LowBits(), src.LowBits());
344 }
345 
346 
347 void X86_64Assembler::movzxw(CpuRegister dst, const Address& src) {
348   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
349   EmitOptionalRex32(dst, src);
350   EmitUint8(0x0F);
351   EmitUint8(0xB7);
352   EmitOperand(dst.LowBits(), src);
353 }
354 
355 
356 void X86_64Assembler::movsxw(CpuRegister dst, CpuRegister src) {
357   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
358   EmitOptionalRex32(dst, src);
359   EmitUint8(0x0F);
360   EmitUint8(0xBF);
361   EmitRegisterOperand(dst.LowBits(), src.LowBits());
362 }
363 
364 
365 void X86_64Assembler::movsxw(CpuRegister dst, const Address& src) {
366   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
367   EmitOptionalRex32(dst, src);
368   EmitUint8(0x0F);
369   EmitUint8(0xBF);
370   EmitOperand(dst.LowBits(), src);
371 }
372 
373 
374 void X86_64Assembler::movw(CpuRegister /*dst*/, const Address& /*src*/) {
375   LOG(FATAL) << "Use movzxw or movsxw instead.";
376 }
377 
378 
379 void X86_64Assembler::movw(const Address& dst, CpuRegister src) {
380   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
381   EmitOperandSizeOverride();
382   EmitOptionalRex32(src, dst);
383   EmitUint8(0x89);
384   EmitOperand(src.LowBits(), dst);
385 }
386 
387 
388 void X86_64Assembler::movw(const Address& dst, const Immediate& imm) {
389   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
390   EmitOperandSizeOverride();
391   EmitOptionalRex32(dst);
392   EmitUint8(0xC7);
393   EmitOperand(Register::RAX, dst);
394   CHECK(imm.is_uint16() || imm.is_int16());
395   EmitUint8(imm.value() & 0xFF);
396   EmitUint8(imm.value() >> 8);
397 }
398 
399 
400 void X86_64Assembler::leaq(CpuRegister dst, const Address& src) {
401   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
402   EmitRex64(dst, src);
403   EmitUint8(0x8D);
404   EmitOperand(dst.LowBits(), src);
405 }
406 
407 
408 void X86_64Assembler::leal(CpuRegister dst, const Address& src) {
409   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
410   EmitOptionalRex32(dst, src);
411   EmitUint8(0x8D);
412   EmitOperand(dst.LowBits(), src);
413 }
414 
415 
416 void X86_64Assembler::movaps(XmmRegister dst, XmmRegister src) {
417   if (CpuHasAVXorAVX2FeatureFlag()) {
418     vmovaps(dst, src);
419     return;
420   }
421   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
422   EmitOptionalRex32(dst, src);
423   EmitUint8(0x0F);
424   EmitUint8(0x28);
425   EmitXmmRegisterOperand(dst.LowBits(), src);
426 }
427 
428 
429 /**VEX.128.0F.WIG 28 /r VMOVAPS xmm1, xmm2 */
430 void X86_64Assembler::vmovaps(XmmRegister dst, XmmRegister src) {
431   DCHECK(CpuHasAVXorAVX2FeatureFlag());
432   uint8_t byte_zero, byte_one, byte_two;
433   bool is_twobyte_form = true;
434   bool load = dst.NeedsRex();
435   bool store = !load;
436 
437   if (src.NeedsRex()&& dst.NeedsRex()) {
438     is_twobyte_form = false;
439   }
440   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
441   // Instruction VEX Prefix
442   byte_zero = EmitVexPrefixByteZero(is_twobyte_form);
443   X86_64ManagedRegister vvvv_reg = ManagedRegister::NoRegister().AsX86_64();
444   if (is_twobyte_form) {
445     bool rex_bit = (load) ? dst.NeedsRex() : src.NeedsRex();
446     byte_one = EmitVexPrefixByteOne(rex_bit,
447                                     vvvv_reg,
448                                     SET_VEX_L_128,
449                                     SET_VEX_PP_NONE);
450   } else {
451     byte_one = EmitVexPrefixByteOne(dst.NeedsRex(),
452                                     /*X=*/ false,
453                                     src.NeedsRex(),
454                                     SET_VEX_M_0F);
455     byte_two = EmitVexPrefixByteTwo(/*W=*/ false,
456                                     SET_VEX_L_128,
457                                     SET_VEX_PP_NONE);
458   }
459   EmitUint8(byte_zero);
460   EmitUint8(byte_one);
461   if (!is_twobyte_form) {
462     EmitUint8(byte_two);
463   }
464   // Instruction Opcode
465   if (is_twobyte_form && store) {
466     EmitUint8(0x29);
467   } else {
468     EmitUint8(0x28);
469   }
470   // Instruction Operands
471   if (is_twobyte_form && store) {
472     EmitXmmRegisterOperand(src.LowBits(), dst);
473   } else {
474     EmitXmmRegisterOperand(dst.LowBits(), src);
475   }
476 }
477 
478 void X86_64Assembler::movaps(XmmRegister dst, const Address& src) {
479   if (CpuHasAVXorAVX2FeatureFlag()) {
480     vmovaps(dst, src);
481     return;
482   }
483   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
484   EmitOptionalRex32(dst, src);
485   EmitUint8(0x0F);
486   EmitUint8(0x28);
487   EmitOperand(dst.LowBits(), src);
488 }
489 
490 /**VEX.128.0F.WIG 28 /r VMOVAPS xmm1, m128 */
491 void X86_64Assembler::vmovaps(XmmRegister dst, const Address& src) {
492   DCHECK(CpuHasAVXorAVX2FeatureFlag());
493   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
494   uint8_t ByteZero, ByteOne, ByteTwo;
495   bool is_twobyte_form = false;
496   // Instruction VEX Prefix
497   uint8_t rex = src.rex();
498   bool Rex_x = rex & GET_REX_X;
499   bool Rex_b = rex & GET_REX_B;
500   if (!Rex_b && !Rex_x) {
501     is_twobyte_form = true;
502   }
503   ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
504   if (is_twobyte_form) {
505     X86_64ManagedRegister vvvv_reg = ManagedRegister::NoRegister().AsX86_64();
506     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
507                                    vvvv_reg,
508                                    SET_VEX_L_128,
509                                    SET_VEX_PP_NONE);
510   } else {
511     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
512                                    Rex_x,
513                                    Rex_b,
514                                    SET_VEX_M_0F);
515     ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false,
516                                    SET_VEX_L_128,
517                                    SET_VEX_PP_NONE);
518   }
519   EmitUint8(ByteZero);
520   EmitUint8(ByteOne);
521   if (!is_twobyte_form) {
522     EmitUint8(ByteTwo);
523   }
524   // Instruction Opcode
525   EmitUint8(0x28);
526   // Instruction Operands
527   EmitOperand(dst.LowBits(), src);
528 }
529 
530 void X86_64Assembler::movups(XmmRegister dst, const Address& src) {
531   if (CpuHasAVXorAVX2FeatureFlag()) {
532     vmovups(dst, src);
533     return;
534   }
535   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
536   EmitOptionalRex32(dst, src);
537   EmitUint8(0x0F);
538   EmitUint8(0x10);
539   EmitOperand(dst.LowBits(), src);
540 }
541 
542 /** VEX.128.0F.WIG 10 /r VMOVUPS xmm1, m128 */
543 void X86_64Assembler::vmovups(XmmRegister dst, const Address& src) {
544   DCHECK(CpuHasAVXorAVX2FeatureFlag());
545   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
546   uint8_t ByteZero, ByteOne, ByteTwo;
547   bool is_twobyte_form = false;
548   // Instruction VEX Prefix
549   uint8_t rex = src.rex();
550   bool Rex_x = rex & GET_REX_X;
551   bool Rex_b = rex & GET_REX_B;
552   if (!Rex_x && !Rex_b) {
553     is_twobyte_form = true;
554   }
555   ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
556   if (is_twobyte_form) {
557     X86_64ManagedRegister vvvv_reg = ManagedRegister::NoRegister().AsX86_64();
558     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
559                                    vvvv_reg,
560                                    SET_VEX_L_128,
561                                    SET_VEX_PP_NONE);
562   } else {
563     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
564                                    Rex_x,
565                                    Rex_b,
566                                    SET_VEX_M_0F);
567     ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false,
568                                    SET_VEX_L_128,
569                                    SET_VEX_PP_NONE);
570   }
571   EmitUint8(ByteZero);
572   EmitUint8(ByteOne);
573   if (!is_twobyte_form) {
574     EmitUint8(ByteTwo);
575   }
576   // Instruction Opcode
577   EmitUint8(0x10);
578   // Instruction Operands
579   EmitOperand(dst.LowBits(), src);
580 }
581 
582 
583 void X86_64Assembler::movaps(const Address& dst, XmmRegister src) {
584   if (CpuHasAVXorAVX2FeatureFlag()) {
585     vmovaps(dst, src);
586     return;
587   }
588   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
589   EmitOptionalRex32(src, dst);
590   EmitUint8(0x0F);
591   EmitUint8(0x29);
592   EmitOperand(src.LowBits(), dst);
593 }
594 
595 /** VEX.128.0F.WIG 29 /r VMOVAPS m128, xmm1 */
596 void X86_64Assembler::vmovaps(const Address& dst, XmmRegister src) {
597   DCHECK(CpuHasAVXorAVX2FeatureFlag());
598   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
599   uint8_t ByteZero, ByteOne, ByteTwo;
600   bool is_twobyte_form = false;
601 
602   // Instruction VEX Prefix
603   uint8_t rex = dst.rex();
604   bool Rex_x = rex & GET_REX_X;
605   bool Rex_b = rex & GET_REX_B;
606   if (!Rex_b && !Rex_x) {
607     is_twobyte_form = true;
608   }
609   ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
610   if (is_twobyte_form) {
611     X86_64ManagedRegister vvvv_reg = ManagedRegister::NoRegister().AsX86_64();
612     ByteOne = EmitVexPrefixByteOne(src.NeedsRex(),
613                                    vvvv_reg,
614                                    SET_VEX_L_128,
615                                    SET_VEX_PP_NONE);
616   } else {
617     ByteOne = EmitVexPrefixByteOne(src.NeedsRex(),
618                                    Rex_x,
619                                    Rex_b,
620                                    SET_VEX_M_0F);
621     ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false,
622                                    SET_VEX_L_128,
623                                    SET_VEX_PP_NONE);
624   }
625   EmitUint8(ByteZero);
626   EmitUint8(ByteOne);
627   if (!is_twobyte_form) {
628     EmitUint8(ByteTwo);
629   }
630   // Instruction Opcode
631   EmitUint8(0x29);
632   // Instruction Operands
633   EmitOperand(src.LowBits(), dst);
634 }
635 
636 void X86_64Assembler::movups(const Address& dst, XmmRegister src) {
637   if (CpuHasAVXorAVX2FeatureFlag()) {
638     vmovups(dst, src);
639     return;
640   }
641   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
642   EmitOptionalRex32(src, dst);
643   EmitUint8(0x0F);
644   EmitUint8(0x11);
645   EmitOperand(src.LowBits(), dst);
646 }
647 
648 /** VEX.128.0F.WIG 11 /r VMOVUPS m128, xmm1 */
649 void X86_64Assembler::vmovups(const Address& dst, XmmRegister src) {
650   DCHECK(CpuHasAVXorAVX2FeatureFlag());
651   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
652   uint8_t ByteZero, ByteOne, ByteTwo;
653   bool is_twobyte_form = false;
654 
655   // Instruction VEX Prefix
656   uint8_t rex = dst.rex();
657   bool Rex_x = rex & GET_REX_X;
658   bool Rex_b = rex & GET_REX_B;
659   if (!Rex_b && !Rex_x) {
660     is_twobyte_form = true;
661   }
662   ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
663   if (is_twobyte_form) {
664     X86_64ManagedRegister vvvv_reg = ManagedRegister::NoRegister().AsX86_64();
665     ByteOne = EmitVexPrefixByteOne(src.NeedsRex(),
666                                    vvvv_reg,
667                                    SET_VEX_L_128,
668                                    SET_VEX_PP_NONE);
669   } else {
670     ByteOne = EmitVexPrefixByteOne(src.NeedsRex(),
671                                    Rex_x,
672                                    Rex_b,
673                                    SET_VEX_M_0F);
674     ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false,
675                                    SET_VEX_L_128,
676                                    SET_VEX_PP_NONE);
677   }
678   EmitUint8(ByteZero);
679   EmitUint8(ByteOne);
680   if (!is_twobyte_form) {
681     EmitUint8(ByteTwo);
682   }
683   // Instruction Opcode
684   EmitUint8(0x11);
685   // Instruction Operands
686   EmitOperand(src.LowBits(), dst);
687 }
688 
689 
690 void X86_64Assembler::movss(XmmRegister dst, const Address& src) {
691   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
692   EmitUint8(0xF3);
693   EmitOptionalRex32(dst, src);
694   EmitUint8(0x0F);
695   EmitUint8(0x10);
696   EmitOperand(dst.LowBits(), src);
697 }
698 
699 
700 void X86_64Assembler::movss(const Address& dst, XmmRegister src) {
701   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
702   EmitUint8(0xF3);
703   EmitOptionalRex32(src, dst);
704   EmitUint8(0x0F);
705   EmitUint8(0x11);
706   EmitOperand(src.LowBits(), dst);
707 }
708 
709 
710 void X86_64Assembler::movss(XmmRegister dst, XmmRegister src) {
711   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
712   EmitUint8(0xF3);
713   EmitOptionalRex32(src, dst);  // Movss is MR encoding instead of the usual RM.
714   EmitUint8(0x0F);
715   EmitUint8(0x11);
716   EmitXmmRegisterOperand(src.LowBits(), dst);
717 }
718 
719 
720 void X86_64Assembler::movsxd(CpuRegister dst, CpuRegister src) {
721   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
722   EmitRex64(dst, src);
723   EmitUint8(0x63);
724   EmitRegisterOperand(dst.LowBits(), src.LowBits());
725 }
726 
727 
728 void X86_64Assembler::movsxd(CpuRegister dst, const Address& src) {
729   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
730   EmitRex64(dst, src);
731   EmitUint8(0x63);
732   EmitOperand(dst.LowBits(), src);
733 }
734 
735 
736 void X86_64Assembler::movd(XmmRegister dst, CpuRegister src) {
737   movd(dst, src, true);
738 }
739 
740 void X86_64Assembler::movd(CpuRegister dst, XmmRegister src) {
741   movd(dst, src, true);
742 }
743 
744 void X86_64Assembler::movd(XmmRegister dst, CpuRegister src, bool is64bit) {
745   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
746   EmitUint8(0x66);
747   EmitOptionalRex(false, is64bit, dst.NeedsRex(), false, src.NeedsRex());
748   EmitUint8(0x0F);
749   EmitUint8(0x6E);
750   EmitOperand(dst.LowBits(), Operand(src));
751 }
752 
753 void X86_64Assembler::movd(CpuRegister dst, XmmRegister src, bool is64bit) {
754   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
755   EmitUint8(0x66);
756   EmitOptionalRex(false, is64bit, src.NeedsRex(), false, dst.NeedsRex());
757   EmitUint8(0x0F);
758   EmitUint8(0x7E);
759   EmitOperand(src.LowBits(), Operand(dst));
760 }
761 
762 void X86_64Assembler::addss(XmmRegister dst, XmmRegister src) {
763   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
764   EmitUint8(0xF3);
765   EmitOptionalRex32(dst, src);
766   EmitUint8(0x0F);
767   EmitUint8(0x58);
768   EmitXmmRegisterOperand(dst.LowBits(), src);
769 }
770 
771 void X86_64Assembler::addss(XmmRegister dst, const Address& src) {
772   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
773   EmitUint8(0xF3);
774   EmitOptionalRex32(dst, src);
775   EmitUint8(0x0F);
776   EmitUint8(0x58);
777   EmitOperand(dst.LowBits(), src);
778 }
779 
780 
781 void X86_64Assembler::subss(XmmRegister dst, XmmRegister src) {
782   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
783   EmitUint8(0xF3);
784   EmitOptionalRex32(dst, src);
785   EmitUint8(0x0F);
786   EmitUint8(0x5C);
787   EmitXmmRegisterOperand(dst.LowBits(), src);
788 }
789 
790 
791 void X86_64Assembler::subss(XmmRegister dst, const Address& src) {
792   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
793   EmitUint8(0xF3);
794   EmitOptionalRex32(dst, src);
795   EmitUint8(0x0F);
796   EmitUint8(0x5C);
797   EmitOperand(dst.LowBits(), src);
798 }
799 
800 
801 void X86_64Assembler::mulss(XmmRegister dst, XmmRegister src) {
802   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
803   EmitUint8(0xF3);
804   EmitOptionalRex32(dst, src);
805   EmitUint8(0x0F);
806   EmitUint8(0x59);
807   EmitXmmRegisterOperand(dst.LowBits(), src);
808 }
809 
810 
811 void X86_64Assembler::mulss(XmmRegister dst, const Address& src) {
812   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
813   EmitUint8(0xF3);
814   EmitOptionalRex32(dst, src);
815   EmitUint8(0x0F);
816   EmitUint8(0x59);
817   EmitOperand(dst.LowBits(), src);
818 }
819 
820 
821 void X86_64Assembler::divss(XmmRegister dst, XmmRegister src) {
822   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
823   EmitUint8(0xF3);
824   EmitOptionalRex32(dst, src);
825   EmitUint8(0x0F);
826   EmitUint8(0x5E);
827   EmitXmmRegisterOperand(dst.LowBits(), src);
828 }
829 
830 
831 void X86_64Assembler::divss(XmmRegister dst, const Address& src) {
832   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
833   EmitUint8(0xF3);
834   EmitOptionalRex32(dst, src);
835   EmitUint8(0x0F);
836   EmitUint8(0x5E);
837   EmitOperand(dst.LowBits(), src);
838 }
839 
840 
841 void X86_64Assembler::addps(XmmRegister dst, XmmRegister src) {
842   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
843   EmitOptionalRex32(dst, src);
844   EmitUint8(0x0F);
845   EmitUint8(0x58);
846   EmitXmmRegisterOperand(dst.LowBits(), src);
847 }
848 
849 
850 void X86_64Assembler::subps(XmmRegister dst, XmmRegister src) {
851   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
852   EmitOptionalRex32(dst, src);
853   EmitUint8(0x0F);
854   EmitUint8(0x5C);
855   EmitXmmRegisterOperand(dst.LowBits(), src);
856 }
857 
858 void X86_64Assembler::vaddps(XmmRegister dst, XmmRegister add_left, XmmRegister add_right) {
859   DCHECK(CpuHasAVXorAVX2FeatureFlag());
860   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
861   bool is_twobyte_form = false;
862   uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
863   if (!add_right.NeedsRex()) {
864     is_twobyte_form = true;
865   }
866   X86_64ManagedRegister vvvv_reg =
867       X86_64ManagedRegister::FromXmmRegister(add_left.AsFloatRegister());
868   ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
869   if (is_twobyte_form) {
870     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_NONE);
871   } else {
872     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
873                                    /*X=*/ false,
874                                    add_right.NeedsRex(),
875                                    SET_VEX_M_0F);
876     ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_NONE);
877   }
878   EmitUint8(ByteZero);
879   EmitUint8(ByteOne);
880   if (!is_twobyte_form) {
881     EmitUint8(ByteTwo);
882   }
883   EmitUint8(0x58);
884   EmitXmmRegisterOperand(dst.LowBits(), add_right);
885 }
886 
887 void X86_64Assembler::vsubps(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
888   DCHECK(CpuHasAVXorAVX2FeatureFlag());
889   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
890   bool is_twobyte_form = false;
891   uint8_t byte_zero = 0x00, byte_one = 0x00, byte_two = 0x00;
892   if (!src2.NeedsRex()) {
893     is_twobyte_form = true;
894   }
895   byte_zero = EmitVexPrefixByteZero(is_twobyte_form);
896   X86_64ManagedRegister vvvv_reg = X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister());
897   if (is_twobyte_form) {
898     byte_one = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_NONE);
899   } else {
900     byte_one = EmitVexPrefixByteOne(dst.NeedsRex(), /*X=*/ false, src2.NeedsRex(), SET_VEX_M_0F);
901     byte_two = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_NONE);
902   }
903   EmitUint8(byte_zero);
904   EmitUint8(byte_one);
905   if (!is_twobyte_form) {
906     EmitUint8(byte_two);
907   }
908   EmitUint8(0x5C);
909   EmitXmmRegisterOperand(dst.LowBits(), src2);
910 }
911 
912 
913 void X86_64Assembler::mulps(XmmRegister dst, XmmRegister src) {
914   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
915   EmitOptionalRex32(dst, src);
916   EmitUint8(0x0F);
917   EmitUint8(0x59);
918   EmitXmmRegisterOperand(dst.LowBits(), src);
919 }
920 
921 void X86_64Assembler::vmulps(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
922   DCHECK(CpuHasAVXorAVX2FeatureFlag());
923   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
924   bool is_twobyte_form = false;
925   uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
926   if (!src2.NeedsRex()) {
927     is_twobyte_form = true;
928   }
929   ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
930   X86_64ManagedRegister vvvv_reg =
931       X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister());
932   if (is_twobyte_form) {
933     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_NONE);
934   } else {
935     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
936                                    /*X=*/ false,
937                                    src2.NeedsRex(),
938                                    SET_VEX_M_0F);
939     ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_NONE);
940   }
941   EmitUint8(ByteZero);
942   EmitUint8(ByteOne);
943   if (!is_twobyte_form) {
944     EmitUint8(ByteTwo);
945   }
946   EmitUint8(0x59);
947   EmitXmmRegisterOperand(dst.LowBits(), src2);
948 }
949 
950 void X86_64Assembler::divps(XmmRegister dst, XmmRegister src) {
951   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
952   EmitOptionalRex32(dst, src);
953   EmitUint8(0x0F);
954   EmitUint8(0x5E);
955   EmitXmmRegisterOperand(dst.LowBits(), src);
956 }
957 
958 void X86_64Assembler::vdivps(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
959   DCHECK(CpuHasAVXorAVX2FeatureFlag());
960   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
961   bool is_twobyte_form = false;
962   uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
963   if (!src2.NeedsRex()) {
964     is_twobyte_form = true;
965   }
966   ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
967   X86_64ManagedRegister vvvv_reg =
968       X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister());
969   if (is_twobyte_form) {
970     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_NONE);
971   } else {
972     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
973                                    /*X=*/ false,
974                                    src2.NeedsRex(),
975                                    SET_VEX_M_0F);
976     ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_NONE);
977   }
978   EmitUint8(ByteZero);
979   EmitUint8(ByteOne);
980   if (!is_twobyte_form) {
981     EmitUint8(ByteTwo);
982   }
983   EmitUint8(0x5E);
984   EmitXmmRegisterOperand(dst.LowBits(), src2);
985 }
986 
987 void X86_64Assembler::flds(const Address& src) {
988   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
989   EmitUint8(0xD9);
990   EmitOperand(0, src);
991 }
992 
993 
994 void X86_64Assembler::fsts(const Address& dst) {
995   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
996   EmitUint8(0xD9);
997   EmitOperand(2, dst);
998 }
999 
1000 
1001 void X86_64Assembler::fstps(const Address& dst) {
1002   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1003   EmitUint8(0xD9);
1004   EmitOperand(3, dst);
1005 }
1006 
1007 
1008 void X86_64Assembler::movapd(XmmRegister dst, XmmRegister src) {
1009   if (CpuHasAVXorAVX2FeatureFlag()) {
1010     vmovapd(dst, src);
1011     return;
1012   }
1013   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1014   EmitUint8(0x66);
1015   EmitOptionalRex32(dst, src);
1016   EmitUint8(0x0F);
1017   EmitUint8(0x28);
1018   EmitXmmRegisterOperand(dst.LowBits(), src);
1019 }
1020 
1021 /** VEX.128.66.0F.WIG 28 /r VMOVAPD xmm1, xmm2 */
1022 void X86_64Assembler::vmovapd(XmmRegister dst, XmmRegister src) {
1023   DCHECK(CpuHasAVXorAVX2FeatureFlag());
1024   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1025   uint8_t ByteZero, ByteOne, ByteTwo;
1026   bool is_twobyte_form = true;
1027 
1028   if (src.NeedsRex() && dst.NeedsRex()) {
1029     is_twobyte_form = false;
1030   }
1031   // Instruction VEX Prefix
1032   ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
1033   bool load = dst.NeedsRex();
1034   if (is_twobyte_form) {
1035     X86_64ManagedRegister vvvv_reg = ManagedRegister::NoRegister().AsX86_64();
1036     bool rex_bit = load ? dst.NeedsRex() : src.NeedsRex();
1037     ByteOne = EmitVexPrefixByteOne(rex_bit,
1038                                    vvvv_reg,
1039                                    SET_VEX_L_128,
1040                                    SET_VEX_PP_66);
1041   } else {
1042     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
1043                                    /*X=*/ false,
1044                                    src.NeedsRex(),
1045                                    SET_VEX_M_0F);
1046     ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false,
1047                                    SET_VEX_L_128,
1048                                    SET_VEX_PP_66);
1049   }
1050   EmitUint8(ByteZero);
1051   EmitUint8(ByteOne);
1052   if (!is_twobyte_form) {
1053     EmitUint8(ByteTwo);
1054   }
1055   // Instruction Opcode
1056   if (is_twobyte_form && !load) {
1057     EmitUint8(0x29);
1058   } else {
1059     EmitUint8(0x28);
1060   }
1061   // Instruction Operands
1062   if (is_twobyte_form && !load) {
1063     EmitXmmRegisterOperand(src.LowBits(), dst);
1064   } else {
1065     EmitXmmRegisterOperand(dst.LowBits(), src);
1066   }
1067 }
1068 
1069 void X86_64Assembler::movapd(XmmRegister dst, const Address& src) {
1070   if (CpuHasAVXorAVX2FeatureFlag()) {
1071     vmovapd(dst, src);
1072     return;
1073   }
1074   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1075   EmitUint8(0x66);
1076   EmitOptionalRex32(dst, src);
1077   EmitUint8(0x0F);
1078   EmitUint8(0x28);
1079   EmitOperand(dst.LowBits(), src);
1080 }
1081 
1082 /** VEX.128.66.0F.WIG 28 /r VMOVAPD xmm1, m128 */
1083 void X86_64Assembler::vmovapd(XmmRegister dst, const Address& src) {
1084   DCHECK(CpuHasAVXorAVX2FeatureFlag());
1085   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1086   uint8_t ByteZero, ByteOne, ByteTwo;
1087   bool is_twobyte_form = false;
1088 
1089   // Instruction VEX Prefix
1090   uint8_t rex = src.rex();
1091   bool Rex_x = rex & GET_REX_X;
1092   bool Rex_b = rex & GET_REX_B;
1093   if (!Rex_b && !Rex_x) {
1094     is_twobyte_form = true;
1095   }
1096   ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
1097   if (is_twobyte_form) {
1098     X86_64ManagedRegister vvvv_reg = ManagedRegister::NoRegister().AsX86_64();
1099     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
1100                                    vvvv_reg,
1101                                    SET_VEX_L_128,
1102                                    SET_VEX_PP_66);
1103   } else {
1104     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
1105                                    Rex_x,
1106                                    Rex_b,
1107                                    SET_VEX_M_0F);
1108     ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false,
1109                                    SET_VEX_L_128,
1110                                    SET_VEX_PP_66);
1111   }
1112   EmitUint8(ByteZero);
1113   EmitUint8(ByteOne);
1114   if (!is_twobyte_form) {
1115     EmitUint8(ByteTwo);
1116   }
1117   // Instruction Opcode
1118   EmitUint8(0x28);
1119   // Instruction Operands
1120   EmitOperand(dst.LowBits(), src);
1121 }
1122 
1123 void X86_64Assembler::movupd(XmmRegister dst, const Address& src) {
1124   if (CpuHasAVXorAVX2FeatureFlag()) {
1125     vmovupd(dst, src);
1126     return;
1127   }
1128   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1129   EmitUint8(0x66);
1130   EmitOptionalRex32(dst, src);
1131   EmitUint8(0x0F);
1132   EmitUint8(0x10);
1133   EmitOperand(dst.LowBits(), src);
1134 }
1135 
1136 /** VEX.128.66.0F.WIG 10 /r VMOVUPD xmm1, m128 */
1137 void X86_64Assembler::vmovupd(XmmRegister dst, const Address& src) {
1138   DCHECK(CpuHasAVXorAVX2FeatureFlag());
1139   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1140   bool is_twobyte_form = false;
1141   uint8_t ByteZero, ByteOne, ByteTwo;
1142 
1143   // Instruction VEX Prefix
1144   uint8_t rex = src.rex();
1145   bool Rex_x = rex & GET_REX_X;
1146   bool Rex_b = rex & GET_REX_B;
1147   if (!Rex_b && !Rex_x) {
1148     is_twobyte_form = true;
1149   }
1150   ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
1151   if (is_twobyte_form) {
1152     X86_64ManagedRegister vvvv_reg = ManagedRegister::NoRegister().AsX86_64();
1153     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
1154                                    vvvv_reg,
1155                                    SET_VEX_L_128,
1156                                    SET_VEX_PP_66);
1157   } else {
1158     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
1159                                    Rex_x,
1160                                    Rex_b,
1161                                    SET_VEX_M_0F);
1162     ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false,
1163                                    SET_VEX_L_128,
1164                                    SET_VEX_PP_66);
1165   }
1166   EmitUint8(ByteZero);
1167   EmitUint8(ByteOne);
1168   if (!is_twobyte_form)
1169   EmitUint8(ByteTwo);
1170   // Instruction Opcode
1171   EmitUint8(0x10);
1172   // Instruction Operands
1173   EmitOperand(dst.LowBits(), src);
1174 }
1175 
1176 void X86_64Assembler::movapd(const Address& dst, XmmRegister src) {
1177   if (CpuHasAVXorAVX2FeatureFlag()) {
1178     vmovapd(dst, src);
1179     return;
1180   }
1181   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1182   EmitUint8(0x66);
1183   EmitOptionalRex32(src, dst);
1184   EmitUint8(0x0F);
1185   EmitUint8(0x29);
1186   EmitOperand(src.LowBits(), dst);
1187 }
1188 
1189 /** VEX.128.66.0F.WIG 29 /r VMOVAPD m128, xmm1 */
1190 void X86_64Assembler::vmovapd(const Address& dst, XmmRegister src) {
1191   DCHECK(CpuHasAVXorAVX2FeatureFlag());
1192   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1193   bool is_twobyte_form = false;
1194   uint8_t ByteZero, ByteOne, ByteTwo;
1195   // Instruction VEX Prefix
1196   uint8_t rex = dst.rex();
1197   bool Rex_x = rex & GET_REX_X;
1198   bool Rex_b = rex & GET_REX_B;
1199   if (!Rex_x && !Rex_b) {
1200     is_twobyte_form = true;
1201   }
1202   ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
1203   if (is_twobyte_form) {
1204     X86_64ManagedRegister vvvv_reg = ManagedRegister::NoRegister().AsX86_64();
1205     ByteOne = EmitVexPrefixByteOne(src.NeedsRex(),
1206                                    vvvv_reg,
1207                                    SET_VEX_L_128,
1208                                    SET_VEX_PP_66);
1209   } else {
1210     ByteOne = EmitVexPrefixByteOne(src.NeedsRex(),
1211                                    Rex_x,
1212                                    Rex_b,
1213                                    SET_VEX_M_0F);
1214     ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false,
1215                                    SET_VEX_L_128,
1216                                    SET_VEX_PP_66);
1217   }
1218   EmitUint8(ByteZero);
1219   EmitUint8(ByteOne);
1220   if (!is_twobyte_form) {
1221     EmitUint8(ByteTwo);
1222   }
1223   // Instruction Opcode
1224   EmitUint8(0x29);
1225   // Instruction Operands
1226   EmitOperand(src.LowBits(), dst);
1227 }
1228 
1229 void X86_64Assembler::movupd(const Address& dst, XmmRegister src) {
1230   if (CpuHasAVXorAVX2FeatureFlag()) {
1231     vmovupd(dst, src);
1232     return;
1233   }
1234   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1235   EmitUint8(0x66);
1236   EmitOptionalRex32(src, dst);
1237   EmitUint8(0x0F);
1238   EmitUint8(0x11);
1239   EmitOperand(src.LowBits(), dst);
1240 }
1241 
1242 /** VEX.128.66.0F.WIG 11 /r VMOVUPD m128, xmm1 */
1243 void X86_64Assembler::vmovupd(const Address& dst, XmmRegister src) {
1244   DCHECK(CpuHasAVXorAVX2FeatureFlag());
1245   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1246   bool is_twobyte_form = false;
1247   uint8_t ByteZero, ByteOne, ByteTwo;
1248 
1249   // Instruction VEX Prefix
1250   uint8_t rex = dst.rex();
1251   bool Rex_x = rex & GET_REX_X;
1252   bool Rex_b = rex & GET_REX_B;
1253   if (!Rex_x && !Rex_b) {
1254     is_twobyte_form = true;
1255   }
1256   ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
1257   if (is_twobyte_form) {
1258     X86_64ManagedRegister vvvv_reg = ManagedRegister::NoRegister().AsX86_64();
1259     ByteOne = EmitVexPrefixByteOne(src.NeedsRex(),
1260                                    vvvv_reg,
1261                                    SET_VEX_L_128,
1262                                    SET_VEX_PP_66);
1263   } else {
1264     ByteOne = EmitVexPrefixByteOne(src.NeedsRex(),
1265                                    Rex_x,
1266                                    Rex_b,
1267                                    SET_VEX_M_0F);
1268     ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false,
1269                                    SET_VEX_L_128,
1270                                    SET_VEX_PP_66);
1271   }
1272   EmitUint8(ByteZero);
1273   EmitUint8(ByteOne);
1274   if (!is_twobyte_form) {
1275     EmitUint8(ByteTwo);
1276   }
1277   // Instruction Opcode
1278   EmitUint8(0x11);
1279   // Instruction Operands
1280   EmitOperand(src.LowBits(), dst);
1281 }
1282 
1283 
1284 void X86_64Assembler::movsd(XmmRegister dst, const Address& src) {
1285   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1286   EmitUint8(0xF2);
1287   EmitOptionalRex32(dst, src);
1288   EmitUint8(0x0F);
1289   EmitUint8(0x10);
1290   EmitOperand(dst.LowBits(), src);
1291 }
1292 
1293 
1294 void X86_64Assembler::movsd(const Address& dst, XmmRegister src) {
1295   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1296   EmitUint8(0xF2);
1297   EmitOptionalRex32(src, dst);
1298   EmitUint8(0x0F);
1299   EmitUint8(0x11);
1300   EmitOperand(src.LowBits(), dst);
1301 }
1302 
1303 
1304 void X86_64Assembler::movsd(XmmRegister dst, XmmRegister src) {
1305   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1306   EmitUint8(0xF2);
1307   EmitOptionalRex32(src, dst);  // Movsd is MR encoding instead of the usual RM.
1308   EmitUint8(0x0F);
1309   EmitUint8(0x11);
1310   EmitXmmRegisterOperand(src.LowBits(), dst);
1311 }
1312 
1313 
1314 void X86_64Assembler::addsd(XmmRegister dst, XmmRegister src) {
1315   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1316   EmitUint8(0xF2);
1317   EmitOptionalRex32(dst, src);
1318   EmitUint8(0x0F);
1319   EmitUint8(0x58);
1320   EmitXmmRegisterOperand(dst.LowBits(), src);
1321 }
1322 
1323 
1324 void X86_64Assembler::addsd(XmmRegister dst, const Address& src) {
1325   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1326   EmitUint8(0xF2);
1327   EmitOptionalRex32(dst, src);
1328   EmitUint8(0x0F);
1329   EmitUint8(0x58);
1330   EmitOperand(dst.LowBits(), src);
1331 }
1332 
1333 
1334 void X86_64Assembler::subsd(XmmRegister dst, XmmRegister src) {
1335   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1336   EmitUint8(0xF2);
1337   EmitOptionalRex32(dst, src);
1338   EmitUint8(0x0F);
1339   EmitUint8(0x5C);
1340   EmitXmmRegisterOperand(dst.LowBits(), src);
1341 }
1342 
1343 
1344 void X86_64Assembler::subsd(XmmRegister dst, const Address& src) {
1345   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1346   EmitUint8(0xF2);
1347   EmitOptionalRex32(dst, src);
1348   EmitUint8(0x0F);
1349   EmitUint8(0x5C);
1350   EmitOperand(dst.LowBits(), src);
1351 }
1352 
1353 
1354 void X86_64Assembler::mulsd(XmmRegister dst, XmmRegister src) {
1355   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1356   EmitUint8(0xF2);
1357   EmitOptionalRex32(dst, src);
1358   EmitUint8(0x0F);
1359   EmitUint8(0x59);
1360   EmitXmmRegisterOperand(dst.LowBits(), src);
1361 }
1362 
1363 
1364 void X86_64Assembler::mulsd(XmmRegister dst, const Address& src) {
1365   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1366   EmitUint8(0xF2);
1367   EmitOptionalRex32(dst, src);
1368   EmitUint8(0x0F);
1369   EmitUint8(0x59);
1370   EmitOperand(dst.LowBits(), src);
1371 }
1372 
1373 
1374 void X86_64Assembler::divsd(XmmRegister dst, XmmRegister src) {
1375   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1376   EmitUint8(0xF2);
1377   EmitOptionalRex32(dst, src);
1378   EmitUint8(0x0F);
1379   EmitUint8(0x5E);
1380   EmitXmmRegisterOperand(dst.LowBits(), src);
1381 }
1382 
1383 
1384 void X86_64Assembler::divsd(XmmRegister dst, const Address& src) {
1385   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1386   EmitUint8(0xF2);
1387   EmitOptionalRex32(dst, src);
1388   EmitUint8(0x0F);
1389   EmitUint8(0x5E);
1390   EmitOperand(dst.LowBits(), src);
1391 }
1392 
1393 
1394 void X86_64Assembler::addpd(XmmRegister dst, XmmRegister src) {
1395   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1396   EmitUint8(0x66);
1397   EmitOptionalRex32(dst, src);
1398   EmitUint8(0x0F);
1399   EmitUint8(0x58);
1400   EmitXmmRegisterOperand(dst.LowBits(), src);
1401 }
1402 
1403 
1404 void X86_64Assembler::vaddpd(XmmRegister dst, XmmRegister add_left, XmmRegister add_right) {
1405   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1406   bool is_twobyte_form = false;
1407   uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
1408   if (!add_right.NeedsRex()) {
1409     is_twobyte_form = true;
1410   }
1411   ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
1412   X86_64ManagedRegister vvvv_reg =
1413       X86_64ManagedRegister::FromXmmRegister(add_left.AsFloatRegister());
1414   if (is_twobyte_form) {
1415     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
1416   } else {
1417     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
1418                                    /*X=*/ false,
1419                                    add_right.NeedsRex(),
1420                                    SET_VEX_M_0F);
1421     ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
1422   }
1423   EmitUint8(ByteZero);
1424   EmitUint8(ByteOne);
1425   if (!is_twobyte_form) {
1426     EmitUint8(ByteTwo);
1427   }
1428   EmitUint8(0x58);
1429   EmitXmmRegisterOperand(dst.LowBits(), add_right);
1430 }
1431 
1432 
1433 void X86_64Assembler::subpd(XmmRegister dst, XmmRegister src) {
1434   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1435   EmitUint8(0x66);
1436   EmitOptionalRex32(dst, src);
1437   EmitUint8(0x0F);
1438   EmitUint8(0x5C);
1439   EmitXmmRegisterOperand(dst.LowBits(), src);
1440 }
1441 
1442 
1443 void X86_64Assembler::vsubpd(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
1444   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1445   bool is_twobyte_form = false;
1446   uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
1447   if (!src2.NeedsRex()) {
1448     is_twobyte_form = true;
1449   }
1450   ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
1451   X86_64ManagedRegister vvvv_reg =
1452       X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister());
1453   if (is_twobyte_form) {
1454     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
1455   } else {
1456     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
1457                                    /*X=*/ false,
1458                                    src2.NeedsRex(),
1459                                    SET_VEX_M_0F);
1460     ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
1461   }
1462   EmitUint8(ByteZero);
1463   EmitUint8(ByteOne);
1464   if (!is_twobyte_form) {
1465     EmitUint8(ByteTwo);
1466   }
1467   EmitUint8(0x5C);
1468   EmitXmmRegisterOperand(dst.LowBits(), src2);
1469 }
1470 
1471 
1472 void X86_64Assembler::mulpd(XmmRegister dst, XmmRegister src) {
1473   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1474   EmitUint8(0x66);
1475   EmitOptionalRex32(dst, src);
1476   EmitUint8(0x0F);
1477   EmitUint8(0x59);
1478   EmitXmmRegisterOperand(dst.LowBits(), src);
1479 }
1480 
1481 void X86_64Assembler::vmulpd(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
1482   DCHECK(CpuHasAVXorAVX2FeatureFlag());
1483   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1484   bool is_twobyte_form = false;
1485   uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
1486   if (!src2.NeedsRex()) {
1487     is_twobyte_form = true;
1488   }
1489   ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
1490   X86_64ManagedRegister vvvv_reg =
1491       X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister());
1492   if (is_twobyte_form) {
1493     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
1494   } else {
1495     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
1496                                    /*X=*/ false,
1497                                    src2.NeedsRex(),
1498                                    SET_VEX_M_0F);
1499     ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
1500   }
1501   EmitUint8(ByteZero);
1502   EmitUint8(ByteOne);
1503   if (!is_twobyte_form) {
1504     EmitUint8(ByteTwo);
1505   }
1506   EmitUint8(0x59);
1507   EmitXmmRegisterOperand(dst.LowBits(), src2);
1508 }
1509 
1510 void X86_64Assembler::divpd(XmmRegister dst, XmmRegister src) {
1511   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1512   EmitUint8(0x66);
1513   EmitOptionalRex32(dst, src);
1514   EmitUint8(0x0F);
1515   EmitUint8(0x5E);
1516   EmitXmmRegisterOperand(dst.LowBits(), src);
1517 }
1518 
1519 
1520 void X86_64Assembler::vdivpd(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
1521   DCHECK(CpuHasAVXorAVX2FeatureFlag());
1522   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1523   bool is_twobyte_form = false;
1524   uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
1525   if (!src2.NeedsRex()) {
1526     is_twobyte_form = true;
1527   }
1528   ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
1529   X86_64ManagedRegister vvvv_reg =
1530       X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister());
1531   if (is_twobyte_form) {
1532     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
1533   } else {
1534     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
1535                                    /*X=*/ false,
1536                                    src2.NeedsRex(),
1537                                    SET_VEX_M_0F);
1538     ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
1539   }
1540   EmitUint8(ByteZero);
1541   EmitUint8(ByteOne);
1542   if (!is_twobyte_form) {
1543     EmitUint8(ByteTwo);
1544   }
1545   EmitUint8(0x5E);
1546   EmitXmmRegisterOperand(dst.LowBits(), src2);
1547 }
1548 
1549 
1550 void X86_64Assembler::movdqa(XmmRegister dst, XmmRegister src) {
1551   if (CpuHasAVXorAVX2FeatureFlag()) {
1552     vmovdqa(dst, src);
1553     return;
1554   }
1555   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1556   EmitUint8(0x66);
1557   EmitOptionalRex32(dst, src);
1558   EmitUint8(0x0F);
1559   EmitUint8(0x6F);
1560   EmitXmmRegisterOperand(dst.LowBits(), src);
1561 }
1562 
1563 /** VEX.128.66.0F.WIG 6F /r VMOVDQA xmm1, xmm2 */
1564 void X86_64Assembler::vmovdqa(XmmRegister dst, XmmRegister src) {
1565   DCHECK(CpuHasAVXorAVX2FeatureFlag());
1566   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1567   uint8_t ByteZero, ByteOne, ByteTwo;
1568   bool is_twobyte_form = true;
1569 
1570   // Instruction VEX Prefix
1571   if (src.NeedsRex() && dst.NeedsRex()) {
1572     is_twobyte_form = false;
1573   }
1574   bool load = dst.NeedsRex();
1575   ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
1576   if (is_twobyte_form) {
1577     X86_64ManagedRegister vvvv_reg = ManagedRegister::NoRegister().AsX86_64();
1578     bool rex_bit = load ? dst.NeedsRex() : src.NeedsRex();
1579     ByteOne = EmitVexPrefixByteOne(rex_bit,
1580                                    vvvv_reg,
1581                                    SET_VEX_L_128,
1582                                    SET_VEX_PP_66);
1583   } else {
1584     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
1585                                    /*X=*/ false,
1586                                    src.NeedsRex(),
1587                                    SET_VEX_M_0F);
1588     ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false,
1589                                    SET_VEX_L_128,
1590                                    SET_VEX_PP_66);
1591   }
1592   EmitUint8(ByteZero);
1593   EmitUint8(ByteOne);
1594   if (!is_twobyte_form) {
1595     EmitUint8(ByteTwo);
1596   }
1597   // Instruction Opcode
1598   if (is_twobyte_form && !load) {
1599     EmitUint8(0x7F);
1600   } else {
1601     EmitUint8(0x6F);
1602   }
1603   // Instruction Operands
1604   if (is_twobyte_form && !load) {
1605     EmitXmmRegisterOperand(src.LowBits(), dst);
1606   } else {
1607     EmitXmmRegisterOperand(dst.LowBits(), src);
1608   }
1609 }
1610 
1611 void X86_64Assembler::movdqa(XmmRegister dst, const Address& src) {
1612   if (CpuHasAVXorAVX2FeatureFlag()) {
1613     vmovdqa(dst, src);
1614     return;
1615   }
1616   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1617   EmitUint8(0x66);
1618   EmitOptionalRex32(dst, src);
1619   EmitUint8(0x0F);
1620   EmitUint8(0x6F);
1621   EmitOperand(dst.LowBits(), src);
1622 }
1623 
1624 /** VEX.128.66.0F.WIG 6F /r VMOVDQA xmm1, m128 */
1625 void X86_64Assembler::vmovdqa(XmmRegister dst, const Address& src) {
1626   DCHECK(CpuHasAVXorAVX2FeatureFlag());
1627   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1628   uint8_t  ByteZero, ByteOne, ByteTwo;
1629   bool is_twobyte_form = false;
1630 
1631   // Instruction VEX Prefix
1632   uint8_t rex = src.rex();
1633   bool Rex_x = rex & GET_REX_X;
1634   bool Rex_b = rex & GET_REX_B;
1635   if (!Rex_x && !Rex_b) {
1636     is_twobyte_form = true;
1637   }
1638   ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
1639   if (is_twobyte_form) {
1640     X86_64ManagedRegister vvvv_reg = ManagedRegister::NoRegister().AsX86_64();
1641     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
1642                                    vvvv_reg,
1643                                    SET_VEX_L_128,
1644                                    SET_VEX_PP_66);
1645   } else {
1646     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
1647                                    Rex_x,
1648                                    Rex_b,
1649                                    SET_VEX_M_0F);
1650     ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false,
1651                                    SET_VEX_L_128,
1652                                    SET_VEX_PP_66);
1653   }
1654   EmitUint8(ByteZero);
1655   EmitUint8(ByteOne);
1656   if (!is_twobyte_form) {
1657     EmitUint8(ByteTwo);
1658   }
1659   // Instruction Opcode
1660   EmitUint8(0x6F);
1661   // Instruction Operands
1662   EmitOperand(dst.LowBits(), src);
1663 }
1664 
1665 void X86_64Assembler::movdqu(XmmRegister dst, const Address& src) {
1666   if (CpuHasAVXorAVX2FeatureFlag()) {
1667     vmovdqu(dst, src);
1668     return;
1669   }
1670   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1671   EmitUint8(0xF3);
1672   EmitOptionalRex32(dst, src);
1673   EmitUint8(0x0F);
1674   EmitUint8(0x6F);
1675   EmitOperand(dst.LowBits(), src);
1676 }
1677 
1678 /** VEX.128.F3.0F.WIG 6F /r VMOVDQU xmm1, m128
1679 Load Unaligned */
1680 void X86_64Assembler::vmovdqu(XmmRegister dst, const Address& src) {
1681   DCHECK(CpuHasAVXorAVX2FeatureFlag());
1682   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1683   uint8_t ByteZero, ByteOne, ByteTwo;
1684   bool is_twobyte_form = false;
1685 
1686   // Instruction VEX Prefix
1687   uint8_t rex = src.rex();
1688   bool Rex_x = rex & GET_REX_X;
1689   bool Rex_b = rex & GET_REX_B;
1690   if (!Rex_x && !Rex_b) {
1691     is_twobyte_form = true;
1692   }
1693   ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
1694   if (is_twobyte_form) {
1695     X86_64ManagedRegister vvvv_reg = ManagedRegister::NoRegister().AsX86_64();
1696     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
1697                                    vvvv_reg,
1698                                    SET_VEX_L_128,
1699                                    SET_VEX_PP_F3);
1700   } else {
1701     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
1702                                    Rex_x,
1703                                    Rex_b,
1704                                    SET_VEX_M_0F);
1705     ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false,
1706                                    SET_VEX_L_128,
1707                                    SET_VEX_PP_F3);
1708   }
1709   EmitUint8(ByteZero);
1710   EmitUint8(ByteOne);
1711   if (!is_twobyte_form) {
1712     EmitUint8(ByteTwo);
1713   }
1714   // Instruction Opcode
1715   EmitUint8(0x6F);
1716   // Instruction Operands
1717   EmitOperand(dst.LowBits(), src);
1718 }
1719 
1720 void X86_64Assembler::movdqa(const Address& dst, XmmRegister src) {
1721   if (CpuHasAVXorAVX2FeatureFlag()) {
1722     vmovdqa(dst, src);
1723     return;
1724   }
1725   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1726   EmitUint8(0x66);
1727   EmitOptionalRex32(src, dst);
1728   EmitUint8(0x0F);
1729   EmitUint8(0x7F);
1730   EmitOperand(src.LowBits(), dst);
1731 }
1732 
1733 /** VEX.128.66.0F.WIG 7F /r VMOVDQA m128, xmm1 */
1734 void X86_64Assembler::vmovdqa(const Address& dst, XmmRegister src) {
1735   DCHECK(CpuHasAVXorAVX2FeatureFlag());
1736   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1737   bool is_twobyte_form = false;
1738   uint8_t ByteZero, ByteOne, ByteTwo;
1739   // Instruction VEX Prefix
1740   uint8_t rex = dst.rex();
1741   bool Rex_x = rex & GET_REX_X;
1742   bool Rex_b = rex & GET_REX_B;
1743   if (!Rex_x && !Rex_b) {
1744     is_twobyte_form = true;
1745   }
1746   ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
1747   if (is_twobyte_form) {
1748     X86_64ManagedRegister vvvv_reg = ManagedRegister::NoRegister().AsX86_64();
1749     ByteOne = EmitVexPrefixByteOne(src.NeedsRex(),
1750                                    vvvv_reg,
1751                                    SET_VEX_L_128,
1752                                    SET_VEX_PP_66);
1753   } else {
1754     ByteOne = EmitVexPrefixByteOne(src.NeedsRex(),
1755                                    Rex_x,
1756                                    Rex_b,
1757                                    SET_VEX_M_0F);
1758     ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false,
1759                                    SET_VEX_L_128,
1760                                    SET_VEX_PP_66);
1761   }
1762   EmitUint8(ByteZero);
1763   EmitUint8(ByteOne);
1764   if (!is_twobyte_form) {
1765     EmitUint8(ByteTwo);
1766   }
1767   // Instruction Opcode
1768   EmitUint8(0x7F);
1769   // Instruction Operands
1770   EmitOperand(src.LowBits(), dst);
1771 }
1772 
1773 void X86_64Assembler::movdqu(const Address& dst, XmmRegister src) {
1774   if (CpuHasAVXorAVX2FeatureFlag()) {
1775     vmovdqu(dst, src);
1776     return;
1777   }
1778   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1779   EmitUint8(0xF3);
1780   EmitOptionalRex32(src, dst);
1781   EmitUint8(0x0F);
1782   EmitUint8(0x7F);
1783   EmitOperand(src.LowBits(), dst);
1784 }
1785 
1786 /** VEX.128.F3.0F.WIG 7F /r VMOVDQU m128, xmm1 */
1787 void X86_64Assembler::vmovdqu(const Address& dst, XmmRegister src) {
1788   DCHECK(CpuHasAVXorAVX2FeatureFlag());
1789   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1790   uint8_t ByteZero, ByteOne, ByteTwo;
1791   bool is_twobyte_form = false;
1792 
1793   // Instruction VEX Prefix
1794   uint8_t rex = dst.rex();
1795   bool Rex_x = rex & GET_REX_X;
1796   bool Rex_b = rex & GET_REX_B;
1797   if (!Rex_b && !Rex_x) {
1798     is_twobyte_form = true;
1799   }
1800   ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
1801   if (is_twobyte_form) {
1802     X86_64ManagedRegister vvvv_reg = ManagedRegister::NoRegister().AsX86_64();
1803     ByteOne = EmitVexPrefixByteOne(src.NeedsRex(),
1804                                    vvvv_reg,
1805                                    SET_VEX_L_128,
1806                                    SET_VEX_PP_F3);
1807   } else {
1808     ByteOne = EmitVexPrefixByteOne(src.NeedsRex(),
1809                                    Rex_x,
1810                                    Rex_b,
1811                                    SET_VEX_M_0F);
1812     ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false,
1813                                    SET_VEX_L_128,
1814                                    SET_VEX_PP_F3);
1815   }
1816   EmitUint8(ByteZero);
1817   EmitUint8(ByteOne);
1818   if (!is_twobyte_form) {
1819     EmitUint8(ByteTwo);
1820   }
1821   // Instruction Opcode
1822   EmitUint8(0x7F);
1823   // Instruction Operands
1824   EmitOperand(src.LowBits(), dst);
1825 }
1826 
1827 void X86_64Assembler::paddb(XmmRegister dst, XmmRegister src) {
1828   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1829   EmitUint8(0x66);
1830   EmitOptionalRex32(dst, src);
1831   EmitUint8(0x0F);
1832   EmitUint8(0xFC);
1833   EmitXmmRegisterOperand(dst.LowBits(), src);
1834 }
1835 
1836 
1837 void X86_64Assembler::vpaddb(XmmRegister dst, XmmRegister add_left, XmmRegister add_right) {
1838   DCHECK(CpuHasAVXorAVX2FeatureFlag());
1839   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1840   uint8_t ByteOne = 0x00, ByteZero = 0x00, ByteTwo = 0x00;
1841   bool is_twobyte_form = true;
1842   if (add_right.NeedsRex()) {
1843     is_twobyte_form = false;
1844   }
1845   ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
1846   X86_64ManagedRegister vvvv_reg =
1847       X86_64ManagedRegister::FromXmmRegister(add_left.AsFloatRegister());
1848   if (is_twobyte_form) {
1849     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
1850   } else {
1851     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
1852                                    /*X=*/ false,
1853                                    add_right.NeedsRex(),
1854                                    SET_VEX_M_0F);
1855     ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
1856   }
1857   EmitUint8(ByteZero);
1858   EmitUint8(ByteOne);
1859   if (!is_twobyte_form) {
1860     EmitUint8(ByteTwo);
1861   }
1862   EmitUint8(0xFC);
1863   EmitXmmRegisterOperand(dst.LowBits(), add_right);
1864 }
1865 
1866 
1867 void X86_64Assembler::psubb(XmmRegister dst, XmmRegister src) {
1868   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1869   EmitUint8(0x66);
1870   EmitOptionalRex32(dst, src);
1871   EmitUint8(0x0F);
1872   EmitUint8(0xF8);
1873   EmitXmmRegisterOperand(dst.LowBits(), src);
1874 }
1875 
1876 
1877 void X86_64Assembler::vpsubb(XmmRegister dst, XmmRegister add_left, XmmRegister add_right) {
1878   DCHECK(CpuHasAVXorAVX2FeatureFlag());
1879   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1880   bool is_twobyte_form = false;
1881   uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
1882   if (!add_right.NeedsRex()) {
1883     is_twobyte_form = true;
1884   }
1885   ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
1886   X86_64ManagedRegister vvvv_reg =
1887       X86_64ManagedRegister::FromXmmRegister(add_left.AsFloatRegister());
1888   if (is_twobyte_form) {
1889     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
1890   } else {
1891     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
1892                                    /*X=*/ false,
1893                                    add_right.NeedsRex(),
1894                                    SET_VEX_M_0F);
1895     ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
1896   }
1897   EmitUint8(ByteZero);
1898   EmitUint8(ByteOne);
1899   if (!is_twobyte_form) {
1900     EmitUint8(ByteTwo);
1901   }
1902   EmitUint8(0xF8);
1903   EmitXmmRegisterOperand(dst.LowBits(), add_right);
1904 }
1905 
1906 
1907 void X86_64Assembler::paddw(XmmRegister dst, XmmRegister src) {
1908   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1909   EmitUint8(0x66);
1910   EmitOptionalRex32(dst, src);
1911   EmitUint8(0x0F);
1912   EmitUint8(0xFD);
1913   EmitXmmRegisterOperand(dst.LowBits(), src);
1914 }
1915 
1916 void X86_64Assembler::vpaddw(XmmRegister dst, XmmRegister add_left, XmmRegister add_right) {
1917   DCHECK(CpuHasAVXorAVX2FeatureFlag());
1918   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1919   bool is_twobyte_form = false;
1920   uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
1921   if (!add_right.NeedsRex()) {
1922     is_twobyte_form = true;
1923   }
1924   ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
1925   X86_64ManagedRegister vvvv_reg =
1926       X86_64ManagedRegister::FromXmmRegister(add_left.AsFloatRegister());
1927   if (is_twobyte_form) {
1928     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
1929   } else {
1930     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
1931                                    /*X=*/ false,
1932                                    add_right.NeedsRex(),
1933                                    SET_VEX_M_0F);
1934     ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
1935   }
1936   EmitUint8(ByteZero);
1937   EmitUint8(ByteOne);
1938   if (!is_twobyte_form) {
1939     EmitUint8(ByteTwo);
1940   }
1941   EmitUint8(0xFD);
1942   EmitXmmRegisterOperand(dst.LowBits(), add_right);
1943 }
1944 
1945 
1946 void X86_64Assembler::psubw(XmmRegister dst, XmmRegister src) {
1947   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1948   EmitUint8(0x66);
1949   EmitOptionalRex32(dst, src);
1950   EmitUint8(0x0F);
1951   EmitUint8(0xF9);
1952   EmitXmmRegisterOperand(dst.LowBits(), src);
1953 }
1954 
1955 void X86_64Assembler::vpsubw(XmmRegister dst, XmmRegister add_left, XmmRegister add_right) {
1956   DCHECK(CpuHasAVXorAVX2FeatureFlag());
1957   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1958   bool is_twobyte_form = false;
1959   uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
1960   if (!add_right.NeedsRex()) {
1961     is_twobyte_form = true;
1962   }
1963   ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
1964   X86_64ManagedRegister vvvv_reg =
1965       X86_64ManagedRegister::FromXmmRegister(add_left.AsFloatRegister());
1966   if (is_twobyte_form) {
1967     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
1968   } else {
1969     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
1970                                    /*X=*/ false,
1971                                    add_right.NeedsRex(),
1972                                    SET_VEX_M_0F);
1973     ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
1974   }
1975   EmitUint8(ByteZero);
1976   EmitUint8(ByteOne);
1977   if (!is_twobyte_form) {
1978     EmitUint8(ByteTwo);
1979   }
1980   EmitUint8(0xF9);
1981   EmitXmmRegisterOperand(dst.LowBits(), add_right);
1982 }
1983 
1984 
1985 void X86_64Assembler::pmullw(XmmRegister dst, XmmRegister src) {
1986   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1987   EmitUint8(0x66);
1988   EmitOptionalRex32(dst, src);
1989   EmitUint8(0x0F);
1990   EmitUint8(0xD5);
1991   EmitXmmRegisterOperand(dst.LowBits(), src);
1992 }
1993 
1994 void X86_64Assembler::vpmullw(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
1995   DCHECK(CpuHasAVXorAVX2FeatureFlag());
1996   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
1997   bool is_twobyte_form = false;
1998   uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
1999   if (!src2.NeedsRex()) {
2000     is_twobyte_form = true;
2001   }
2002   ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
2003   X86_64ManagedRegister vvvv_reg =
2004       X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister());
2005   if (is_twobyte_form) {
2006     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
2007   } else {
2008     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
2009                                    /*X=*/ false,
2010                                    src2.NeedsRex(),
2011                                    SET_VEX_M_0F);
2012     ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
2013   }
2014   EmitUint8(ByteZero);
2015   EmitUint8(ByteOne);
2016   if (!is_twobyte_form) {
2017     EmitUint8(ByteTwo);
2018   }
2019   EmitUint8(0xD5);
2020   EmitXmmRegisterOperand(dst.LowBits(), src2);
2021 }
2022 
2023 void X86_64Assembler::paddd(XmmRegister dst, XmmRegister src) {
2024   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2025   EmitUint8(0x66);
2026   EmitOptionalRex32(dst, src);
2027   EmitUint8(0x0F);
2028   EmitUint8(0xFE);
2029   EmitXmmRegisterOperand(dst.LowBits(), src);
2030 }
2031 
2032 void X86_64Assembler::vpaddd(XmmRegister dst, XmmRegister add_left, XmmRegister add_right) {
2033   DCHECK(CpuHasAVXorAVX2FeatureFlag());
2034   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2035   bool is_twobyte_form = false;
2036   uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
2037   if (!add_right.NeedsRex()) {
2038     is_twobyte_form = true;
2039   }
2040   ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
2041   X86_64ManagedRegister vvvv_reg =
2042       X86_64ManagedRegister::FromXmmRegister(add_left.AsFloatRegister());
2043   if (is_twobyte_form) {
2044     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
2045   } else {
2046     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
2047                                    /*X=*/ false,
2048                                    add_right.NeedsRex(),
2049                                    SET_VEX_M_0F);
2050     ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
2051   }
2052   EmitUint8(ByteZero);
2053   EmitUint8(ByteOne);
2054   if (!is_twobyte_form) {
2055     EmitUint8(ByteTwo);
2056   }
2057   EmitUint8(0xFE);
2058   EmitXmmRegisterOperand(dst.LowBits(), add_right);
2059 }
2060 
2061 void X86_64Assembler::psubd(XmmRegister dst, XmmRegister src) {
2062   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2063   EmitUint8(0x66);
2064   EmitOptionalRex32(dst, src);
2065   EmitUint8(0x0F);
2066   EmitUint8(0xFA);
2067   EmitXmmRegisterOperand(dst.LowBits(), src);
2068 }
2069 
2070 
2071 void X86_64Assembler::pmulld(XmmRegister dst, XmmRegister src) {
2072   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2073   EmitUint8(0x66);
2074   EmitOptionalRex32(dst, src);
2075   EmitUint8(0x0F);
2076   EmitUint8(0x38);
2077   EmitUint8(0x40);
2078   EmitXmmRegisterOperand(dst.LowBits(), src);
2079 }
2080 
2081 void X86_64Assembler::vpmulld(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
2082   DCHECK(CpuHasAVXorAVX2FeatureFlag());
2083   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2084   uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
2085   ByteZero = EmitVexPrefixByteZero(/*is_twobyte_form*/ false);
2086   X86_64ManagedRegister vvvv_reg =
2087       X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister());
2088   ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
2089                                    /*X=*/ false,
2090                                    src2.NeedsRex(),
2091                                    SET_VEX_M_0F_38);
2092   ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
2093   EmitUint8(ByteZero);
2094   EmitUint8(ByteOne);
2095   EmitUint8(ByteTwo);
2096   EmitUint8(0x40);
2097   EmitXmmRegisterOperand(dst.LowBits(), src2);
2098 }
2099 
2100 void X86_64Assembler::paddq(XmmRegister dst, XmmRegister src) {
2101   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2102   EmitUint8(0x66);
2103   EmitOptionalRex32(dst, src);
2104   EmitUint8(0x0F);
2105   EmitUint8(0xD4);
2106   EmitXmmRegisterOperand(dst.LowBits(), src);
2107 }
2108 
2109 
2110 void X86_64Assembler::vpaddq(XmmRegister dst, XmmRegister add_left, XmmRegister add_right) {
2111   DCHECK(CpuHasAVXorAVX2FeatureFlag());
2112   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2113   bool is_twobyte_form = false;
2114   uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
2115   if (!add_right.NeedsRex()) {
2116     is_twobyte_form = true;
2117   }
2118   ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
2119   X86_64ManagedRegister vvvv_reg =
2120       X86_64ManagedRegister::FromXmmRegister(add_left.AsFloatRegister());
2121   if (is_twobyte_form) {
2122     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
2123   } else {
2124     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
2125                                    /*X=*/ false,
2126                                    add_right.NeedsRex(),
2127                                    SET_VEX_M_0F);
2128     ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
2129   }
2130   EmitUint8(ByteZero);
2131   EmitUint8(ByteOne);
2132   if (!is_twobyte_form) {
2133     EmitUint8(ByteTwo);
2134   }
2135   EmitUint8(0xD4);
2136   EmitXmmRegisterOperand(dst.LowBits(), add_right);
2137 }
2138 
2139 
2140 void X86_64Assembler::psubq(XmmRegister dst, XmmRegister src) {
2141   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2142   EmitUint8(0x66);
2143   EmitOptionalRex32(dst, src);
2144   EmitUint8(0x0F);
2145   EmitUint8(0xFB);
2146   EmitXmmRegisterOperand(dst.LowBits(), src);
2147 }
2148 
2149 void X86_64Assembler::vpsubq(XmmRegister dst, XmmRegister add_left, XmmRegister add_right) {
2150   DCHECK(CpuHasAVXorAVX2FeatureFlag());
2151   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2152   bool is_twobyte_form = false;
2153   uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
2154   if (!add_right.NeedsRex()) {
2155     is_twobyte_form = true;
2156   }
2157   ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
2158   X86_64ManagedRegister vvvv_reg =
2159       X86_64ManagedRegister::FromXmmRegister(add_left.AsFloatRegister());
2160   if (is_twobyte_form) {
2161     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
2162   } else {
2163     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
2164                                    /*X=*/ false,
2165                                    add_right.NeedsRex(),
2166                                    SET_VEX_M_0F);
2167     ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
2168   }
2169   EmitUint8(ByteZero);
2170   EmitUint8(ByteOne);
2171   if (!is_twobyte_form) {
2172     EmitUint8(ByteTwo);
2173   }
2174   EmitUint8(0xFB);
2175   EmitXmmRegisterOperand(dst.LowBits(), add_right);
2176 }
2177 
2178 
2179 void X86_64Assembler::paddusb(XmmRegister dst, XmmRegister src) {
2180   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2181   EmitUint8(0x66);
2182   EmitOptionalRex32(dst, src);
2183   EmitUint8(0x0F);
2184   EmitUint8(0xDC);
2185   EmitXmmRegisterOperand(dst.LowBits(), src);
2186 }
2187 
2188 
2189 void X86_64Assembler::paddsb(XmmRegister dst, XmmRegister src) {
2190   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2191   EmitUint8(0x66);
2192   EmitOptionalRex32(dst, src);
2193   EmitUint8(0x0F);
2194   EmitUint8(0xEC);
2195   EmitXmmRegisterOperand(dst.LowBits(), src);
2196 }
2197 
2198 
2199 void X86_64Assembler::paddusw(XmmRegister dst, XmmRegister src) {
2200   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2201   EmitUint8(0x66);
2202   EmitOptionalRex32(dst, src);
2203   EmitUint8(0x0F);
2204   EmitUint8(0xDD);
2205   EmitXmmRegisterOperand(dst.LowBits(), src);
2206 }
2207 
2208 
2209 void X86_64Assembler::paddsw(XmmRegister dst, XmmRegister src) {
2210   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2211   EmitUint8(0x66);
2212   EmitOptionalRex32(dst, src);
2213   EmitUint8(0x0F);
2214   EmitUint8(0xED);
2215   EmitXmmRegisterOperand(dst.LowBits(), src);
2216 }
2217 
2218 
2219 void X86_64Assembler::psubusb(XmmRegister dst, XmmRegister src) {
2220   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2221   EmitUint8(0x66);
2222   EmitOptionalRex32(dst, src);
2223   EmitUint8(0x0F);
2224   EmitUint8(0xD8);
2225   EmitXmmRegisterOperand(dst.LowBits(), src);
2226 }
2227 
2228 
2229 void X86_64Assembler::psubsb(XmmRegister dst, XmmRegister src) {
2230   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2231   EmitUint8(0x66);
2232   EmitOptionalRex32(dst, src);
2233   EmitUint8(0x0F);
2234   EmitUint8(0xE8);
2235   EmitXmmRegisterOperand(dst.LowBits(), src);
2236 }
2237 
2238 
2239 void X86_64Assembler::vpsubd(XmmRegister dst, XmmRegister add_left, XmmRegister add_right) {
2240   DCHECK(CpuHasAVXorAVX2FeatureFlag());
2241   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2242   bool is_twobyte_form = false;
2243   uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
2244   if (!add_right.NeedsRex()) {
2245     is_twobyte_form = true;
2246   }
2247   ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
2248   X86_64ManagedRegister vvvv_reg =
2249       X86_64ManagedRegister::FromXmmRegister(add_left.AsFloatRegister());
2250   if (is_twobyte_form) {
2251     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
2252   } else {
2253     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
2254                                    /*X=*/ false,
2255                                    add_right.NeedsRex(),
2256                                    SET_VEX_M_0F);
2257     ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
2258   }
2259   EmitUint8(ByteZero);
2260   EmitUint8(ByteOne);
2261   if (!is_twobyte_form) {
2262     EmitUint8(ByteTwo);
2263   }
2264   EmitUint8(0xFA);
2265   EmitXmmRegisterOperand(dst.LowBits(), add_right);
2266 }
2267 
2268 
2269 void X86_64Assembler::psubusw(XmmRegister dst, XmmRegister src) {
2270   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2271   EmitUint8(0x66);
2272   EmitOptionalRex32(dst, src);
2273   EmitUint8(0x0F);
2274   EmitUint8(0xD9);
2275   EmitXmmRegisterOperand(dst.LowBits(), src);
2276 }
2277 
2278 
2279 void X86_64Assembler::psubsw(XmmRegister dst, XmmRegister src) {
2280   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2281   EmitUint8(0x66);
2282   EmitOptionalRex32(dst, src);
2283   EmitUint8(0x0F);
2284   EmitUint8(0xE9);
2285   EmitXmmRegisterOperand(dst.LowBits(), src);
2286 }
2287 
2288 
2289 void X86_64Assembler::cvtsi2ss(XmmRegister dst, CpuRegister src) {
2290   cvtsi2ss(dst, src, false);
2291 }
2292 
2293 
2294 void X86_64Assembler::cvtsi2ss(XmmRegister dst, CpuRegister src, bool is64bit) {
2295   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2296   EmitUint8(0xF3);
2297   if (is64bit) {
2298     // Emit a REX.W prefix if the operand size is 64 bits.
2299     EmitRex64(dst, src);
2300   } else {
2301     EmitOptionalRex32(dst, src);
2302   }
2303   EmitUint8(0x0F);
2304   EmitUint8(0x2A);
2305   EmitOperand(dst.LowBits(), Operand(src));
2306 }
2307 
2308 
2309 void X86_64Assembler::cvtsi2ss(XmmRegister dst, const Address& src, bool is64bit) {
2310   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2311   EmitUint8(0xF3);
2312   if (is64bit) {
2313     // Emit a REX.W prefix if the operand size is 64 bits.
2314     EmitRex64(dst, src);
2315   } else {
2316     EmitOptionalRex32(dst, src);
2317   }
2318   EmitUint8(0x0F);
2319   EmitUint8(0x2A);
2320   EmitOperand(dst.LowBits(), src);
2321 }
2322 
2323 
2324 void X86_64Assembler::cvtsi2sd(XmmRegister dst, CpuRegister src) {
2325   cvtsi2sd(dst, src, false);
2326 }
2327 
2328 
2329 void X86_64Assembler::cvtsi2sd(XmmRegister dst, CpuRegister src, bool is64bit) {
2330   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2331   EmitUint8(0xF2);
2332   if (is64bit) {
2333     // Emit a REX.W prefix if the operand size is 64 bits.
2334     EmitRex64(dst, src);
2335   } else {
2336     EmitOptionalRex32(dst, src);
2337   }
2338   EmitUint8(0x0F);
2339   EmitUint8(0x2A);
2340   EmitOperand(dst.LowBits(), Operand(src));
2341 }
2342 
2343 
2344 void X86_64Assembler::cvtsi2sd(XmmRegister dst, const Address& src, bool is64bit) {
2345   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2346   EmitUint8(0xF2);
2347   if (is64bit) {
2348     // Emit a REX.W prefix if the operand size is 64 bits.
2349     EmitRex64(dst, src);
2350   } else {
2351     EmitOptionalRex32(dst, src);
2352   }
2353   EmitUint8(0x0F);
2354   EmitUint8(0x2A);
2355   EmitOperand(dst.LowBits(), src);
2356 }
2357 
2358 
2359 void X86_64Assembler::cvtss2si(CpuRegister dst, XmmRegister src) {
2360   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2361   EmitUint8(0xF3);
2362   EmitOptionalRex32(dst, src);
2363   EmitUint8(0x0F);
2364   EmitUint8(0x2D);
2365   EmitXmmRegisterOperand(dst.LowBits(), src);
2366 }
2367 
2368 
2369 void X86_64Assembler::cvtss2sd(XmmRegister dst, XmmRegister src) {
2370   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2371   EmitUint8(0xF3);
2372   EmitOptionalRex32(dst, src);
2373   EmitUint8(0x0F);
2374   EmitUint8(0x5A);
2375   EmitXmmRegisterOperand(dst.LowBits(), src);
2376 }
2377 
2378 
2379 void X86_64Assembler::cvtss2sd(XmmRegister dst, const Address& src) {
2380   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2381   EmitUint8(0xF3);
2382   EmitOptionalRex32(dst, src);
2383   EmitUint8(0x0F);
2384   EmitUint8(0x5A);
2385   EmitOperand(dst.LowBits(), src);
2386 }
2387 
2388 
2389 void X86_64Assembler::cvtsd2si(CpuRegister dst, XmmRegister src) {
2390   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2391   EmitUint8(0xF2);
2392   EmitOptionalRex32(dst, src);
2393   EmitUint8(0x0F);
2394   EmitUint8(0x2D);
2395   EmitXmmRegisterOperand(dst.LowBits(), src);
2396 }
2397 
2398 
2399 void X86_64Assembler::cvttss2si(CpuRegister dst, XmmRegister src) {
2400   cvttss2si(dst, src, false);
2401 }
2402 
2403 
2404 void X86_64Assembler::cvttss2si(CpuRegister dst, XmmRegister src, bool is64bit) {
2405   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2406   EmitUint8(0xF3);
2407   if (is64bit) {
2408     // Emit a REX.W prefix if the operand size is 64 bits.
2409     EmitRex64(dst, src);
2410   } else {
2411     EmitOptionalRex32(dst, src);
2412   }
2413   EmitUint8(0x0F);
2414   EmitUint8(0x2C);
2415   EmitXmmRegisterOperand(dst.LowBits(), src);
2416 }
2417 
2418 
2419 void X86_64Assembler::cvttsd2si(CpuRegister dst, XmmRegister src) {
2420   cvttsd2si(dst, src, false);
2421 }
2422 
2423 
2424 void X86_64Assembler::cvttsd2si(CpuRegister dst, XmmRegister src, bool is64bit) {
2425   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2426   EmitUint8(0xF2);
2427   if (is64bit) {
2428     // Emit a REX.W prefix if the operand size is 64 bits.
2429     EmitRex64(dst, src);
2430   } else {
2431     EmitOptionalRex32(dst, src);
2432   }
2433   EmitUint8(0x0F);
2434   EmitUint8(0x2C);
2435   EmitXmmRegisterOperand(dst.LowBits(), src);
2436 }
2437 
2438 
2439 void X86_64Assembler::cvtsd2ss(XmmRegister dst, XmmRegister src) {
2440   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2441   EmitUint8(0xF2);
2442   EmitOptionalRex32(dst, src);
2443   EmitUint8(0x0F);
2444   EmitUint8(0x5A);
2445   EmitXmmRegisterOperand(dst.LowBits(), src);
2446 }
2447 
2448 
2449 void X86_64Assembler::cvtsd2ss(XmmRegister dst, const Address& src) {
2450   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2451   EmitUint8(0xF2);
2452   EmitOptionalRex32(dst, src);
2453   EmitUint8(0x0F);
2454   EmitUint8(0x5A);
2455   EmitOperand(dst.LowBits(), src);
2456 }
2457 
2458 
2459 void X86_64Assembler::cvtdq2ps(XmmRegister dst, XmmRegister src) {
2460   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2461   EmitOptionalRex32(dst, src);
2462   EmitUint8(0x0F);
2463   EmitUint8(0x5B);
2464   EmitXmmRegisterOperand(dst.LowBits(), src);
2465 }
2466 
2467 
2468 void X86_64Assembler::cvtdq2pd(XmmRegister dst, XmmRegister src) {
2469   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2470   EmitUint8(0xF3);
2471   EmitOptionalRex32(dst, src);
2472   EmitUint8(0x0F);
2473   EmitUint8(0xE6);
2474   EmitXmmRegisterOperand(dst.LowBits(), src);
2475 }
2476 
2477 
2478 void X86_64Assembler::comiss(XmmRegister a, XmmRegister b) {
2479   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2480   EmitOptionalRex32(a, b);
2481   EmitUint8(0x0F);
2482   EmitUint8(0x2F);
2483   EmitXmmRegisterOperand(a.LowBits(), b);
2484 }
2485 
2486 
2487 void X86_64Assembler::comiss(XmmRegister a, const Address& b) {
2488   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2489   EmitOptionalRex32(a, b);
2490   EmitUint8(0x0F);
2491   EmitUint8(0x2F);
2492   EmitOperand(a.LowBits(), b);
2493 }
2494 
2495 
2496 void X86_64Assembler::comisd(XmmRegister a, XmmRegister b) {
2497   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2498   EmitUint8(0x66);
2499   EmitOptionalRex32(a, b);
2500   EmitUint8(0x0F);
2501   EmitUint8(0x2F);
2502   EmitXmmRegisterOperand(a.LowBits(), b);
2503 }
2504 
2505 
2506 void X86_64Assembler::comisd(XmmRegister a, const Address& b) {
2507   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2508   EmitUint8(0x66);
2509   EmitOptionalRex32(a, b);
2510   EmitUint8(0x0F);
2511   EmitUint8(0x2F);
2512   EmitOperand(a.LowBits(), b);
2513 }
2514 
2515 
2516 void X86_64Assembler::ucomiss(XmmRegister a, XmmRegister b) {
2517   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2518   EmitOptionalRex32(a, b);
2519   EmitUint8(0x0F);
2520   EmitUint8(0x2E);
2521   EmitXmmRegisterOperand(a.LowBits(), b);
2522 }
2523 
2524 
2525 void X86_64Assembler::ucomiss(XmmRegister a, const Address& b) {
2526   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2527   EmitOptionalRex32(a, b);
2528   EmitUint8(0x0F);
2529   EmitUint8(0x2E);
2530   EmitOperand(a.LowBits(), b);
2531 }
2532 
2533 
2534 void X86_64Assembler::ucomisd(XmmRegister a, XmmRegister b) {
2535   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2536   EmitUint8(0x66);
2537   EmitOptionalRex32(a, b);
2538   EmitUint8(0x0F);
2539   EmitUint8(0x2E);
2540   EmitXmmRegisterOperand(a.LowBits(), b);
2541 }
2542 
2543 
2544 void X86_64Assembler::ucomisd(XmmRegister a, const Address& b) {
2545   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2546   EmitUint8(0x66);
2547   EmitOptionalRex32(a, b);
2548   EmitUint8(0x0F);
2549   EmitUint8(0x2E);
2550   EmitOperand(a.LowBits(), b);
2551 }
2552 
2553 
2554 void X86_64Assembler::roundsd(XmmRegister dst, XmmRegister src, const Immediate& imm) {
2555   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2556   EmitUint8(0x66);
2557   EmitOptionalRex32(dst, src);
2558   EmitUint8(0x0F);
2559   EmitUint8(0x3A);
2560   EmitUint8(0x0B);
2561   EmitXmmRegisterOperand(dst.LowBits(), src);
2562   EmitUint8(imm.value());
2563 }
2564 
2565 
2566 void X86_64Assembler::roundss(XmmRegister dst, XmmRegister src, const Immediate& imm) {
2567   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2568   EmitUint8(0x66);
2569   EmitOptionalRex32(dst, src);
2570   EmitUint8(0x0F);
2571   EmitUint8(0x3A);
2572   EmitUint8(0x0A);
2573   EmitXmmRegisterOperand(dst.LowBits(), src);
2574   EmitUint8(imm.value());
2575 }
2576 
2577 
2578 void X86_64Assembler::sqrtsd(XmmRegister dst, XmmRegister src) {
2579   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2580   EmitUint8(0xF2);
2581   EmitOptionalRex32(dst, src);
2582   EmitUint8(0x0F);
2583   EmitUint8(0x51);
2584   EmitXmmRegisterOperand(dst.LowBits(), src);
2585 }
2586 
2587 
2588 void X86_64Assembler::sqrtss(XmmRegister dst, XmmRegister src) {
2589   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2590   EmitUint8(0xF3);
2591   EmitOptionalRex32(dst, src);
2592   EmitUint8(0x0F);
2593   EmitUint8(0x51);
2594   EmitXmmRegisterOperand(dst.LowBits(), src);
2595 }
2596 
2597 
2598 void X86_64Assembler::xorpd(XmmRegister dst, const Address& src) {
2599   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2600   EmitUint8(0x66);
2601   EmitOptionalRex32(dst, src);
2602   EmitUint8(0x0F);
2603   EmitUint8(0x57);
2604   EmitOperand(dst.LowBits(), src);
2605 }
2606 
2607 
2608 void X86_64Assembler::xorpd(XmmRegister dst, XmmRegister src) {
2609   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2610   EmitUint8(0x66);
2611   EmitOptionalRex32(dst, src);
2612   EmitUint8(0x0F);
2613   EmitUint8(0x57);
2614   EmitXmmRegisterOperand(dst.LowBits(), src);
2615 }
2616 
2617 
2618 void X86_64Assembler::xorps(XmmRegister dst, const Address& src) {
2619   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2620   EmitOptionalRex32(dst, src);
2621   EmitUint8(0x0F);
2622   EmitUint8(0x57);
2623   EmitOperand(dst.LowBits(), src);
2624 }
2625 
2626 
2627 void X86_64Assembler::xorps(XmmRegister dst, XmmRegister src) {
2628   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2629   EmitOptionalRex32(dst, src);
2630   EmitUint8(0x0F);
2631   EmitUint8(0x57);
2632   EmitXmmRegisterOperand(dst.LowBits(), src);
2633 }
2634 
2635 void X86_64Assembler::pxor(XmmRegister dst, XmmRegister src) {
2636   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2637   EmitUint8(0x66);
2638   EmitOptionalRex32(dst, src);
2639   EmitUint8(0x0F);
2640   EmitUint8(0xEF);
2641   EmitXmmRegisterOperand(dst.LowBits(), src);
2642 }
2643 
2644 /* VEX.128.66.0F.WIG EF /r VPXOR xmm1, xmm2, xmm3/m128 */
2645 void X86_64Assembler::vpxor(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
2646   DCHECK(CpuHasAVXorAVX2FeatureFlag());
2647   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2648   bool is_twobyte_form = false;
2649   uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
2650   if (!src2.NeedsRex()) {
2651     is_twobyte_form = true;
2652   }
2653   X86_64ManagedRegister vvvv_reg =
2654       X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister());
2655   ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
2656   if (is_twobyte_form) {
2657     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
2658   } else {
2659     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
2660                                    /*X=*/ false,
2661                                    src2.NeedsRex(),
2662                                    SET_VEX_M_0F);
2663     ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
2664   }
2665   EmitUint8(ByteZero);
2666   EmitUint8(ByteOne);
2667   if (!is_twobyte_form) {
2668     EmitUint8(ByteTwo);
2669   }
2670   EmitUint8(0xEF);
2671   EmitXmmRegisterOperand(dst.LowBits(), src2);
2672 }
2673 
2674 /* VEX.128.0F.WIG 57 /r VXORPS xmm1,xmm2, xmm3/m128 */
2675 void X86_64Assembler::vxorps(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
2676   DCHECK(CpuHasAVXorAVX2FeatureFlag());
2677   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2678   bool is_twobyte_form = false;
2679   uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
2680   if (!src2.NeedsRex()) {
2681     is_twobyte_form = true;
2682   }
2683   X86_64ManagedRegister vvvv_reg =
2684       X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister());
2685   ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
2686   if (is_twobyte_form) {
2687     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_NONE);
2688   } else {
2689     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
2690                                    /*X=*/ false,
2691                                    src2.NeedsRex(),
2692                                    SET_VEX_M_0F);
2693     ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_NONE);
2694   }
2695   EmitUint8(ByteZero);
2696   EmitUint8(ByteOne);
2697   if (!is_twobyte_form) {
2698     EmitUint8(ByteTwo);
2699   }
2700   EmitUint8(0x57);
2701   EmitXmmRegisterOperand(dst.LowBits(), src2);
2702 }
2703 
2704 /* VEX.128.66.0F.WIG 57 /r VXORPD xmm1,xmm2, xmm3/m128 */
2705 void X86_64Assembler::vxorpd(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
2706   DCHECK(CpuHasAVXorAVX2FeatureFlag());
2707   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2708   bool is_twobyte_form = false;
2709   uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
2710   if (!src2.NeedsRex()) {
2711     is_twobyte_form = true;
2712   }
2713   X86_64ManagedRegister vvvv_reg =
2714       X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister());
2715   ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
2716   if (is_twobyte_form) {
2717     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
2718   } else {
2719     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
2720                                    /*X=*/ false,
2721                                    src2.NeedsRex(),
2722                                    SET_VEX_M_0F);
2723     ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
2724   }
2725   EmitUint8(ByteZero);
2726   EmitUint8(ByteOne);
2727   if (!is_twobyte_form) {
2728     EmitUint8(ByteTwo);
2729   }
2730   EmitUint8(0x57);
2731   EmitXmmRegisterOperand(dst.LowBits(), src2);
2732 }
2733 
2734 void X86_64Assembler::andpd(XmmRegister dst, const Address& src) {
2735   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2736   EmitUint8(0x66);
2737   EmitOptionalRex32(dst, src);
2738   EmitUint8(0x0F);
2739   EmitUint8(0x54);
2740   EmitOperand(dst.LowBits(), src);
2741 }
2742 
2743 void X86_64Assembler::andpd(XmmRegister dst, XmmRegister src) {
2744   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2745   EmitUint8(0x66);
2746   EmitOptionalRex32(dst, src);
2747   EmitUint8(0x0F);
2748   EmitUint8(0x54);
2749   EmitXmmRegisterOperand(dst.LowBits(), src);
2750 }
2751 
2752 void X86_64Assembler::andps(XmmRegister dst, XmmRegister src) {
2753   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2754   EmitOptionalRex32(dst, src);
2755   EmitUint8(0x0F);
2756   EmitUint8(0x54);
2757   EmitXmmRegisterOperand(dst.LowBits(), src);
2758 }
2759 
2760 void X86_64Assembler::pand(XmmRegister dst, XmmRegister src) {
2761   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2762   EmitUint8(0x66);
2763   EmitOptionalRex32(dst, src);
2764   EmitUint8(0x0F);
2765   EmitUint8(0xDB);
2766   EmitXmmRegisterOperand(dst.LowBits(), src);
2767 }
2768 
2769 /* VEX.128.66.0F.WIG DB /r VPAND xmm1, xmm2, xmm3/m128 */
2770 void X86_64Assembler::vpand(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
2771   DCHECK(CpuHasAVXorAVX2FeatureFlag());
2772   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2773   bool is_twobyte_form = false;
2774   uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
2775   if (!src2.NeedsRex()) {
2776     is_twobyte_form = true;
2777   }
2778   X86_64ManagedRegister vvvv_reg =
2779       X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister());
2780   ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
2781   if (is_twobyte_form) {
2782     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
2783   } else {
2784     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
2785                                    /*X=*/ false,
2786                                    src2.NeedsRex(),
2787                                    SET_VEX_M_0F);
2788     ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
2789   }
2790   EmitUint8(ByteZero);
2791   EmitUint8(ByteOne);
2792   if (!is_twobyte_form) {
2793     EmitUint8(ByteTwo);
2794   }
2795   EmitUint8(0xDB);
2796   EmitXmmRegisterOperand(dst.LowBits(), src2);
2797 }
2798 
2799 /* VEX.128.0F 54 /r VANDPS xmm1,xmm2, xmm3/m128 */
2800 void X86_64Assembler::vandps(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
2801   DCHECK(CpuHasAVXorAVX2FeatureFlag());
2802   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2803   bool is_twobyte_form = false;
2804   uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
2805   if (!src2.NeedsRex()) {
2806     is_twobyte_form = true;
2807   }
2808   X86_64ManagedRegister vvvv_reg =
2809       X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister());
2810   ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
2811   if (is_twobyte_form) {
2812     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_NONE);
2813   } else {
2814     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
2815                                    /*X=*/ false,
2816                                    src2.NeedsRex(),
2817                                    SET_VEX_M_0F);
2818     ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_NONE);
2819   }
2820   EmitUint8(ByteZero);
2821   EmitUint8(ByteOne);
2822   if (!is_twobyte_form) {
2823     EmitUint8(ByteTwo);
2824   }
2825   EmitUint8(0x54);
2826   EmitXmmRegisterOperand(dst.LowBits(), src2);
2827 }
2828 
2829 /* VEX.128.66.0F 54 /r VANDPD xmm1, xmm2, xmm3/m128 */
2830 void X86_64Assembler::vandpd(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
2831   DCHECK(CpuHasAVXorAVX2FeatureFlag());
2832   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2833   bool is_twobyte_form = false;
2834   uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
2835   if (!src2.NeedsRex()) {
2836     is_twobyte_form = true;
2837   }
2838   X86_64ManagedRegister vvvv_reg =
2839       X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister());
2840   ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
2841   if (is_twobyte_form) {
2842     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
2843   } else {
2844     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
2845                                    /*X=*/ false,
2846                                    src2.NeedsRex(),
2847                                    SET_VEX_M_0F);
2848     ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
2849   }
2850   EmitUint8(ByteZero);
2851   EmitUint8(ByteOne);
2852   if (!is_twobyte_form) {
2853     EmitUint8(ByteTwo);
2854   }
2855   EmitUint8(0x54);
2856   EmitXmmRegisterOperand(dst.LowBits(), src2);
2857 }
2858 
2859 void X86_64Assembler::andn(CpuRegister dst, CpuRegister src1, CpuRegister src2) {
2860   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2861   uint8_t byte_zero = EmitVexPrefixByteZero(/*is_twobyte_form=*/ false);
2862   uint8_t byte_one = EmitVexPrefixByteOne(dst.NeedsRex(),
2863                                           /*X=*/ false,
2864                                           src2.NeedsRex(),
2865                                           SET_VEX_M_0F_38);
2866   uint8_t byte_two = EmitVexPrefixByteTwo(/*W=*/ true,
2867                                           X86_64ManagedRegister::FromCpuRegister(src1.AsRegister()),
2868                                           SET_VEX_L_128,
2869                                           SET_VEX_PP_NONE);
2870   EmitUint8(byte_zero);
2871   EmitUint8(byte_one);
2872   EmitUint8(byte_two);
2873   // Opcode field
2874   EmitUint8(0xF2);
2875   EmitRegisterOperand(dst.LowBits(), src2.LowBits());
2876 }
2877 
2878 void X86_64Assembler::andnpd(XmmRegister dst, XmmRegister src) {
2879   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2880   EmitUint8(0x66);
2881   EmitOptionalRex32(dst, src);
2882   EmitUint8(0x0F);
2883   EmitUint8(0x55);
2884   EmitXmmRegisterOperand(dst.LowBits(), src);
2885 }
2886 
2887 void X86_64Assembler::andnps(XmmRegister dst, XmmRegister src) {
2888   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2889   EmitOptionalRex32(dst, src);
2890   EmitUint8(0x0F);
2891   EmitUint8(0x55);
2892   EmitXmmRegisterOperand(dst.LowBits(), src);
2893 }
2894 
2895 void X86_64Assembler::pandn(XmmRegister dst, XmmRegister src) {
2896   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2897   EmitUint8(0x66);
2898   EmitOptionalRex32(dst, src);
2899   EmitUint8(0x0F);
2900   EmitUint8(0xDF);
2901   EmitXmmRegisterOperand(dst.LowBits(), src);
2902 }
2903 
2904 /* VEX.128.66.0F.WIG DF /r VPANDN xmm1, xmm2, xmm3/m128 */
2905 void X86_64Assembler::vpandn(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
2906   DCHECK(CpuHasAVXorAVX2FeatureFlag());
2907   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2908   bool is_twobyte_form = false;
2909   uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
2910   if (!src2.NeedsRex()) {
2911     is_twobyte_form = true;
2912   }
2913   X86_64ManagedRegister vvvv_reg =
2914       X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister());
2915   ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
2916   if (is_twobyte_form) {
2917     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
2918   } else {
2919     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
2920                                    /*X=*/ false,
2921                                    src2.NeedsRex(),
2922                                    SET_VEX_M_0F);
2923     ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
2924   }
2925   EmitUint8(ByteZero);
2926   EmitUint8(ByteOne);
2927   if (!is_twobyte_form) {
2928     EmitUint8(ByteTwo);
2929   }
2930   EmitUint8(0xDF);
2931   EmitXmmRegisterOperand(dst.LowBits(), src2);
2932 }
2933 
2934 /* VEX.128.0F 55 /r VANDNPS xmm1, xmm2, xmm3/m128 */
2935 void X86_64Assembler::vandnps(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
2936   DCHECK(CpuHasAVXorAVX2FeatureFlag());
2937   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2938   bool is_twobyte_form = false;
2939   uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
2940   if (!src2.NeedsRex()) {
2941     is_twobyte_form = true;
2942   }
2943   X86_64ManagedRegister vvvv_reg =
2944       X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister());
2945   ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
2946   if (is_twobyte_form) {
2947     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_NONE);
2948   } else {
2949     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
2950                                    /*X=*/ false,
2951                                    src2.NeedsRex(),
2952                                    SET_VEX_M_0F);
2953     ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_NONE);
2954   }
2955   EmitUint8(ByteZero);
2956   EmitUint8(ByteOne);
2957   if (!is_twobyte_form) {
2958     EmitUint8(ByteTwo);
2959   }
2960   EmitUint8(0x55);
2961   EmitXmmRegisterOperand(dst.LowBits(), src2);
2962 }
2963 
2964 /* VEX.128.66.0F 55 /r VANDNPD xmm1, xmm2, xmm3/m128 */
2965 void X86_64Assembler::vandnpd(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
2966   DCHECK(CpuHasAVXorAVX2FeatureFlag());
2967   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2968   bool is_twobyte_form = false;
2969   uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
2970   if (!src2.NeedsRex()) {
2971     is_twobyte_form = true;
2972   }
2973   X86_64ManagedRegister vvvv_reg =
2974       X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister());
2975   ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
2976   if (is_twobyte_form) {
2977     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
2978   } else {
2979     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
2980                                    /*X=*/ false,
2981                                    src2.NeedsRex(),
2982                                    SET_VEX_M_0F);
2983     ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
2984   }
2985   EmitUint8(ByteZero);
2986   EmitUint8(ByteOne);
2987   if (!is_twobyte_form) {
2988     EmitUint8(ByteTwo);
2989   }
2990   EmitUint8(0x55);
2991   EmitXmmRegisterOperand(dst.LowBits(), src2);
2992 }
2993 
2994 void X86_64Assembler::orpd(XmmRegister dst, XmmRegister src) {
2995   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
2996   EmitUint8(0x66);
2997   EmitOptionalRex32(dst, src);
2998   EmitUint8(0x0F);
2999   EmitUint8(0x56);
3000   EmitXmmRegisterOperand(dst.LowBits(), src);
3001 }
3002 
3003 void X86_64Assembler::orps(XmmRegister dst, XmmRegister src) {
3004   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3005   EmitOptionalRex32(dst, src);
3006   EmitUint8(0x0F);
3007   EmitUint8(0x56);
3008   EmitXmmRegisterOperand(dst.LowBits(), src);
3009 }
3010 
3011 void X86_64Assembler::por(XmmRegister dst, XmmRegister src) {
3012   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3013   EmitUint8(0x66);
3014   EmitOptionalRex32(dst, src);
3015   EmitUint8(0x0F);
3016   EmitUint8(0xEB);
3017   EmitXmmRegisterOperand(dst.LowBits(), src);
3018 }
3019 
3020 /* VEX.128.66.0F.WIG EB /r VPOR xmm1, xmm2, xmm3/m128 */
3021 void X86_64Assembler::vpor(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
3022   DCHECK(CpuHasAVXorAVX2FeatureFlag());
3023   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3024   bool is_twobyte_form = false;
3025   uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
3026   if (!src2.NeedsRex()) {
3027     is_twobyte_form = true;
3028   }
3029   X86_64ManagedRegister vvvv_reg =
3030       X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister());
3031   ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
3032   if (is_twobyte_form) {
3033     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
3034   } else {
3035     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
3036                                    /*X=*/ false,
3037                                    src2.NeedsRex(),
3038                                    SET_VEX_M_0F);
3039     ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
3040   }
3041   EmitUint8(ByteZero);
3042   EmitUint8(ByteOne);
3043   if (!is_twobyte_form) {
3044     EmitUint8(ByteTwo);
3045   }
3046   EmitUint8(0xEB);
3047   EmitXmmRegisterOperand(dst.LowBits(), src2);
3048 }
3049 
3050 /* VEX.128.0F 56 /r VORPS xmm1,xmm2, xmm3/m128 */
3051 void X86_64Assembler::vorps(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
3052   DCHECK(CpuHasAVXorAVX2FeatureFlag());
3053   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3054   bool is_twobyte_form = false;
3055   uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
3056   if (!src2.NeedsRex()) {
3057     is_twobyte_form = true;
3058   }
3059   X86_64ManagedRegister vvvv_reg =
3060       X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister());
3061   ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
3062   if (is_twobyte_form) {
3063     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_NONE);
3064   } else {
3065     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
3066                                    /*X=*/ false,
3067                                    src2.NeedsRex(),
3068                                    SET_VEX_M_0F);
3069     ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_NONE);
3070   }
3071   EmitUint8(ByteZero);
3072   EmitUint8(ByteOne);
3073   if (!is_twobyte_form) {
3074     EmitUint8(ByteTwo);
3075   }
3076   EmitUint8(0x56);
3077   EmitXmmRegisterOperand(dst.LowBits(), src2);
3078 }
3079 
3080 /* VEX.128.66.0F 56 /r VORPD xmm1,xmm2, xmm3/m128 */
3081 void X86_64Assembler::vorpd(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
3082   DCHECK(CpuHasAVXorAVX2FeatureFlag());
3083   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3084   bool is_twobyte_form = false;
3085   uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
3086   if (!src2.NeedsRex()) {
3087     is_twobyte_form = true;
3088   }
3089   X86_64ManagedRegister vvvv_reg =
3090       X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister());
3091   ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
3092   if (is_twobyte_form) {
3093     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
3094   } else {
3095     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
3096                                    /*X=*/ false,
3097                                    src2.NeedsRex(),
3098                                    SET_VEX_M_0F);
3099     ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
3100   }
3101   EmitUint8(ByteZero);
3102   EmitUint8(ByteOne);
3103   if (!is_twobyte_form) {
3104     EmitUint8(ByteTwo);
3105   }
3106   EmitUint8(0x56);
3107   EmitXmmRegisterOperand(dst.LowBits(), src2);
3108 }
3109 
3110 void X86_64Assembler::pavgb(XmmRegister dst, XmmRegister src) {
3111   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3112   EmitUint8(0x66);
3113   EmitOptionalRex32(dst, src);
3114   EmitUint8(0x0F);
3115   EmitUint8(0xE0);
3116   EmitXmmRegisterOperand(dst.LowBits(), src);
3117 }
3118 
3119 void X86_64Assembler::pavgw(XmmRegister dst, XmmRegister src) {
3120   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3121   EmitUint8(0x66);
3122   EmitOptionalRex32(dst, src);
3123   EmitUint8(0x0F);
3124   EmitUint8(0xE3);
3125   EmitXmmRegisterOperand(dst.LowBits(), src);
3126 }
3127 
3128 void X86_64Assembler::psadbw(XmmRegister dst, XmmRegister src) {
3129   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3130   EmitUint8(0x66);
3131   EmitOptionalRex32(dst, src);
3132   EmitUint8(0x0F);
3133   EmitUint8(0xF6);
3134   EmitXmmRegisterOperand(dst.LowBits(), src);
3135 }
3136 
3137 void X86_64Assembler::pmaddwd(XmmRegister dst, XmmRegister src) {
3138   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3139   EmitUint8(0x66);
3140   EmitOptionalRex32(dst, src);
3141   EmitUint8(0x0F);
3142   EmitUint8(0xF5);
3143   EmitXmmRegisterOperand(dst.LowBits(), src);
3144 }
3145 
3146 void X86_64Assembler::vpmaddwd(XmmRegister dst, XmmRegister src1, XmmRegister src2) {
3147   DCHECK(CpuHasAVXorAVX2FeatureFlag());
3148   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3149   bool is_twobyte_form = false;
3150   uint8_t ByteZero = 0x00, ByteOne = 0x00, ByteTwo = 0x00;
3151   if (!src2.NeedsRex()) {
3152     is_twobyte_form = true;
3153   }
3154   ByteZero = EmitVexPrefixByteZero(is_twobyte_form);
3155   X86_64ManagedRegister vvvv_reg =
3156       X86_64ManagedRegister::FromXmmRegister(src1.AsFloatRegister());
3157   if (is_twobyte_form) {
3158     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(), vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
3159   } else {
3160     ByteOne = EmitVexPrefixByteOne(dst.NeedsRex(),
3161                                    /*X=*/ false,
3162                                    src2.NeedsRex(),
3163                                    SET_VEX_M_0F);
3164     ByteTwo = EmitVexPrefixByteTwo(/*W=*/ false, vvvv_reg, SET_VEX_L_128, SET_VEX_PP_66);
3165   }
3166   EmitUint8(ByteZero);
3167   EmitUint8(ByteOne);
3168   if (!is_twobyte_form) {
3169     EmitUint8(ByteTwo);
3170   }
3171   EmitUint8(0xF5);
3172   EmitXmmRegisterOperand(dst.LowBits(), src2);
3173 }
3174 
3175 void X86_64Assembler::phaddw(XmmRegister dst, XmmRegister src) {
3176   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3177   EmitUint8(0x66);
3178   EmitOptionalRex32(dst, src);
3179   EmitUint8(0x0F);
3180   EmitUint8(0x38);
3181   EmitUint8(0x01);
3182   EmitXmmRegisterOperand(dst.LowBits(), src);
3183 }
3184 
3185 void X86_64Assembler::phaddd(XmmRegister dst, XmmRegister src) {
3186   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3187   EmitUint8(0x66);
3188   EmitOptionalRex32(dst, src);
3189   EmitUint8(0x0F);
3190   EmitUint8(0x38);
3191   EmitUint8(0x02);
3192   EmitXmmRegisterOperand(dst.LowBits(), src);
3193 }
3194 
3195 void X86_64Assembler::haddps(XmmRegister dst, XmmRegister src) {
3196   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3197   EmitUint8(0xF2);
3198   EmitOptionalRex32(dst, src);
3199   EmitUint8(0x0F);
3200   EmitUint8(0x7C);
3201   EmitXmmRegisterOperand(dst.LowBits(), src);
3202 }
3203 
3204 void X86_64Assembler::haddpd(XmmRegister dst, XmmRegister src) {
3205   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3206   EmitUint8(0x66);
3207   EmitOptionalRex32(dst, src);
3208   EmitUint8(0x0F);
3209   EmitUint8(0x7C);
3210   EmitXmmRegisterOperand(dst.LowBits(), src);
3211 }
3212 
3213 void X86_64Assembler::phsubw(XmmRegister dst, XmmRegister src) {
3214   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3215   EmitUint8(0x66);
3216   EmitOptionalRex32(dst, src);
3217   EmitUint8(0x0F);
3218   EmitUint8(0x38);
3219   EmitUint8(0x05);
3220   EmitXmmRegisterOperand(dst.LowBits(), src);
3221 }
3222 
3223 void X86_64Assembler::phsubd(XmmRegister dst, XmmRegister src) {
3224   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3225   EmitUint8(0x66);
3226   EmitOptionalRex32(dst, src);
3227   EmitUint8(0x0F);
3228   EmitUint8(0x38);
3229   EmitUint8(0x06);
3230   EmitXmmRegisterOperand(dst.LowBits(), src);
3231 }
3232 
3233 void X86_64Assembler::hsubps(XmmRegister dst, XmmRegister src) {
3234   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3235   EmitUint8(0xF2);
3236   EmitOptionalRex32(dst, src);
3237   EmitUint8(0x0F);
3238   EmitUint8(0x7D);
3239   EmitXmmRegisterOperand(dst.LowBits(), src);
3240 }
3241 
3242 void X86_64Assembler::hsubpd(XmmRegister dst, XmmRegister src) {
3243   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3244   EmitUint8(0x66);
3245   EmitOptionalRex32(dst, src);
3246   EmitUint8(0x0F);
3247   EmitUint8(0x7D);
3248   EmitXmmRegisterOperand(dst.LowBits(), src);
3249 }
3250 
3251 void X86_64Assembler::pminsb(XmmRegister dst, XmmRegister src) {
3252   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3253   EmitUint8(0x66);
3254   EmitOptionalRex32(dst, src);
3255   EmitUint8(0x0F);
3256   EmitUint8(0x38);
3257   EmitUint8(0x38);
3258   EmitXmmRegisterOperand(dst.LowBits(), src);
3259 }
3260 
3261 void X86_64Assembler::pmaxsb(XmmRegister dst, XmmRegister src) {
3262   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3263   EmitUint8(0x66);
3264   EmitOptionalRex32(dst, src);
3265   EmitUint8(0x0F);
3266   EmitUint8(0x38);
3267   EmitUint8(0x3C);
3268   EmitXmmRegisterOperand(dst.LowBits(), src);
3269 }
3270 
3271 void X86_64Assembler::pminsw(XmmRegister dst, XmmRegister src) {
3272   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3273   EmitUint8(0x66);
3274   EmitOptionalRex32(dst, src);
3275   EmitUint8(0x0F);
3276   EmitUint8(0xEA);
3277   EmitXmmRegisterOperand(dst.LowBits(), src);
3278 }
3279 
3280 void X86_64Assembler::pmaxsw(XmmRegister dst, XmmRegister src) {
3281   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3282   EmitUint8(0x66);
3283   EmitOptionalRex32(dst, src);
3284   EmitUint8(0x0F);
3285   EmitUint8(0xEE);
3286   EmitXmmRegisterOperand(dst.LowBits(), src);
3287 }
3288 
3289 void X86_64Assembler::pminsd(XmmRegister dst, XmmRegister src) {
3290   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3291   EmitUint8(0x66);
3292   EmitOptionalRex32(dst, src);
3293   EmitUint8(0x0F);
3294   EmitUint8(0x38);
3295   EmitUint8(0x39);
3296   EmitXmmRegisterOperand(dst.LowBits(), src);
3297 }
3298 
3299 void X86_64Assembler::pmaxsd(XmmRegister dst, XmmRegister src) {
3300   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3301   EmitUint8(0x66);
3302   EmitOptionalRex32(dst, src);
3303   EmitUint8(0x0F);
3304   EmitUint8(0x38);
3305   EmitUint8(0x3D);
3306   EmitXmmRegisterOperand(dst.LowBits(), src);
3307 }
3308 
3309 void X86_64Assembler::pminub(XmmRegister dst, XmmRegister src) {
3310   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3311   EmitUint8(0x66);
3312   EmitOptionalRex32(dst, src);
3313   EmitUint8(0x0F);
3314   EmitUint8(0xDA);
3315   EmitXmmRegisterOperand(dst.LowBits(), src);
3316 }
3317 
3318 void X86_64Assembler::pmaxub(XmmRegister dst, XmmRegister src) {
3319   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3320   EmitUint8(0x66);
3321   EmitOptionalRex32(dst, src);
3322   EmitUint8(0x0F);
3323   EmitUint8(0xDE);
3324   EmitXmmRegisterOperand(dst.LowBits(), src);
3325 }
3326 
3327 void X86_64Assembler::pminuw(XmmRegister dst, XmmRegister src) {
3328   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3329   EmitUint8(0x66);
3330   EmitOptionalRex32(dst, src);
3331   EmitUint8(0x0F);
3332   EmitUint8(0x38);
3333   EmitUint8(0x3A);
3334   EmitXmmRegisterOperand(dst.LowBits(), src);
3335 }
3336 
3337 void X86_64Assembler::pmaxuw(XmmRegister dst, XmmRegister src) {
3338   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3339   EmitUint8(0x66);
3340   EmitOptionalRex32(dst, src);
3341   EmitUint8(0x0F);
3342   EmitUint8(0x38);
3343   EmitUint8(0x3E);
3344   EmitXmmRegisterOperand(dst.LowBits(), src);
3345 }
3346 
3347 void X86_64Assembler::pminud(XmmRegister dst, XmmRegister src) {
3348   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3349   EmitUint8(0x66);
3350   EmitOptionalRex32(dst, src);
3351   EmitUint8(0x0F);
3352   EmitUint8(0x38);
3353   EmitUint8(0x3B);
3354   EmitXmmRegisterOperand(dst.LowBits(), src);
3355 }
3356 
3357 void X86_64Assembler::pmaxud(XmmRegister dst, XmmRegister src) {
3358   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3359   EmitUint8(0x66);
3360   EmitOptionalRex32(dst, src);
3361   EmitUint8(0x0F);
3362   EmitUint8(0x38);
3363   EmitUint8(0x3F);
3364   EmitXmmRegisterOperand(dst.LowBits(), src);
3365 }
3366 
3367 void X86_64Assembler::minps(XmmRegister dst, XmmRegister src) {
3368   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3369   EmitOptionalRex32(dst, src);
3370   EmitUint8(0x0F);
3371   EmitUint8(0x5D);
3372   EmitXmmRegisterOperand(dst.LowBits(), src);
3373 }
3374 
3375 void X86_64Assembler::maxps(XmmRegister dst, XmmRegister src) {
3376   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3377   EmitOptionalRex32(dst, src);
3378   EmitUint8(0x0F);
3379   EmitUint8(0x5F);
3380   EmitXmmRegisterOperand(dst.LowBits(), src);
3381 }
3382 
3383 void X86_64Assembler::minpd(XmmRegister dst, XmmRegister src) {
3384   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3385   EmitUint8(0x66);
3386   EmitOptionalRex32(dst, src);
3387   EmitUint8(0x0F);
3388   EmitUint8(0x5D);
3389   EmitXmmRegisterOperand(dst.LowBits(), src);
3390 }
3391 
3392 void X86_64Assembler::maxpd(XmmRegister dst, XmmRegister src) {
3393   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3394   EmitUint8(0x66);
3395   EmitOptionalRex32(dst, src);
3396   EmitUint8(0x0F);
3397   EmitUint8(0x5F);
3398   EmitXmmRegisterOperand(dst.LowBits(), src);
3399 }
3400 
3401 void X86_64Assembler::pcmpeqb(XmmRegister dst, XmmRegister src) {
3402   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3403   EmitUint8(0x66);
3404   EmitOptionalRex32(dst, src);
3405   EmitUint8(0x0F);
3406   EmitUint8(0x74);
3407   EmitXmmRegisterOperand(dst.LowBits(), src);
3408 }
3409 
3410 void X86_64Assembler::pcmpeqw(XmmRegister dst, XmmRegister src) {
3411   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3412   EmitUint8(0x66);
3413   EmitOptionalRex32(dst, src);
3414   EmitUint8(0x0F);
3415   EmitUint8(0x75);
3416   EmitXmmRegisterOperand(dst.LowBits(), src);
3417 }
3418 
3419 void X86_64Assembler::pcmpeqd(XmmRegister dst, XmmRegister src) {
3420   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3421   EmitUint8(0x66);
3422   EmitOptionalRex32(dst, src);
3423   EmitUint8(0x0F);
3424   EmitUint8(0x76);
3425   EmitXmmRegisterOperand(dst.LowBits(), src);
3426 }
3427 
3428 void X86_64Assembler::pcmpeqq(XmmRegister dst, XmmRegister src) {
3429   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3430   EmitUint8(0x66);
3431   EmitOptionalRex32(dst, src);
3432   EmitUint8(0x0F);
3433   EmitUint8(0x38);
3434   EmitUint8(0x29);
3435   EmitXmmRegisterOperand(dst.LowBits(), src);
3436 }
3437 
3438 void X86_64Assembler::pcmpgtb(XmmRegister dst, XmmRegister src) {
3439   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3440   EmitUint8(0x66);
3441   EmitOptionalRex32(dst, src);
3442   EmitUint8(0x0F);
3443   EmitUint8(0x64);
3444   EmitXmmRegisterOperand(dst.LowBits(), src);
3445 }
3446 
3447 void X86_64Assembler::pcmpgtw(XmmRegister dst, XmmRegister src) {
3448   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3449   EmitUint8(0x66);
3450   EmitOptionalRex32(dst, src);
3451   EmitUint8(0x0F);
3452   EmitUint8(0x65);
3453   EmitXmmRegisterOperand(dst.LowBits(), src);
3454 }
3455 
3456 void X86_64Assembler::pcmpgtd(XmmRegister dst, XmmRegister src) {
3457   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3458   EmitUint8(0x66);
3459   EmitOptionalRex32(dst, src);
3460   EmitUint8(0x0F);
3461   EmitUint8(0x66);
3462   EmitXmmRegisterOperand(dst.LowBits(), src);
3463 }
3464 
3465 void X86_64Assembler::pcmpgtq(XmmRegister dst, XmmRegister src) {
3466   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3467   EmitUint8(0x66);
3468   EmitOptionalRex32(dst, src);
3469   EmitUint8(0x0F);
3470   EmitUint8(0x38);
3471   EmitUint8(0x37);
3472   EmitXmmRegisterOperand(dst.LowBits(), src);
3473 }
3474 
3475 void X86_64Assembler::shufpd(XmmRegister dst, XmmRegister src, const Immediate& imm) {
3476   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3477   EmitUint8(0x66);
3478   EmitOptionalRex32(dst, src);
3479   EmitUint8(0x0F);
3480   EmitUint8(0xC6);
3481   EmitXmmRegisterOperand(dst.LowBits(), src);
3482   EmitUint8(imm.value());
3483 }
3484 
3485 
3486 void X86_64Assembler::shufps(XmmRegister dst, XmmRegister src, const Immediate& imm) {
3487   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3488   EmitOptionalRex32(dst, src);
3489   EmitUint8(0x0F);
3490   EmitUint8(0xC6);
3491   EmitXmmRegisterOperand(dst.LowBits(), src);
3492   EmitUint8(imm.value());
3493 }
3494 
3495 
3496 void X86_64Assembler::pshufd(XmmRegister dst, XmmRegister src, const Immediate& imm) {
3497   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3498   EmitUint8(0x66);
3499   EmitOptionalRex32(dst, src);
3500   EmitUint8(0x0F);
3501   EmitUint8(0x70);
3502   EmitXmmRegisterOperand(dst.LowBits(), src);
3503   EmitUint8(imm.value());
3504 }
3505 
3506 
3507 void X86_64Assembler::punpcklbw(XmmRegister dst, XmmRegister src) {
3508   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3509   EmitUint8(0x66);
3510   EmitOptionalRex32(dst, src);
3511   EmitUint8(0x0F);
3512   EmitUint8(0x60);
3513   EmitXmmRegisterOperand(dst.LowBits(), src);
3514 }
3515 
3516 
3517 void X86_64Assembler::punpcklwd(XmmRegister dst, XmmRegister src) {
3518   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3519   EmitUint8(0x66);
3520   EmitOptionalRex32(dst, src);
3521   EmitUint8(0x0F);
3522   EmitUint8(0x61);
3523   EmitXmmRegisterOperand(dst.LowBits(), src);
3524 }
3525 
3526 
3527 void X86_64Assembler::punpckldq(XmmRegister dst, XmmRegister src) {
3528   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3529   EmitUint8(0x66);
3530   EmitOptionalRex32(dst, src);
3531   EmitUint8(0x0F);
3532   EmitUint8(0x62);
3533   EmitXmmRegisterOperand(dst.LowBits(), src);
3534 }
3535 
3536 
3537 void X86_64Assembler::punpcklqdq(XmmRegister dst, XmmRegister src) {
3538   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3539   EmitUint8(0x66);
3540   EmitOptionalRex32(dst, src);
3541   EmitUint8(0x0F);
3542   EmitUint8(0x6C);
3543   EmitXmmRegisterOperand(dst.LowBits(), src);
3544 }
3545 
3546 
3547 void X86_64Assembler::punpckhbw(XmmRegister dst, XmmRegister src) {
3548   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3549   EmitUint8(0x66);
3550   EmitOptionalRex32(dst, src);
3551   EmitUint8(0x0F);
3552   EmitUint8(0x68);
3553   EmitXmmRegisterOperand(dst.LowBits(), src);
3554 }
3555 
3556 
3557 void X86_64Assembler::punpckhwd(XmmRegister dst, XmmRegister src) {
3558   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3559   EmitUint8(0x66);
3560   EmitOptionalRex32(dst, src);
3561   EmitUint8(0x0F);
3562   EmitUint8(0x69);
3563   EmitXmmRegisterOperand(dst.LowBits(), src);
3564 }
3565 
3566 
3567 void X86_64Assembler::punpckhdq(XmmRegister dst, XmmRegister src) {
3568   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3569   EmitUint8(0x66);
3570   EmitOptionalRex32(dst, src);
3571   EmitUint8(0x0F);
3572   EmitUint8(0x6A);
3573   EmitXmmRegisterOperand(dst.LowBits(), src);
3574 }
3575 
3576 
3577 void X86_64Assembler::punpckhqdq(XmmRegister dst, XmmRegister src) {
3578   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3579   EmitUint8(0x66);
3580   EmitOptionalRex32(dst, src);
3581   EmitUint8(0x0F);
3582   EmitUint8(0x6D);
3583   EmitXmmRegisterOperand(dst.LowBits(), src);
3584 }
3585 
3586 
3587 void X86_64Assembler::psllw(XmmRegister reg, const Immediate& shift_count) {
3588   DCHECK(shift_count.is_uint8());
3589   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3590   EmitUint8(0x66);
3591   EmitOptionalRex(false, false, false, false, reg.NeedsRex());
3592   EmitUint8(0x0F);
3593   EmitUint8(0x71);
3594   EmitXmmRegisterOperand(6, reg);
3595   EmitUint8(shift_count.value());
3596 }
3597 
3598 
3599 void X86_64Assembler::pslld(XmmRegister reg, const Immediate& shift_count) {
3600   DCHECK(shift_count.is_uint8());
3601   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3602   EmitUint8(0x66);
3603   EmitOptionalRex(false, false, false, false, reg.NeedsRex());
3604   EmitUint8(0x0F);
3605   EmitUint8(0x72);
3606   EmitXmmRegisterOperand(6, reg);
3607   EmitUint8(shift_count.value());
3608 }
3609 
3610 
3611 void X86_64Assembler::psllq(XmmRegister reg, const Immediate& shift_count) {
3612   DCHECK(shift_count.is_uint8());
3613   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3614   EmitUint8(0x66);
3615   EmitOptionalRex(false, false, false, false, reg.NeedsRex());
3616   EmitUint8(0x0F);
3617   EmitUint8(0x73);
3618   EmitXmmRegisterOperand(6, reg);
3619   EmitUint8(shift_count.value());
3620 }
3621 
3622 
3623 void X86_64Assembler::psraw(XmmRegister reg, const Immediate& shift_count) {
3624   DCHECK(shift_count.is_uint8());
3625   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3626   EmitUint8(0x66);
3627   EmitOptionalRex(false, false, false, false, reg.NeedsRex());
3628   EmitUint8(0x0F);
3629   EmitUint8(0x71);
3630   EmitXmmRegisterOperand(4, reg);
3631   EmitUint8(shift_count.value());
3632 }
3633 
3634 
3635 void X86_64Assembler::psrad(XmmRegister reg, const Immediate& shift_count) {
3636   DCHECK(shift_count.is_uint8());
3637   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3638   EmitUint8(0x66);
3639   EmitOptionalRex(false, false, false, false, reg.NeedsRex());
3640   EmitUint8(0x0F);
3641   EmitUint8(0x72);
3642   EmitXmmRegisterOperand(4, reg);
3643   EmitUint8(shift_count.value());
3644 }
3645 
3646 
3647 void X86_64Assembler::psrlw(XmmRegister reg, const Immediate& shift_count) {
3648   DCHECK(shift_count.is_uint8());
3649   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3650   EmitUint8(0x66);
3651   EmitOptionalRex(false, false, false, false, reg.NeedsRex());
3652   EmitUint8(0x0F);
3653   EmitUint8(0x71);
3654   EmitXmmRegisterOperand(2, reg);
3655   EmitUint8(shift_count.value());
3656 }
3657 
3658 
3659 void X86_64Assembler::psrld(XmmRegister reg, const Immediate& shift_count) {
3660   DCHECK(shift_count.is_uint8());
3661   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3662   EmitUint8(0x66);
3663   EmitOptionalRex(false, false, false, false, reg.NeedsRex());
3664   EmitUint8(0x0F);
3665   EmitUint8(0x72);
3666   EmitXmmRegisterOperand(2, reg);
3667   EmitUint8(shift_count.value());
3668 }
3669 
3670 
3671 void X86_64Assembler::psrlq(XmmRegister reg, const Immediate& shift_count) {
3672   DCHECK(shift_count.is_uint8());
3673   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3674   EmitUint8(0x66);
3675   EmitOptionalRex(false, false, false, false, reg.NeedsRex());
3676   EmitUint8(0x0F);
3677   EmitUint8(0x73);
3678   EmitXmmRegisterOperand(2, reg);
3679   EmitUint8(shift_count.value());
3680 }
3681 
3682 
3683 void X86_64Assembler::psrldq(XmmRegister reg, const Immediate& shift_count) {
3684   DCHECK(shift_count.is_uint8());
3685   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3686   EmitUint8(0x66);
3687   EmitOptionalRex(false, false, false, false, reg.NeedsRex());
3688   EmitUint8(0x0F);
3689   EmitUint8(0x73);
3690   EmitXmmRegisterOperand(3, reg);
3691   EmitUint8(shift_count.value());
3692 }
3693 
3694 
3695 void X86_64Assembler::fldl(const Address& src) {
3696   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3697   EmitUint8(0xDD);
3698   EmitOperand(0, src);
3699 }
3700 
3701 
3702 void X86_64Assembler::fstl(const Address& dst) {
3703   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3704   EmitUint8(0xDD);
3705   EmitOperand(2, dst);
3706 }
3707 
3708 
3709 void X86_64Assembler::fstpl(const Address& dst) {
3710   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3711   EmitUint8(0xDD);
3712   EmitOperand(3, dst);
3713 }
3714 
3715 
3716 void X86_64Assembler::fstsw() {
3717   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3718   EmitUint8(0x9B);
3719   EmitUint8(0xDF);
3720   EmitUint8(0xE0);
3721 }
3722 
3723 
3724 void X86_64Assembler::fnstcw(const Address& dst) {
3725   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3726   EmitUint8(0xD9);
3727   EmitOperand(7, dst);
3728 }
3729 
3730 
3731 void X86_64Assembler::fldcw(const Address& src) {
3732   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3733   EmitUint8(0xD9);
3734   EmitOperand(5, src);
3735 }
3736 
3737 
3738 void X86_64Assembler::fistpl(const Address& dst) {
3739   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3740   EmitUint8(0xDF);
3741   EmitOperand(7, dst);
3742 }
3743 
3744 
3745 void X86_64Assembler::fistps(const Address& dst) {
3746   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3747   EmitUint8(0xDB);
3748   EmitOperand(3, dst);
3749 }
3750 
3751 
3752 void X86_64Assembler::fildl(const Address& src) {
3753   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3754   EmitUint8(0xDF);
3755   EmitOperand(5, src);
3756 }
3757 
3758 
3759 void X86_64Assembler::filds(const Address& src) {
3760   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3761   EmitUint8(0xDB);
3762   EmitOperand(0, src);
3763 }
3764 
3765 
3766 void X86_64Assembler::fincstp() {
3767   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3768   EmitUint8(0xD9);
3769   EmitUint8(0xF7);
3770 }
3771 
3772 
3773 void X86_64Assembler::ffree(const Immediate& index) {
3774   CHECK_LT(index.value(), 7);
3775   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3776   EmitUint8(0xDD);
3777   EmitUint8(0xC0 + index.value());
3778 }
3779 
3780 
3781 void X86_64Assembler::fsin() {
3782   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3783   EmitUint8(0xD9);
3784   EmitUint8(0xFE);
3785 }
3786 
3787 
3788 void X86_64Assembler::fcos() {
3789   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3790   EmitUint8(0xD9);
3791   EmitUint8(0xFF);
3792 }
3793 
3794 
3795 void X86_64Assembler::fptan() {
3796   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3797   EmitUint8(0xD9);
3798   EmitUint8(0xF2);
3799 }
3800 
3801 void X86_64Assembler::fucompp() {
3802   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3803   EmitUint8(0xDA);
3804   EmitUint8(0xE9);
3805 }
3806 
3807 
3808 void X86_64Assembler::fprem() {
3809   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3810   EmitUint8(0xD9);
3811   EmitUint8(0xF8);
3812 }
3813 
3814 
3815 void X86_64Assembler::xchgl(CpuRegister dst, CpuRegister src) {
3816   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3817   // There is a short version for rax.
3818   // It's a bit awkward, as CpuRegister has a const field, so assignment and thus swapping doesn't
3819   // work.
3820   const bool src_rax = src.AsRegister() == RAX;
3821   const bool dst_rax = dst.AsRegister() == RAX;
3822   if (src_rax || dst_rax) {
3823     EmitOptionalRex32(src_rax ? dst : src);
3824     EmitUint8(0x90 + (src_rax ? dst.LowBits() : src.LowBits()));
3825     return;
3826   }
3827 
3828   // General case.
3829   EmitOptionalRex32(src, dst);
3830   EmitUint8(0x87);
3831   EmitRegisterOperand(src.LowBits(), dst.LowBits());
3832 }
3833 
3834 
3835 void X86_64Assembler::xchgq(CpuRegister dst, CpuRegister src) {
3836   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3837   // There is a short version for rax.
3838   // It's a bit awkward, as CpuRegister has a const field, so assignment and thus swapping doesn't
3839   // work.
3840   const bool src_rax = src.AsRegister() == RAX;
3841   const bool dst_rax = dst.AsRegister() == RAX;
3842   if (src_rax || dst_rax) {
3843     // If src == target, emit a nop instead.
3844     if (src_rax && dst_rax) {
3845       EmitUint8(0x90);
3846     } else {
3847       EmitRex64(src_rax ? dst : src);
3848       EmitUint8(0x90 + (src_rax ? dst.LowBits() : src.LowBits()));
3849     }
3850     return;
3851   }
3852 
3853   // General case.
3854   EmitRex64(src, dst);
3855   EmitUint8(0x87);
3856   EmitRegisterOperand(src.LowBits(), dst.LowBits());
3857 }
3858 
3859 
3860 void X86_64Assembler::xchgl(CpuRegister reg, const Address& address) {
3861   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3862   EmitOptionalRex32(reg, address);
3863   EmitUint8(0x87);
3864   EmitOperand(reg.LowBits(), address);
3865 }
3866 
3867 
3868 void X86_64Assembler::cmpb(const Address& address, const Immediate& imm) {
3869   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3870   CHECK(imm.is_int32());
3871   EmitOptionalRex32(address);
3872   EmitUint8(0x80);
3873   EmitOperand(7, address);
3874   EmitUint8(imm.value() & 0xFF);
3875 }
3876 
3877 
3878 void X86_64Assembler::cmpw(const Address& address, const Immediate& imm) {
3879   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3880   CHECK(imm.is_int32());
3881   EmitOperandSizeOverride();
3882   EmitOptionalRex32(address);
3883   EmitComplex(7, address, imm, /* is_16_op= */ true);
3884 }
3885 
3886 
3887 void X86_64Assembler::cmpl(CpuRegister reg, const Immediate& imm) {
3888   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3889   CHECK(imm.is_int32());
3890   EmitOptionalRex32(reg);
3891   EmitComplex(7, Operand(reg), imm);
3892 }
3893 
3894 
3895 void X86_64Assembler::cmpl(CpuRegister reg0, CpuRegister reg1) {
3896   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3897   EmitOptionalRex32(reg0, reg1);
3898   EmitUint8(0x3B);
3899   EmitOperand(reg0.LowBits(), Operand(reg1));
3900 }
3901 
3902 
3903 void X86_64Assembler::cmpl(CpuRegister reg, const Address& address) {
3904   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3905   EmitOptionalRex32(reg, address);
3906   EmitUint8(0x3B);
3907   EmitOperand(reg.LowBits(), address);
3908 }
3909 
3910 
3911 void X86_64Assembler::cmpl(const Address& address, CpuRegister reg) {
3912   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3913   EmitOptionalRex32(reg, address);
3914   EmitUint8(0x39);
3915   EmitOperand(reg.LowBits(), address);
3916 }
3917 
3918 
3919 void X86_64Assembler::cmpl(const Address& address, const Immediate& imm) {
3920   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3921   CHECK(imm.is_int32());
3922   EmitOptionalRex32(address);
3923   EmitComplex(7, address, imm);
3924 }
3925 
3926 
3927 void X86_64Assembler::cmpq(CpuRegister reg0, CpuRegister reg1) {
3928   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3929   EmitRex64(reg0, reg1);
3930   EmitUint8(0x3B);
3931   EmitOperand(reg0.LowBits(), Operand(reg1));
3932 }
3933 
3934 
3935 void X86_64Assembler::cmpq(CpuRegister reg, const Immediate& imm) {
3936   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3937   CHECK(imm.is_int32());  // cmpq only supports 32b immediate.
3938   EmitRex64(reg);
3939   EmitComplex(7, Operand(reg), imm);
3940 }
3941 
3942 
3943 void X86_64Assembler::cmpq(CpuRegister reg, const Address& address) {
3944   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3945   EmitRex64(reg, address);
3946   EmitUint8(0x3B);
3947   EmitOperand(reg.LowBits(), address);
3948 }
3949 
3950 
3951 void X86_64Assembler::cmpq(const Address& address, const Immediate& imm) {
3952   CHECK(imm.is_int32());  // cmpq only supports 32b immediate.
3953   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3954   EmitRex64(address);
3955   EmitComplex(7, address, imm);
3956 }
3957 
3958 
3959 void X86_64Assembler::addl(CpuRegister dst, CpuRegister src) {
3960   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3961   EmitOptionalRex32(dst, src);
3962   EmitUint8(0x03);
3963   EmitRegisterOperand(dst.LowBits(), src.LowBits());
3964 }
3965 
3966 
3967 void X86_64Assembler::addl(CpuRegister reg, const Address& address) {
3968   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3969   EmitOptionalRex32(reg, address);
3970   EmitUint8(0x03);
3971   EmitOperand(reg.LowBits(), address);
3972 }
3973 
3974 
3975 void X86_64Assembler::testl(CpuRegister reg1, CpuRegister reg2) {
3976   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3977   EmitOptionalRex32(reg1, reg2);
3978   EmitUint8(0x85);
3979   EmitRegisterOperand(reg1.LowBits(), reg2.LowBits());
3980 }
3981 
3982 
3983 void X86_64Assembler::testl(CpuRegister reg, const Address& address) {
3984   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3985   EmitOptionalRex32(reg, address);
3986   EmitUint8(0x85);
3987   EmitOperand(reg.LowBits(), address);
3988 }
3989 
3990 
3991 void X86_64Assembler::testl(CpuRegister reg, const Immediate& immediate) {
3992   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
3993   // For registers that have a byte variant (RAX, RBX, RCX, and RDX)
3994   // we only test the byte CpuRegister to keep the encoding short.
3995   if (immediate.is_uint8() && reg.AsRegister() < 4) {
3996     // Use zero-extended 8-bit immediate.
3997     if (reg.AsRegister() == RAX) {
3998       EmitUint8(0xA8);
3999     } else {
4000       EmitUint8(0xF6);
4001       EmitUint8(0xC0 + reg.AsRegister());
4002     }
4003     EmitUint8(immediate.value() & 0xFF);
4004   } else if (reg.AsRegister() == RAX) {
4005     // Use short form if the destination is RAX.
4006     EmitUint8(0xA9);
4007     EmitImmediate(immediate);
4008   } else {
4009     EmitOptionalRex32(reg);
4010     EmitUint8(0xF7);
4011     EmitOperand(0, Operand(reg));
4012     EmitImmediate(immediate);
4013   }
4014 }
4015 
4016 
4017 void X86_64Assembler::testq(CpuRegister reg1, CpuRegister reg2) {
4018   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4019   EmitRex64(reg1, reg2);
4020   EmitUint8(0x85);
4021   EmitRegisterOperand(reg1.LowBits(), reg2.LowBits());
4022 }
4023 
4024 
4025 void X86_64Assembler::testq(CpuRegister reg, const Address& address) {
4026   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4027   EmitRex64(reg, address);
4028   EmitUint8(0x85);
4029   EmitOperand(reg.LowBits(), address);
4030 }
4031 
4032 
4033 void X86_64Assembler::testb(const Address& dst, const Immediate& imm) {
4034   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4035   EmitOptionalRex32(dst);
4036   EmitUint8(0xF6);
4037   EmitOperand(Register::RAX, dst);
4038   CHECK(imm.is_int8());
4039   EmitUint8(imm.value() & 0xFF);
4040 }
4041 
4042 
4043 void X86_64Assembler::testl(const Address& dst, const Immediate& imm) {
4044   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4045   EmitOptionalRex32(dst);
4046   EmitUint8(0xF7);
4047   EmitOperand(0, dst);
4048   EmitImmediate(imm);
4049 }
4050 
4051 
4052 void X86_64Assembler::andl(CpuRegister dst, CpuRegister src) {
4053   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4054   EmitOptionalRex32(dst, src);
4055   EmitUint8(0x23);
4056   EmitOperand(dst.LowBits(), Operand(src));
4057 }
4058 
4059 
4060 void X86_64Assembler::andl(CpuRegister reg, const Address& address) {
4061   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4062   EmitOptionalRex32(reg, address);
4063   EmitUint8(0x23);
4064   EmitOperand(reg.LowBits(), address);
4065 }
4066 
4067 
4068 void X86_64Assembler::andl(CpuRegister dst, const Immediate& imm) {
4069   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4070   EmitOptionalRex32(dst);
4071   EmitComplex(4, Operand(dst), imm);
4072 }
4073 
4074 
4075 void X86_64Assembler::andq(CpuRegister reg, const Immediate& imm) {
4076   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4077   CHECK(imm.is_int32());  // andq only supports 32b immediate.
4078   EmitRex64(reg);
4079   EmitComplex(4, Operand(reg), imm);
4080 }
4081 
4082 
4083 void X86_64Assembler::andq(CpuRegister dst, CpuRegister src) {
4084   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4085   EmitRex64(dst, src);
4086   EmitUint8(0x23);
4087   EmitOperand(dst.LowBits(), Operand(src));
4088 }
4089 
4090 
4091 void X86_64Assembler::andq(CpuRegister dst, const Address& src) {
4092   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4093   EmitRex64(dst, src);
4094   EmitUint8(0x23);
4095   EmitOperand(dst.LowBits(), src);
4096 }
4097 
4098 
4099 void X86_64Assembler::orl(CpuRegister dst, CpuRegister src) {
4100   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4101   EmitOptionalRex32(dst, src);
4102   EmitUint8(0x0B);
4103   EmitOperand(dst.LowBits(), Operand(src));
4104 }
4105 
4106 
4107 void X86_64Assembler::orl(CpuRegister reg, const Address& address) {
4108   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4109   EmitOptionalRex32(reg, address);
4110   EmitUint8(0x0B);
4111   EmitOperand(reg.LowBits(), address);
4112 }
4113 
4114 
4115 void X86_64Assembler::orl(CpuRegister dst, const Immediate& imm) {
4116   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4117   EmitOptionalRex32(dst);
4118   EmitComplex(1, Operand(dst), imm);
4119 }
4120 
4121 
4122 void X86_64Assembler::orq(CpuRegister dst, const Immediate& imm) {
4123   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4124   CHECK(imm.is_int32());  // orq only supports 32b immediate.
4125   EmitRex64(dst);
4126   EmitComplex(1, Operand(dst), imm);
4127 }
4128 
4129 
4130 void X86_64Assembler::orq(CpuRegister dst, CpuRegister src) {
4131   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4132   EmitRex64(dst, src);
4133   EmitUint8(0x0B);
4134   EmitOperand(dst.LowBits(), Operand(src));
4135 }
4136 
4137 
4138 void X86_64Assembler::orq(CpuRegister dst, const Address& src) {
4139   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4140   EmitRex64(dst, src);
4141   EmitUint8(0x0B);
4142   EmitOperand(dst.LowBits(), src);
4143 }
4144 
4145 
4146 void X86_64Assembler::xorl(CpuRegister dst, CpuRegister src) {
4147   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4148   EmitOptionalRex32(dst, src);
4149   EmitUint8(0x33);
4150   EmitOperand(dst.LowBits(), Operand(src));
4151 }
4152 
4153 
4154 void X86_64Assembler::xorl(CpuRegister reg, const Address& address) {
4155   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4156   EmitOptionalRex32(reg, address);
4157   EmitUint8(0x33);
4158   EmitOperand(reg.LowBits(), address);
4159 }
4160 
4161 
4162 void X86_64Assembler::xorl(CpuRegister dst, const Immediate& imm) {
4163   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4164   EmitOptionalRex32(dst);
4165   EmitComplex(6, Operand(dst), imm);
4166 }
4167 
4168 
4169 void X86_64Assembler::xorq(CpuRegister dst, CpuRegister src) {
4170   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4171   EmitRex64(dst, src);
4172   EmitUint8(0x33);
4173   EmitOperand(dst.LowBits(), Operand(src));
4174 }
4175 
4176 
4177 void X86_64Assembler::xorq(CpuRegister dst, const Immediate& imm) {
4178   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4179   CHECK(imm.is_int32());  // xorq only supports 32b immediate.
4180   EmitRex64(dst);
4181   EmitComplex(6, Operand(dst), imm);
4182 }
4183 
4184 void X86_64Assembler::xorq(CpuRegister dst, const Address& src) {
4185   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4186   EmitRex64(dst, src);
4187   EmitUint8(0x33);
4188   EmitOperand(dst.LowBits(), src);
4189 }
4190 
4191 
4192 #if 0
4193 void X86_64Assembler::rex(bool force, bool w, Register* r, Register* x, Register* b) {
4194   // REX.WRXB
4195   // W - 64-bit operand
4196   // R - MODRM.reg
4197   // X - SIB.index
4198   // B - MODRM.rm/SIB.base
4199   uint8_t rex = force ? 0x40 : 0;
4200   if (w) {
4201     rex |= 0x48;  // REX.W000
4202   }
4203   if (r != nullptr && *r >= Register::R8 && *r < Register::kNumberOfCpuRegisters) {
4204     rex |= 0x44;  // REX.0R00
4205     *r = static_cast<Register>(*r - 8);
4206   }
4207   if (x != nullptr && *x >= Register::R8 && *x < Register::kNumberOfCpuRegisters) {
4208     rex |= 0x42;  // REX.00X0
4209     *x = static_cast<Register>(*x - 8);
4210   }
4211   if (b != nullptr && *b >= Register::R8 && *b < Register::kNumberOfCpuRegisters) {
4212     rex |= 0x41;  // REX.000B
4213     *b = static_cast<Register>(*b - 8);
4214   }
4215   if (rex != 0) {
4216     EmitUint8(rex);
4217   }
4218 }
4219 
4220 void X86_64Assembler::rex_reg_mem(bool force, bool w, Register* dst, const Address& mem) {
4221   // REX.WRXB
4222   // W - 64-bit operand
4223   // R - MODRM.reg
4224   // X - SIB.index
4225   // B - MODRM.rm/SIB.base
4226   uint8_t rex = mem->rex();
4227   if (force) {
4228     rex |= 0x40;  // REX.0000
4229   }
4230   if (w) {
4231     rex |= 0x48;  // REX.W000
4232   }
4233   if (dst != nullptr && *dst >= Register::R8 && *dst < Register::kNumberOfCpuRegisters) {
4234     rex |= 0x44;  // REX.0R00
4235     *dst = static_cast<Register>(*dst - 8);
4236   }
4237   if (rex != 0) {
4238     EmitUint8(rex);
4239   }
4240 }
4241 
4242 void rex_mem_reg(bool force, bool w, Address* mem, Register* src);
4243 #endif
4244 
4245 void X86_64Assembler::addl(CpuRegister reg, const Immediate& imm) {
4246   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4247   EmitOptionalRex32(reg);
4248   EmitComplex(0, Operand(reg), imm);
4249 }
4250 
4251 
4252 void X86_64Assembler::addq(CpuRegister reg, const Immediate& imm) {
4253   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4254   CHECK(imm.is_int32());  // addq only supports 32b immediate.
4255   EmitRex64(reg);
4256   EmitComplex(0, Operand(reg), imm);
4257 }
4258 
4259 
4260 void X86_64Assembler::addq(CpuRegister dst, const Address& address) {
4261   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4262   EmitRex64(dst, address);
4263   EmitUint8(0x03);
4264   EmitOperand(dst.LowBits(), address);
4265 }
4266 
4267 
4268 void X86_64Assembler::addq(CpuRegister dst, CpuRegister src) {
4269   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4270   // 0x01 is addq r/m64 <- r/m64 + r64, with op1 in r/m and op2 in reg: so reverse EmitRex64
4271   EmitRex64(src, dst);
4272   EmitUint8(0x01);
4273   EmitRegisterOperand(src.LowBits(), dst.LowBits());
4274 }
4275 
4276 
4277 void X86_64Assembler::addl(const Address& address, CpuRegister reg) {
4278   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4279   EmitOptionalRex32(reg, address);
4280   EmitUint8(0x01);
4281   EmitOperand(reg.LowBits(), address);
4282 }
4283 
4284 
4285 void X86_64Assembler::addl(const Address& address, const Immediate& imm) {
4286   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4287   EmitOptionalRex32(address);
4288   EmitComplex(0, address, imm);
4289 }
4290 
4291 
4292 void X86_64Assembler::addw(const Address& address, const Immediate& imm) {
4293   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4294   CHECK(imm.is_uint16() || imm.is_int16()) << imm.value();
4295   EmitUint8(0x66);
4296   EmitOptionalRex32(address);
4297   EmitComplex(0, address, imm, /* is_16_op= */ true);
4298 }
4299 
4300 
4301 void X86_64Assembler::subl(CpuRegister dst, CpuRegister src) {
4302   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4303   EmitOptionalRex32(dst, src);
4304   EmitUint8(0x2B);
4305   EmitOperand(dst.LowBits(), Operand(src));
4306 }
4307 
4308 
4309 void X86_64Assembler::subl(CpuRegister reg, const Immediate& imm) {
4310   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4311   EmitOptionalRex32(reg);
4312   EmitComplex(5, Operand(reg), imm);
4313 }
4314 
4315 
4316 void X86_64Assembler::subq(CpuRegister reg, const Immediate& imm) {
4317   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4318   CHECK(imm.is_int32());  // subq only supports 32b immediate.
4319   EmitRex64(reg);
4320   EmitComplex(5, Operand(reg), imm);
4321 }
4322 
4323 
4324 void X86_64Assembler::subq(CpuRegister dst, CpuRegister src) {
4325   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4326   EmitRex64(dst, src);
4327   EmitUint8(0x2B);
4328   EmitRegisterOperand(dst.LowBits(), src.LowBits());
4329 }
4330 
4331 
4332 void X86_64Assembler::subq(CpuRegister reg, const Address& address) {
4333   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4334   EmitRex64(reg, address);
4335   EmitUint8(0x2B);
4336   EmitOperand(reg.LowBits() & 7, address);
4337 }
4338 
4339 
4340 void X86_64Assembler::subl(CpuRegister reg, const Address& address) {
4341   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4342   EmitOptionalRex32(reg, address);
4343   EmitUint8(0x2B);
4344   EmitOperand(reg.LowBits(), address);
4345 }
4346 
4347 
4348 void X86_64Assembler::cdq() {
4349   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4350   EmitUint8(0x99);
4351 }
4352 
4353 
4354 void X86_64Assembler::cqo() {
4355   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4356   EmitRex64();
4357   EmitUint8(0x99);
4358 }
4359 
4360 
4361 void X86_64Assembler::idivl(CpuRegister reg) {
4362   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4363   EmitOptionalRex32(reg);
4364   EmitUint8(0xF7);
4365   EmitUint8(0xF8 | reg.LowBits());
4366 }
4367 
4368 
4369 void X86_64Assembler::idivq(CpuRegister reg) {
4370   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4371   EmitRex64(reg);
4372   EmitUint8(0xF7);
4373   EmitUint8(0xF8 | reg.LowBits());
4374 }
4375 
4376 
4377 void X86_64Assembler::imull(CpuRegister dst, CpuRegister src) {
4378   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4379   EmitOptionalRex32(dst, src);
4380   EmitUint8(0x0F);
4381   EmitUint8(0xAF);
4382   EmitOperand(dst.LowBits(), Operand(src));
4383 }
4384 
4385 void X86_64Assembler::imull(CpuRegister dst, CpuRegister src, const Immediate& imm) {
4386   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4387   CHECK(imm.is_int32());  // imull only supports 32b immediate.
4388 
4389   EmitOptionalRex32(dst, src);
4390 
4391   // See whether imm can be represented as a sign-extended 8bit value.
4392   int32_t v32 = static_cast<int32_t>(imm.value());
4393   if (IsInt<8>(v32)) {
4394     // Sign-extension works.
4395     EmitUint8(0x6B);
4396     EmitOperand(dst.LowBits(), Operand(src));
4397     EmitUint8(static_cast<uint8_t>(v32 & 0xFF));
4398   } else {
4399     // Not representable, use full immediate.
4400     EmitUint8(0x69);
4401     EmitOperand(dst.LowBits(), Operand(src));
4402     EmitImmediate(imm);
4403   }
4404 }
4405 
4406 
4407 void X86_64Assembler::imull(CpuRegister reg, const Immediate& imm) {
4408   imull(reg, reg, imm);
4409 }
4410 
4411 
4412 void X86_64Assembler::imull(CpuRegister reg, const Address& address) {
4413   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4414   EmitOptionalRex32(reg, address);
4415   EmitUint8(0x0F);
4416   EmitUint8(0xAF);
4417   EmitOperand(reg.LowBits(), address);
4418 }
4419 
4420 
4421 void X86_64Assembler::imulq(CpuRegister dst, CpuRegister src) {
4422   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4423   EmitRex64(dst, src);
4424   EmitUint8(0x0F);
4425   EmitUint8(0xAF);
4426   EmitRegisterOperand(dst.LowBits(), src.LowBits());
4427 }
4428 
4429 
4430 void X86_64Assembler::imulq(CpuRegister reg, const Immediate& imm) {
4431   imulq(reg, reg, imm);
4432 }
4433 
4434 void X86_64Assembler::imulq(CpuRegister dst, CpuRegister reg, const Immediate& imm) {
4435   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4436   CHECK(imm.is_int32());  // imulq only supports 32b immediate.
4437 
4438   EmitRex64(dst, reg);
4439 
4440   // See whether imm can be represented as a sign-extended 8bit value.
4441   int64_t v64 = imm.value();
4442   if (IsInt<8>(v64)) {
4443     // Sign-extension works.
4444     EmitUint8(0x6B);
4445     EmitOperand(dst.LowBits(), Operand(reg));
4446     EmitUint8(static_cast<uint8_t>(v64 & 0xFF));
4447   } else {
4448     // Not representable, use full immediate.
4449     EmitUint8(0x69);
4450     EmitOperand(dst.LowBits(), Operand(reg));
4451     EmitImmediate(imm);
4452   }
4453 }
4454 
4455 void X86_64Assembler::imulq(CpuRegister reg, const Address& address) {
4456   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4457   EmitRex64(reg, address);
4458   EmitUint8(0x0F);
4459   EmitUint8(0xAF);
4460   EmitOperand(reg.LowBits(), address);
4461 }
4462 
4463 
4464 void X86_64Assembler::imull(CpuRegister reg) {
4465   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4466   EmitOptionalRex32(reg);
4467   EmitUint8(0xF7);
4468   EmitOperand(5, Operand(reg));
4469 }
4470 
4471 
4472 void X86_64Assembler::imulq(CpuRegister reg) {
4473   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4474   EmitRex64(reg);
4475   EmitUint8(0xF7);
4476   EmitOperand(5, Operand(reg));
4477 }
4478 
4479 
4480 void X86_64Assembler::imull(const Address& address) {
4481   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4482   EmitOptionalRex32(address);
4483   EmitUint8(0xF7);
4484   EmitOperand(5, address);
4485 }
4486 
4487 
4488 void X86_64Assembler::mull(CpuRegister reg) {
4489   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4490   EmitOptionalRex32(reg);
4491   EmitUint8(0xF7);
4492   EmitOperand(4, Operand(reg));
4493 }
4494 
4495 
4496 void X86_64Assembler::mull(const Address& address) {
4497   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4498   EmitOptionalRex32(address);
4499   EmitUint8(0xF7);
4500   EmitOperand(4, address);
4501 }
4502 
4503 
4504 void X86_64Assembler::shll(CpuRegister reg, const Immediate& imm) {
4505   EmitGenericShift(false, 4, reg, imm);
4506 }
4507 
4508 
4509 void X86_64Assembler::shlq(CpuRegister reg, const Immediate& imm) {
4510   EmitGenericShift(true, 4, reg, imm);
4511 }
4512 
4513 
4514 void X86_64Assembler::shll(CpuRegister operand, CpuRegister shifter) {
4515   EmitGenericShift(false, 4, operand, shifter);
4516 }
4517 
4518 
4519 void X86_64Assembler::shlq(CpuRegister operand, CpuRegister shifter) {
4520   EmitGenericShift(true, 4, operand, shifter);
4521 }
4522 
4523 
4524 void X86_64Assembler::shrl(CpuRegister reg, const Immediate& imm) {
4525   EmitGenericShift(false, 5, reg, imm);
4526 }
4527 
4528 
4529 void X86_64Assembler::shrq(CpuRegister reg, const Immediate& imm) {
4530   EmitGenericShift(true, 5, reg, imm);
4531 }
4532 
4533 
4534 void X86_64Assembler::shrl(CpuRegister operand, CpuRegister shifter) {
4535   EmitGenericShift(false, 5, operand, shifter);
4536 }
4537 
4538 
4539 void X86_64Assembler::shrq(CpuRegister operand, CpuRegister shifter) {
4540   EmitGenericShift(true, 5, operand, shifter);
4541 }
4542 
4543 
4544 void X86_64Assembler::sarl(CpuRegister reg, const Immediate& imm) {
4545   EmitGenericShift(false, 7, reg, imm);
4546 }
4547 
4548 
4549 void X86_64Assembler::sarl(CpuRegister operand, CpuRegister shifter) {
4550   EmitGenericShift(false, 7, operand, shifter);
4551 }
4552 
4553 
4554 void X86_64Assembler::sarq(CpuRegister reg, const Immediate& imm) {
4555   EmitGenericShift(true, 7, reg, imm);
4556 }
4557 
4558 
4559 void X86_64Assembler::sarq(CpuRegister operand, CpuRegister shifter) {
4560   EmitGenericShift(true, 7, operand, shifter);
4561 }
4562 
4563 
4564 void X86_64Assembler::roll(CpuRegister reg, const Immediate& imm) {
4565   EmitGenericShift(false, 0, reg, imm);
4566 }
4567 
4568 
4569 void X86_64Assembler::roll(CpuRegister operand, CpuRegister shifter) {
4570   EmitGenericShift(false, 0, operand, shifter);
4571 }
4572 
4573 
4574 void X86_64Assembler::rorl(CpuRegister reg, const Immediate& imm) {
4575   EmitGenericShift(false, 1, reg, imm);
4576 }
4577 
4578 
4579 void X86_64Assembler::rorl(CpuRegister operand, CpuRegister shifter) {
4580   EmitGenericShift(false, 1, operand, shifter);
4581 }
4582 
4583 
4584 void X86_64Assembler::rolq(CpuRegister reg, const Immediate& imm) {
4585   EmitGenericShift(true, 0, reg, imm);
4586 }
4587 
4588 
4589 void X86_64Assembler::rolq(CpuRegister operand, CpuRegister shifter) {
4590   EmitGenericShift(true, 0, operand, shifter);
4591 }
4592 
4593 
4594 void X86_64Assembler::rorq(CpuRegister reg, const Immediate& imm) {
4595   EmitGenericShift(true, 1, reg, imm);
4596 }
4597 
4598 
4599 void X86_64Assembler::rorq(CpuRegister operand, CpuRegister shifter) {
4600   EmitGenericShift(true, 1, operand, shifter);
4601 }
4602 
4603 
4604 void X86_64Assembler::negl(CpuRegister reg) {
4605   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4606   EmitOptionalRex32(reg);
4607   EmitUint8(0xF7);
4608   EmitOperand(3, Operand(reg));
4609 }
4610 
4611 
4612 void X86_64Assembler::negq(CpuRegister reg) {
4613   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4614   EmitRex64(reg);
4615   EmitUint8(0xF7);
4616   EmitOperand(3, Operand(reg));
4617 }
4618 
4619 
4620 void X86_64Assembler::notl(CpuRegister reg) {
4621   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4622   EmitOptionalRex32(reg);
4623   EmitUint8(0xF7);
4624   EmitUint8(0xD0 | reg.LowBits());
4625 }
4626 
4627 
4628 void X86_64Assembler::notq(CpuRegister reg) {
4629   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4630   EmitRex64(reg);
4631   EmitUint8(0xF7);
4632   EmitOperand(2, Operand(reg));
4633 }
4634 
4635 
4636 void X86_64Assembler::enter(const Immediate& imm) {
4637   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4638   EmitUint8(0xC8);
4639   CHECK(imm.is_uint16()) << imm.value();
4640   EmitUint8(imm.value() & 0xFF);
4641   EmitUint8((imm.value() >> 8) & 0xFF);
4642   EmitUint8(0x00);
4643 }
4644 
4645 
4646 void X86_64Assembler::leave() {
4647   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4648   EmitUint8(0xC9);
4649 }
4650 
4651 
4652 void X86_64Assembler::ret() {
4653   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4654   EmitUint8(0xC3);
4655 }
4656 
4657 
4658 void X86_64Assembler::ret(const Immediate& imm) {
4659   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4660   EmitUint8(0xC2);
4661   CHECK(imm.is_uint16());
4662   EmitUint8(imm.value() & 0xFF);
4663   EmitUint8((imm.value() >> 8) & 0xFF);
4664 }
4665 
4666 
4667 
4668 void X86_64Assembler::nop() {
4669   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4670   EmitUint8(0x90);
4671 }
4672 
4673 
4674 void X86_64Assembler::int3() {
4675   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4676   EmitUint8(0xCC);
4677 }
4678 
4679 
4680 void X86_64Assembler::hlt() {
4681   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4682   EmitUint8(0xF4);
4683 }
4684 
4685 
4686 void X86_64Assembler::j(Condition condition, Label* label) {
4687   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4688   if (label->IsBound()) {
4689     static const int kShortSize = 2;
4690     static const int kLongSize = 6;
4691     int offset = label->Position() - buffer_.Size();
4692     CHECK_LE(offset, 0);
4693     if (IsInt<8>(offset - kShortSize)) {
4694       EmitUint8(0x70 + condition);
4695       EmitUint8((offset - kShortSize) & 0xFF);
4696     } else {
4697       EmitUint8(0x0F);
4698       EmitUint8(0x80 + condition);
4699       EmitInt32(offset - kLongSize);
4700     }
4701   } else {
4702     EmitUint8(0x0F);
4703     EmitUint8(0x80 + condition);
4704     EmitLabelLink(label);
4705   }
4706 }
4707 
4708 
4709 void X86_64Assembler::j(Condition condition, NearLabel* label) {
4710   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4711   if (label->IsBound()) {
4712     static const int kShortSize = 2;
4713     int offset = label->Position() - buffer_.Size();
4714     CHECK_LE(offset, 0);
4715     CHECK(IsInt<8>(offset - kShortSize));
4716     EmitUint8(0x70 + condition);
4717     EmitUint8((offset - kShortSize) & 0xFF);
4718   } else {
4719     EmitUint8(0x70 + condition);
4720     EmitLabelLink(label);
4721   }
4722 }
4723 
4724 
4725 void X86_64Assembler::jrcxz(NearLabel* label) {
4726   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4727   if (label->IsBound()) {
4728     static const int kShortSize = 2;
4729     int offset = label->Position() - buffer_.Size();
4730     CHECK_LE(offset, 0);
4731     CHECK(IsInt<8>(offset - kShortSize));
4732     EmitUint8(0xE3);
4733     EmitUint8((offset - kShortSize) & 0xFF);
4734   } else {
4735     EmitUint8(0xE3);
4736     EmitLabelLink(label);
4737   }
4738 }
4739 
4740 
4741 void X86_64Assembler::jmp(CpuRegister reg) {
4742   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4743   EmitOptionalRex32(reg);
4744   EmitUint8(0xFF);
4745   EmitRegisterOperand(4, reg.LowBits());
4746 }
4747 
4748 void X86_64Assembler::jmp(const Address& address) {
4749   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4750   EmitOptionalRex32(address);
4751   EmitUint8(0xFF);
4752   EmitOperand(4, address);
4753 }
4754 
4755 void X86_64Assembler::jmp(Label* label) {
4756   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4757   if (label->IsBound()) {
4758     static const int kShortSize = 2;
4759     static const int kLongSize = 5;
4760     int offset = label->Position() - buffer_.Size();
4761     CHECK_LE(offset, 0);
4762     if (IsInt<8>(offset - kShortSize)) {
4763       EmitUint8(0xEB);
4764       EmitUint8((offset - kShortSize) & 0xFF);
4765     } else {
4766       EmitUint8(0xE9);
4767       EmitInt32(offset - kLongSize);
4768     }
4769   } else {
4770     EmitUint8(0xE9);
4771     EmitLabelLink(label);
4772   }
4773 }
4774 
4775 
4776 void X86_64Assembler::jmp(NearLabel* label) {
4777   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4778   if (label->IsBound()) {
4779     static const int kShortSize = 2;
4780     int offset = label->Position() - buffer_.Size();
4781     CHECK_LE(offset, 0);
4782     CHECK(IsInt<8>(offset - kShortSize));
4783     EmitUint8(0xEB);
4784     EmitUint8((offset - kShortSize) & 0xFF);
4785   } else {
4786     EmitUint8(0xEB);
4787     EmitLabelLink(label);
4788   }
4789 }
4790 
4791 
4792 void X86_64Assembler::rep_movsw() {
4793   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4794   EmitUint8(0x66);
4795   EmitUint8(0xF3);
4796   EmitUint8(0xA5);
4797 }
4798 
4799 
4800 X86_64Assembler* X86_64Assembler::lock() {
4801   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4802   EmitUint8(0xF0);
4803   return this;
4804 }
4805 
4806 
4807 void X86_64Assembler::cmpxchgl(const Address& address, CpuRegister reg) {
4808   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4809   EmitOptionalRex32(reg, address);
4810   EmitUint8(0x0F);
4811   EmitUint8(0xB1);
4812   EmitOperand(reg.LowBits(), address);
4813 }
4814 
4815 
4816 void X86_64Assembler::cmpxchgq(const Address& address, CpuRegister reg) {
4817   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4818   EmitRex64(reg, address);
4819   EmitUint8(0x0F);
4820   EmitUint8(0xB1);
4821   EmitOperand(reg.LowBits(), address);
4822 }
4823 
4824 
4825 void X86_64Assembler::mfence() {
4826   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4827   EmitUint8(0x0F);
4828   EmitUint8(0xAE);
4829   EmitUint8(0xF0);
4830 }
4831 
4832 
4833 X86_64Assembler* X86_64Assembler::gs() {
4834   // TODO: gs is a prefix and not an instruction
4835   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4836   EmitUint8(0x65);
4837   return this;
4838 }
4839 
4840 
4841 void X86_64Assembler::AddImmediate(CpuRegister reg, const Immediate& imm) {
4842   int value = imm.value();
4843   if (value != 0) {
4844     if (value > 0) {
4845       addl(reg, imm);
4846     } else {
4847       subl(reg, Immediate(value));
4848     }
4849   }
4850 }
4851 
4852 
4853 void X86_64Assembler::setcc(Condition condition, CpuRegister dst) {
4854   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4855   // RSP, RBP, RDI, RSI need rex prefix (else the pattern encodes ah/bh/ch/dh).
4856   if (dst.NeedsRex() || dst.AsRegister() > 3) {
4857     EmitOptionalRex(true, false, false, false, dst.NeedsRex());
4858   }
4859   EmitUint8(0x0F);
4860   EmitUint8(0x90 + condition);
4861   EmitUint8(0xC0 + dst.LowBits());
4862 }
4863 
4864 void X86_64Assembler::blsi(CpuRegister dst, CpuRegister src) {
4865   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4866   uint8_t byte_zero = EmitVexPrefixByteZero(/*is_twobyte_form=*/ false);
4867   uint8_t byte_one = EmitVexPrefixByteOne(/*R=*/ false,
4868                                           /*X=*/ false,
4869                                           src.NeedsRex(),
4870                                           SET_VEX_M_0F_38);
4871   uint8_t byte_two = EmitVexPrefixByteTwo(/*W=*/true,
4872                                           X86_64ManagedRegister::FromCpuRegister(dst.AsRegister()),
4873                                           SET_VEX_L_128,
4874                                           SET_VEX_PP_NONE);
4875   EmitUint8(byte_zero);
4876   EmitUint8(byte_one);
4877   EmitUint8(byte_two);
4878   EmitUint8(0xF3);
4879   EmitRegisterOperand(3, src.LowBits());
4880 }
4881 
4882 void X86_64Assembler::blsmsk(CpuRegister dst, CpuRegister src) {
4883   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4884   uint8_t byte_zero = EmitVexPrefixByteZero(/*is_twobyte_form=*/ false);
4885   uint8_t byte_one = EmitVexPrefixByteOne(/*R=*/ false,
4886                                           /*X=*/ false,
4887                                           src.NeedsRex(),
4888                                           SET_VEX_M_0F_38);
4889   uint8_t byte_two = EmitVexPrefixByteTwo(/*W=*/ true,
4890                                           X86_64ManagedRegister::FromCpuRegister(dst.AsRegister()),
4891                                           SET_VEX_L_128,
4892                                           SET_VEX_PP_NONE);
4893   EmitUint8(byte_zero);
4894   EmitUint8(byte_one);
4895   EmitUint8(byte_two);
4896   EmitUint8(0xF3);
4897   EmitRegisterOperand(2, src.LowBits());
4898 }
4899 
4900 void X86_64Assembler::blsr(CpuRegister dst, CpuRegister src) {
4901   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4902   uint8_t byte_zero = EmitVexPrefixByteZero(/*is_twobyte_form=*/false);
4903   uint8_t byte_one = EmitVexPrefixByteOne(/*R=*/ false,
4904                                           /*X=*/ false,
4905                                           src.NeedsRex(),
4906                                           SET_VEX_M_0F_38);
4907   uint8_t byte_two = EmitVexPrefixByteTwo(/*W=*/ true,
4908                                           X86_64ManagedRegister::FromCpuRegister(dst.AsRegister()),
4909                                           SET_VEX_L_128,
4910                                           SET_VEX_PP_NONE);
4911   EmitUint8(byte_zero);
4912   EmitUint8(byte_one);
4913   EmitUint8(byte_two);
4914   EmitUint8(0xF3);
4915   EmitRegisterOperand(1, src.LowBits());
4916 }
4917 
4918 void X86_64Assembler::bswapl(CpuRegister dst) {
4919   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4920   EmitOptionalRex(false, false, false, false, dst.NeedsRex());
4921   EmitUint8(0x0F);
4922   EmitUint8(0xC8 + dst.LowBits());
4923 }
4924 
4925 void X86_64Assembler::bswapq(CpuRegister dst) {
4926   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4927   EmitOptionalRex(false, true, false, false, dst.NeedsRex());
4928   EmitUint8(0x0F);
4929   EmitUint8(0xC8 + dst.LowBits());
4930 }
4931 
4932 void X86_64Assembler::bsfl(CpuRegister dst, CpuRegister src) {
4933   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4934   EmitOptionalRex32(dst, src);
4935   EmitUint8(0x0F);
4936   EmitUint8(0xBC);
4937   EmitRegisterOperand(dst.LowBits(), src.LowBits());
4938 }
4939 
4940 void X86_64Assembler::bsfl(CpuRegister dst, const Address& src) {
4941   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4942   EmitOptionalRex32(dst, src);
4943   EmitUint8(0x0F);
4944   EmitUint8(0xBC);
4945   EmitOperand(dst.LowBits(), src);
4946 }
4947 
4948 void X86_64Assembler::bsfq(CpuRegister dst, CpuRegister src) {
4949   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4950   EmitRex64(dst, src);
4951   EmitUint8(0x0F);
4952   EmitUint8(0xBC);
4953   EmitRegisterOperand(dst.LowBits(), src.LowBits());
4954 }
4955 
4956 void X86_64Assembler::bsfq(CpuRegister dst, const Address& src) {
4957   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4958   EmitRex64(dst, src);
4959   EmitUint8(0x0F);
4960   EmitUint8(0xBC);
4961   EmitOperand(dst.LowBits(), src);
4962 }
4963 
4964 void X86_64Assembler::bsrl(CpuRegister dst, CpuRegister src) {
4965   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4966   EmitOptionalRex32(dst, src);
4967   EmitUint8(0x0F);
4968   EmitUint8(0xBD);
4969   EmitRegisterOperand(dst.LowBits(), src.LowBits());
4970 }
4971 
4972 void X86_64Assembler::bsrl(CpuRegister dst, const Address& src) {
4973   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4974   EmitOptionalRex32(dst, src);
4975   EmitUint8(0x0F);
4976   EmitUint8(0xBD);
4977   EmitOperand(dst.LowBits(), src);
4978 }
4979 
4980 void X86_64Assembler::bsrq(CpuRegister dst, CpuRegister src) {
4981   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4982   EmitRex64(dst, src);
4983   EmitUint8(0x0F);
4984   EmitUint8(0xBD);
4985   EmitRegisterOperand(dst.LowBits(), src.LowBits());
4986 }
4987 
4988 void X86_64Assembler::bsrq(CpuRegister dst, const Address& src) {
4989   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4990   EmitRex64(dst, src);
4991   EmitUint8(0x0F);
4992   EmitUint8(0xBD);
4993   EmitOperand(dst.LowBits(), src);
4994 }
4995 
4996 void X86_64Assembler::popcntl(CpuRegister dst, CpuRegister src) {
4997   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
4998   EmitUint8(0xF3);
4999   EmitOptionalRex32(dst, src);
5000   EmitUint8(0x0F);
5001   EmitUint8(0xB8);
5002   EmitRegisterOperand(dst.LowBits(), src.LowBits());
5003 }
5004 
5005 void X86_64Assembler::popcntl(CpuRegister dst, const Address& src) {
5006   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
5007   EmitUint8(0xF3);
5008   EmitOptionalRex32(dst, src);
5009   EmitUint8(0x0F);
5010   EmitUint8(0xB8);
5011   EmitOperand(dst.LowBits(), src);
5012 }
5013 
5014 void X86_64Assembler::popcntq(CpuRegister dst, CpuRegister src) {
5015   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
5016   EmitUint8(0xF3);
5017   EmitRex64(dst, src);
5018   EmitUint8(0x0F);
5019   EmitUint8(0xB8);
5020   EmitRegisterOperand(dst.LowBits(), src.LowBits());
5021 }
5022 
5023 void X86_64Assembler::popcntq(CpuRegister dst, const Address& src) {
5024   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
5025   EmitUint8(0xF3);
5026   EmitRex64(dst, src);
5027   EmitUint8(0x0F);
5028   EmitUint8(0xB8);
5029   EmitOperand(dst.LowBits(), src);
5030 }
5031 
5032 void X86_64Assembler::repne_scasb() {
5033   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
5034   EmitUint8(0xF2);
5035   EmitUint8(0xAE);
5036 }
5037 
5038 void X86_64Assembler::repne_scasw() {
5039   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
5040   EmitUint8(0x66);
5041   EmitUint8(0xF2);
5042   EmitUint8(0xAF);
5043 }
5044 
5045 void X86_64Assembler::repe_cmpsw() {
5046   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
5047   EmitUint8(0x66);
5048   EmitUint8(0xF3);
5049   EmitUint8(0xA7);
5050 }
5051 
5052 
5053 void X86_64Assembler::repe_cmpsl() {
5054   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
5055   EmitUint8(0xF3);
5056   EmitUint8(0xA7);
5057 }
5058 
5059 
5060 void X86_64Assembler::repe_cmpsq() {
5061   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
5062   EmitUint8(0xF3);
5063   EmitRex64();
5064   EmitUint8(0xA7);
5065 }
5066 
5067 
5068 void X86_64Assembler::LoadDoubleConstant(XmmRegister dst, double value) {
5069   // TODO: Need to have a code constants table.
5070   int64_t constant = bit_cast<int64_t, double>(value);
5071   pushq(Immediate(High32Bits(constant)));
5072   pushq(Immediate(Low32Bits(constant)));
5073   movsd(dst, Address(CpuRegister(RSP), 0));
5074   addq(CpuRegister(RSP), Immediate(2 * sizeof(intptr_t)));
5075 }
5076 
5077 
5078 void X86_64Assembler::Align(int alignment, int offset) {
5079   CHECK(IsPowerOfTwo(alignment));
5080   // Emit nop instruction until the real position is aligned.
5081   while (((offset + buffer_.GetPosition()) & (alignment-1)) != 0) {
5082     nop();
5083   }
5084 }
5085 
5086 
5087 void X86_64Assembler::Bind(Label* label) {
5088   int bound = buffer_.Size();
5089   CHECK(!label->IsBound());  // Labels can only be bound once.
5090   while (label->IsLinked()) {
5091     int position = label->LinkPosition();
5092     int next = buffer_.Load<int32_t>(position);
5093     buffer_.Store<int32_t>(position, bound - (position + 4));
5094     label->position_ = next;
5095   }
5096   label->BindTo(bound);
5097 }
5098 
5099 
5100 void X86_64Assembler::Bind(NearLabel* label) {
5101   int bound = buffer_.Size();
5102   CHECK(!label->IsBound());  // Labels can only be bound once.
5103   while (label->IsLinked()) {
5104     int position = label->LinkPosition();
5105     uint8_t delta = buffer_.Load<uint8_t>(position);
5106     int offset = bound - (position + 1);
5107     CHECK(IsInt<8>(offset));
5108     buffer_.Store<int8_t>(position, offset);
5109     label->position_ = delta != 0u ? label->position_ - delta : 0;
5110   }
5111   label->BindTo(bound);
5112 }
5113 
5114 
5115 void X86_64Assembler::EmitOperand(uint8_t reg_or_opcode, const Operand& operand) {
5116   CHECK_GE(reg_or_opcode, 0);
5117   CHECK_LT(reg_or_opcode, 8);
5118   const int length = operand.length_;
5119   CHECK_GT(length, 0);
5120   // Emit the ModRM byte updated with the given reg value.
5121   CHECK_EQ(operand.encoding_[0] & 0x38, 0);
5122   EmitUint8(operand.encoding_[0] + (reg_or_opcode << 3));
5123   // Emit the rest of the encoded operand.
5124   for (int i = 1; i < length; i++) {
5125     EmitUint8(operand.encoding_[i]);
5126   }
5127   AssemblerFixup* fixup = operand.GetFixup();
5128   if (fixup != nullptr) {
5129     EmitFixup(fixup);
5130   }
5131 }
5132 
5133 
5134 void X86_64Assembler::EmitImmediate(const Immediate& imm, bool is_16_op) {
5135   if (is_16_op) {
5136     EmitUint8(imm.value() & 0xFF);
5137     EmitUint8(imm.value() >> 8);
5138   } else if (imm.is_int32()) {
5139     EmitInt32(static_cast<int32_t>(imm.value()));
5140   } else {
5141     EmitInt64(imm.value());
5142   }
5143 }
5144 
5145 
5146 void X86_64Assembler::EmitComplex(uint8_t reg_or_opcode,
5147                                   const Operand& operand,
5148                                   const Immediate& immediate,
5149                                   bool is_16_op) {
5150   CHECK_GE(reg_or_opcode, 0);
5151   CHECK_LT(reg_or_opcode, 8);
5152   if (immediate.is_int8()) {
5153     // Use sign-extended 8-bit immediate.
5154     EmitUint8(0x83);
5155     EmitOperand(reg_or_opcode, operand);
5156     EmitUint8(immediate.value() & 0xFF);
5157   } else if (operand.IsRegister(CpuRegister(RAX))) {
5158     // Use short form if the destination is eax.
5159     EmitUint8(0x05 + (reg_or_opcode << 3));
5160     EmitImmediate(immediate, is_16_op);
5161   } else {
5162     EmitUint8(0x81);
5163     EmitOperand(reg_or_opcode, operand);
5164     EmitImmediate(immediate, is_16_op);
5165   }
5166 }
5167 
5168 
5169 void X86_64Assembler::EmitLabel(Label* label, int instruction_size) {
5170   if (label->IsBound()) {
5171     int offset = label->Position() - buffer_.Size();
5172     CHECK_LE(offset, 0);
5173     EmitInt32(offset - instruction_size);
5174   } else {
5175     EmitLabelLink(label);
5176   }
5177 }
5178 
5179 
5180 void X86_64Assembler::EmitLabelLink(Label* label) {
5181   CHECK(!label->IsBound());
5182   int position = buffer_.Size();
5183   EmitInt32(label->position_);
5184   label->LinkTo(position);
5185 }
5186 
5187 
5188 void X86_64Assembler::EmitLabelLink(NearLabel* label) {
5189   CHECK(!label->IsBound());
5190   int position = buffer_.Size();
5191   if (label->IsLinked()) {
5192     // Save the delta in the byte that we have to play with.
5193     uint32_t delta = position - label->LinkPosition();
5194     CHECK(IsUint<8>(delta));
5195     EmitUint8(delta & 0xFF);
5196   } else {
5197     EmitUint8(0);
5198   }
5199   label->LinkTo(position);
5200 }
5201 
5202 
5203 void X86_64Assembler::EmitGenericShift(bool wide,
5204                                        int reg_or_opcode,
5205                                        CpuRegister reg,
5206                                        const Immediate& imm) {
5207   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
5208   CHECK(imm.is_int8());
5209   if (wide) {
5210     EmitRex64(reg);
5211   } else {
5212     EmitOptionalRex32(reg);
5213   }
5214   if (imm.value() == 1) {
5215     EmitUint8(0xD1);
5216     EmitOperand(reg_or_opcode, Operand(reg));
5217   } else {
5218     EmitUint8(0xC1);
5219     EmitOperand(reg_or_opcode, Operand(reg));
5220     EmitUint8(imm.value() & 0xFF);
5221   }
5222 }
5223 
5224 
5225 void X86_64Assembler::EmitGenericShift(bool wide,
5226                                        int reg_or_opcode,
5227                                        CpuRegister operand,
5228                                        CpuRegister shifter) {
5229   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
5230   CHECK_EQ(shifter.AsRegister(), RCX);
5231   if (wide) {
5232     EmitRex64(operand);
5233   } else {
5234     EmitOptionalRex32(operand);
5235   }
5236   EmitUint8(0xD3);
5237   EmitOperand(reg_or_opcode, Operand(operand));
5238 }
5239 
5240 void X86_64Assembler::EmitOptionalRex(bool force, bool w, bool r, bool x, bool b) {
5241   // REX.WRXB
5242   // W - 64-bit operand
5243   // R - MODRM.reg
5244   // X - SIB.index
5245   // B - MODRM.rm/SIB.base
5246   uint8_t rex = force ? 0x40 : 0;
5247   if (w) {
5248     rex |= 0x48;  // REX.W000
5249   }
5250   if (r) {
5251     rex |= 0x44;  // REX.0R00
5252   }
5253   if (x) {
5254     rex |= 0x42;  // REX.00X0
5255   }
5256   if (b) {
5257     rex |= 0x41;  // REX.000B
5258   }
5259   if (rex != 0) {
5260     EmitUint8(rex);
5261   }
5262 }
5263 
5264 void X86_64Assembler::EmitOptionalRex32(CpuRegister reg) {
5265   EmitOptionalRex(false, false, false, false, reg.NeedsRex());
5266 }
5267 
5268 void X86_64Assembler::EmitOptionalRex32(CpuRegister dst, CpuRegister src) {
5269   EmitOptionalRex(false, false, dst.NeedsRex(), false, src.NeedsRex());
5270 }
5271 
5272 void X86_64Assembler::EmitOptionalRex32(XmmRegister dst, XmmRegister src) {
5273   EmitOptionalRex(false, false, dst.NeedsRex(), false, src.NeedsRex());
5274 }
5275 
5276 void X86_64Assembler::EmitOptionalRex32(CpuRegister dst, XmmRegister src) {
5277   EmitOptionalRex(false, false, dst.NeedsRex(), false, src.NeedsRex());
5278 }
5279 
5280 void X86_64Assembler::EmitOptionalRex32(XmmRegister dst, CpuRegister src) {
5281   EmitOptionalRex(false, false, dst.NeedsRex(), false, src.NeedsRex());
5282 }
5283 
5284 void X86_64Assembler::EmitOptionalRex32(const Operand& operand) {
5285   uint8_t rex = operand.rex();
5286   if (rex != 0) {
5287     EmitUint8(rex);
5288   }
5289 }
5290 
5291 void X86_64Assembler::EmitOptionalRex32(CpuRegister dst, const Operand& operand) {
5292   uint8_t rex = operand.rex();
5293   if (dst.NeedsRex()) {
5294     rex |= 0x44;  // REX.0R00
5295   }
5296   if (rex != 0) {
5297     EmitUint8(rex);
5298   }
5299 }
5300 
5301 void X86_64Assembler::EmitOptionalRex32(XmmRegister dst, const Operand& operand) {
5302   uint8_t rex = operand.rex();
5303   if (dst.NeedsRex()) {
5304     rex |= 0x44;  // REX.0R00
5305   }
5306   if (rex != 0) {
5307     EmitUint8(rex);
5308   }
5309 }
5310 
5311 void X86_64Assembler::EmitRex64() {
5312   EmitOptionalRex(false, true, false, false, false);
5313 }
5314 
5315 void X86_64Assembler::EmitRex64(CpuRegister reg) {
5316   EmitOptionalRex(false, true, false, false, reg.NeedsRex());
5317 }
5318 
5319 void X86_64Assembler::EmitRex64(const Operand& operand) {
5320   uint8_t rex = operand.rex();
5321   rex |= 0x48;  // REX.W000
5322   EmitUint8(rex);
5323 }
5324 
5325 void X86_64Assembler::EmitRex64(CpuRegister dst, CpuRegister src) {
5326   EmitOptionalRex(false, true, dst.NeedsRex(), false, src.NeedsRex());
5327 }
5328 
5329 void X86_64Assembler::EmitRex64(XmmRegister dst, CpuRegister src) {
5330   EmitOptionalRex(false, true, dst.NeedsRex(), false, src.NeedsRex());
5331 }
5332 
5333 void X86_64Assembler::EmitRex64(CpuRegister dst, XmmRegister src) {
5334   EmitOptionalRex(false, true, dst.NeedsRex(), false, src.NeedsRex());
5335 }
5336 
5337 void X86_64Assembler::EmitRex64(CpuRegister dst, const Operand& operand) {
5338   uint8_t rex = 0x48 | operand.rex();  // REX.W000
5339   if (dst.NeedsRex()) {
5340     rex |= 0x44;  // REX.0R00
5341   }
5342   EmitUint8(rex);
5343 }
5344 
5345 void X86_64Assembler::EmitRex64(XmmRegister dst, const Operand& operand) {
5346   uint8_t rex = 0x48 | operand.rex();  // REX.W000
5347   if (dst.NeedsRex()) {
5348     rex |= 0x44;  // REX.0R00
5349   }
5350   EmitUint8(rex);
5351 }
5352 
5353 void X86_64Assembler::EmitOptionalByteRegNormalizingRex32(CpuRegister dst, CpuRegister src) {
5354   // For src, SPL, BPL, SIL, DIL need the rex prefix.
5355   bool force = src.AsRegister() > 3;
5356   EmitOptionalRex(force, false, dst.NeedsRex(), false, src.NeedsRex());
5357 }
5358 
5359 void X86_64Assembler::EmitOptionalByteRegNormalizingRex32(CpuRegister dst, const Operand& operand) {
5360   uint8_t rex = operand.rex();
5361   // For dst, SPL, BPL, SIL, DIL need the rex prefix.
5362   bool force = dst.AsRegister() > 3;
5363   if (force) {
5364     rex |= 0x40;  // REX.0000
5365   }
5366   if (dst.NeedsRex()) {
5367     rex |= 0x44;  // REX.0R00
5368   }
5369   if (rex != 0) {
5370     EmitUint8(rex);
5371   }
5372 }
5373 
5374 void X86_64Assembler::AddConstantArea() {
5375   ArrayRef<const int32_t> area = constant_area_.GetBuffer();
5376   for (size_t i = 0, e = area.size(); i < e; i++) {
5377     AssemblerBuffer::EnsureCapacity ensured(&buffer_);
5378     EmitInt32(area[i]);
5379   }
5380 }
5381 
5382 size_t ConstantArea::AppendInt32(int32_t v) {
5383   size_t result = buffer_.size() * elem_size_;
5384   buffer_.push_back(v);
5385   return result;
5386 }
5387 
5388 size_t ConstantArea::AddInt32(int32_t v) {
5389   // Look for an existing match.
5390   for (size_t i = 0, e = buffer_.size(); i < e; i++) {
5391     if (v == buffer_[i]) {
5392       return i * elem_size_;
5393     }
5394   }
5395 
5396   // Didn't match anything.
5397   return AppendInt32(v);
5398 }
5399 
5400 size_t ConstantArea::AddInt64(int64_t v) {
5401   int32_t v_low = v;
5402   int32_t v_high = v >> 32;
5403   if (buffer_.size() > 1) {
5404     // Ensure we don't pass the end of the buffer.
5405     for (size_t i = 0, e = buffer_.size() - 1; i < e; i++) {
5406       if (v_low == buffer_[i] && v_high == buffer_[i + 1]) {
5407         return i * elem_size_;
5408       }
5409     }
5410   }
5411 
5412   // Didn't match anything.
5413   size_t result = buffer_.size() * elem_size_;
5414   buffer_.push_back(v_low);
5415   buffer_.push_back(v_high);
5416   return result;
5417 }
5418 
5419 size_t ConstantArea::AddDouble(double v) {
5420   // Treat the value as a 64-bit integer value.
5421   return AddInt64(bit_cast<int64_t, double>(v));
5422 }
5423 
5424 size_t ConstantArea::AddFloat(float v) {
5425   // Treat the value as a 32-bit integer value.
5426   return AddInt32(bit_cast<int32_t, float>(v));
5427 }
5428 
5429 uint8_t X86_64Assembler::EmitVexPrefixByteZero(bool is_twobyte_form) {
5430   // Vex Byte 0,
5431   // Bits [7:0] must contain the value 11000101b (0xC5) for 2-byte Vex
5432   // Bits [7:0] must contain the value 11000100b (0xC4) for 3-byte Vex
5433   uint8_t vex_prefix = 0xC0;
5434   if (is_twobyte_form) {
5435     vex_prefix |= TWO_BYTE_VEX;  // 2-Byte Vex
5436   } else {
5437     vex_prefix |= THREE_BYTE_VEX;  // 3-Byte Vex
5438   }
5439   return vex_prefix;
5440 }
5441 
5442 uint8_t X86_64Assembler::EmitVexPrefixByteOne(bool R, bool X, bool B, int SET_VEX_M) {
5443   // Vex Byte 1,
5444   uint8_t vex_prefix = VEX_INIT;
5445   /** Bit[7] This bit needs to be set to '1'
5446   otherwise the instruction is LES or LDS */
5447   if (!R) {
5448     // R .
5449     vex_prefix |= SET_VEX_R;
5450   }
5451   /** Bit[6] This bit needs to be set to '1'
5452   otherwise the instruction is LES or LDS */
5453   if (!X) {
5454     // X .
5455     vex_prefix |= SET_VEX_X;
5456   }
5457   /** Bit[5] This bit needs to be set to '1' */
5458   if (!B) {
5459     // B .
5460     vex_prefix |= SET_VEX_B;
5461   }
5462   /** Bits[4:0], Based on the instruction documentaion */
5463   vex_prefix |= SET_VEX_M;
5464   return vex_prefix;
5465 }
5466 
5467 uint8_t X86_64Assembler::EmitVexPrefixByteOne(bool R,
5468                                               X86_64ManagedRegister operand,
5469                                               int SET_VEX_L,
5470                                               int SET_VEX_PP) {
5471   // Vex Byte 1,
5472   uint8_t vex_prefix = VEX_INIT;
5473   /** Bit[7] This bit needs to be set to '1'
5474   otherwise the instruction is LES or LDS */
5475   if (!R) {
5476     // R .
5477     vex_prefix |= SET_VEX_R;
5478   }
5479   /**Bits[6:3] - 'vvvv' the source or dest register specifier */
5480   if (operand.IsNoRegister()) {
5481     vex_prefix |= 0x78;
5482   } else if (operand.IsXmmRegister()) {
5483     XmmRegister vvvv = operand.AsXmmRegister();
5484     int inverted_reg = 15 - static_cast<int>(vvvv.AsFloatRegister());
5485     uint8_t reg = static_cast<uint8_t>(inverted_reg);
5486     vex_prefix |= ((reg & 0x0F) << 3);
5487   } else if (operand.IsCpuRegister()) {
5488     CpuRegister vvvv = operand.AsCpuRegister();
5489     int inverted_reg = 15 - static_cast<int>(vvvv.AsRegister());
5490     uint8_t reg = static_cast<uint8_t>(inverted_reg);
5491     vex_prefix |= ((reg & 0x0F) << 3);
5492   }
5493   /** Bit[2] - "L" If VEX.L = 1 indicates 256-bit vector operation,
5494   VEX.L = 0 indicates 128 bit vector operation */
5495   vex_prefix |= SET_VEX_L;
5496   // Bits[1:0] -  "pp"
5497   vex_prefix |= SET_VEX_PP;
5498   return vex_prefix;
5499 }
5500 
5501 uint8_t X86_64Assembler::EmitVexPrefixByteTwo(bool W,
5502                                               X86_64ManagedRegister operand,
5503                                               int SET_VEX_L,
5504                                               int SET_VEX_PP) {
5505   // Vex Byte 2,
5506   uint8_t vex_prefix = VEX_INIT;
5507 
5508   /** Bit[7] This bits needs to be set to '1' with default value.
5509   When using C4H form of VEX prefix, REX.W value is ignored */
5510   if (W) {
5511     vex_prefix |= SET_VEX_W;
5512   }
5513   // Bits[6:3] - 'vvvv' the source or dest register specifier
5514   if (operand.IsXmmRegister()) {
5515     XmmRegister vvvv = operand.AsXmmRegister();
5516     int inverted_reg = 15 - static_cast<int>(vvvv.AsFloatRegister());
5517     uint8_t reg = static_cast<uint8_t>(inverted_reg);
5518     vex_prefix |= ((reg & 0x0F) << 3);
5519   } else if (operand.IsCpuRegister()) {
5520     CpuRegister vvvv = operand.AsCpuRegister();
5521     int inverted_reg = 15 - static_cast<int>(vvvv.AsRegister());
5522     uint8_t reg = static_cast<uint8_t>(inverted_reg);
5523     vex_prefix |= ((reg & 0x0F) << 3);
5524   }
5525   /** Bit[2] - "L" If VEX.L = 1 indicates 256-bit vector operation,
5526   VEX.L = 0 indicates 128 bit vector operation */
5527   vex_prefix |= SET_VEX_L;
5528   // Bits[1:0] -  "pp"
5529   vex_prefix |= SET_VEX_PP;
5530   return vex_prefix;
5531 }
5532 
5533 uint8_t X86_64Assembler::EmitVexPrefixByteTwo(bool W,
5534                                               int SET_VEX_L,
5535                                               int SET_VEX_PP) {
5536   // Vex Byte 2,
5537   uint8_t vex_prefix = VEX_INIT;
5538 
5539   /** Bit[7] This bits needs to be set to '1' with default value.
5540   When using C4H form of VEX prefix, REX.W value is ignored */
5541   if (W) {
5542     vex_prefix |= SET_VEX_W;
5543   }
5544   /** Bits[6:3] - 'vvvv' the source or dest register specifier */
5545   vex_prefix |= (0x0F << 3);
5546   /** Bit[2] - "L" If VEX.L = 1 indicates 256-bit vector operation,
5547   VEX.L = 0 indicates 128 bit vector operation */
5548   vex_prefix |= SET_VEX_L;
5549 
5550   // Bits[1:0] -  "pp"
5551   if (SET_VEX_PP != SET_VEX_PP_NONE) {
5552     vex_prefix |= SET_VEX_PP;
5553   }
5554   return vex_prefix;
5555 }
5556 
5557 }  // namespace x86_64
5558 }  // namespace art
5559