1 /*
2 * Copyright (C) 2014 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #ifndef ART_COMPILER_UTILS_X86_64_ASSEMBLER_X86_64_H_
18 #define ART_COMPILER_UTILS_X86_64_ASSEMBLER_X86_64_H_
19
20 #include <vector>
21
22 #include "arch/x86_64/instruction_set_features_x86_64.h"
23 #include "base/arena_containers.h"
24 #include "base/array_ref.h"
25 #include "base/bit_utils.h"
26 #include "base/globals.h"
27 #include "base/macros.h"
28 #include "constants_x86_64.h"
29 #include "heap_poisoning.h"
30 #include "managed_register_x86_64.h"
31 #include "offsets.h"
32 #include "utils/assembler.h"
33 #include "utils/jni_macro_assembler.h"
34
35 namespace art {
36 namespace x86_64 {
37
38 // Encodes an immediate value for operands.
39 //
40 // Note: Immediates can be 64b on x86-64 for certain instructions, but are often restricted
41 // to 32b.
42 //
43 // Note: As we support cross-compilation, the value type must be int64_t. Please be aware of
44 // conversion rules in expressions regarding negation, especially size_t on 32b.
45 class Immediate : public ValueObject {
46 public:
Immediate(int64_t value_in)47 explicit Immediate(int64_t value_in) : value_(value_in) {}
48
value()49 int64_t value() const { return value_; }
50
is_int8()51 bool is_int8() const { return IsInt<8>(value_); }
is_uint8()52 bool is_uint8() const { return IsUint<8>(value_); }
is_int16()53 bool is_int16() const { return IsInt<16>(value_); }
is_uint16()54 bool is_uint16() const { return IsUint<16>(value_); }
is_int32()55 bool is_int32() const { return IsInt<32>(value_); }
56
57 private:
58 const int64_t value_;
59 };
60
61
62 class Operand : public ValueObject {
63 public:
mod()64 uint8_t mod() const {
65 return (encoding_at(0) >> 6) & 3;
66 }
67
rm()68 Register rm() const {
69 return static_cast<Register>(encoding_at(0) & 7);
70 }
71
scale()72 ScaleFactor scale() const {
73 return static_cast<ScaleFactor>((encoding_at(1) >> 6) & 3);
74 }
75
index()76 Register index() const {
77 return static_cast<Register>((encoding_at(1) >> 3) & 7);
78 }
79
base()80 Register base() const {
81 return static_cast<Register>(encoding_at(1) & 7);
82 }
83
cpu_rm()84 CpuRegister cpu_rm() const {
85 int ext = (rex_ & 1) != 0 ? x86_64::R8 : x86_64::RAX;
86 return static_cast<CpuRegister>(rm() + ext);
87 }
88
cpu_index()89 CpuRegister cpu_index() const {
90 int ext = (rex_ & 2) != 0 ? x86_64::R8 : x86_64::RAX;
91 return static_cast<CpuRegister>(index() + ext);
92 }
93
cpu_base()94 CpuRegister cpu_base() const {
95 int ext = (rex_ & 1) != 0 ? x86_64::R8 : x86_64::RAX;
96 return static_cast<CpuRegister>(base() + ext);
97 }
98
rex()99 uint8_t rex() const {
100 return rex_;
101 }
102
disp8()103 int8_t disp8() const {
104 CHECK_GE(length_, 2);
105 return static_cast<int8_t>(encoding_[length_ - 1]);
106 }
107
disp32()108 int32_t disp32() const {
109 CHECK_GE(length_, 5);
110 int32_t value;
111 memcpy(&value, &encoding_[length_ - 4], sizeof(value));
112 return value;
113 }
114
IsRegister(CpuRegister reg)115 bool IsRegister(CpuRegister reg) const {
116 return ((encoding_[0] & 0xF8) == 0xC0) // Addressing mode is register only.
117 && ((encoding_[0] & 0x07) == reg.LowBits()) // Register codes match.
118 && (reg.NeedsRex() == ((rex_ & 1) != 0)); // REX.000B bits match.
119 }
120
GetFixup()121 AssemblerFixup* GetFixup() const {
122 return fixup_;
123 }
124
125 protected:
126 // Operand can be sub classed (e.g: Address).
Operand()127 Operand() : rex_(0), length_(0), fixup_(nullptr) { }
128
SetModRM(uint8_t mod_in,CpuRegister rm_in)129 void SetModRM(uint8_t mod_in, CpuRegister rm_in) {
130 CHECK_EQ(mod_in & ~3, 0);
131 if (rm_in.NeedsRex()) {
132 rex_ |= 0x41; // REX.000B
133 }
134 encoding_[0] = (mod_in << 6) | rm_in.LowBits();
135 length_ = 1;
136 }
137
SetSIB(ScaleFactor scale_in,CpuRegister index_in,CpuRegister base_in)138 void SetSIB(ScaleFactor scale_in, CpuRegister index_in, CpuRegister base_in) {
139 CHECK_EQ(length_, 1);
140 CHECK_EQ(scale_in & ~3, 0);
141 if (base_in.NeedsRex()) {
142 rex_ |= 0x41; // REX.000B
143 }
144 if (index_in.NeedsRex()) {
145 rex_ |= 0x42; // REX.00X0
146 }
147 encoding_[1] = (scale_in << 6) | (static_cast<uint8_t>(index_in.LowBits()) << 3) |
148 static_cast<uint8_t>(base_in.LowBits());
149 length_ = 2;
150 }
151
SetDisp8(int8_t disp)152 void SetDisp8(int8_t disp) {
153 CHECK(length_ == 1 || length_ == 2);
154 encoding_[length_++] = static_cast<uint8_t>(disp);
155 }
156
SetDisp32(int32_t disp)157 void SetDisp32(int32_t disp) {
158 CHECK(length_ == 1 || length_ == 2);
159 int disp_size = sizeof(disp);
160 memmove(&encoding_[length_], &disp, disp_size);
161 length_ += disp_size;
162 }
163
SetFixup(AssemblerFixup * fixup)164 void SetFixup(AssemblerFixup* fixup) {
165 fixup_ = fixup;
166 }
167
168 private:
169 uint8_t rex_;
170 uint8_t length_;
171 uint8_t encoding_[6];
172 AssemblerFixup* fixup_;
173
Operand(CpuRegister reg)174 explicit Operand(CpuRegister reg) : rex_(0), length_(0), fixup_(nullptr) { SetModRM(3, reg); }
175
176 // Get the operand encoding byte at the given index.
encoding_at(int index_in)177 uint8_t encoding_at(int index_in) const {
178 CHECK_GE(index_in, 0);
179 CHECK_LT(index_in, length_);
180 return encoding_[index_in];
181 }
182
183 friend class X86_64Assembler;
184 };
185
186
187 class Address : public Operand {
188 public:
Address(CpuRegister base_in,int32_t disp)189 Address(CpuRegister base_in, int32_t disp) {
190 Init(base_in, disp);
191 }
192
Address(CpuRegister base_in,Offset disp)193 Address(CpuRegister base_in, Offset disp) {
194 Init(base_in, disp.Int32Value());
195 }
196
Address(CpuRegister base_in,FrameOffset disp)197 Address(CpuRegister base_in, FrameOffset disp) {
198 CHECK_EQ(base_in.AsRegister(), RSP);
199 Init(CpuRegister(RSP), disp.Int32Value());
200 }
201
Address(CpuRegister base_in,MemberOffset disp)202 Address(CpuRegister base_in, MemberOffset disp) {
203 Init(base_in, disp.Int32Value());
204 }
205
Init(CpuRegister base_in,int32_t disp)206 void Init(CpuRegister base_in, int32_t disp) {
207 if (disp == 0 && base_in.LowBits() != RBP) {
208 SetModRM(0, base_in);
209 if (base_in.LowBits() == RSP) {
210 SetSIB(TIMES_1, CpuRegister(RSP), base_in);
211 }
212 } else if (disp >= -128 && disp <= 127) {
213 SetModRM(1, base_in);
214 if (base_in.LowBits() == RSP) {
215 SetSIB(TIMES_1, CpuRegister(RSP), base_in);
216 }
217 SetDisp8(disp);
218 } else {
219 SetModRM(2, base_in);
220 if (base_in.LowBits() == RSP) {
221 SetSIB(TIMES_1, CpuRegister(RSP), base_in);
222 }
223 SetDisp32(disp);
224 }
225 }
226
227
Address(CpuRegister index_in,ScaleFactor scale_in,int32_t disp)228 Address(CpuRegister index_in, ScaleFactor scale_in, int32_t disp) {
229 CHECK_NE(index_in.AsRegister(), RSP); // Illegal addressing mode.
230 SetModRM(0, CpuRegister(RSP));
231 SetSIB(scale_in, index_in, CpuRegister(RBP));
232 SetDisp32(disp);
233 }
234
Address(CpuRegister base_in,CpuRegister index_in,ScaleFactor scale_in,int32_t disp)235 Address(CpuRegister base_in, CpuRegister index_in, ScaleFactor scale_in, int32_t disp) {
236 CHECK_NE(index_in.AsRegister(), RSP); // Illegal addressing mode.
237 if (disp == 0 && base_in.LowBits() != RBP) {
238 SetModRM(0, CpuRegister(RSP));
239 SetSIB(scale_in, index_in, base_in);
240 } else if (disp >= -128 && disp <= 127) {
241 SetModRM(1, CpuRegister(RSP));
242 SetSIB(scale_in, index_in, base_in);
243 SetDisp8(disp);
244 } else {
245 SetModRM(2, CpuRegister(RSP));
246 SetSIB(scale_in, index_in, base_in);
247 SetDisp32(disp);
248 }
249 }
250
251 // If no_rip is true then the Absolute address isn't RIP relative.
252 static Address Absolute(uintptr_t addr, bool no_rip = false) {
253 Address result;
254 if (no_rip) {
255 result.SetModRM(0, CpuRegister(RSP));
256 result.SetSIB(TIMES_1, CpuRegister(RSP), CpuRegister(RBP));
257 result.SetDisp32(addr);
258 } else {
259 // RIP addressing is done using RBP as the base register.
260 // The value in RBP isn't used. Instead the offset is added to RIP.
261 result.SetModRM(0, CpuRegister(RBP));
262 result.SetDisp32(addr);
263 }
264 return result;
265 }
266
267 // An RIP relative address that will be fixed up later.
RIP(AssemblerFixup * fixup)268 static Address RIP(AssemblerFixup* fixup) {
269 Address result;
270 // RIP addressing is done using RBP as the base register.
271 // The value in RBP isn't used. Instead the offset is added to RIP.
272 result.SetModRM(0, CpuRegister(RBP));
273 result.SetDisp32(0);
274 result.SetFixup(fixup);
275 return result;
276 }
277
278 // If no_rip is true then the Absolute address isn't RIP relative.
279 static Address Absolute(ThreadOffset64 addr, bool no_rip = false) {
280 return Absolute(addr.Int32Value(), no_rip);
281 }
282
283 private:
Address()284 Address() {}
285 };
286
287 std::ostream& operator<<(std::ostream& os, const Address& addr);
288
289 /**
290 * Class to handle constant area values.
291 */
292 class ConstantArea {
293 public:
ConstantArea(ArenaAllocator * allocator)294 explicit ConstantArea(ArenaAllocator* allocator)
295 : buffer_(allocator->Adapter(kArenaAllocAssembler)) {}
296
297 // Add a double to the constant area, returning the offset into
298 // the constant area where the literal resides.
299 size_t AddDouble(double v);
300
301 // Add a float to the constant area, returning the offset into
302 // the constant area where the literal resides.
303 size_t AddFloat(float v);
304
305 // Add an int32_t to the constant area, returning the offset into
306 // the constant area where the literal resides.
307 size_t AddInt32(int32_t v);
308
309 // Add an int32_t to the end of the constant area, returning the offset into
310 // the constant area where the literal resides.
311 size_t AppendInt32(int32_t v);
312
313 // Add an int64_t to the constant area, returning the offset into
314 // the constant area where the literal resides.
315 size_t AddInt64(int64_t v);
316
GetSize()317 size_t GetSize() const {
318 return buffer_.size() * elem_size_;
319 }
320
GetBuffer()321 ArrayRef<const int32_t> GetBuffer() const {
322 return ArrayRef<const int32_t>(buffer_);
323 }
324
325 private:
326 static constexpr size_t elem_size_ = sizeof(int32_t);
327 ArenaVector<int32_t> buffer_;
328 };
329
330
331 // This is equivalent to the Label class, used in a slightly different context. We
332 // inherit the functionality of the Label class, but prevent unintended
333 // derived-to-base conversions by making the base class private.
334 class NearLabel : private Label {
335 public:
NearLabel()336 NearLabel() : Label() {}
337
338 // Expose the Label routines that we need.
339 using Label::Position;
340 using Label::LinkPosition;
341 using Label::IsBound;
342 using Label::IsUnused;
343 using Label::IsLinked;
344
345 private:
346 using Label::BindTo;
347 using Label::LinkTo;
348
349 friend class x86_64::X86_64Assembler;
350
351 DISALLOW_COPY_AND_ASSIGN(NearLabel);
352 };
353
354
355 class X86_64Assembler final : public Assembler {
356 public:
357 explicit X86_64Assembler(ArenaAllocator* allocator,
358 const X86_64InstructionSetFeatures* instruction_set_features = nullptr)
Assembler(allocator)359 : Assembler(allocator),
360 constant_area_(allocator),
361 has_AVX_(instruction_set_features != nullptr ? instruction_set_features->HasAVX(): false),
362 has_AVX2_(instruction_set_features != nullptr ? instruction_set_features->HasAVX2() : false) {}
~X86_64Assembler()363 virtual ~X86_64Assembler() {}
364
365 /*
366 * Emit Machine Instructions.
367 */
368 void call(CpuRegister reg);
369 void call(const Address& address);
370 void call(Label* label);
371
372 void pushq(CpuRegister reg);
373 void pushq(const Address& address);
374 void pushq(const Immediate& imm);
375
376 void popq(CpuRegister reg);
377 void popq(const Address& address);
378
379 void movq(CpuRegister dst, const Immediate& src);
380 void movl(CpuRegister dst, const Immediate& src);
381 void movq(CpuRegister dst, CpuRegister src);
382 void movl(CpuRegister dst, CpuRegister src);
383
384 void movntl(const Address& dst, CpuRegister src);
385 void movntq(const Address& dst, CpuRegister src);
386
387 void movq(CpuRegister dst, const Address& src);
388 void movl(CpuRegister dst, const Address& src);
389 void movq(const Address& dst, CpuRegister src);
390 void movq(const Address& dst, const Immediate& imm);
391 void movl(const Address& dst, CpuRegister src);
392 void movl(const Address& dst, const Immediate& imm);
393
394 void cmov(Condition c, CpuRegister dst, CpuRegister src); // This is the 64b version.
395 void cmov(Condition c, CpuRegister dst, CpuRegister src, bool is64bit);
396 void cmov(Condition c, CpuRegister dst, const Address& src, bool is64bit);
397
398 void movzxb(CpuRegister dst, CpuRegister src);
399 void movzxb(CpuRegister dst, const Address& src);
400 void movsxb(CpuRegister dst, CpuRegister src);
401 void movsxb(CpuRegister dst, const Address& src);
402 void movb(CpuRegister dst, const Address& src);
403 void movb(const Address& dst, CpuRegister src);
404 void movb(const Address& dst, const Immediate& imm);
405
406 void movzxw(CpuRegister dst, CpuRegister src);
407 void movzxw(CpuRegister dst, const Address& src);
408 void movsxw(CpuRegister dst, CpuRegister src);
409 void movsxw(CpuRegister dst, const Address& src);
410 void movw(CpuRegister dst, const Address& src);
411 void movw(const Address& dst, CpuRegister src);
412 void movw(const Address& dst, const Immediate& imm);
413
414 void leaq(CpuRegister dst, const Address& src);
415 void leal(CpuRegister dst, const Address& src);
416
417 void movaps(XmmRegister dst, XmmRegister src); // move
418 void movaps(XmmRegister dst, const Address& src); // load aligned
419 void movups(XmmRegister dst, const Address& src); // load unaligned
420 void movaps(const Address& dst, XmmRegister src); // store aligned
421 void movups(const Address& dst, XmmRegister src); // store unaligned
422
423 void vmovaps(XmmRegister dst, XmmRegister src); // move
424 void vmovaps(XmmRegister dst, const Address& src); // load aligned
425 void vmovaps(const Address& dst, XmmRegister src); // store aligned
426 void vmovups(XmmRegister dst, const Address& src); // load unaligned
427 void vmovups(const Address& dst, XmmRegister src); // store unaligned
428
429 void movss(XmmRegister dst, const Address& src);
430 void movss(const Address& dst, XmmRegister src);
431 void movss(XmmRegister dst, XmmRegister src);
432
433 void movsxd(CpuRegister dst, CpuRegister src);
434 void movsxd(CpuRegister dst, const Address& src);
435
436 void movd(XmmRegister dst, CpuRegister src); // Note: this is the r64 version, formally movq.
437 void movd(CpuRegister dst, XmmRegister src); // Note: this is the r64 version, formally movq.
438 void movd(XmmRegister dst, CpuRegister src, bool is64bit);
439 void movd(CpuRegister dst, XmmRegister src, bool is64bit);
440
441 void addss(XmmRegister dst, XmmRegister src);
442 void addss(XmmRegister dst, const Address& src);
443 void subss(XmmRegister dst, XmmRegister src);
444 void subss(XmmRegister dst, const Address& src);
445 void mulss(XmmRegister dst, XmmRegister src);
446 void mulss(XmmRegister dst, const Address& src);
447 void divss(XmmRegister dst, XmmRegister src);
448 void divss(XmmRegister dst, const Address& src);
449
450 void addps(XmmRegister dst, XmmRegister src); // no addr variant (for now)
451 void subps(XmmRegister dst, XmmRegister src);
452 void mulps(XmmRegister dst, XmmRegister src);
453 void divps(XmmRegister dst, XmmRegister src);
454
455 void vmulps(XmmRegister dst, XmmRegister src1, XmmRegister src2);
456 void vmulpd(XmmRegister dst, XmmRegister src1, XmmRegister src2);
457 void vdivps(XmmRegister dst, XmmRegister src1, XmmRegister src2);
458 void vdivpd(XmmRegister dst, XmmRegister src1, XmmRegister src2);
459
460 void vaddps(XmmRegister dst, XmmRegister add_left, XmmRegister add_right);
461 void vsubps(XmmRegister dst, XmmRegister add_left, XmmRegister add_right);
462 void vsubpd(XmmRegister dst, XmmRegister add_left, XmmRegister add_right);
463 void vaddpd(XmmRegister dst, XmmRegister add_left, XmmRegister add_right);
464
465 void movapd(XmmRegister dst, XmmRegister src); // move
466 void movapd(XmmRegister dst, const Address& src); // load aligned
467 void movupd(XmmRegister dst, const Address& src); // load unaligned
468 void movapd(const Address& dst, XmmRegister src); // store aligned
469 void movupd(const Address& dst, XmmRegister src); // store unaligned
470
471 void vmovapd(XmmRegister dst, XmmRegister src); // move
472 void vmovapd(XmmRegister dst, const Address& src); // load aligned
473 void vmovapd(const Address& dst, XmmRegister src); // store aligned
474 void vmovupd(XmmRegister dst, const Address& src); // load unaligned
475 void vmovupd(const Address& dst, XmmRegister src); // store unaligned
476
477 void movsd(XmmRegister dst, const Address& src);
478 void movsd(const Address& dst, XmmRegister src);
479 void movsd(XmmRegister dst, XmmRegister src);
480
481 void addsd(XmmRegister dst, XmmRegister src);
482 void addsd(XmmRegister dst, const Address& src);
483 void subsd(XmmRegister dst, XmmRegister src);
484 void subsd(XmmRegister dst, const Address& src);
485 void mulsd(XmmRegister dst, XmmRegister src);
486 void mulsd(XmmRegister dst, const Address& src);
487 void divsd(XmmRegister dst, XmmRegister src);
488 void divsd(XmmRegister dst, const Address& src);
489
490 void addpd(XmmRegister dst, XmmRegister src); // no addr variant (for now)
491 void subpd(XmmRegister dst, XmmRegister src);
492 void mulpd(XmmRegister dst, XmmRegister src);
493 void divpd(XmmRegister dst, XmmRegister src);
494
495 void movdqa(XmmRegister dst, XmmRegister src); // move
496 void movdqa(XmmRegister dst, const Address& src); // load aligned
497 void movdqu(XmmRegister dst, const Address& src); // load unaligned
498 void movdqa(const Address& dst, XmmRegister src); // store aligned
499 void movdqu(const Address& dst, XmmRegister src); // store unaligned
500
501 void vmovdqa(XmmRegister dst, XmmRegister src); // move
502 void vmovdqa(XmmRegister dst, const Address& src); // load aligned
503 void vmovdqa(const Address& dst, XmmRegister src); // store aligned
504 void vmovdqu(XmmRegister dst, const Address& src); // load unaligned
505 void vmovdqu(const Address& dst, XmmRegister src); // store unaligned
506
507 void paddb(XmmRegister dst, XmmRegister src); // no addr variant (for now)
508 void psubb(XmmRegister dst, XmmRegister src);
509
510 void vpaddb(XmmRegister dst, XmmRegister add_left, XmmRegister add_right);
511 void vpaddw(XmmRegister dst, XmmRegister add_left, XmmRegister add_right);
512
513 void paddw(XmmRegister dst, XmmRegister src);
514 void psubw(XmmRegister dst, XmmRegister src);
515 void pmullw(XmmRegister dst, XmmRegister src);
516 void vpmullw(XmmRegister dst, XmmRegister src1, XmmRegister src2);
517
518 void vpsubb(XmmRegister dst, XmmRegister src1, XmmRegister src2);
519 void vpsubw(XmmRegister dst, XmmRegister src1, XmmRegister src2);
520 void vpsubd(XmmRegister dst, XmmRegister src1, XmmRegister src2);
521
522 void paddd(XmmRegister dst, XmmRegister src);
523 void psubd(XmmRegister dst, XmmRegister src);
524 void pmulld(XmmRegister dst, XmmRegister src);
525 void vpmulld(XmmRegister dst, XmmRegister src1, XmmRegister src2);
526
527 void vpaddd(XmmRegister dst, XmmRegister src1, XmmRegister src2);
528
529 void paddq(XmmRegister dst, XmmRegister src);
530 void psubq(XmmRegister dst, XmmRegister src);
531
532 void vpaddq(XmmRegister dst, XmmRegister add_left, XmmRegister add_right);
533 void vpsubq(XmmRegister dst, XmmRegister add_left, XmmRegister add_right);
534
535 void paddusb(XmmRegister dst, XmmRegister src);
536 void paddsb(XmmRegister dst, XmmRegister src);
537 void paddusw(XmmRegister dst, XmmRegister src);
538 void paddsw(XmmRegister dst, XmmRegister src);
539 void psubusb(XmmRegister dst, XmmRegister src);
540 void psubsb(XmmRegister dst, XmmRegister src);
541 void psubusw(XmmRegister dst, XmmRegister src);
542 void psubsw(XmmRegister dst, XmmRegister src);
543
544 void cvtsi2ss(XmmRegister dst, CpuRegister src); // Note: this is the r/m32 version.
545 void cvtsi2ss(XmmRegister dst, CpuRegister src, bool is64bit);
546 void cvtsi2ss(XmmRegister dst, const Address& src, bool is64bit);
547 void cvtsi2sd(XmmRegister dst, CpuRegister src); // Note: this is the r/m32 version.
548 void cvtsi2sd(XmmRegister dst, CpuRegister src, bool is64bit);
549 void cvtsi2sd(XmmRegister dst, const Address& src, bool is64bit);
550
551 void cvtss2si(CpuRegister dst, XmmRegister src); // Note: this is the r32 version.
552 void cvtss2sd(XmmRegister dst, XmmRegister src);
553 void cvtss2sd(XmmRegister dst, const Address& src);
554
555 void cvtsd2si(CpuRegister dst, XmmRegister src); // Note: this is the r32 version.
556 void cvtsd2ss(XmmRegister dst, XmmRegister src);
557 void cvtsd2ss(XmmRegister dst, const Address& src);
558
559 void cvttss2si(CpuRegister dst, XmmRegister src); // Note: this is the r32 version.
560 void cvttss2si(CpuRegister dst, XmmRegister src, bool is64bit);
561 void cvttsd2si(CpuRegister dst, XmmRegister src); // Note: this is the r32 version.
562 void cvttsd2si(CpuRegister dst, XmmRegister src, bool is64bit);
563
564 void cvtdq2ps(XmmRegister dst, XmmRegister src);
565 void cvtdq2pd(XmmRegister dst, XmmRegister src);
566
567 void comiss(XmmRegister a, XmmRegister b);
568 void comiss(XmmRegister a, const Address& b);
569 void comisd(XmmRegister a, XmmRegister b);
570 void comisd(XmmRegister a, const Address& b);
571 void ucomiss(XmmRegister a, XmmRegister b);
572 void ucomiss(XmmRegister a, const Address& b);
573 void ucomisd(XmmRegister a, XmmRegister b);
574 void ucomisd(XmmRegister a, const Address& b);
575
576 void roundsd(XmmRegister dst, XmmRegister src, const Immediate& imm);
577 void roundss(XmmRegister dst, XmmRegister src, const Immediate& imm);
578
579 void sqrtsd(XmmRegister dst, XmmRegister src);
580 void sqrtss(XmmRegister dst, XmmRegister src);
581
582 void xorpd(XmmRegister dst, const Address& src);
583 void xorpd(XmmRegister dst, XmmRegister src);
584 void xorps(XmmRegister dst, const Address& src);
585 void xorps(XmmRegister dst, XmmRegister src);
586 void pxor(XmmRegister dst, XmmRegister src); // no addr variant (for now)
587 void vpxor(XmmRegister dst, XmmRegister src1, XmmRegister src2);
588 void vxorps(XmmRegister dst, XmmRegister src1, XmmRegister src2);
589 void vxorpd(XmmRegister dst, XmmRegister src1, XmmRegister src2);
590
591 void andpd(XmmRegister dst, const Address& src);
592 void andpd(XmmRegister dst, XmmRegister src);
593 void andps(XmmRegister dst, XmmRegister src); // no addr variant (for now)
594 void pand(XmmRegister dst, XmmRegister src);
595 void vpand(XmmRegister dst, XmmRegister src1, XmmRegister src2);
596 void vandps(XmmRegister dst, XmmRegister src1, XmmRegister src2);
597 void vandpd(XmmRegister dst, XmmRegister src1, XmmRegister src2);
598
599 void andn(CpuRegister dst, CpuRegister src1, CpuRegister src2);
600 void andnpd(XmmRegister dst, XmmRegister src); // no addr variant (for now)
601 void andnps(XmmRegister dst, XmmRegister src);
602 void pandn(XmmRegister dst, XmmRegister src);
603 void vpandn(XmmRegister dst, XmmRegister src1, XmmRegister src2);
604 void vandnps(XmmRegister dst, XmmRegister src1, XmmRegister src2);
605 void vandnpd(XmmRegister dst, XmmRegister src1, XmmRegister src2);
606
607 void orpd(XmmRegister dst, XmmRegister src); // no addr variant (for now)
608 void orps(XmmRegister dst, XmmRegister src);
609 void por(XmmRegister dst, XmmRegister src);
610 void vpor(XmmRegister dst, XmmRegister src1, XmmRegister src2);
611 void vorps(XmmRegister dst, XmmRegister src1, XmmRegister src2);
612 void vorpd(XmmRegister dst, XmmRegister src1, XmmRegister src2);
613
614 void pavgb(XmmRegister dst, XmmRegister src); // no addr variant (for now)
615 void pavgw(XmmRegister dst, XmmRegister src);
616 void psadbw(XmmRegister dst, XmmRegister src);
617 void pmaddwd(XmmRegister dst, XmmRegister src);
618 void vpmaddwd(XmmRegister dst, XmmRegister src1, XmmRegister src2);
619 void phaddw(XmmRegister dst, XmmRegister src);
620 void phaddd(XmmRegister dst, XmmRegister src);
621 void haddps(XmmRegister dst, XmmRegister src);
622 void haddpd(XmmRegister dst, XmmRegister src);
623 void phsubw(XmmRegister dst, XmmRegister src);
624 void phsubd(XmmRegister dst, XmmRegister src);
625 void hsubps(XmmRegister dst, XmmRegister src);
626 void hsubpd(XmmRegister dst, XmmRegister src);
627
628 void pminsb(XmmRegister dst, XmmRegister src); // no addr variant (for now)
629 void pmaxsb(XmmRegister dst, XmmRegister src);
630 void pminsw(XmmRegister dst, XmmRegister src);
631 void pmaxsw(XmmRegister dst, XmmRegister src);
632 void pminsd(XmmRegister dst, XmmRegister src);
633 void pmaxsd(XmmRegister dst, XmmRegister src);
634
635 void pminub(XmmRegister dst, XmmRegister src); // no addr variant (for now)
636 void pmaxub(XmmRegister dst, XmmRegister src);
637 void pminuw(XmmRegister dst, XmmRegister src);
638 void pmaxuw(XmmRegister dst, XmmRegister src);
639 void pminud(XmmRegister dst, XmmRegister src);
640 void pmaxud(XmmRegister dst, XmmRegister src);
641
642 void minps(XmmRegister dst, XmmRegister src); // no addr variant (for now)
643 void maxps(XmmRegister dst, XmmRegister src);
644 void minpd(XmmRegister dst, XmmRegister src);
645 void maxpd(XmmRegister dst, XmmRegister src);
646
647 void pcmpeqb(XmmRegister dst, XmmRegister src);
648 void pcmpeqw(XmmRegister dst, XmmRegister src);
649 void pcmpeqd(XmmRegister dst, XmmRegister src);
650 void pcmpeqq(XmmRegister dst, XmmRegister src);
651
652 void pcmpgtb(XmmRegister dst, XmmRegister src);
653 void pcmpgtw(XmmRegister dst, XmmRegister src);
654 void pcmpgtd(XmmRegister dst, XmmRegister src);
655 void pcmpgtq(XmmRegister dst, XmmRegister src); // SSE4.2
656
657 void shufpd(XmmRegister dst, XmmRegister src, const Immediate& imm);
658 void shufps(XmmRegister dst, XmmRegister src, const Immediate& imm);
659 void pshufd(XmmRegister dst, XmmRegister src, const Immediate& imm);
660
661 void punpcklbw(XmmRegister dst, XmmRegister src);
662 void punpcklwd(XmmRegister dst, XmmRegister src);
663 void punpckldq(XmmRegister dst, XmmRegister src);
664 void punpcklqdq(XmmRegister dst, XmmRegister src);
665
666 void punpckhbw(XmmRegister dst, XmmRegister src);
667 void punpckhwd(XmmRegister dst, XmmRegister src);
668 void punpckhdq(XmmRegister dst, XmmRegister src);
669 void punpckhqdq(XmmRegister dst, XmmRegister src);
670
671 void psllw(XmmRegister reg, const Immediate& shift_count);
672 void pslld(XmmRegister reg, const Immediate& shift_count);
673 void psllq(XmmRegister reg, const Immediate& shift_count);
674
675 void psraw(XmmRegister reg, const Immediate& shift_count);
676 void psrad(XmmRegister reg, const Immediate& shift_count);
677 // no psraq
678
679 void psrlw(XmmRegister reg, const Immediate& shift_count);
680 void psrld(XmmRegister reg, const Immediate& shift_count);
681 void psrlq(XmmRegister reg, const Immediate& shift_count);
682 void psrldq(XmmRegister reg, const Immediate& shift_count);
683
684 void flds(const Address& src);
685 void fstps(const Address& dst);
686 void fsts(const Address& dst);
687
688 void fldl(const Address& src);
689 void fstpl(const Address& dst);
690 void fstl(const Address& dst);
691
692 void fstsw();
693
694 void fucompp();
695
696 void fnstcw(const Address& dst);
697 void fldcw(const Address& src);
698
699 void fistpl(const Address& dst);
700 void fistps(const Address& dst);
701 void fildl(const Address& src);
702 void filds(const Address& src);
703
704 void fincstp();
705 void ffree(const Immediate& index);
706
707 void fsin();
708 void fcos();
709 void fptan();
710 void fprem();
711
712 void xchgl(CpuRegister dst, CpuRegister src);
713 void xchgq(CpuRegister dst, CpuRegister src);
714 void xchgl(CpuRegister reg, const Address& address);
715
716 void cmpb(const Address& address, const Immediate& imm);
717 void cmpw(const Address& address, const Immediate& imm);
718
719 void cmpl(CpuRegister reg, const Immediate& imm);
720 void cmpl(CpuRegister reg0, CpuRegister reg1);
721 void cmpl(CpuRegister reg, const Address& address);
722 void cmpl(const Address& address, CpuRegister reg);
723 void cmpl(const Address& address, const Immediate& imm);
724
725 void cmpq(CpuRegister reg0, CpuRegister reg1);
726 void cmpq(CpuRegister reg0, const Immediate& imm);
727 void cmpq(CpuRegister reg0, const Address& address);
728 void cmpq(const Address& address, const Immediate& imm);
729
730 void testl(CpuRegister reg1, CpuRegister reg2);
731 void testl(CpuRegister reg, const Address& address);
732 void testl(CpuRegister reg, const Immediate& imm);
733
734 void testq(CpuRegister reg1, CpuRegister reg2);
735 void testq(CpuRegister reg, const Address& address);
736
737 void testb(const Address& address, const Immediate& imm);
738 void testl(const Address& address, const Immediate& imm);
739
740 void andl(CpuRegister dst, const Immediate& imm);
741 void andl(CpuRegister dst, CpuRegister src);
742 void andl(CpuRegister reg, const Address& address);
743 void andq(CpuRegister dst, const Immediate& imm);
744 void andq(CpuRegister dst, CpuRegister src);
745 void andq(CpuRegister reg, const Address& address);
746
747 void orl(CpuRegister dst, const Immediate& imm);
748 void orl(CpuRegister dst, CpuRegister src);
749 void orl(CpuRegister reg, const Address& address);
750 void orq(CpuRegister dst, CpuRegister src);
751 void orq(CpuRegister dst, const Immediate& imm);
752 void orq(CpuRegister reg, const Address& address);
753
754 void xorl(CpuRegister dst, CpuRegister src);
755 void xorl(CpuRegister dst, const Immediate& imm);
756 void xorl(CpuRegister reg, const Address& address);
757 void xorq(CpuRegister dst, const Immediate& imm);
758 void xorq(CpuRegister dst, CpuRegister src);
759 void xorq(CpuRegister reg, const Address& address);
760
761 void addl(CpuRegister dst, CpuRegister src);
762 void addl(CpuRegister reg, const Immediate& imm);
763 void addl(CpuRegister reg, const Address& address);
764 void addl(const Address& address, CpuRegister reg);
765 void addl(const Address& address, const Immediate& imm);
766 void addw(const Address& address, const Immediate& imm);
767
768 void addq(CpuRegister reg, const Immediate& imm);
769 void addq(CpuRegister dst, CpuRegister src);
770 void addq(CpuRegister dst, const Address& address);
771
772 void subl(CpuRegister dst, CpuRegister src);
773 void subl(CpuRegister reg, const Immediate& imm);
774 void subl(CpuRegister reg, const Address& address);
775
776 void subq(CpuRegister reg, const Immediate& imm);
777 void subq(CpuRegister dst, CpuRegister src);
778 void subq(CpuRegister dst, const Address& address);
779
780 void cdq();
781 void cqo();
782
783 void idivl(CpuRegister reg);
784 void idivq(CpuRegister reg);
785
786 void imull(CpuRegister dst, CpuRegister src);
787 void imull(CpuRegister reg, const Immediate& imm);
788 void imull(CpuRegister dst, CpuRegister src, const Immediate& imm);
789 void imull(CpuRegister reg, const Address& address);
790
791 void imulq(CpuRegister src);
792 void imulq(CpuRegister dst, CpuRegister src);
793 void imulq(CpuRegister reg, const Immediate& imm);
794 void imulq(CpuRegister reg, const Address& address);
795 void imulq(CpuRegister dst, CpuRegister reg, const Immediate& imm);
796
797 void imull(CpuRegister reg);
798 void imull(const Address& address);
799
800 void mull(CpuRegister reg);
801 void mull(const Address& address);
802
803 void shll(CpuRegister reg, const Immediate& imm);
804 void shll(CpuRegister operand, CpuRegister shifter);
805 void shrl(CpuRegister reg, const Immediate& imm);
806 void shrl(CpuRegister operand, CpuRegister shifter);
807 void sarl(CpuRegister reg, const Immediate& imm);
808 void sarl(CpuRegister operand, CpuRegister shifter);
809
810 void shlq(CpuRegister reg, const Immediate& imm);
811 void shlq(CpuRegister operand, CpuRegister shifter);
812 void shrq(CpuRegister reg, const Immediate& imm);
813 void shrq(CpuRegister operand, CpuRegister shifter);
814 void sarq(CpuRegister reg, const Immediate& imm);
815 void sarq(CpuRegister operand, CpuRegister shifter);
816
817 void negl(CpuRegister reg);
818 void negq(CpuRegister reg);
819
820 void notl(CpuRegister reg);
821 void notq(CpuRegister reg);
822
823 void enter(const Immediate& imm);
824 void leave();
825
826 void ret();
827 void ret(const Immediate& imm);
828
829 void nop();
830 void int3();
831 void hlt();
832
833 void j(Condition condition, Label* label);
834 void j(Condition condition, NearLabel* label);
835 void jrcxz(NearLabel* label);
836
837 void jmp(CpuRegister reg);
838 void jmp(const Address& address);
839 void jmp(Label* label);
840 void jmp(NearLabel* label);
841
842 X86_64Assembler* lock();
843 void cmpxchgl(const Address& address, CpuRegister reg);
844 void cmpxchgq(const Address& address, CpuRegister reg);
845
846 void mfence();
847
848 X86_64Assembler* gs();
849
850 void setcc(Condition condition, CpuRegister dst);
851
852 void bswapl(CpuRegister dst);
853 void bswapq(CpuRegister dst);
854
855 void bsfl(CpuRegister dst, CpuRegister src);
856 void bsfl(CpuRegister dst, const Address& src);
857 void bsfq(CpuRegister dst, CpuRegister src);
858 void bsfq(CpuRegister dst, const Address& src);
859
860 void blsi(CpuRegister dst, CpuRegister src); // no addr variant (for now)
861 void blsmsk(CpuRegister dst, CpuRegister src); // no addr variant (for now)
862 void blsr(CpuRegister dst, CpuRegister src); // no addr variant (for now)
863
864 void bsrl(CpuRegister dst, CpuRegister src);
865 void bsrl(CpuRegister dst, const Address& src);
866 void bsrq(CpuRegister dst, CpuRegister src);
867 void bsrq(CpuRegister dst, const Address& src);
868
869 void popcntl(CpuRegister dst, CpuRegister src);
870 void popcntl(CpuRegister dst, const Address& src);
871 void popcntq(CpuRegister dst, CpuRegister src);
872 void popcntq(CpuRegister dst, const Address& src);
873
874 void rorl(CpuRegister reg, const Immediate& imm);
875 void rorl(CpuRegister operand, CpuRegister shifter);
876 void roll(CpuRegister reg, const Immediate& imm);
877 void roll(CpuRegister operand, CpuRegister shifter);
878
879 void rorq(CpuRegister reg, const Immediate& imm);
880 void rorq(CpuRegister operand, CpuRegister shifter);
881 void rolq(CpuRegister reg, const Immediate& imm);
882 void rolq(CpuRegister operand, CpuRegister shifter);
883
884 void repne_scasb();
885 void repne_scasw();
886 void repe_cmpsw();
887 void repe_cmpsl();
888 void repe_cmpsq();
889 void rep_movsw();
890
891 //
892 // Macros for High-level operations.
893 //
894
895 void AddImmediate(CpuRegister reg, const Immediate& imm);
896
897 void LoadDoubleConstant(XmmRegister dst, double value);
898
LockCmpxchgl(const Address & address,CpuRegister reg)899 void LockCmpxchgl(const Address& address, CpuRegister reg) {
900 lock()->cmpxchgl(address, reg);
901 }
902
LockCmpxchgq(const Address & address,CpuRegister reg)903 void LockCmpxchgq(const Address& address, CpuRegister reg) {
904 lock()->cmpxchgq(address, reg);
905 }
906
907 //
908 // Misc. functionality
909 //
PreferredLoopAlignment()910 int PreferredLoopAlignment() { return 16; }
911 void Align(int alignment, int offset);
912 void Bind(Label* label) override;
Jump(Label * label)913 void Jump(Label* label) override {
914 jmp(label);
915 }
916 void Bind(NearLabel* label);
917
918 // Add a double to the constant area, returning the offset into
919 // the constant area where the literal resides.
AddDouble(double v)920 size_t AddDouble(double v) { return constant_area_.AddDouble(v); }
921
922 // Add a float to the constant area, returning the offset into
923 // the constant area where the literal resides.
AddFloat(float v)924 size_t AddFloat(float v) { return constant_area_.AddFloat(v); }
925
926 // Add an int32_t to the constant area, returning the offset into
927 // the constant area where the literal resides.
AddInt32(int32_t v)928 size_t AddInt32(int32_t v) {
929 return constant_area_.AddInt32(v);
930 }
931
932 // Add an int32_t to the end of the constant area, returning the offset into
933 // the constant area where the literal resides.
AppendInt32(int32_t v)934 size_t AppendInt32(int32_t v) {
935 return constant_area_.AppendInt32(v);
936 }
937
938 // Add an int64_t to the constant area, returning the offset into
939 // the constant area where the literal resides.
AddInt64(int64_t v)940 size_t AddInt64(int64_t v) { return constant_area_.AddInt64(v); }
941
942 // Add the contents of the constant area to the assembler buffer.
943 void AddConstantArea();
944
945 // Is the constant area empty? Return true if there are no literals in the constant area.
IsConstantAreaEmpty()946 bool IsConstantAreaEmpty() const { return constant_area_.GetSize() == 0; }
947
948 // Return the current size of the constant area.
ConstantAreaSize()949 size_t ConstantAreaSize() const { return constant_area_.GetSize(); }
950
951 //
952 // Heap poisoning.
953 //
954
955 // Poison a heap reference contained in `reg`.
PoisonHeapReference(CpuRegister reg)956 void PoisonHeapReference(CpuRegister reg) { negl(reg); }
957 // Unpoison a heap reference contained in `reg`.
UnpoisonHeapReference(CpuRegister reg)958 void UnpoisonHeapReference(CpuRegister reg) { negl(reg); }
959 // Poison a heap reference contained in `reg` if heap poisoning is enabled.
MaybePoisonHeapReference(CpuRegister reg)960 void MaybePoisonHeapReference(CpuRegister reg) {
961 if (kPoisonHeapReferences) {
962 PoisonHeapReference(reg);
963 }
964 }
965 // Unpoison a heap reference contained in `reg` if heap poisoning is enabled.
MaybeUnpoisonHeapReference(CpuRegister reg)966 void MaybeUnpoisonHeapReference(CpuRegister reg) {
967 if (kPoisonHeapReferences) {
968 UnpoisonHeapReference(reg);
969 }
970 }
971
972 bool CpuHasAVXorAVX2FeatureFlag();
973
974 private:
975 void EmitUint8(uint8_t value);
976 void EmitInt32(int32_t value);
977 void EmitInt64(int64_t value);
978 void EmitRegisterOperand(uint8_t rm, uint8_t reg);
979 void EmitXmmRegisterOperand(uint8_t rm, XmmRegister reg);
980 void EmitFixup(AssemblerFixup* fixup);
981 void EmitOperandSizeOverride();
982
983 void EmitOperand(uint8_t rm, const Operand& operand);
984 void EmitImmediate(const Immediate& imm, bool is_16_op = false);
985 void EmitComplex(
986 uint8_t rm, const Operand& operand, const Immediate& immediate, bool is_16_op = false);
987 void EmitLabel(Label* label, int instruction_size);
988 void EmitLabelLink(Label* label);
989 void EmitLabelLink(NearLabel* label);
990
991 void EmitGenericShift(bool wide, int rm, CpuRegister reg, const Immediate& imm);
992 void EmitGenericShift(bool wide, int rm, CpuRegister operand, CpuRegister shifter);
993
994 // If any input is not false, output the necessary rex prefix.
995 void EmitOptionalRex(bool force, bool w, bool r, bool x, bool b);
996
997 // Emit a rex prefix byte if necessary for reg. ie if reg is a register in the range R8 to R15.
998 void EmitOptionalRex32(CpuRegister reg);
999 void EmitOptionalRex32(CpuRegister dst, CpuRegister src);
1000 void EmitOptionalRex32(XmmRegister dst, XmmRegister src);
1001 void EmitOptionalRex32(CpuRegister dst, XmmRegister src);
1002 void EmitOptionalRex32(XmmRegister dst, CpuRegister src);
1003 void EmitOptionalRex32(const Operand& operand);
1004 void EmitOptionalRex32(CpuRegister dst, const Operand& operand);
1005 void EmitOptionalRex32(XmmRegister dst, const Operand& operand);
1006
1007 // Emit a REX.W prefix plus necessary register bit encodings.
1008 void EmitRex64();
1009 void EmitRex64(CpuRegister reg);
1010 void EmitRex64(const Operand& operand);
1011 void EmitRex64(CpuRegister dst, CpuRegister src);
1012 void EmitRex64(CpuRegister dst, const Operand& operand);
1013 void EmitRex64(XmmRegister dst, const Operand& operand);
1014 void EmitRex64(XmmRegister dst, CpuRegister src);
1015 void EmitRex64(CpuRegister dst, XmmRegister src);
1016
1017 // Emit a REX prefix to normalize byte registers plus necessary register bit encodings.
1018 void EmitOptionalByteRegNormalizingRex32(CpuRegister dst, CpuRegister src);
1019 void EmitOptionalByteRegNormalizingRex32(CpuRegister dst, const Operand& operand);
1020
1021 uint8_t EmitVexPrefixByteZero(bool is_twobyte_form);
1022 uint8_t EmitVexPrefixByteOne(bool R, bool X, bool B, int SET_VEX_M);
1023 uint8_t EmitVexPrefixByteOne(bool R,
1024 X86_64ManagedRegister operand,
1025 int SET_VEX_L,
1026 int SET_VEX_PP);
1027 uint8_t EmitVexPrefixByteTwo(bool W,
1028 X86_64ManagedRegister operand,
1029 int SET_VEX_L,
1030 int SET_VEX_PP);
1031 uint8_t EmitVexPrefixByteTwo(bool W,
1032 int SET_VEX_L,
1033 int SET_VEX_PP);
1034 ConstantArea constant_area_;
1035 bool has_AVX_; // x86 256bit SIMD AVX.
1036 bool has_AVX2_; // x86 256bit SIMD AVX 2.0.
1037
1038 DISALLOW_COPY_AND_ASSIGN(X86_64Assembler);
1039 };
1040
EmitUint8(uint8_t value)1041 inline void X86_64Assembler::EmitUint8(uint8_t value) {
1042 buffer_.Emit<uint8_t>(value);
1043 }
1044
EmitInt32(int32_t value)1045 inline void X86_64Assembler::EmitInt32(int32_t value) {
1046 buffer_.Emit<int32_t>(value);
1047 }
1048
EmitInt64(int64_t value)1049 inline void X86_64Assembler::EmitInt64(int64_t value) {
1050 // Write this 64-bit value as two 32-bit words for alignment reasons
1051 // (this is essentially when running on ARM, which does not allow
1052 // 64-bit unaligned accesses). We assume little-endianness here.
1053 EmitInt32(Low32Bits(value));
1054 EmitInt32(High32Bits(value));
1055 }
1056
EmitRegisterOperand(uint8_t rm,uint8_t reg)1057 inline void X86_64Assembler::EmitRegisterOperand(uint8_t rm, uint8_t reg) {
1058 CHECK_GE(rm, 0);
1059 CHECK_LT(rm, 8);
1060 buffer_.Emit<uint8_t>((0xC0 | (reg & 7)) + (rm << 3));
1061 }
1062
EmitXmmRegisterOperand(uint8_t rm,XmmRegister reg)1063 inline void X86_64Assembler::EmitXmmRegisterOperand(uint8_t rm, XmmRegister reg) {
1064 EmitRegisterOperand(rm, static_cast<uint8_t>(reg.AsFloatRegister()));
1065 }
1066
EmitFixup(AssemblerFixup * fixup)1067 inline void X86_64Assembler::EmitFixup(AssemblerFixup* fixup) {
1068 buffer_.EmitFixup(fixup);
1069 }
1070
EmitOperandSizeOverride()1071 inline void X86_64Assembler::EmitOperandSizeOverride() {
1072 EmitUint8(0x66);
1073 }
1074
1075 } // namespace x86_64
1076 } // namespace art
1077
1078 #endif // ART_COMPILER_UTILS_X86_64_ASSEMBLER_X86_64_H_
1079