1/*
2 * Copyright (C) 2012 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "asm_support_x86_64.S"
18#include "interpreter/cfi_asm_support.h"
19
20#include "arch/quick_alloc_entrypoints.S"
21
22MACRO0(ASSERT_USE_READ_BARRIER)
23#if !defined(USE_READ_BARRIER)
24    int3
25    int3
26#endif
27END_MACRO
28
29// For x86, the CFA is esp+4, the address above the pushed return address on the stack.
30
31
32    /*
33     * Macro that sets up the callee save frame to conform with
34     * Runtime::CreateCalleeSaveMethod(kSaveRefsAndArgs)
35     */
36MACRO0(SETUP_SAVE_REFS_AND_ARGS_FRAME)
37#if defined(__APPLE__)
38    int3
39    int3
40#else
41    SETUP_SAVE_REFS_AND_ARGS_FRAME_REGISTERS_ONLY
42    // R10 := Runtime::Current()
43    movq _ZN3art7Runtime9instance_E@GOTPCREL(%rip), %r10
44    movq (%r10), %r10
45    // R10 := ArtMethod* for ref and args callee save frame method.
46    movq RUNTIME_SAVE_REFS_AND_ARGS_METHOD_OFFSET(%r10), %r10
47    // Store ArtMethod* to bottom of stack.
48    movq %r10, 0(%rsp)
49    // Store rsp as the top quick frame.
50    movq %rsp, %gs:THREAD_TOP_QUICK_FRAME_OFFSET
51#endif  // __APPLE__
52END_MACRO
53
54MACRO0(SETUP_SAVE_REFS_AND_ARGS_FRAME_WITH_METHOD_IN_RDI)
55    SETUP_SAVE_REFS_AND_ARGS_FRAME_REGISTERS_ONLY
56    // Store ArtMethod to bottom of stack.
57    movq %rdi, 0(%rsp)
58    // Store rsp as the stop quick frame.
59    movq %rsp, %gs:THREAD_TOP_QUICK_FRAME_OFFSET
60END_MACRO
61
62    /*
63     * Macro that sets up the callee save frame to conform with
64     * Runtime::CreateCalleeSaveMethod(kSaveEverything)
65     * when R14 and R15 are already saved.
66     */
67MACRO1(SETUP_SAVE_EVERYTHING_FRAME_R14_R15_SAVED, runtime_method_offset = RUNTIME_SAVE_EVERYTHING_METHOD_OFFSET)
68#if defined(__APPLE__)
69    int3
70    int3
71#else
72    // Save core registers from highest to lowest to agree with core spills bitmap.
73    // R14 and R15, or at least placeholders for them, are already on the stack.
74    PUSH r13
75    PUSH r12
76    PUSH r11
77    PUSH r10
78    PUSH r9
79    PUSH r8
80    PUSH rdi
81    PUSH rsi
82    PUSH rbp
83    PUSH rbx
84    PUSH rdx
85    PUSH rcx
86    PUSH rax
87    // Create space for FPRs and stack alignment padding.
88    subq MACRO_LITERAL(8 + 16 * 8), %rsp
89    CFI_ADJUST_CFA_OFFSET(8 + 16 * 8)
90    // R10 := Runtime::Current()
91    movq _ZN3art7Runtime9instance_E@GOTPCREL(%rip), %r10
92    movq (%r10), %r10
93    // Save FPRs.
94    movq %xmm0, 8(%rsp)
95    movq %xmm1, 16(%rsp)
96    movq %xmm2, 24(%rsp)
97    movq %xmm3, 32(%rsp)
98    movq %xmm4, 40(%rsp)
99    movq %xmm5, 48(%rsp)
100    movq %xmm6, 56(%rsp)
101    movq %xmm7, 64(%rsp)
102    movq %xmm8, 72(%rsp)
103    movq %xmm9, 80(%rsp)
104    movq %xmm10, 88(%rsp)
105    movq %xmm11, 96(%rsp)
106    movq %xmm12, 104(%rsp)
107    movq %xmm13, 112(%rsp)
108    movq %xmm14, 120(%rsp)
109    movq %xmm15, 128(%rsp)
110    // Push ArtMethod* for save everything frame method.
111    pushq \runtime_method_offset(%r10)
112    CFI_ADJUST_CFA_OFFSET(8)
113    // Store rsp as the top quick frame.
114    movq %rsp, %gs:THREAD_TOP_QUICK_FRAME_OFFSET
115
116    // Ugly compile-time check, but we only have the preprocessor.
117    // Last +8: implicit return address pushed on stack when caller made call.
118#if (FRAME_SIZE_SAVE_EVERYTHING != 15 * 8 + 16 * 8 + 16 + 8)
119#error "FRAME_SIZE_SAVE_EVERYTHING(X86_64) size not as expected."
120#endif
121#endif  // __APPLE__
122END_MACRO
123
124    /*
125     * Macro that sets up the callee save frame to conform with
126     * Runtime::CreateCalleeSaveMethod(kSaveEverything)
127     * when R15 is already saved.
128     */
129MACRO1(SETUP_SAVE_EVERYTHING_FRAME_R15_SAVED, runtime_method_offset = RUNTIME_SAVE_EVERYTHING_METHOD_OFFSET)
130    PUSH r14
131    SETUP_SAVE_EVERYTHING_FRAME_R14_R15_SAVED \runtime_method_offset
132END_MACRO
133
134    /*
135     * Macro that sets up the callee save frame to conform with
136     * Runtime::CreateCalleeSaveMethod(kSaveEverything)
137     */
138MACRO1(SETUP_SAVE_EVERYTHING_FRAME, runtime_method_offset = RUNTIME_SAVE_EVERYTHING_METHOD_OFFSET)
139    PUSH r15
140    SETUP_SAVE_EVERYTHING_FRAME_R15_SAVED \runtime_method_offset
141END_MACRO
142
143MACRO0(RESTORE_SAVE_EVERYTHING_FRAME_FRPS)
144    // Restore FPRs. Method and padding is still on the stack.
145    movq 16(%rsp), %xmm0
146    movq 24(%rsp), %xmm1
147    movq 32(%rsp), %xmm2
148    movq 40(%rsp), %xmm3
149    movq 48(%rsp), %xmm4
150    movq 56(%rsp), %xmm5
151    movq 64(%rsp), %xmm6
152    movq 72(%rsp), %xmm7
153    movq 80(%rsp), %xmm8
154    movq 88(%rsp), %xmm9
155    movq 96(%rsp), %xmm10
156    movq 104(%rsp), %xmm11
157    movq 112(%rsp), %xmm12
158    movq 120(%rsp), %xmm13
159    movq 128(%rsp), %xmm14
160    movq 136(%rsp), %xmm15
161END_MACRO
162
163MACRO0(RESTORE_SAVE_EVERYTHING_FRAME_GPRS_EXCEPT_RAX)
164    // Restore callee and GPR args (except RAX), mixed together to agree with core spills bitmap.
165    POP rcx
166    POP rdx
167    POP rbx
168    POP rbp
169    POP rsi
170    POP rdi
171    POP r8
172    POP r9
173    POP r10
174    POP r11
175    POP r12
176    POP r13
177    POP r14
178    POP r15
179END_MACRO
180
181MACRO0(RESTORE_SAVE_EVERYTHING_FRAME)
182    RESTORE_SAVE_EVERYTHING_FRAME_FRPS
183
184    // Remove save everything callee save method, stack alignment padding and FPRs.
185    addq MACRO_LITERAL(16 + 16 * 8), %rsp
186    CFI_ADJUST_CFA_OFFSET(-(16 + 16 * 8))
187
188    POP rax
189    RESTORE_SAVE_EVERYTHING_FRAME_GPRS_EXCEPT_RAX
190END_MACRO
191
192MACRO0(RESTORE_SAVE_EVERYTHING_FRAME_KEEP_RAX)
193    RESTORE_SAVE_EVERYTHING_FRAME_FRPS
194
195    // Remove save everything callee save method, stack alignment padding and FPRs, skip RAX.
196    addq MACRO_LITERAL(16 + 16 * 8 + 8), %rsp
197    CFI_ADJUST_CFA_OFFSET(-(16 + 16 * 8 + 8))
198
199    RESTORE_SAVE_EVERYTHING_FRAME_GPRS_EXCEPT_RAX
200END_MACRO
201
202MACRO2(NO_ARG_RUNTIME_EXCEPTION, c_name, cxx_name)
203    DEFINE_FUNCTION VAR(c_name)
204    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME  // save all registers as basis for long jump context
205    // Outgoing argument set up
206    movq %gs:THREAD_SELF_OFFSET, %rdi  // pass Thread::Current()
207    call CALLVAR(cxx_name)             // cxx_name(Thread*)
208    UNREACHABLE
209    END_FUNCTION VAR(c_name)
210END_MACRO
211
212MACRO2(NO_ARG_RUNTIME_EXCEPTION_SAVE_EVERYTHING, c_name, cxx_name)
213    DEFINE_FUNCTION VAR(c_name)
214    SETUP_SAVE_EVERYTHING_FRAME        // save all registers as basis for long jump context
215    // Outgoing argument set up
216    movq %gs:THREAD_SELF_OFFSET, %rdi  // pass Thread::Current()
217    call CALLVAR(cxx_name)             // cxx_name(Thread*)
218    UNREACHABLE
219    END_FUNCTION VAR(c_name)
220END_MACRO
221
222MACRO2(ONE_ARG_RUNTIME_EXCEPTION, c_name, cxx_name)
223    DEFINE_FUNCTION VAR(c_name)
224    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME  // save all registers as basis for long jump context
225    // Outgoing argument set up
226    movq %gs:THREAD_SELF_OFFSET, %rsi  // pass Thread::Current()
227    call CALLVAR(cxx_name)             // cxx_name(arg1, Thread*)
228    UNREACHABLE
229    END_FUNCTION VAR(c_name)
230END_MACRO
231
232MACRO2(TWO_ARG_RUNTIME_EXCEPTION_SAVE_EVERYTHING, c_name, cxx_name)
233    DEFINE_FUNCTION VAR(c_name)
234    SETUP_SAVE_EVERYTHING_FRAME        // save all registers as basis for long jump context
235    // Outgoing argument set up
236    movq %gs:THREAD_SELF_OFFSET, %rdx  // pass Thread::Current()
237    call CALLVAR(cxx_name)             // cxx_name(Thread*)
238    UNREACHABLE
239    END_FUNCTION VAR(c_name)
240END_MACRO
241
242    /*
243     * Called by managed code to create and deliver a NullPointerException.
244     */
245NO_ARG_RUNTIME_EXCEPTION_SAVE_EVERYTHING art_quick_throw_null_pointer_exception, artThrowNullPointerExceptionFromCode
246
247    /*
248     * Call installed by a signal handler to create and deliver a NullPointerException.
249     */
250DEFINE_FUNCTION_CUSTOM_CFA art_quick_throw_null_pointer_exception_from_signal, 2 * __SIZEOF_POINTER__
251    // Fault address and return address were saved by the fault handler.
252    // Save all registers as basis for long jump context; R15 will replace fault address later.
253    SETUP_SAVE_EVERYTHING_FRAME_R15_SAVED
254    // Retrieve fault address and save R15.
255    movq (FRAME_SIZE_SAVE_EVERYTHING - 2 * __SIZEOF_POINTER__)(%rsp), %rdi
256    movq %r15, (FRAME_SIZE_SAVE_EVERYTHING - 2 * __SIZEOF_POINTER__)(%rsp)
257    CFI_REL_OFFSET(%r15, (FRAME_SIZE_SAVE_EVERYTHING - 2 * __SIZEOF_POINTER__))
258    // Outgoing argument set up; RDI already contains the fault address.
259    movq %gs:THREAD_SELF_OFFSET, %rsi  // pass Thread::Current()
260    call SYMBOL(artThrowNullPointerExceptionFromSignal)  // (addr, self)
261    UNREACHABLE
262END_FUNCTION art_quick_throw_null_pointer_exception_from_signal
263
264    /*
265     * Called by managed code to create and deliver an ArithmeticException.
266     */
267NO_ARG_RUNTIME_EXCEPTION_SAVE_EVERYTHING art_quick_throw_div_zero, artThrowDivZeroFromCode
268
269    /*
270     * Called by managed code to create and deliver a StackOverflowError.
271     */
272NO_ARG_RUNTIME_EXCEPTION art_quick_throw_stack_overflow, artThrowStackOverflowFromCode
273
274    /*
275     * Called by managed code, saves callee saves and then calls artThrowException
276     * that will place a mock Method* at the bottom of the stack. Arg1 holds the exception.
277     */
278ONE_ARG_RUNTIME_EXCEPTION art_quick_deliver_exception, artDeliverExceptionFromCode
279
280    /*
281     * Called by managed code to create and deliver an ArrayIndexOutOfBoundsException. Arg1 holds
282     * index, arg2 holds limit.
283     */
284TWO_ARG_RUNTIME_EXCEPTION_SAVE_EVERYTHING art_quick_throw_array_bounds, artThrowArrayBoundsFromCode
285
286    /*
287     * Called by managed code to create and deliver a StringIndexOutOfBoundsException
288     * as if thrown from a call to String.charAt(). Arg1 holds index, arg2 holds limit.
289     */
290TWO_ARG_RUNTIME_EXCEPTION_SAVE_EVERYTHING art_quick_throw_string_bounds, artThrowStringBoundsFromCode
291
292    /*
293     * All generated callsites for interface invokes and invocation slow paths will load arguments
294     * as usual - except instead of loading arg0/rdi with the target Method*, arg0/rdi will contain
295     * the method_idx.  This wrapper will save arg1-arg3, and call the appropriate C helper.
296     * NOTE: "this" is first visible argument of the target, and so can be found in arg1/rsi.
297     *
298     * The helper will attempt to locate the target and return a 128-bit result in rax/rdx consisting
299     * of the target Method* in rax and method->code_ in rdx.
300     *
301     * If unsuccessful, the helper will return null/????. There will be a pending exception in the
302     * thread and we branch to another stub to deliver it.
303     *
304     * On success this wrapper will restore arguments and *jump* to the target, leaving the return
305     * location on the stack.
306     *
307     * Adapted from x86 code.
308     */
309MACRO1(INVOKE_TRAMPOLINE_BODY, cxx_name)
310    SETUP_SAVE_REFS_AND_ARGS_FRAME  // save callee saves in case allocation triggers GC
311    // Helper signature is always
312    // (method_idx, *this_object, *caller_method, *self, sp)
313
314    movq %gs:THREAD_SELF_OFFSET, %rdx                      // pass Thread
315    movq %rsp, %rcx                                        // pass SP
316
317    call CALLVAR(cxx_name)                                 // cxx_name(arg1, arg2, Thread*, SP)
318                                                           // save the code pointer
319    movq %rax, %rdi
320    movq %rdx, %rax
321    RESTORE_SAVE_REFS_AND_ARGS_FRAME
322
323    testq %rdi, %rdi
324    jz 1f
325
326    // Tail call to intended method.
327    jmp *%rax
3281:
329    DELIVER_PENDING_EXCEPTION
330END_MACRO
331MACRO2(INVOKE_TRAMPOLINE, c_name, cxx_name)
332    DEFINE_FUNCTION VAR(c_name)
333    INVOKE_TRAMPOLINE_BODY RAW_VAR(cxx_name)
334    END_FUNCTION VAR(c_name)
335END_MACRO
336
337INVOKE_TRAMPOLINE art_quick_invoke_interface_trampoline_with_access_check, artInvokeInterfaceTrampolineWithAccessCheck
338
339INVOKE_TRAMPOLINE art_quick_invoke_static_trampoline_with_access_check, artInvokeStaticTrampolineWithAccessCheck
340INVOKE_TRAMPOLINE art_quick_invoke_direct_trampoline_with_access_check, artInvokeDirectTrampolineWithAccessCheck
341INVOKE_TRAMPOLINE art_quick_invoke_super_trampoline_with_access_check, artInvokeSuperTrampolineWithAccessCheck
342INVOKE_TRAMPOLINE art_quick_invoke_virtual_trampoline_with_access_check, artInvokeVirtualTrampolineWithAccessCheck
343
344
345    /*
346     * Helper for quick invocation stub to set up XMM registers. Assumes r10 == shorty,
347     * r11 == arg_array. Clobbers r10, r11 and al. Branches to xmm_setup_finished if it encounters
348     * the end of the shorty.
349     */
350MACRO2(LOOP_OVER_SHORTY_LOADING_XMMS, xmm_reg, finished)
3511: // LOOP
352    movb (%r10), %al              // al := *shorty
353    addq MACRO_LITERAL(1), %r10   // shorty++
354    cmpb MACRO_LITERAL(0), %al    // if (al == '\0') goto xmm_setup_finished
355    je VAR(finished)
356    cmpb MACRO_LITERAL(68), %al   // if (al == 'D') goto FOUND_DOUBLE
357    je 2f
358    cmpb MACRO_LITERAL(70), %al   // if (al == 'F') goto FOUND_FLOAT
359    je 3f
360    addq MACRO_LITERAL(4), %r11   // arg_array++
361    //  Handle extra space in arg array taken by a long.
362    cmpb MACRO_LITERAL(74), %al   // if (al != 'J') goto LOOP
363    jne 1b
364    addq MACRO_LITERAL(4), %r11   // arg_array++
365    jmp 1b                        // goto LOOP
3662:  // FOUND_DOUBLE
367    movsd (%r11), REG_VAR(xmm_reg)
368    addq MACRO_LITERAL(8), %r11   // arg_array+=2
369    jmp 4f
3703:  // FOUND_FLOAT
371    movss (%r11), REG_VAR(xmm_reg)
372    addq MACRO_LITERAL(4), %r11   // arg_array++
3734:
374END_MACRO
375
376    /*
377     * Helper for quick invocation stub to set up GPR registers. Assumes r10 == shorty,
378     * r11 == arg_array. Clobbers r10, r11 and al. Branches to gpr_setup_finished if it encounters
379     * the end of the shorty.
380     */
381MACRO3(LOOP_OVER_SHORTY_LOADING_GPRS, gpr_reg64, gpr_reg32, finished)
3821: // LOOP
383    movb (%r10), %al              // al := *shorty
384    addq MACRO_LITERAL(1), %r10   // shorty++
385    cmpb MACRO_LITERAL(0), %al    // if (al == '\0') goto gpr_setup_finished
386    je  VAR(finished)
387    cmpb MACRO_LITERAL(74), %al   // if (al == 'J') goto FOUND_LONG
388    je 2f
389    cmpb MACRO_LITERAL(70), %al   // if (al == 'F') goto SKIP_FLOAT
390    je 3f
391    cmpb MACRO_LITERAL(68), %al   // if (al == 'D') goto SKIP_DOUBLE
392    je 4f
393    movl (%r11), REG_VAR(gpr_reg32)
394    addq MACRO_LITERAL(4), %r11   // arg_array++
395    jmp 5f
3962:  // FOUND_LONG
397    movq (%r11), REG_VAR(gpr_reg64)
398    addq MACRO_LITERAL(8), %r11   // arg_array+=2
399    jmp 5f
4003:  // SKIP_FLOAT
401    addq MACRO_LITERAL(4), %r11   // arg_array++
402    jmp 1b
4034:  // SKIP_DOUBLE
404    addq MACRO_LITERAL(8), %r11   // arg_array+=2
405    jmp 1b
4065:
407END_MACRO
408
409    /*
410     * Quick invocation stub.
411     * On entry:
412     *   [sp] = return address
413     *   rdi = method pointer
414     *   rsi = argument array that must at least contain the this pointer.
415     *   rdx = size of argument array in bytes
416     *   rcx = (managed) thread pointer
417     *   r8 = JValue* result
418     *   r9 = char* shorty
419     */
420DEFINE_FUNCTION art_quick_invoke_stub
421#if defined(__APPLE__)
422    int3
423    int3
424#else
425    // Set up argument XMM registers.
426    leaq 1(%r9), %r10             // R10 := shorty + 1  ; ie skip return arg character.
427    leaq 4(%rsi), %r11            // R11 := arg_array + 4 ; ie skip this pointer.
428    LOOP_OVER_SHORTY_LOADING_XMMS xmm0, .Lxmm_setup_finished
429    LOOP_OVER_SHORTY_LOADING_XMMS xmm1, .Lxmm_setup_finished
430    LOOP_OVER_SHORTY_LOADING_XMMS xmm2, .Lxmm_setup_finished
431    LOOP_OVER_SHORTY_LOADING_XMMS xmm3, .Lxmm_setup_finished
432    LOOP_OVER_SHORTY_LOADING_XMMS xmm4, .Lxmm_setup_finished
433    LOOP_OVER_SHORTY_LOADING_XMMS xmm5, .Lxmm_setup_finished
434    LOOP_OVER_SHORTY_LOADING_XMMS xmm6, .Lxmm_setup_finished
435    LOOP_OVER_SHORTY_LOADING_XMMS xmm7, .Lxmm_setup_finished
436    .balign 16
437.Lxmm_setup_finished:
438    PUSH rbp                      // Save rbp.
439    PUSH r8                       // Save r8/result*.
440    PUSH r9                       // Save r9/shorty*.
441    PUSH rbx                      // Save native callee save rbx
442    PUSH r12                      // Save native callee save r12
443    PUSH r13                      // Save native callee save r13
444    PUSH r14                      // Save native callee save r14
445    PUSH r15                      // Save native callee save r15
446    movq %rsp, %rbp               // Copy value of stack pointer into base pointer.
447    CFI_DEF_CFA_REGISTER(rbp)
448
449    movl %edx, %r10d
450    addl LITERAL(100), %edx        // Reserve space for return addr, StackReference<method>, rbp,
451                                   // r8, r9, rbx, r12, r13, r14, and r15 in frame.
452    andl LITERAL(0xFFFFFFF0), %edx // Align frame size to 16 bytes.
453    subl LITERAL(72), %edx         // Remove space for return address, rbp, r8, r9, rbx, r12,
454                                   // r13, r14, and r15
455    subq %rdx, %rsp                // Reserve stack space for argument array.
456
457#if (STACK_REFERENCE_SIZE != 4)
458#error "STACK_REFERENCE_SIZE(X86_64) size not as expected."
459#endif
460    movq LITERAL(0), (%rsp)       // Store null for method*
461
462    movl %r10d, %ecx              // Place size of args in rcx.
463    movq %rdi, %rax               // rax := method to be called
464    movq %rsi, %r11               // r11 := arg_array
465    leaq 8(%rsp), %rdi            // rdi is pointing just above the ArtMethod* in the stack
466                                  // arguments.
467    // Copy arg array into stack.
468    rep movsb                     // while (rcx--) { *rdi++ = *rsi++ }
469    leaq 1(%r9), %r10             // r10 := shorty + 1  ; ie skip return arg character
470    movq %rax, %rdi               // rdi := method to be called
471    movl (%r11), %esi             // rsi := this pointer
472    addq LITERAL(4), %r11         // arg_array++
473    LOOP_OVER_SHORTY_LOADING_GPRS rdx, edx, .Lgpr_setup_finished
474    LOOP_OVER_SHORTY_LOADING_GPRS rcx, ecx, .Lgpr_setup_finished
475    LOOP_OVER_SHORTY_LOADING_GPRS r8, r8d, .Lgpr_setup_finished
476    LOOP_OVER_SHORTY_LOADING_GPRS r9, r9d, .Lgpr_setup_finished
477.Lgpr_setup_finished:
478    call *ART_METHOD_QUICK_CODE_OFFSET_64(%rdi) // Call the method.
479    movq %rbp, %rsp               // Restore stack pointer.
480    POP r15                       // Pop r15
481    POP r14                       // Pop r14
482    POP r13                       // Pop r13
483    POP r12                       // Pop r12
484    POP rbx                       // Pop rbx
485    POP r9                        // Pop r9 - shorty*
486    POP r8                        // Pop r8 - result*.
487    POP rbp                       // Pop rbp
488    cmpb LITERAL(68), (%r9)       // Test if result type char == 'D'.
489    je .Lreturn_double_quick
490    cmpb LITERAL(70), (%r9)       // Test if result type char == 'F'.
491    je .Lreturn_float_quick
492    movq %rax, (%r8)              // Store the result assuming its a long, int or Object*
493    ret
494.Lreturn_double_quick:
495    movsd %xmm0, (%r8)            // Store the double floating point result.
496    ret
497.Lreturn_float_quick:
498    movss %xmm0, (%r8)            // Store the floating point result.
499    ret
500#endif  // __APPLE__
501END_FUNCTION art_quick_invoke_stub
502
503    /*
504     * Quick invocation stub.
505     * On entry:
506     *   [sp] = return address
507     *   rdi = method pointer
508     *   rsi = argument array or null if no arguments.
509     *   rdx = size of argument array in bytes
510     *   rcx = (managed) thread pointer
511     *   r8 = JValue* result
512     *   r9 = char* shorty
513     */
514DEFINE_FUNCTION art_quick_invoke_static_stub
515#if defined(__APPLE__)
516    int3
517    int3
518#else
519    // Set up argument XMM registers.
520    leaq 1(%r9), %r10             // R10 := shorty + 1  ; ie skip return arg character
521    movq %rsi, %r11               // R11 := arg_array
522    LOOP_OVER_SHORTY_LOADING_XMMS xmm0, .Lxmm_setup_finished2
523    LOOP_OVER_SHORTY_LOADING_XMMS xmm1, .Lxmm_setup_finished2
524    LOOP_OVER_SHORTY_LOADING_XMMS xmm2, .Lxmm_setup_finished2
525    LOOP_OVER_SHORTY_LOADING_XMMS xmm3, .Lxmm_setup_finished2
526    LOOP_OVER_SHORTY_LOADING_XMMS xmm4, .Lxmm_setup_finished2
527    LOOP_OVER_SHORTY_LOADING_XMMS xmm5, .Lxmm_setup_finished2
528    LOOP_OVER_SHORTY_LOADING_XMMS xmm6, .Lxmm_setup_finished2
529    LOOP_OVER_SHORTY_LOADING_XMMS xmm7, .Lxmm_setup_finished2
530    .balign 16
531.Lxmm_setup_finished2:
532    PUSH rbp                      // Save rbp.
533    PUSH r8                       // Save r8/result*.
534    PUSH r9                       // Save r9/shorty*.
535    PUSH rbx                      // Save rbx
536    PUSH r12                      // Save r12
537    PUSH r13                      // Save r13
538    PUSH r14                      // Save r14
539    PUSH r15                      // Save r15
540    movq %rsp, %rbp               // Copy value of stack pointer into base pointer.
541    CFI_DEF_CFA_REGISTER(rbp)
542
543    movl %edx, %r10d
544    addl LITERAL(100), %edx        // Reserve space for return addr, StackReference<method>, rbp,
545                                   // r8, r9, r12, r13, r14, and r15 in frame.
546    andl LITERAL(0xFFFFFFF0), %edx // Align frame size to 16 bytes.
547    subl LITERAL(72), %edx         // Remove space for return address, rbp, r8, r9, rbx, r12,
548                                   // r13, r14, and r15.
549    subq %rdx, %rsp                // Reserve stack space for argument array.
550
551#if (STACK_REFERENCE_SIZE != 4)
552#error "STACK_REFERENCE_SIZE(X86_64) size not as expected."
553#endif
554    movq LITERAL(0), (%rsp)        // Store null for method*
555
556    movl %r10d, %ecx               // Place size of args in rcx.
557    movq %rdi, %rax                // rax := method to be called
558    movq %rsi, %r11                // r11 := arg_array
559    leaq 8(%rsp), %rdi             // rdi is pointing just above the ArtMethod* in the
560                                   // stack arguments.
561    // Copy arg array into stack.
562    rep movsb                      // while (rcx--) { *rdi++ = *rsi++ }
563    leaq 1(%r9), %r10              // r10 := shorty + 1  ; ie skip return arg character
564    movq %rax, %rdi                // rdi := method to be called
565    LOOP_OVER_SHORTY_LOADING_GPRS rsi, esi, .Lgpr_setup_finished2
566    LOOP_OVER_SHORTY_LOADING_GPRS rdx, edx, .Lgpr_setup_finished2
567    LOOP_OVER_SHORTY_LOADING_GPRS rcx, ecx, .Lgpr_setup_finished2
568    LOOP_OVER_SHORTY_LOADING_GPRS r8, r8d, .Lgpr_setup_finished2
569    LOOP_OVER_SHORTY_LOADING_GPRS r9, r9d, .Lgpr_setup_finished2
570.Lgpr_setup_finished2:
571    call *ART_METHOD_QUICK_CODE_OFFSET_64(%rdi) // Call the method.
572    movq %rbp, %rsp                // Restore stack pointer.
573    POP r15                        // Pop r15
574    POP r14                        // Pop r14
575    POP r13                        // Pop r13
576    POP r12                        // Pop r12
577    POP rbx                        // Pop rbx
578    POP r9                         // Pop r9 - shorty*.
579    POP r8                         // Pop r8 - result*.
580    POP rbp                        // Pop rbp
581    cmpb LITERAL(68), (%r9)        // Test if result type char == 'D'.
582    je .Lreturn_double_quick2
583    cmpb LITERAL(70), (%r9)        // Test if result type char == 'F'.
584    je .Lreturn_float_quick2
585    movq %rax, (%r8)               // Store the result assuming its a long, int or Object*
586    ret
587.Lreturn_double_quick2:
588    movsd %xmm0, (%r8)             // Store the double floating point result.
589    ret
590.Lreturn_float_quick2:
591    movss %xmm0, (%r8)             // Store the floating point result.
592    ret
593#endif  // __APPLE__
594END_FUNCTION art_quick_invoke_static_stub
595
596    /*
597     * Long jump stub.
598     * On entry:
599     *   rdi = gprs
600     *   rsi = fprs
601     */
602DEFINE_FUNCTION art_quick_do_long_jump
603#if defined(__APPLE__)
604    int3
605    int3
606#else
607    // Restore FPRs.
608    movq 0(%rsi), %xmm0
609    movq 8(%rsi), %xmm1
610    movq 16(%rsi), %xmm2
611    movq 24(%rsi), %xmm3
612    movq 32(%rsi), %xmm4
613    movq 40(%rsi), %xmm5
614    movq 48(%rsi), %xmm6
615    movq 56(%rsi), %xmm7
616    movq 64(%rsi), %xmm8
617    movq 72(%rsi), %xmm9
618    movq 80(%rsi), %xmm10
619    movq 88(%rsi), %xmm11
620    movq 96(%rsi), %xmm12
621    movq 104(%rsi), %xmm13
622    movq 112(%rsi), %xmm14
623    movq 120(%rsi), %xmm15
624    // Restore FPRs.
625    movq %rdi, %rsp   // RSP points to gprs.
626    // Load all registers except RSP and RIP with values in gprs.
627    popq %r15
628    popq %r14
629    popq %r13
630    popq %r12
631    popq %r11
632    popq %r10
633    popq %r9
634    popq %r8
635    popq %rdi
636    popq %rsi
637    popq %rbp
638    addq LITERAL(8), %rsp   // Skip rsp
639    popq %rbx
640    popq %rdx
641    popq %rcx
642    popq %rax
643    popq %rsp      // Load stack pointer.
644    ret            // From higher in the stack pop rip.
645#endif  // __APPLE__
646END_FUNCTION art_quick_do_long_jump
647
648MACRO3(ONE_ARG_DOWNCALL, c_name, cxx_name, return_macro)
649    DEFINE_FUNCTION VAR(c_name)
650    SETUP_SAVE_REFS_ONLY_FRAME           // save ref containing registers for GC
651    // Outgoing argument set up
652    movq %gs:THREAD_SELF_OFFSET, %rsi    // pass Thread::Current()
653    call CALLVAR(cxx_name)               // cxx_name(arg0, Thread*)
654    RESTORE_SAVE_REFS_ONLY_FRAME         // restore frame up to return address
655    CALL_MACRO(return_macro)             // return or deliver exception
656    END_FUNCTION VAR(c_name)
657END_MACRO
658
659MACRO3(TWO_ARG_DOWNCALL, c_name, cxx_name, return_macro)
660    DEFINE_FUNCTION VAR(c_name)
661    SETUP_SAVE_REFS_ONLY_FRAME           // save ref containing registers for GC
662    // Outgoing argument set up
663    movq %gs:THREAD_SELF_OFFSET, %rdx    // pass Thread::Current()
664    call CALLVAR(cxx_name)               // cxx_name(arg0, arg1, Thread*)
665    RESTORE_SAVE_REFS_ONLY_FRAME         // restore frame up to return address
666    CALL_MACRO(return_macro)             // return or deliver exception
667    END_FUNCTION VAR(c_name)
668END_MACRO
669
670MACRO3(THREE_ARG_DOWNCALL, c_name, cxx_name, return_macro)
671    DEFINE_FUNCTION VAR(c_name)
672    SETUP_SAVE_REFS_ONLY_FRAME          // save ref containing registers for GC
673    // Outgoing argument set up
674    movq %gs:THREAD_SELF_OFFSET, %rcx   // pass Thread::Current()
675    call CALLVAR(cxx_name)              // cxx_name(arg0, arg1, arg2, Thread*)
676    RESTORE_SAVE_REFS_ONLY_FRAME        // restore frame up to return address
677    CALL_MACRO(return_macro)            // return or deliver exception
678    END_FUNCTION VAR(c_name)
679END_MACRO
680
681MACRO3(FOUR_ARG_DOWNCALL, c_name, cxx_name, return_macro)
682    DEFINE_FUNCTION VAR(c_name)
683    SETUP_SAVE_REFS_ONLY_FRAME          // save ref containing registers for GC
684    // Outgoing argument set up
685    movq %gs:THREAD_SELF_OFFSET, %r8    // pass Thread::Current()
686    call CALLVAR(cxx_name)              // cxx_name(arg1, arg2, arg3, arg4, Thread*)
687    RESTORE_SAVE_REFS_ONLY_FRAME        // restore frame up to return address
688    CALL_MACRO(return_macro)            // return or deliver exception
689    END_FUNCTION VAR(c_name)
690END_MACRO
691
692MACRO3(ONE_ARG_REF_DOWNCALL, c_name, cxx_name, return_macro)
693    DEFINE_FUNCTION VAR(c_name)
694    SETUP_SAVE_REFS_ONLY_FRAME
695                                        // arg0 is in rdi
696    movq %gs:THREAD_SELF_OFFSET, %rsi   // pass Thread::Current()
697    call CALLVAR(cxx_name)              // cxx_name(arg0, Thread*)
698    RESTORE_SAVE_REFS_ONLY_FRAME        // restore frame up to return address
699    CALL_MACRO(return_macro)
700    END_FUNCTION VAR(c_name)
701END_MACRO
702
703MACRO3(TWO_ARG_REF_DOWNCALL, c_name, cxx_name, return_macro)
704    DEFINE_FUNCTION VAR(c_name)
705    SETUP_SAVE_REFS_ONLY_FRAME
706                                        // arg0 and arg1 are in rdi/rsi
707    movq %gs:THREAD_SELF_OFFSET, %rdx   // pass Thread::Current()
708    call CALLVAR(cxx_name)              // (arg0, arg1, Thread*)
709    RESTORE_SAVE_REFS_ONLY_FRAME        // restore frame up to return address
710    CALL_MACRO(return_macro)
711    END_FUNCTION VAR(c_name)
712END_MACRO
713
714MACRO3(THREE_ARG_REF_DOWNCALL, c_name, cxx_name, return_macro)
715    DEFINE_FUNCTION VAR(c_name)
716    SETUP_SAVE_REFS_ONLY_FRAME
717                                        // arg0, arg1, and arg2 are in rdi/rsi/rdx
718    movq %gs:THREAD_SELF_OFFSET, %rcx   // pass Thread::Current()
719    call CALLVAR(cxx_name)              // cxx_name(arg0, arg1, arg2, Thread*)
720    RESTORE_SAVE_REFS_ONLY_FRAME        // restore frame up to return address
721    CALL_MACRO(return_macro)            // return or deliver exception
722    END_FUNCTION VAR(c_name)
723END_MACRO
724
725    /*
726     * Macro for resolution and initialization of indexed DEX file
727     * constants such as classes and strings.
728     */
729MACRO3(ONE_ARG_SAVE_EVERYTHING_DOWNCALL, c_name, cxx_name, runtime_method_offset = RUNTIME_SAVE_EVERYTHING_METHOD_OFFSET)
730    DEFINE_FUNCTION VAR(c_name)
731    SETUP_SAVE_EVERYTHING_FRAME \runtime_method_offset  // save everything for GC
732    // Outgoing argument set up
733    movl %eax, %edi                               // pass the index of the constant as arg0
734    movq %gs:THREAD_SELF_OFFSET, %rsi             // pass Thread::Current()
735    call CALLVAR(cxx_name)                        // cxx_name(arg0, Thread*)
736    testl %eax, %eax                              // If result is null, deliver the OOME.
737    jz 1f
738    CFI_REMEMBER_STATE
739    RESTORE_SAVE_EVERYTHING_FRAME_KEEP_RAX        // restore frame up to return address
740    ret
741    CFI_RESTORE_STATE_AND_DEF_CFA(rsp, FRAME_SIZE_SAVE_EVERYTHING)
7421:
743    DELIVER_PENDING_EXCEPTION_FRAME_READY
744    END_FUNCTION VAR(c_name)
745END_MACRO
746
747MACRO2(ONE_ARG_SAVE_EVERYTHING_DOWNCALL_FOR_CLINIT, c_name, cxx_name)
748    ONE_ARG_SAVE_EVERYTHING_DOWNCALL \c_name, \cxx_name, RUNTIME_SAVE_EVERYTHING_FOR_CLINIT_METHOD_OFFSET
749END_MACRO
750
751MACRO0(RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER)
752    testq %rax, %rax               // rax == 0 ?
753    jz  1f                         // if rax == 0 goto 1
754    ret                            // return
7551:                                 // deliver exception on current thread
756    DELIVER_PENDING_EXCEPTION
757END_MACRO
758
759MACRO0(RETURN_IF_EAX_ZERO)
760    testl %eax, %eax               // eax == 0 ?
761    jnz  1f                        // if eax != 0 goto 1
762    ret                            // return
7631:                                 // deliver exception on current thread
764    DELIVER_PENDING_EXCEPTION
765END_MACRO
766
767// Generate the allocation entrypoints for each allocator.
768GENERATE_ALLOC_ENTRYPOINTS_FOR_NON_TLAB_ALLOCATORS
769
770// Comment out allocators that have x86_64 specific asm.
771// Region TLAB:
772// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_region_tlab, RegionTLAB)
773// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_region_tlab, RegionTLAB)
774GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_region_tlab, RegionTLAB)
775GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_OBJECT(_region_tlab, RegionTLAB)
776// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_region_tlab, RegionTLAB)
777// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED8(_region_tlab, RegionTLAB)
778// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED16(_region_tlab, RegionTLAB)
779// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED32(_region_tlab, RegionTLAB)
780// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED64(_region_tlab, RegionTLAB)
781GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_region_tlab, RegionTLAB)
782GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_region_tlab, RegionTLAB)
783GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_region_tlab, RegionTLAB)
784// Normal TLAB:
785// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_tlab, TLAB)
786// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_tlab, TLAB)
787GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_tlab, TLAB)
788GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_OBJECT(_tlab, TLAB)
789// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_tlab, TLAB)
790// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED8(_tlab, TLAB)
791// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED16(_tlab, TLAB)
792// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED32(_tlab, TLAB)
793// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED64(_tlab, TLAB)
794GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_tlab, TLAB)
795GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_tlab, TLAB)
796GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_tlab, TLAB)
797
798
799// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_rosalloc, RosAlloc).
800MACRO2(ART_QUICK_ALLOC_OBJECT_ROSALLOC, c_name, cxx_name)
801    DEFINE_FUNCTION VAR(c_name)
802    // Fast path rosalloc allocation.
803    // RDI: mirror::Class*, RAX: return value
804    // RSI, RDX, RCX, R8, R9: free.
805                                                           // Check if the thread local
806                                                           // allocation stack has room.
807    movq   %gs:THREAD_SELF_OFFSET, %r8                     // r8 = thread
808    movq   THREAD_LOCAL_ALLOC_STACK_TOP_OFFSET(%r8), %rcx  // rcx = alloc stack top.
809    cmpq   THREAD_LOCAL_ALLOC_STACK_END_OFFSET(%r8), %rcx
810    jae    .Lslow_path\c_name
811                                                           // Load the object size
812    movl   MIRROR_CLASS_OBJECT_SIZE_ALLOC_FAST_PATH_OFFSET(%rdi), %eax
813                                                           // Check if the size is for a thread
814                                                           // local allocation. Also does the
815                                                           // initialized and finalizable checks.
816    cmpl   LITERAL(ROSALLOC_MAX_THREAD_LOCAL_BRACKET_SIZE), %eax
817    ja     .Lslow_path\c_name
818                                                           // Compute the rosalloc bracket index
819                                                           // from the size.
820    shrq   LITERAL(ROSALLOC_BRACKET_QUANTUM_SIZE_SHIFT), %rax
821                                                           // Load the rosalloc run (r9)
822                                                           // Subtract __SIZEOF_POINTER__ to
823                                                           // subtract one from edi as there is no
824                                                           // 0 byte run and the size is already
825                                                           // aligned.
826    movq   (THREAD_ROSALLOC_RUNS_OFFSET - __SIZEOF_POINTER__)(%r8, %rax, __SIZEOF_POINTER__), %r9
827                                                           // Load the free list head (rax). This
828                                                           // will be the return val.
829    movq   (ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_HEAD_OFFSET)(%r9), %rax
830    testq  %rax, %rax
831    jz     .Lslow_path\c_name
832    // "Point of no slow path". Won't go to the slow path from here on. OK to clobber rdi and rsi.
833                                                           // Push the new object onto the thread
834                                                           // local allocation stack and
835                                                           // increment the thread local
836                                                           // allocation stack top.
837    movl   %eax, (%rcx)
838    addq   LITERAL(COMPRESSED_REFERENCE_SIZE), %rcx
839    movq   %rcx, THREAD_LOCAL_ALLOC_STACK_TOP_OFFSET(%r8)
840                                                           // Load the next pointer of the head
841                                                           // and update the list head with the
842                                                           // next pointer.
843    movq   ROSALLOC_SLOT_NEXT_OFFSET(%rax), %rcx
844    movq   %rcx, (ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_HEAD_OFFSET)(%r9)
845                                                           // Store the class pointer in the
846                                                           // header. This also overwrites the
847                                                           // next pointer. The offsets are
848                                                           // asserted to match.
849#if ROSALLOC_SLOT_NEXT_OFFSET != MIRROR_OBJECT_CLASS_OFFSET
850#error "Class pointer needs to overwrite next pointer."
851#endif
852    POISON_HEAP_REF edi
853    movl   %edi, MIRROR_OBJECT_CLASS_OFFSET(%rax)
854                                                           // Decrement the size of the free list
855    decl   (ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_SIZE_OFFSET)(%r9)
856                                                           // No fence necessary for x86.
857    ret
858.Lslow_path\c_name:
859    SETUP_SAVE_REFS_ONLY_FRAME                             // save ref containing registers for GC
860    // Outgoing argument set up
861    movq %gs:THREAD_SELF_OFFSET, %rsi                      // pass Thread::Current()
862    call CALLVAR(cxx_name)                                 // cxx_name(arg0, Thread*)
863    RESTORE_SAVE_REFS_ONLY_FRAME                           // restore frame up to return address
864    RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER                // return or deliver exception
865    END_FUNCTION VAR(c_name)
866END_MACRO
867
868ART_QUICK_ALLOC_OBJECT_ROSALLOC art_quick_alloc_object_resolved_rosalloc, artAllocObjectFromCodeResolvedRosAlloc
869ART_QUICK_ALLOC_OBJECT_ROSALLOC art_quick_alloc_object_initialized_rosalloc, artAllocObjectFromCodeInitializedRosAlloc
870
871// The common fast path code for art_quick_alloc_object_resolved_region_tlab.
872// TODO: delete ALLOC_OBJECT_RESOLVED_TLAB_FAST_PATH since it is the same as
873// ALLOC_OBJECT_INITIALIZED_TLAB_FAST_PATH.
874//
875// RDI: the class, RAX: return value.
876// RCX, RSI, RDX: scratch, r8: Thread::Current().
877MACRO1(ALLOC_OBJECT_RESOLVED_TLAB_FAST_PATH, slowPathLabel)
878    ALLOC_OBJECT_INITIALIZED_TLAB_FAST_PATH(RAW_VAR(slowPathLabel))
879END_MACRO
880
881// The fast path code for art_quick_alloc_object_initialized_region_tlab.
882//
883// RDI: the class, RSI: ArtMethod*, RAX: return value.
884// RCX, RSI, RDX: scratch, r8: Thread::Current().
885MACRO1(ALLOC_OBJECT_INITIALIZED_TLAB_FAST_PATH, slowPathLabel)
886    movq %gs:THREAD_SELF_OFFSET, %r8                           // r8 = thread
887    movl MIRROR_CLASS_OBJECT_SIZE_ALLOC_FAST_PATH_OFFSET(%rdi), %ecx // Load the object size.
888    movq THREAD_LOCAL_POS_OFFSET(%r8), %rax
889    addq %rax, %rcx                                            // Add size to pos, note that these
890                                                               // are both 32 bit ints, overflow
891                                                               // will cause the add to be past the
892                                                               // end of the thread local region.
893    cmpq THREAD_LOCAL_END_OFFSET(%r8), %rcx                    // Check if it fits.
894    ja   RAW_VAR(slowPathLabel)
895    movq %rcx, THREAD_LOCAL_POS_OFFSET(%r8)                    // Update thread_local_pos.
896    incq THREAD_LOCAL_OBJECTS_OFFSET(%r8)                      // Increase thread_local_objects.
897                                                               // Store the class pointer in the
898                                                               // header.
899                                                               // No fence needed for x86.
900    POISON_HEAP_REF edi
901    movl %edi, MIRROR_OBJECT_CLASS_OFFSET(%rax)
902    ret                                                        // Fast path succeeded.
903END_MACRO
904
905// The fast path code for art_quick_alloc_array_region_tlab.
906// Inputs: RDI: the class, RSI: int32_t component_count, R9: total_size
907// Free temps: RCX, RDX, R8
908// Output: RAX: return value.
909MACRO1(ALLOC_ARRAY_TLAB_FAST_PATH_RESOLVED_WITH_SIZE, slowPathLabel)
910    movq %gs:THREAD_SELF_OFFSET, %rcx                          // rcx = thread
911    // Mask out the unaligned part to make sure we are 8 byte aligned.
912    andq LITERAL(OBJECT_ALIGNMENT_MASK_TOGGLED64), %r9
913    movq THREAD_LOCAL_POS_OFFSET(%rcx), %rax
914    addq %rax, %r9
915    cmpq THREAD_LOCAL_END_OFFSET(%rcx), %r9                    // Check if it fits.
916    ja   RAW_VAR(slowPathLabel)
917    movq %r9, THREAD_LOCAL_POS_OFFSET(%rcx)                    // Update thread_local_pos.
918    addq LITERAL(1), THREAD_LOCAL_OBJECTS_OFFSET(%rcx)         // Increase thread_local_objects.
919                                                               // Store the class pointer in the
920                                                               // header.
921                                                               // No fence needed for x86.
922    POISON_HEAP_REF edi
923    movl %edi, MIRROR_OBJECT_CLASS_OFFSET(%rax)
924    movl %esi, MIRROR_ARRAY_LENGTH_OFFSET(%rax)
925    ret                                                        // Fast path succeeded.
926END_MACRO
927
928// The common slow path code for art_quick_alloc_object_{resolved, initialized}_tlab
929// and art_quick_alloc_object_{resolved, initialized}_region_tlab.
930MACRO1(ALLOC_OBJECT_TLAB_SLOW_PATH, cxx_name)
931    SETUP_SAVE_REFS_ONLY_FRAME                             // save ref containing registers for GC
932    // Outgoing argument set up
933    movq %gs:THREAD_SELF_OFFSET, %rsi                      // pass Thread::Current()
934    call CALLVAR(cxx_name)                                 // cxx_name(arg0, Thread*)
935    RESTORE_SAVE_REFS_ONLY_FRAME                           // restore frame up to return address
936    RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER                // return or deliver exception
937END_MACRO
938
939// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_tlab, TLAB). May be
940// called with CC if the GC is not active.
941DEFINE_FUNCTION art_quick_alloc_object_resolved_tlab
942    // RDI: mirror::Class* klass
943    // RDX, RSI, RCX, R8, R9: free. RAX: return val.
944    ALLOC_OBJECT_RESOLVED_TLAB_FAST_PATH .Lart_quick_alloc_object_resolved_tlab_slow_path
945.Lart_quick_alloc_object_resolved_tlab_slow_path:
946    ALLOC_OBJECT_TLAB_SLOW_PATH artAllocObjectFromCodeResolvedTLAB
947END_FUNCTION art_quick_alloc_object_resolved_tlab
948
949// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_tlab, TLAB).
950// May be called with CC if the GC is not active.
951DEFINE_FUNCTION art_quick_alloc_object_initialized_tlab
952    // RDI: mirror::Class* klass
953    // RDX, RSI, RCX, R8, R9: free. RAX: return val.
954    ALLOC_OBJECT_INITIALIZED_TLAB_FAST_PATH .Lart_quick_alloc_object_initialized_tlab_slow_path
955.Lart_quick_alloc_object_initialized_tlab_slow_path:
956    ALLOC_OBJECT_TLAB_SLOW_PATH artAllocObjectFromCodeInitializedTLAB
957END_FUNCTION art_quick_alloc_object_initialized_tlab
958
959MACRO0(COMPUTE_ARRAY_SIZE_UNKNOWN)
960    movl MIRROR_CLASS_COMPONENT_TYPE_OFFSET(%rdi), %ecx        // Load component type.
961    UNPOISON_HEAP_REF ecx
962    movl MIRROR_CLASS_OBJECT_PRIMITIVE_TYPE_OFFSET(%rcx), %ecx // Load primitive type.
963    shrq MACRO_LITERAL(PRIMITIVE_TYPE_SIZE_SHIFT_SHIFT), %rcx        // Get component size shift.
964    movq %rsi, %r9
965    salq %cl, %r9                                              // Calculate array count shifted.
966    // Add array header + alignment rounding.
967    addq MACRO_LITERAL(MIRROR_INT_ARRAY_DATA_OFFSET + OBJECT_ALIGNMENT_MASK), %r9
968    // Add 4 extra bytes if we are doing a long array.
969    addq MACRO_LITERAL(1), %rcx
970    andq MACRO_LITERAL(4), %rcx
971#if MIRROR_LONG_ARRAY_DATA_OFFSET != MIRROR_INT_ARRAY_DATA_OFFSET + 4
972#error Long array data offset must be 4 greater than int array data offset.
973#endif
974    addq %rcx, %r9
975END_MACRO
976
977MACRO0(COMPUTE_ARRAY_SIZE_8)
978    // RDI: mirror::Class* klass, RSI: int32_t component_count
979    // RDX, RCX, R8, R9: free. RAX: return val.
980    movq %rsi, %r9
981    // Add array header + alignment rounding.
982    addq MACRO_LITERAL(MIRROR_INT_ARRAY_DATA_OFFSET + OBJECT_ALIGNMENT_MASK), %r9
983END_MACRO
984
985MACRO0(COMPUTE_ARRAY_SIZE_16)
986    // RDI: mirror::Class* klass, RSI: int32_t component_count
987    // RDX, RCX, R8, R9: free. RAX: return val.
988    movq %rsi, %r9
989    salq MACRO_LITERAL(1), %r9
990    // Add array header + alignment rounding.
991    addq MACRO_LITERAL(MIRROR_INT_ARRAY_DATA_OFFSET + OBJECT_ALIGNMENT_MASK), %r9
992END_MACRO
993
994MACRO0(COMPUTE_ARRAY_SIZE_32)
995    // RDI: mirror::Class* klass, RSI: int32_t component_count
996    // RDX, RCX, R8, R9: free. RAX: return val.
997    movq %rsi, %r9
998    salq MACRO_LITERAL(2), %r9
999    // Add array header + alignment rounding.
1000    addq MACRO_LITERAL(MIRROR_INT_ARRAY_DATA_OFFSET + OBJECT_ALIGNMENT_MASK), %r9
1001END_MACRO
1002
1003MACRO0(COMPUTE_ARRAY_SIZE_64)
1004    // RDI: mirror::Class* klass, RSI: int32_t component_count
1005    // RDX, RCX, R8, R9: free. RAX: return val.
1006    movq %rsi, %r9
1007    salq MACRO_LITERAL(3), %r9
1008    // Add array header + alignment rounding.
1009    addq MACRO_LITERAL(MIRROR_WIDE_ARRAY_DATA_OFFSET + OBJECT_ALIGNMENT_MASK), %r9
1010END_MACRO
1011
1012MACRO3(GENERATE_ALLOC_ARRAY_TLAB, c_entrypoint, cxx_name, size_setup)
1013    DEFINE_FUNCTION VAR(c_entrypoint)
1014    // RDI: mirror::Class* klass, RSI: int32_t component_count
1015    // RDX, RCX, R8, R9: free. RAX: return val.
1016    CALL_MACRO(size_setup)
1017    ALLOC_ARRAY_TLAB_FAST_PATH_RESOLVED_WITH_SIZE .Lslow_path\c_entrypoint
1018.Lslow_path\c_entrypoint:
1019    SETUP_SAVE_REFS_ONLY_FRAME                                 // save ref containing registers for GC
1020    // Outgoing argument set up
1021    movq %gs:THREAD_SELF_OFFSET, %rdx                          // pass Thread::Current()
1022    call CALLVAR(cxx_name)                                     // cxx_name(arg0, arg1, Thread*)
1023    RESTORE_SAVE_REFS_ONLY_FRAME                               // restore frame up to return address
1024    RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER                    // return or deliver exception
1025    END_FUNCTION VAR(c_entrypoint)
1026END_MACRO
1027
1028
1029GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved_region_tlab, artAllocArrayFromCodeResolvedRegionTLAB, COMPUTE_ARRAY_SIZE_UNKNOWN
1030GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved8_region_tlab, artAllocArrayFromCodeResolvedRegionTLAB, COMPUTE_ARRAY_SIZE_8
1031GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved16_region_tlab, artAllocArrayFromCodeResolvedRegionTLAB, COMPUTE_ARRAY_SIZE_16
1032GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved32_region_tlab, artAllocArrayFromCodeResolvedRegionTLAB, COMPUTE_ARRAY_SIZE_32
1033GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved64_region_tlab, artAllocArrayFromCodeResolvedRegionTLAB, COMPUTE_ARRAY_SIZE_64
1034
1035GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved_tlab, artAllocArrayFromCodeResolvedTLAB, COMPUTE_ARRAY_SIZE_UNKNOWN
1036GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved8_tlab, artAllocArrayFromCodeResolvedTLAB, COMPUTE_ARRAY_SIZE_8
1037GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved16_tlab, artAllocArrayFromCodeResolvedTLAB, COMPUTE_ARRAY_SIZE_16
1038GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved32_tlab, artAllocArrayFromCodeResolvedTLAB, COMPUTE_ARRAY_SIZE_32
1039GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved64_tlab, artAllocArrayFromCodeResolvedTLAB, COMPUTE_ARRAY_SIZE_64
1040
1041// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_region_tlab, RegionTLAB).
1042DEFINE_FUNCTION art_quick_alloc_object_resolved_region_tlab
1043    // Fast path region tlab allocation.
1044    // RDI: mirror::Class* klass
1045    // RDX, RSI, RCX, R8, R9: free. RAX: return val.
1046    ASSERT_USE_READ_BARRIER
1047    ALLOC_OBJECT_RESOLVED_TLAB_FAST_PATH .Lart_quick_alloc_object_resolved_region_tlab_slow_path
1048.Lart_quick_alloc_object_resolved_region_tlab_slow_path:
1049    ALLOC_OBJECT_TLAB_SLOW_PATH artAllocObjectFromCodeResolvedRegionTLAB
1050END_FUNCTION art_quick_alloc_object_resolved_region_tlab
1051
1052// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_region_tlab, RegionTLAB).
1053DEFINE_FUNCTION art_quick_alloc_object_initialized_region_tlab
1054    // Fast path region tlab allocation.
1055    // RDI: mirror::Class* klass
1056    // RDX, RSI, RCX, R8, R9: free. RAX: return val.
1057    ASSERT_USE_READ_BARRIER
1058    // No read barrier since the caller is responsible for that.
1059    ALLOC_OBJECT_INITIALIZED_TLAB_FAST_PATH .Lart_quick_alloc_object_initialized_region_tlab_slow_path
1060.Lart_quick_alloc_object_initialized_region_tlab_slow_path:
1061    ALLOC_OBJECT_TLAB_SLOW_PATH artAllocObjectFromCodeInitializedRegionTLAB
1062END_FUNCTION art_quick_alloc_object_initialized_region_tlab
1063
1064ONE_ARG_SAVE_EVERYTHING_DOWNCALL_FOR_CLINIT art_quick_initialize_static_storage, artInitializeStaticStorageFromCode
1065ONE_ARG_SAVE_EVERYTHING_DOWNCALL_FOR_CLINIT art_quick_resolve_type, artResolveTypeFromCode
1066ONE_ARG_SAVE_EVERYTHING_DOWNCALL art_quick_resolve_type_and_verify_access, artResolveTypeAndVerifyAccessFromCode
1067ONE_ARG_SAVE_EVERYTHING_DOWNCALL art_quick_resolve_method_handle, artResolveMethodHandleFromCode
1068ONE_ARG_SAVE_EVERYTHING_DOWNCALL art_quick_resolve_method_type, artResolveMethodTypeFromCode
1069ONE_ARG_SAVE_EVERYTHING_DOWNCALL art_quick_resolve_string, artResolveStringFromCode
1070
1071TWO_ARG_REF_DOWNCALL art_quick_handle_fill_data, artHandleFillArrayDataFromCode, RETURN_IF_EAX_ZERO
1072
1073DEFINE_FUNCTION art_quick_lock_object
1074    testl %edi, %edi                      // Null check object/rdi.
1075    jz   .Lslow_lock
1076.Lretry_lock:
1077    movl MIRROR_OBJECT_LOCK_WORD_OFFSET(%edi), %ecx  // ecx := lock word.
1078    test LITERAL(LOCK_WORD_STATE_MASK_SHIFTED), %ecx  // Test the 2 high bits.
1079    jne  .Lslow_lock                      // Slow path if either of the two high bits are set.
1080    movl %ecx, %edx                       // save lock word (edx) to keep read barrier bits.
1081    andl LITERAL(LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED), %ecx  // zero the gc bits.
1082    test %ecx, %ecx
1083    jnz  .Lalready_thin                   // Lock word contains a thin lock.
1084    // unlocked case - edx: original lock word, edi: obj.
1085    movl %edx, %eax                       // eax: lock word zero except for read barrier bits.
1086    movl %gs:THREAD_ID_OFFSET, %edx       // edx := thread id
1087    or   %eax, %edx                       // edx: thread id with count of 0 + read barrier bits.
1088    lock cmpxchg  %edx, MIRROR_OBJECT_LOCK_WORD_OFFSET(%edi)
1089    jnz  .Lretry_lock                     // cmpxchg failed retry
1090    ret
1091.Lalready_thin:  // edx: lock word (with high 2 bits zero and original rb bits), edi: obj.
1092    movl %gs:THREAD_ID_OFFSET, %ecx       // ecx := thread id
1093    cmpw %cx, %dx                         // do we hold the lock already?
1094    jne  .Lslow_lock
1095    movl %edx, %ecx                       // copy the lock word to check count overflow.
1096    andl LITERAL(LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED), %ecx  // zero the gc bits.
1097    addl LITERAL(LOCK_WORD_THIN_LOCK_COUNT_ONE), %ecx  // increment recursion count
1098    test LITERAL(LOCK_WORD_READ_BARRIER_STATE_MASK), %ecx  // overflowed if the upper bit (28) is set
1099    jne  .Lslow_lock                      // count overflowed so go slow
1100    movl %edx, %eax                       // copy the lock word as the old val for cmpxchg.
1101    addl LITERAL(LOCK_WORD_THIN_LOCK_COUNT_ONE), %edx   // increment recursion count again for real.
1102    // update lockword, cmpxchg necessary for read barrier bits.
1103    lock cmpxchg  %edx, MIRROR_OBJECT_LOCK_WORD_OFFSET(%edi)  // eax: old val, edx: new val.
1104    jnz  .Lretry_lock                     // cmpxchg failed retry
1105    ret
1106.Lslow_lock:
1107    SETUP_SAVE_REFS_ONLY_FRAME
1108    movq %gs:THREAD_SELF_OFFSET, %rsi     // pass Thread::Current()
1109    call SYMBOL(artLockObjectFromCode)    // artLockObjectFromCode(object, Thread*)
1110    RESTORE_SAVE_REFS_ONLY_FRAME          // restore frame up to return address
1111    RETURN_IF_EAX_ZERO
1112END_FUNCTION art_quick_lock_object
1113
1114DEFINE_FUNCTION art_quick_lock_object_no_inline
1115    SETUP_SAVE_REFS_ONLY_FRAME
1116    movq %gs:THREAD_SELF_OFFSET, %rsi     // pass Thread::Current()
1117    call SYMBOL(artLockObjectFromCode)    // artLockObjectFromCode(object, Thread*)
1118    RESTORE_SAVE_REFS_ONLY_FRAME          // restore frame up to return address
1119    RETURN_IF_EAX_ZERO
1120END_FUNCTION art_quick_lock_object_no_inline
1121
1122DEFINE_FUNCTION art_quick_unlock_object
1123    testl %edi, %edi                      // null check object/edi
1124    jz   .Lslow_unlock
1125.Lretry_unlock:
1126    movl MIRROR_OBJECT_LOCK_WORD_OFFSET(%edi), %ecx  // ecx := lock word
1127    movl %gs:THREAD_ID_OFFSET, %edx       // edx := thread id
1128    test LITERAL(LOCK_WORD_STATE_MASK_SHIFTED), %ecx
1129    jnz  .Lslow_unlock                    // lock word contains a monitor
1130    cmpw %cx, %dx                         // does the thread id match?
1131    jne  .Lslow_unlock
1132    movl %ecx, %edx                       // copy the lock word to detect new count of 0.
1133    andl LITERAL(LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED), %edx  // zero the gc bits.
1134    cmpl LITERAL(LOCK_WORD_THIN_LOCK_COUNT_ONE), %edx
1135    jae  .Lrecursive_thin_unlock
1136    // update lockword, cmpxchg necessary for read barrier bits.
1137    movl %ecx, %eax                       // eax: old lock word.
1138    andl LITERAL(LOCK_WORD_GC_STATE_MASK_SHIFTED), %ecx  // ecx: new lock word zero except original gc bits.
1139#ifndef USE_READ_BARRIER
1140    movl %ecx, MIRROR_OBJECT_LOCK_WORD_OFFSET(%edi)
1141#else
1142    lock cmpxchg  %ecx, MIRROR_OBJECT_LOCK_WORD_OFFSET(%edi)  // eax: old val, ecx: new val.
1143    jnz  .Lretry_unlock                   // cmpxchg failed retry
1144#endif
1145    ret
1146.Lrecursive_thin_unlock:  // ecx: original lock word, edi: obj
1147    // update lockword, cmpxchg necessary for read barrier bits.
1148    movl %ecx, %eax                       // eax: old lock word.
1149    subl LITERAL(LOCK_WORD_THIN_LOCK_COUNT_ONE), %ecx
1150#ifndef USE_READ_BARRIER
1151    mov  %ecx, MIRROR_OBJECT_LOCK_WORD_OFFSET(%edi)
1152#else
1153    lock cmpxchg  %ecx, MIRROR_OBJECT_LOCK_WORD_OFFSET(%edi)  // eax: old val, ecx: new val.
1154    jnz  .Lretry_unlock                   // cmpxchg failed retry
1155#endif
1156    ret
1157.Lslow_unlock:
1158    SETUP_SAVE_REFS_ONLY_FRAME
1159    movq %gs:THREAD_SELF_OFFSET, %rsi     // pass Thread::Current()
1160    call SYMBOL(artUnlockObjectFromCode)  // artUnlockObjectFromCode(object, Thread*)
1161    RESTORE_SAVE_REFS_ONLY_FRAME          // restore frame up to return address
1162    RETURN_IF_EAX_ZERO
1163END_FUNCTION art_quick_unlock_object
1164
1165DEFINE_FUNCTION art_quick_unlock_object_no_inline
1166    SETUP_SAVE_REFS_ONLY_FRAME
1167    movq %gs:THREAD_SELF_OFFSET, %rsi     // pass Thread::Current()
1168    call SYMBOL(artUnlockObjectFromCode)  // artUnlockObjectFromCode(object, Thread*)
1169    RESTORE_SAVE_REFS_ONLY_FRAME          // restore frame up to return address
1170    RETURN_IF_EAX_ZERO
1171END_FUNCTION art_quick_unlock_object_no_inline
1172
1173DEFINE_FUNCTION art_quick_check_instance_of
1174    // Type check using the bit string passes null as the target class. In that case just throw.
1175    testl %esi, %esi
1176    jz .Lthrow_class_cast_exception_for_bitstring_check
1177
1178    // We could check the super classes here but that is usually already checked in the caller.
1179    PUSH rdi                          // Save args for exc
1180    PUSH rsi
1181    subq LITERAL(8), %rsp             // Alignment padding.
1182    CFI_ADJUST_CFA_OFFSET(8)
1183    SETUP_FP_CALLEE_SAVE_FRAME
1184    call SYMBOL(artInstanceOfFromCode)  // (Object* obj, Class* ref_klass)
1185    testq %rax, %rax
1186    jz .Lthrow_class_cast_exception   // jump forward if not assignable
1187    CFI_REMEMBER_STATE
1188    RESTORE_FP_CALLEE_SAVE_FRAME
1189    addq LITERAL(24), %rsp            // pop arguments
1190    CFI_ADJUST_CFA_OFFSET(-24)
1191    ret
1192    CFI_RESTORE_STATE_AND_DEF_CFA(rsp, 64)  // Reset unwind info so following code unwinds.
1193
1194.Lthrow_class_cast_exception:
1195    RESTORE_FP_CALLEE_SAVE_FRAME
1196    addq LITERAL(8), %rsp             // pop padding
1197    CFI_ADJUST_CFA_OFFSET(-8)
1198    POP rsi                           // Pop arguments
1199    POP rdi
1200
1201.Lthrow_class_cast_exception_for_bitstring_check:
1202    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME // save all registers as basis for long jump context
1203    mov %gs:THREAD_SELF_OFFSET, %rdx  // pass Thread::Current()
1204    call SYMBOL(artThrowClassCastExceptionForObject)  // (Object* src, Class* dest, Thread*)
1205    UNREACHABLE
1206END_FUNCTION art_quick_check_instance_of
1207
1208
1209// Restore reg's value if reg is not the same as exclude_reg, otherwise just adjust stack.
1210MACRO2(POP_REG_NE, reg, exclude_reg)
1211    .ifc RAW_VAR(reg), RAW_VAR(exclude_reg)
1212      addq MACRO_LITERAL(8), %rsp
1213      CFI_ADJUST_CFA_OFFSET(-8)
1214    .else
1215      POP RAW_VAR(reg)
1216    .endif
1217END_MACRO
1218
1219    /*
1220     * Macro to insert read barrier, used in art_quick_aput_obj.
1221     * obj_reg and dest_reg{32|64} are registers, offset is a defined literal such as
1222     * MIRROR_OBJECT_CLASS_OFFSET. dest_reg needs two versions to handle the mismatch between
1223     * 64b PUSH/POP and 32b argument.
1224     * TODO: When read barrier has a fast path, add heap unpoisoning support for the fast path.
1225     *
1226     * As with art_quick_aput_obj function, the 64b versions are in comments.
1227     */
1228MACRO4(READ_BARRIER, obj_reg, offset, dest_reg32, dest_reg64)
1229#ifdef USE_READ_BARRIER
1230    PUSH rax                            // save registers that might be used
1231    PUSH rdi
1232    PUSH rsi
1233    PUSH rdx
1234    PUSH rcx
1235    SETUP_FP_CALLEE_SAVE_FRAME
1236    // Outgoing argument set up
1237    // movl REG_VAR(ref_reg32), %edi    // pass ref, no-op for now since parameter ref is unused
1238    // // movq REG_VAR(ref_reg64), %rdi
1239    movl REG_VAR(obj_reg), %esi         // pass obj_reg
1240    // movq REG_VAR(obj_reg), %rsi
1241    movl MACRO_LITERAL((RAW_VAR(offset))), %edx // pass offset, double parentheses are necessary
1242    // movq MACRO_LITERAL((RAW_VAR(offset))), %rdx
1243    call SYMBOL(artReadBarrierSlow)     // artReadBarrierSlow(ref, obj_reg, offset)
1244    // No need to unpoison return value in rax, artReadBarrierSlow() would do the unpoisoning.
1245    .ifnc RAW_VAR(dest_reg32), eax
1246    // .ifnc RAW_VAR(dest_reg64), rax
1247      movl %eax, REG_VAR(dest_reg32)    // save loaded ref in dest_reg
1248      // movq %rax, REG_VAR(dest_reg64)
1249    .endif
1250    RESTORE_FP_CALLEE_SAVE_FRAME
1251    POP_REG_NE rcx, RAW_VAR(dest_reg64) // Restore registers except dest_reg
1252    POP_REG_NE rdx, RAW_VAR(dest_reg64)
1253    POP_REG_NE rsi, RAW_VAR(dest_reg64)
1254    POP_REG_NE rdi, RAW_VAR(dest_reg64)
1255    POP_REG_NE rax, RAW_VAR(dest_reg64)
1256#else
1257    movl RAW_VAR(offset)(REG_VAR(obj_reg)), REG_VAR(dest_reg32)
1258    // movq RAW_VAR(offset)(REG_VAR(obj_reg)), REG_VAR(dest_reg64)
1259    UNPOISON_HEAP_REF RAW_VAR(dest_reg32) // UNPOISON_HEAP_REF only takes a 32b register
1260#endif  // USE_READ_BARRIER
1261END_MACRO
1262
1263DEFINE_FUNCTION art_quick_aput_obj
1264    testl %edx, %edx                // store of null
1265//  test %rdx, %rdx
1266    jz .Ldo_aput_null
1267    READ_BARRIER edi, MIRROR_OBJECT_CLASS_OFFSET, ecx, rcx
1268    // READ_BARRIER rdi, MIRROR_OBJECT_CLASS_OFFSET, ecx, rcx
1269    READ_BARRIER ecx, MIRROR_CLASS_COMPONENT_TYPE_OFFSET, ecx, rcx
1270    // READ_BARRIER rcx, MIRROR_CLASS_COMPONENT_TYPE_OFFSET, ecx, rcx
1271#if defined(USE_HEAP_POISONING) || defined(USE_READ_BARRIER)
1272    READ_BARRIER edx, MIRROR_OBJECT_CLASS_OFFSET, eax, rax  // rax is free.
1273    // READ_BARRIER rdx, MIRROR_OBJECT_CLASS_OFFSET, eax, rax
1274    cmpl %eax, %ecx  // value's type == array's component type - trivial assignability
1275#else
1276    cmpl MIRROR_OBJECT_CLASS_OFFSET(%edx), %ecx // value's type == array's component type - trivial assignability
1277//  cmpq MIRROR_CLASS_OFFSET(%rdx), %rcx
1278#endif
1279    jne .Lcheck_assignability
1280.Ldo_aput:
1281    POISON_HEAP_REF edx
1282    movl %edx, MIRROR_OBJECT_ARRAY_DATA_OFFSET(%edi, %esi, 4)
1283//  movq %rdx, MIRROR_OBJECT_ARRAY_DATA_OFFSET(%rdi, %rsi, 4)
1284    movq %gs:THREAD_CARD_TABLE_OFFSET, %rdx
1285    shrl LITERAL(CARD_TABLE_CARD_SHIFT), %edi
1286//  shrl LITERAL(CARD_TABLE_CARD_SHIFT), %rdi
1287    movb %dl, (%rdx, %rdi)                       // Note: this assumes that top 32b of %rdi are zero
1288    ret
1289.Ldo_aput_null:
1290    movl %edx, MIRROR_OBJECT_ARRAY_DATA_OFFSET(%edi, %esi, 4)
1291//  movq %rdx, MIRROR_OBJECT_ARRAY_DATA_OFFSET(%rdi, %rsi, 4)
1292    ret
1293.Lcheck_assignability:
1294    // Save arguments.
1295    PUSH rdi
1296    PUSH rsi
1297    PUSH rdx
1298    SETUP_FP_CALLEE_SAVE_FRAME
1299
1300#if defined(USE_HEAP_POISONING) || defined(USE_READ_BARRIER)
1301    // The load of MIRROR_OBJECT_CLASS_OFFSET(%edx) is redundant, eax still holds the value.
1302    movl %eax, %esi               // Pass arg2 = value's class.
1303    // movq %rax, %rsi
1304#else
1305                                     // "Uncompress" = do nothing, as already zero-extended on load.
1306    movl MIRROR_OBJECT_CLASS_OFFSET(%edx), %esi  // Pass arg2 = value's class.
1307#endif
1308    movq %rcx, %rdi               // Pass arg1 = array's component type.
1309
1310    call SYMBOL(artIsAssignableFromCode)  // (Class* a, Class* b)
1311
1312    // Exception?
1313    testq %rax, %rax
1314    jz   .Lthrow_array_store_exception
1315
1316    RESTORE_FP_CALLEE_SAVE_FRAME
1317    // Restore arguments.
1318    POP  rdx
1319    POP  rsi
1320    POP  rdi
1321
1322    POISON_HEAP_REF edx
1323    movl %edx, MIRROR_OBJECT_ARRAY_DATA_OFFSET(%edi, %esi, 4)
1324//  movq %rdx, MIRROR_OBJECT_ARRAY_DATA_OFFSET(%rdi, %rsi, 4)
1325    movq %gs:THREAD_CARD_TABLE_OFFSET, %rdx
1326    shrl LITERAL(CARD_TABLE_CARD_SHIFT), %edi
1327//  shrl LITERAL(CARD_TABLE_CARD_SHIFT), %rdi
1328    movb %dl, (%rdx, %rdi)                       // Note: this assumes that top 32b of %rdi are zero
1329//  movb %dl, (%rdx, %rdi)
1330    ret
1331    CFI_ADJUST_CFA_OFFSET(24 + 4 * 8)  // Reset unwind info so following code unwinds.
1332.Lthrow_array_store_exception:
1333    RESTORE_FP_CALLEE_SAVE_FRAME
1334    // Restore arguments.
1335    POP  rdx
1336    POP  rsi
1337    POP  rdi
1338
1339    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME  // Save all registers as basis for long jump context.
1340
1341    // Outgoing argument set up.
1342    movq %rdx, %rsi                         // Pass arg 2 = value.
1343    movq %gs:THREAD_SELF_OFFSET, %rdx       // Pass arg 3 = Thread::Current().
1344                                            // Pass arg 1 = array.
1345    call SYMBOL(artThrowArrayStoreException) // (array, value, Thread*)
1346    UNREACHABLE
1347END_FUNCTION art_quick_aput_obj
1348
1349// TODO: This is quite silly on X86_64 now.
1350DEFINE_FUNCTION art_quick_memcpy
1351    call PLT_SYMBOL(memcpy)       // (void*, const void*, size_t)
1352    ret
1353END_FUNCTION art_quick_memcpy
1354
1355DEFINE_FUNCTION art_quick_test_suspend
1356    SETUP_SAVE_EVERYTHING_FRAME RUNTIME_SAVE_EVERYTHING_FOR_SUSPEND_CHECK_METHOD_OFFSET  // save everything for GC
1357    // Outgoing argument set up
1358    movq %gs:THREAD_SELF_OFFSET, %rdi           // pass Thread::Current()
1359    call SYMBOL(artTestSuspendFromCode)         // (Thread*)
1360    RESTORE_SAVE_EVERYTHING_FRAME               // restore frame up to return address
1361    ret
1362END_FUNCTION art_quick_test_suspend
1363
1364UNIMPLEMENTED art_quick_ldiv
1365UNIMPLEMENTED art_quick_lmod
1366UNIMPLEMENTED art_quick_lmul
1367UNIMPLEMENTED art_quick_lshl
1368UNIMPLEMENTED art_quick_lshr
1369UNIMPLEMENTED art_quick_lushr
1370
1371// Note: Functions `art{Get,Set}<Kind>{Static,Instance}FromCompiledCode` are
1372// defined with a macro in runtime/entrypoints/quick/quick_field_entrypoints.cc.
1373
1374THREE_ARG_REF_DOWNCALL art_quick_set8_instance, artSet8InstanceFromCompiledCode, RETURN_IF_EAX_ZERO
1375THREE_ARG_REF_DOWNCALL art_quick_set16_instance, artSet16InstanceFromCompiledCode, RETURN_IF_EAX_ZERO
1376THREE_ARG_REF_DOWNCALL art_quick_set32_instance, artSet32InstanceFromCompiledCode, RETURN_IF_EAX_ZERO
1377THREE_ARG_REF_DOWNCALL art_quick_set64_instance, artSet64InstanceFromCompiledCode, RETURN_IF_EAX_ZERO
1378THREE_ARG_REF_DOWNCALL art_quick_set_obj_instance, artSetObjInstanceFromCompiledCode, RETURN_IF_EAX_ZERO
1379
1380TWO_ARG_REF_DOWNCALL art_quick_get_byte_instance, artGetByteInstanceFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION
1381TWO_ARG_REF_DOWNCALL art_quick_get_boolean_instance, artGetBooleanInstanceFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION
1382TWO_ARG_REF_DOWNCALL art_quick_get_short_instance, artGetShortInstanceFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION
1383TWO_ARG_REF_DOWNCALL art_quick_get_char_instance, artGetCharInstanceFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION
1384TWO_ARG_REF_DOWNCALL art_quick_get32_instance, artGet32InstanceFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION
1385TWO_ARG_REF_DOWNCALL art_quick_get64_instance, artGet64InstanceFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION
1386TWO_ARG_REF_DOWNCALL art_quick_get_obj_instance, artGetObjInstanceFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION
1387
1388TWO_ARG_REF_DOWNCALL art_quick_set8_static, artSet8StaticFromCompiledCode, RETURN_IF_EAX_ZERO
1389TWO_ARG_REF_DOWNCALL art_quick_set16_static, artSet16StaticFromCompiledCode, RETURN_IF_EAX_ZERO
1390TWO_ARG_REF_DOWNCALL art_quick_set32_static, artSet32StaticFromCompiledCode, RETURN_IF_EAX_ZERO
1391TWO_ARG_REF_DOWNCALL art_quick_set64_static, artSet64StaticFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION
1392TWO_ARG_REF_DOWNCALL art_quick_set_obj_static, artSetObjStaticFromCompiledCode, RETURN_IF_EAX_ZERO
1393
1394ONE_ARG_REF_DOWNCALL art_quick_get_byte_static, artGetByteStaticFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION
1395ONE_ARG_REF_DOWNCALL art_quick_get_boolean_static, artGetBooleanStaticFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION
1396ONE_ARG_REF_DOWNCALL art_quick_get_short_static, artGetShortStaticFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION
1397ONE_ARG_REF_DOWNCALL art_quick_get_char_static, artGetCharStaticFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION
1398ONE_ARG_REF_DOWNCALL art_quick_get32_static, artGet32StaticFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION
1399ONE_ARG_REF_DOWNCALL art_quick_get64_static, artGet64StaticFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION
1400ONE_ARG_REF_DOWNCALL art_quick_get_obj_static, artGetObjStaticFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION
1401
1402DEFINE_FUNCTION art_quick_proxy_invoke_handler
1403    SETUP_SAVE_REFS_AND_ARGS_FRAME_WITH_METHOD_IN_RDI
1404
1405    movq %gs:THREAD_SELF_OFFSET, %rdx       // Pass Thread::Current().
1406    movq %rsp, %rcx                         // Pass SP.
1407    call SYMBOL(artQuickProxyInvokeHandler) // (proxy method, receiver, Thread*, SP)
1408    RESTORE_SAVE_REFS_AND_ARGS_FRAME
1409    movq %rax, %xmm0                        // Copy return value in case of float returns.
1410    RETURN_OR_DELIVER_PENDING_EXCEPTION
1411END_FUNCTION art_quick_proxy_invoke_handler
1412
1413    /*
1414     * Called to resolve an imt conflict.
1415     * rdi is the conflict ArtMethod.
1416     * rax is a hidden argument that holds the target interface method's dex method index.
1417     *
1418     * Note that this stub writes to r10, r11, rax and rdi.
1419     */
1420DEFINE_FUNCTION art_quick_imt_conflict_trampoline
1421#if defined(__APPLE__)
1422    int3
1423    int3
1424#else
1425    movq __SIZEOF_POINTER__(%rsp), %r10 // Load referrer.
1426    mov %eax, %r11d             // Remember method index in R11.
1427    PUSH rdx                    // Preserve RDX as we need to clobber it by LOCK CMPXCHG16B.
1428    // If the method is obsolete, just go through the dex cache miss slow path.
1429    // The obsolete flag is set with suspended threads, so we do not need an acquire operation here.
1430    testl LITERAL(ACC_OBSOLETE_METHOD), ART_METHOD_ACCESS_FLAGS_OFFSET(%r10)
1431    jnz .Limt_conflict_trampoline_dex_cache_miss
1432    movl ART_METHOD_DECLARING_CLASS_OFFSET(%r10), %r10d  // Load declaring class (no read barrier).
1433    movl MIRROR_CLASS_DEX_CACHE_OFFSET(%r10), %r10d    // Load the DexCache (without read barrier).
1434    UNPOISON_HEAP_REF r10d
1435    movq MIRROR_DEX_CACHE_RESOLVED_METHODS_OFFSET(%r10), %r10  // Load the resolved methods.
1436    andl LITERAL(METHOD_DEX_CACHE_SIZE_MINUS_ONE), %eax  // Calculate DexCache method slot index.
1437    shll LITERAL(1), %eax       // Multiply by 2 as entries have size 2 * __SIZEOF_POINTER__.
1438    leaq 0(%r10, %rax, __SIZEOF_POINTER__), %r10 // Load DexCache method slot address.
1439    mov %rcx, %rdx              // Make RDX:RAX == RCX:RBX so that LOCK CMPXCHG16B makes no changes.
1440    mov %rbx, %rax              // (The actual value does not matter.)
1441    lock cmpxchg16b (%r10)      // Relaxed atomic load RDX:RAX from the dex cache slot.
1442    movq ART_METHOD_JNI_OFFSET_64(%rdi), %rdi  // Load ImtConflictTable
1443    cmp %rdx, %r11              // Compare method index to see if we had a DexCache method hit.
1444    jne .Limt_conflict_trampoline_dex_cache_miss
1445.Limt_table_iterate:
1446    cmpq %rax, 0(%rdi)
1447    jne .Limt_table_next_entry
1448    // We successfully hit an entry in the table. Load the target method
1449    // and jump to it.
1450    movq __SIZEOF_POINTER__(%rdi), %rdi
1451    CFI_REMEMBER_STATE
1452    POP rdx
1453    jmp *ART_METHOD_QUICK_CODE_OFFSET_64(%rdi)
1454    CFI_RESTORE_STATE_AND_DEF_CFA(rsp, 16)
1455.Limt_table_next_entry:
1456    // If the entry is null, the interface method is not in the ImtConflictTable.
1457    cmpq LITERAL(0), 0(%rdi)
1458    jz .Lconflict_trampoline
1459    // Iterate over the entries of the ImtConflictTable.
1460    addq LITERAL(2 * __SIZEOF_POINTER__), %rdi
1461    jmp .Limt_table_iterate
1462.Lconflict_trampoline:
1463    // Call the runtime stub to populate the ImtConflictTable and jump to the
1464    // resolved method.
1465    CFI_REMEMBER_STATE
1466    POP rdx
1467    movq %rax, %rdi  // Load interface method
1468    INVOKE_TRAMPOLINE_BODY artInvokeInterfaceTrampoline
1469    CFI_RESTORE_STATE_AND_DEF_CFA(rsp, 16)
1470.Limt_conflict_trampoline_dex_cache_miss:
1471    // We're not creating a proper runtime method frame here,
1472    // artLookupResolvedMethod() is not allowed to walk the stack.
1473
1474    // Save GPR args and ImtConflictTable; RDX is already saved.
1475    PUSH r9   // Quick arg 5.
1476    PUSH r8   // Quick arg 4.
1477    PUSH rsi  // Quick arg 1.
1478    PUSH rcx  // Quick arg 3.
1479    PUSH rdi  // ImtConflictTable
1480    // Save FPR args and callee-saves, align stack to 16B.
1481    subq MACRO_LITERAL(12 * 8 + 8), %rsp
1482    CFI_ADJUST_CFA_OFFSET(12 * 8 + 8)
1483    movq %xmm0, 0(%rsp)
1484    movq %xmm1, 8(%rsp)
1485    movq %xmm2, 16(%rsp)
1486    movq %xmm3, 24(%rsp)
1487    movq %xmm4, 32(%rsp)
1488    movq %xmm5, 40(%rsp)
1489    movq %xmm6, 48(%rsp)
1490    movq %xmm7, 56(%rsp)
1491    movq %xmm12, 64(%rsp)  // XMM12-15 are callee-save in ART compiled code ABI
1492    movq %xmm13, 72(%rsp)  // but caller-save in native ABI.
1493    movq %xmm14, 80(%rsp)
1494    movq %xmm15, 88(%rsp)
1495
1496    movq %r11, %rdi             // Pass method index.
1497    movq 12 * 8 + 8 + 6 * 8 + 8(%rsp), %rsi   // Pass referrer.
1498    call SYMBOL(artLookupResolvedMethod)  // (uint32_t method_index, ArtMethod* referrer)
1499
1500    // Restore FPRs.
1501    movq 0(%rsp), %xmm0
1502    movq 8(%rsp), %xmm1
1503    movq 16(%rsp), %xmm2
1504    movq 24(%rsp), %xmm3
1505    movq 32(%rsp), %xmm4
1506    movq 40(%rsp), %xmm5
1507    movq 48(%rsp), %xmm6
1508    movq 56(%rsp), %xmm7
1509    movq 64(%rsp), %xmm12
1510    movq 72(%rsp), %xmm13
1511    movq 80(%rsp), %xmm14
1512    movq 88(%rsp), %xmm15
1513    addq MACRO_LITERAL(12 * 8 + 8), %rsp
1514    CFI_ADJUST_CFA_OFFSET(-(12 * 8 + 8))
1515    // Restore ImtConflictTable and GPR args.
1516    POP rdi
1517    POP rcx
1518    POP rsi
1519    POP r8
1520    POP r9
1521
1522    cmp LITERAL(0), %rax        // If the method wasn't resolved,
1523    je .Lconflict_trampoline    //   skip the lookup and go to artInvokeInterfaceTrampoline().
1524    jmp .Limt_table_iterate
1525#endif  // __APPLE__
1526END_FUNCTION art_quick_imt_conflict_trampoline
1527
1528DEFINE_FUNCTION art_quick_resolution_trampoline
1529    SETUP_SAVE_REFS_AND_ARGS_FRAME
1530    movq %gs:THREAD_SELF_OFFSET, %rdx
1531    movq %rsp, %rcx
1532    call SYMBOL(artQuickResolutionTrampoline) // (called, receiver, Thread*, SP)
1533    movq %rax, %r10               // Remember returned code pointer in R10.
1534    movq (%rsp), %rdi             // Load called method into RDI.
1535    RESTORE_SAVE_REFS_AND_ARGS_FRAME
1536    testq %r10, %r10              // If code pointer is null goto deliver pending exception.
1537    jz 1f
1538    jmp *%r10                     // Tail call into method.
15391:
1540    DELIVER_PENDING_EXCEPTION
1541END_FUNCTION art_quick_resolution_trampoline
1542
1543/* Generic JNI frame layout:
1544 *
1545 * #-------------------#
1546 * |                   |
1547 * | caller method...  |
1548 * #-------------------#    <--- SP on entry
1549 *
1550 *          |
1551 *          V
1552 *
1553 * #-------------------#
1554 * | caller method...  |
1555 * #-------------------#
1556 * | Return            |
1557 * | R15               |    callee save
1558 * | R14               |    callee save
1559 * | R13               |    callee save
1560 * | R12               |    callee save
1561 * | R9                |    arg5
1562 * | R8                |    arg4
1563 * | RSI/R6            |    arg1
1564 * | RBP/R5            |    callee save
1565 * | RBX/R3            |    callee save
1566 * | RDX/R2            |    arg2
1567 * | RCX/R1            |    arg3
1568 * | XMM7              |    float arg 8
1569 * | XMM6              |    float arg 7
1570 * | XMM5              |    float arg 6
1571 * | XMM4              |    float arg 5
1572 * | XMM3              |    float arg 4
1573 * | XMM2              |    float arg 3
1574 * | XMM1              |    float arg 2
1575 * | XMM0              |    float arg 1
1576 * | RDI/Method*       |  <- sp
1577 * #-------------------#
1578 * | Scratch Alloca    |    5K scratch space
1579 * #---------#---------#
1580 * |         | sp*     |
1581 * | Tramp.  #---------#
1582 * | args    | thread  |
1583 * | Tramp.  #---------#
1584 * |         | method  |
1585 * #-------------------#    <--- SP on artQuickGenericJniTrampoline
1586 *
1587 *           |
1588 *           v              artQuickGenericJniTrampoline
1589 *
1590 * #-------------------#
1591 * | caller method...  |
1592 * #-------------------#
1593 * | Return PC         |
1594 * | Callee-Saves      |
1595 * | padding           | // 8B
1596 * | Method*           |    <--- (1)
1597 * #-------------------#
1598 * | local ref cookie  | // 4B
1599 * | padding           | // 0B or 4B to align handle scope on 8B address
1600 * | handle scope      | // Size depends on number of references; multiple of 4B.
1601 * #-------------------#
1602 * | JNI Stack Args    | // Empty if all args fit into registers.
1603 * #-------------------#    <--- SP on native call (1)
1604 * | Free scratch      |
1605 * #-------------------#
1606 * | SP for JNI call   | // Pointer to (1).
1607 * #-------------------#
1608 * | Hidden arg        | // For @CriticalNative
1609 * #-------------------#
1610 * |                   |
1611 * | Stack for Regs    |    The trampoline assembly will pop these values
1612 * |                   |    into registers for native call
1613 * #-------------------#
1614 */
1615    /*
1616     * Called to do a generic JNI down-call
1617     */
1618DEFINE_FUNCTION art_quick_generic_jni_trampoline
1619    SETUP_SAVE_REFS_AND_ARGS_FRAME_WITH_METHOD_IN_RDI
1620
1621    movq %rsp, %rbp                 // save SP at (old) callee-save frame
1622    CFI_DEF_CFA_REGISTER(rbp)
1623
1624    //
1625    // reserve a lot of space
1626    //
1627    //      4    local state ref
1628    //      4    padding
1629    //   4196    4k scratch space, enough for 2x 256 8-byte parameters (TODO: handle scope overhead?)
1630    //     16    handle scope member fields ?
1631    // +  112    14x 8-byte stack-2-register space
1632    // ------
1633    //   4332
1634    // 16-byte aligned: 4336
1635    // Note: 14x8 = 7*16, so the stack stays aligned for the native call...
1636    //       Also means: the padding is somewhere in the middle
1637    //
1638    //
1639    // New test: use 5K and release
1640    // 5k = 5120
1641    subq LITERAL(5120), %rsp
1642    // prepare for artQuickGenericJniTrampoline call
1643    // (Thread*, managed_sp, reserved_area)
1644    //    rdi       rsi           rdx   <= C calling convention
1645    //  gs:...      rbp           rsp   <= where they are
1646    movq %gs:THREAD_SELF_OFFSET, %rdi  // Pass Thread::Current().
1647    movq %rbp, %rsi                    // Pass managed frame SP.
1648    movq %rsp, %rdx                    // Pass reserved area.
1649    call SYMBOL(artQuickGenericJniTrampoline)  // (Thread*, sp)
1650
1651    // The C call will have registered the complete save-frame on success.
1652    // The result of the call is:
1653    //     %rax: pointer to native code, 0 on error.
1654    //     The bottom of the reserved area contains values for arg registers,
1655    //     hidden arg register and SP for out args for the call.
1656
1657    // Check for error (class init check or locking for synchronized native method can throw).
1658    test %rax, %rax
1659    jz .Lexception_in_native
1660
1661    // pop from the register-passing alloca region
1662    // what's the right layout?
1663    popq %rdi
1664    popq %rsi
1665    popq %rdx
1666    popq %rcx
1667    popq %r8
1668    popq %r9
1669    // TODO: skip floating point if unused, some flag.
1670    movq 0(%rsp), %xmm0
1671    movq 8(%rsp), %xmm1
1672    movq 16(%rsp), %xmm2
1673    movq 24(%rsp), %xmm3
1674    movq 32(%rsp), %xmm4
1675    movq 40(%rsp), %xmm5
1676    movq 48(%rsp), %xmm6
1677    movq 56(%rsp), %xmm7
1678
1679    // Load hidden arg (r11) for @CriticalNative.
1680    movq 64(%rsp), %r11
1681    // Load SP for out args, releasing unneeded reserved area.
1682    movq 72(%rsp), %rsp
1683
1684    // native call
1685    call *%rax
1686
1687    // result sign extension is handled in C code
1688    // prepare for artQuickGenericJniEndTrampoline call
1689    // (Thread*,  result, result_f)
1690    //   rdi      rsi   rdx       <= C calling convention
1691    //  gs:...    rax   xmm0      <= where they are
1692    movq %gs:THREAD_SELF_OFFSET, %rdi
1693    movq %rax, %rsi
1694    movq %xmm0, %rdx
1695    call SYMBOL(artQuickGenericJniEndTrampoline)
1696
1697    // Pending exceptions possible.
1698    // TODO: use cmpq, needs direct encoding because of gas bug
1699    movq %gs:THREAD_EXCEPTION_OFFSET, %rcx
1700    test %rcx, %rcx
1701    jnz .Lexception_in_native
1702
1703    // Tear down the alloca.
1704    movq %rbp, %rsp
1705    CFI_DEF_CFA_REGISTER(rsp)
1706
1707    // Tear down the callee-save frame.
1708    // Load FPRs.
1709    // movq %xmm0, 16(%rsp)         // doesn't make sense!!!
1710    movq 24(%rsp), %xmm1            // neither does this!!!
1711    movq 32(%rsp), %xmm2
1712    movq 40(%rsp), %xmm3
1713    movq 48(%rsp), %xmm4
1714    movq 56(%rsp), %xmm5
1715    movq 64(%rsp), %xmm6
1716    movq 72(%rsp), %xmm7
1717    movq 80(%rsp), %xmm12
1718    movq 88(%rsp), %xmm13
1719    movq 96(%rsp), %xmm14
1720    movq 104(%rsp), %xmm15
1721    // was 80 bytes
1722    addq LITERAL(80 + 4*8), %rsp
1723    CFI_ADJUST_CFA_OFFSET(-80 - 4*8)
1724    // Save callee and GPR args, mixed together to agree with core spills bitmap.
1725    POP rcx  // Arg.
1726    POP rdx  // Arg.
1727    POP rbx  // Callee save.
1728    POP rbp  // Callee save.
1729    POP rsi  // Arg.
1730    POP r8   // Arg.
1731    POP r9   // Arg.
1732    POP r12  // Callee save.
1733    POP r13  // Callee save.
1734    POP r14  // Callee save.
1735    POP r15  // Callee save.
1736    // store into fpr, for when it's a fpr return...
1737    movq %rax, %xmm0
1738    ret
1739.Lexception_in_native:
1740    pushq %gs:THREAD_TOP_QUICK_FRAME_OFFSET
1741    addq LITERAL(-1), (%rsp)  // Remove the GenericJNI tag.
1742    movq (%rsp), %rsp
1743    CFI_DEF_CFA_REGISTER(rsp)
1744    // Do a call to push a new save-all frame required by the runtime.
1745    call .Lexception_call
1746.Lexception_call:
1747    DELIVER_PENDING_EXCEPTION
1748END_FUNCTION art_quick_generic_jni_trampoline
1749
1750    /*
1751     * Called to bridge from the quick to interpreter ABI. On entry the arguments match those
1752     * of a quick call:
1753     * RDI = method being called / to bridge to.
1754     * RSI, RDX, RCX, R8, R9 are arguments to that method.
1755     */
1756DEFINE_FUNCTION art_quick_to_interpreter_bridge
1757    SETUP_SAVE_REFS_AND_ARGS_FRAME     // Set up frame and save arguments.
1758    movq %gs:THREAD_SELF_OFFSET, %rsi  // RSI := Thread::Current()
1759    movq %rsp, %rdx                    // RDX := sp
1760    call SYMBOL(artQuickToInterpreterBridge)  // (method, Thread*, SP)
1761    RESTORE_SAVE_REFS_AND_ARGS_FRAME   // TODO: no need to restore arguments in this case.
1762    movq %rax, %xmm0                   // Place return value also into floating point return value.
1763    RETURN_OR_DELIVER_PENDING_EXCEPTION    // return or deliver exception
1764END_FUNCTION art_quick_to_interpreter_bridge
1765
1766    /*
1767     * Called to catch an attempt to invoke an obsolete method.
1768     * RDI = method being called.
1769     */
1770ONE_ARG_RUNTIME_EXCEPTION art_invoke_obsolete_method_stub, artInvokeObsoleteMethod
1771
1772    /*
1773     * Routine that intercepts method calls and returns.
1774     */
1775DEFINE_FUNCTION art_quick_instrumentation_entry
1776#if defined(__APPLE__)
1777    int3
1778    int3
1779#else
1780    SETUP_SAVE_REFS_AND_ARGS_FRAME
1781
1782    movq %rdi, %r12               // Preserve method pointer in a callee-save.
1783
1784    movq %gs:THREAD_SELF_OFFSET, %rdx   // Pass thread.
1785    movq %rsp, %rcx                     // Pass SP.
1786
1787    call SYMBOL(artInstrumentationMethodEntryFromCode) // (Method*, Object*, Thread*, SP)
1788
1789                                  // %rax = result of call.
1790    testq %rax, %rax
1791    jz 1f
1792
1793    movq %r12, %rdi               // Reload method pointer.
1794    leaq art_quick_instrumentation_exit(%rip), %r12   // Set up return through instrumentation
1795    movq %r12, FRAME_SIZE_SAVE_REFS_AND_ARGS-8(%rsp)  // exit.
1796
1797    RESTORE_SAVE_REFS_AND_ARGS_FRAME
1798
1799    jmp *%rax                     // Tail call to intended method.
18001:
1801    RESTORE_SAVE_REFS_AND_ARGS_FRAME
1802    DELIVER_PENDING_EXCEPTION
1803#endif  // __APPLE__
1804END_FUNCTION art_quick_instrumentation_entry
1805
1806DEFINE_FUNCTION_CUSTOM_CFA art_quick_instrumentation_exit, 0
1807    pushq LITERAL(0)          // Push a fake return PC as there will be none on the stack.
1808    CFI_ADJUST_CFA_OFFSET(8)
1809
1810    SETUP_SAVE_EVERYTHING_FRAME
1811
1812    leaq 16(%rsp), %rcx       // Pass floating-point result pointer, in kSaveEverything frame.
1813    leaq 144(%rsp), %rdx      // Pass integer result pointer, in kSaveEverything frame.
1814    movq %rsp, %rsi           // Pass SP.
1815    movq %gs:THREAD_SELF_OFFSET, %rdi  // Pass Thread.
1816
1817    call SYMBOL(artInstrumentationMethodExitFromCode)   // (Thread*, SP, gpr_res*, fpr_res*)
1818
1819    testq %rax, %rax          // Check if we have a return-pc to go to. If we don't then there was
1820                              // an exception
1821    jz .Ldo_deliver_instrumentation_exception
1822    testq %rdx, %rdx
1823    jnz .Ldeoptimize
1824    // Normal return.
1825    movq %rax, FRAME_SIZE_SAVE_EVERYTHING-8(%rsp)  // Set return pc.
1826    RESTORE_SAVE_EVERYTHING_FRAME
1827    ret
1828.Ldeoptimize:
1829    movq %rdx, FRAME_SIZE_SAVE_EVERYTHING-8(%rsp)  // Set return pc.
1830    RESTORE_SAVE_EVERYTHING_FRAME
1831    // Jump to art_quick_deoptimize.
1832    jmp SYMBOL(art_quick_deoptimize)
1833.Ldo_deliver_instrumentation_exception:
1834    DELIVER_PENDING_EXCEPTION_FRAME_READY
1835END_FUNCTION art_quick_instrumentation_exit
1836
1837    /*
1838     * Instrumentation has requested that we deoptimize into the interpreter. The deoptimization
1839     * will long jump to the upcall with a special exception of -1.
1840     */
1841DEFINE_FUNCTION art_quick_deoptimize
1842    SETUP_SAVE_EVERYTHING_FRAME        // Stack should be aligned now.
1843    movq %gs:THREAD_SELF_OFFSET, %rdi  // Pass Thread.
1844    call SYMBOL(artDeoptimize)         // (Thread*)
1845    UNREACHABLE
1846END_FUNCTION art_quick_deoptimize
1847
1848    /*
1849     * Compiled code has requested that we deoptimize into the interpreter. The deoptimization
1850     * will long jump to the interpreter bridge.
1851     */
1852DEFINE_FUNCTION art_quick_deoptimize_from_compiled_code
1853    SETUP_SAVE_EVERYTHING_FRAME
1854                                                // Stack should be aligned now.
1855    movq %gs:THREAD_SELF_OFFSET, %rsi           // Pass Thread.
1856    call SYMBOL(artDeoptimizeFromCompiledCode)  // (DeoptimizationKind, Thread*)
1857    UNREACHABLE
1858END_FUNCTION art_quick_deoptimize_from_compiled_code
1859
1860    /*
1861     * String's compareTo.
1862     *
1863     * On entry:
1864     *    rdi:   this string object (known non-null)
1865     *    rsi:   comp string object (known non-null)
1866     */
1867DEFINE_FUNCTION art_quick_string_compareto
1868    movl MIRROR_STRING_COUNT_OFFSET(%edi), %r8d
1869    movl MIRROR_STRING_COUNT_OFFSET(%esi), %r9d
1870    /* Build pointers to the start of string data */
1871    leal MIRROR_STRING_VALUE_OFFSET(%edi), %edi
1872    leal MIRROR_STRING_VALUE_OFFSET(%esi), %esi
1873#if (STRING_COMPRESSION_FEATURE)
1874    /* Differ cases */
1875    shrl    LITERAL(1), %r8d
1876    jnc     .Lstring_compareto_this_is_compressed
1877    shrl    LITERAL(1), %r9d
1878    jnc     .Lstring_compareto_that_is_compressed
1879    jmp     .Lstring_compareto_both_not_compressed
1880.Lstring_compareto_this_is_compressed:
1881    shrl    LITERAL(1), %r9d
1882    jnc     .Lstring_compareto_both_compressed
1883    /* Comparison this (8-bit) and that (16-bit) */
1884    mov     %r8d, %eax
1885    subl    %r9d, %eax
1886    mov     %r8d, %ecx
1887    cmovg   %r9d, %ecx
1888    /* Going into loop to compare each character */
1889    jecxz   .Lstring_compareto_keep_length1     // check loop counter (if 0 then stop)
1890.Lstring_compareto_loop_comparison_this_compressed:
1891    movzbl  (%edi), %r8d                        // move *(this_cur_char) byte to long
1892    movzwl  (%esi), %r9d                        // move *(that_cur_char) word to long
1893    addl    LITERAL(1), %edi                    // ++this_cur_char (8-bit)
1894    addl    LITERAL(2), %esi                    // ++that_cur_char (16-bit)
1895    subl    %r9d, %r8d
1896    loope   .Lstring_compareto_loop_comparison_this_compressed
1897    cmovne  %r8d, %eax                          // return eax = *(this_cur_char) - *(that_cur_char)
1898.Lstring_compareto_keep_length1:
1899    ret
1900.Lstring_compareto_that_is_compressed:
1901    movl    %r8d, %eax
1902    subl    %r9d, %eax
1903    mov     %r8d, %ecx
1904    cmovg   %r9d, %ecx
1905    /* Comparison this (8-bit) and that (16-bit) */
1906    jecxz   .Lstring_compareto_keep_length2     // check loop counter (if 0, don't compare)
1907.Lstring_compareto_loop_comparison_that_compressed:
1908    movzwl  (%edi), %r8d                        // move *(this_cur_char) word to long
1909    movzbl  (%esi), %r9d                        // move *(that_cur_chat) byte to long
1910    addl    LITERAL(2), %edi                    // ++this_cur_char (16-bit)
1911    addl    LITERAL(1), %esi                    // ++that_cur_char (8-bit)
1912    subl    %r9d, %r8d
1913    loope   .Lstring_compareto_loop_comparison_that_compressed
1914    cmovne  %r8d, %eax                          // return eax = *(this_cur_char) - *(that_cur_char)
1915.Lstring_compareto_keep_length2:
1916    ret
1917.Lstring_compareto_both_compressed:
1918    /* Calculate min length and count diff */
1919    movl    %r8d, %ecx
1920    movl    %r8d, %eax
1921    subl    %r9d, %eax
1922    cmovg   %r9d, %ecx
1923    jecxz   .Lstring_compareto_keep_length3
1924    repe    cmpsb
1925    je      .Lstring_compareto_keep_length3
1926    movzbl  -1(%edi), %eax        // get last compared char from this string (8-bit)
1927    movzbl  -1(%esi), %ecx        // get last compared char from comp string (8-bit)
1928    jmp     .Lstring_compareto_count_difference
1929#endif // STRING_COMPRESSION_FEATURE
1930.Lstring_compareto_both_not_compressed:
1931    /* Calculate min length and count diff */
1932    movl    %r8d, %ecx
1933    movl    %r8d, %eax
1934    subl    %r9d, %eax
1935    cmovg   %r9d, %ecx
1936    /*
1937     * At this point we have:
1938     *   eax: value to return if first part of strings are equal
1939     *   ecx: minimum among the lengths of the two strings
1940     *   esi: pointer to comp string data
1941     *   edi: pointer to this string data
1942     */
1943    jecxz .Lstring_compareto_keep_length3
1944    repe  cmpsw                   // find nonmatching chars in [%esi] and [%edi], up to length %ecx
1945    je    .Lstring_compareto_keep_length3
1946    movzwl  -2(%edi), %eax        // get last compared char from this string (16-bit)
1947    movzwl  -2(%esi), %ecx        // get last compared char from comp string (16-bit)
1948.Lstring_compareto_count_difference:
1949    subl  %ecx, %eax              // return the difference
1950.Lstring_compareto_keep_length3:
1951    ret
1952END_FUNCTION art_quick_string_compareto
1953
1954UNIMPLEMENTED art_quick_memcmp16
1955
1956DEFINE_FUNCTION art_quick_instance_of
1957    SETUP_FP_CALLEE_SAVE_FRAME
1958    subq LITERAL(8), %rsp                      // Alignment padding.
1959    CFI_ADJUST_CFA_OFFSET(8)
1960    call SYMBOL(artInstanceOfFromCode)         // (mirror::Object*, mirror::Class*)
1961    addq LITERAL(8), %rsp
1962    CFI_ADJUST_CFA_OFFSET(-8)
1963    RESTORE_FP_CALLEE_SAVE_FRAME
1964    ret
1965END_FUNCTION art_quick_instance_of
1966
1967DEFINE_FUNCTION art_quick_string_builder_append
1968    SETUP_SAVE_REFS_ONLY_FRAME                // save ref containing registers for GC
1969    // Outgoing argument set up
1970    leaq FRAME_SIZE_SAVE_REFS_ONLY + __SIZEOF_POINTER__(%rsp), %rsi  // pass args
1971    movq %gs:THREAD_SELF_OFFSET, %rdx         // pass Thread::Current()
1972    call artStringBuilderAppend               // (uint32_t, const unit32_t*, Thread*)
1973    RESTORE_SAVE_REFS_ONLY_FRAME              // restore frame up to return address
1974    RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER   // return or deliver exception
1975END_FUNCTION art_quick_string_builder_append
1976
1977// Create a function `name` calling the ReadBarrier::Mark routine,
1978// getting its argument and returning its result through register
1979// `reg`, saving and restoring all caller-save registers.
1980//
1981// The generated function follows a non-standard runtime calling
1982// convention:
1983// - register `reg` (which may be different from RDI) is used to pass
1984//   the (sole) argument of this function;
1985// - register `reg` (which may be different from RAX) is used to return
1986//   the result of this function (instead of RAX);
1987// - if `reg` is different from `rdi`, RDI is treated like a normal
1988//   (non-argument) caller-save register;
1989// - if `reg` is different from `rax`, RAX is treated like a normal
1990//   (non-result) caller-save register;
1991// - everything else is the same as in the standard runtime calling
1992//   convention (e.g. standard callee-save registers are preserved).
1993MACRO2(READ_BARRIER_MARK_REG, name, reg)
1994    DEFINE_FUNCTION VAR(name)
1995    // Null check so that we can load the lock word.
1996    testq REG_VAR(reg), REG_VAR(reg)
1997    jz .Lret_rb_\name
1998.Lnot_null_\name:
1999    // Check the mark bit, if it is 1 return.
2000    testl LITERAL(LOCK_WORD_MARK_BIT_MASK_SHIFTED), MIRROR_OBJECT_LOCK_WORD_OFFSET(REG_VAR(reg))
2001    jz .Lslow_rb_\name
2002    ret
2003.Lslow_rb_\name:
2004    PUSH rax
2005    movl MIRROR_OBJECT_LOCK_WORD_OFFSET(REG_VAR(reg)), %eax
2006    addl LITERAL(LOCK_WORD_STATE_FORWARDING_ADDRESS_OVERFLOW), %eax
2007    // Jump if the addl caused eax to unsigned overflow. The only case where it overflows is the
2008    // forwarding address one.
2009    // Taken ~25% of the time.
2010    jnae .Lret_forwarding_address\name
2011
2012    // Save all potentially live caller-save core registers.
2013    movq 0(%rsp), %rax
2014    PUSH rcx
2015    PUSH rdx
2016    PUSH rsi
2017    PUSH rdi
2018    PUSH r8
2019    PUSH r9
2020    PUSH r10
2021    PUSH r11
2022    // Create space for caller-save floating-point registers.
2023    subq MACRO_LITERAL(12 * 8), %rsp
2024    CFI_ADJUST_CFA_OFFSET(12 * 8)
2025    // Save all potentially live caller-save floating-point registers.
2026    movq %xmm0, 0(%rsp)
2027    movq %xmm1, 8(%rsp)
2028    movq %xmm2, 16(%rsp)
2029    movq %xmm3, 24(%rsp)
2030    movq %xmm4, 32(%rsp)
2031    movq %xmm5, 40(%rsp)
2032    movq %xmm6, 48(%rsp)
2033    movq %xmm7, 56(%rsp)
2034    movq %xmm8, 64(%rsp)
2035    movq %xmm9, 72(%rsp)
2036    movq %xmm10, 80(%rsp)
2037    movq %xmm11, 88(%rsp)
2038    SETUP_FP_CALLEE_SAVE_FRAME
2039
2040    .ifnc RAW_VAR(reg), rdi
2041      movq REG_VAR(reg), %rdi       // Pass arg1 - obj from `reg`.
2042    .endif
2043    call SYMBOL(artReadBarrierMark) // artReadBarrierMark(obj)
2044    .ifnc RAW_VAR(reg), rax
2045      movq %rax, REG_VAR(reg)       // Return result into `reg`.
2046    .endif
2047
2048    RESTORE_FP_CALLEE_SAVE_FRAME
2049    // Restore floating-point registers.
2050    movq 0(%rsp), %xmm0
2051    movq 8(%rsp), %xmm1
2052    movq 16(%rsp), %xmm2
2053    movq 24(%rsp), %xmm3
2054    movq 32(%rsp), %xmm4
2055    movq 40(%rsp), %xmm5
2056    movq 48(%rsp), %xmm6
2057    movq 56(%rsp), %xmm7
2058    movq 64(%rsp), %xmm8
2059    movq 72(%rsp), %xmm9
2060    movq 80(%rsp), %xmm10
2061    movq 88(%rsp), %xmm11
2062    // Remove floating-point registers.
2063    addq MACRO_LITERAL(12 * 8), %rsp
2064    CFI_ADJUST_CFA_OFFSET(-(12 * 8))
2065    // Restore core regs, except `reg`, as it is used to return the
2066    // result of this function (simply remove it from the stack instead).
2067    POP_REG_NE r11, RAW_VAR(reg)
2068    POP_REG_NE r10, RAW_VAR(reg)
2069    POP_REG_NE r9, RAW_VAR(reg)
2070    POP_REG_NE r8, RAW_VAR(reg)
2071    POP_REG_NE rdi, RAW_VAR(reg)
2072    POP_REG_NE rsi, RAW_VAR(reg)
2073    POP_REG_NE rdx, RAW_VAR(reg)
2074    POP_REG_NE rcx, RAW_VAR(reg)
2075    POP_REG_NE rax, RAW_VAR(reg)
2076.Lret_rb_\name:
2077    ret
2078.Lret_forwarding_address\name:
2079    // The overflow cleared the top bits.
2080    sall LITERAL(LOCK_WORD_STATE_FORWARDING_ADDRESS_SHIFT), %eax
2081    movq %rax, REG_VAR(reg)
2082    POP_REG_NE rax, RAW_VAR(reg)
2083    ret
2084    END_FUNCTION VAR(name)
2085END_MACRO
2086
2087READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg00, rax
2088READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg01, rcx
2089READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg02, rdx
2090READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg03, rbx
2091// Note: There is no art_quick_read_barrier_mark_reg04, as register 4 (RSP)
2092// cannot be used to pass arguments.
2093READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg05, rbp
2094READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg06, rsi
2095READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg07, rdi
2096READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg08, r8
2097READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg09, r9
2098READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg10, r10
2099READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg11, r11
2100READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg12, r12
2101READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg13, r13
2102READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg14, r14
2103READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg15, r15
2104
2105DEFINE_FUNCTION art_quick_read_barrier_slow
2106    SETUP_FP_CALLEE_SAVE_FRAME
2107    subq LITERAL(8), %rsp           // Alignment padding.
2108    CFI_ADJUST_CFA_OFFSET(8)
2109    call SYMBOL(artReadBarrierSlow) // artReadBarrierSlow(ref, obj, offset)
2110    addq LITERAL(8), %rsp
2111    CFI_ADJUST_CFA_OFFSET(-8)
2112    RESTORE_FP_CALLEE_SAVE_FRAME
2113    ret
2114END_FUNCTION art_quick_read_barrier_slow
2115
2116DEFINE_FUNCTION art_quick_read_barrier_for_root_slow
2117    SETUP_FP_CALLEE_SAVE_FRAME
2118    subq LITERAL(8), %rsp                  // Alignment padding.
2119    CFI_ADJUST_CFA_OFFSET(8)
2120    call SYMBOL(artReadBarrierForRootSlow) // artReadBarrierForRootSlow(root)
2121    addq LITERAL(8), %rsp
2122    CFI_ADJUST_CFA_OFFSET(-8)
2123    RESTORE_FP_CALLEE_SAVE_FRAME
2124    ret
2125END_FUNCTION art_quick_read_barrier_for_root_slow
2126
2127    /*
2128     * On stack replacement stub.
2129     * On entry:
2130     *   [sp] = return address
2131     *   rdi = stack to copy
2132     *   rsi = size of stack
2133     *   rdx = pc to call
2134     *   rcx = JValue* result
2135     *   r8 = shorty
2136     *   r9 = thread
2137     *
2138     * Note that the native C ABI already aligned the stack to 16-byte.
2139     */
2140DEFINE_FUNCTION art_quick_osr_stub
2141    // Save the non-volatiles.
2142    PUSH rbp                      // Save rbp.
2143    PUSH rcx                      // Save rcx/result*.
2144    PUSH r8                       // Save r8/shorty*.
2145
2146    // Save callee saves.
2147    PUSH rbx
2148    PUSH r12
2149    PUSH r13
2150    PUSH r14
2151    PUSH r15
2152
2153    pushq LITERAL(0)              // Push null for ArtMethod*.
2154    CFI_ADJUST_CFA_OFFSET(8)
2155    movl %esi, %ecx               // rcx := size of stack
2156    movq %rdi, %rsi               // rsi := stack to copy
2157    movq %rsp, %rbp               // Save stack pointer to RBP for CFI use in .Losr_entry.
2158    call .Losr_entry
2159    CFI_REMEMBER_STATE
2160
2161    // Restore stack and callee-saves.
2162    addq LITERAL(8), %rsp
2163    CFI_ADJUST_CFA_OFFSET(-8)
2164    POP r15
2165    POP r14
2166    POP r13
2167    POP r12
2168    POP rbx
2169    POP r8
2170    POP rcx
2171    POP rbp
2172    movq %rax, (%rcx)              // Store the result.
2173    ret
2174.Losr_entry:
2175    CFI_RESTORE_STATE_AND_DEF_CFA(rsp, 80)
2176    // Since the call has pushed the return address we need to switch the CFA register to RBP.
2177    CFI_DEF_CFA_REGISTER(rbp)
2178
2179    subl LITERAL(8), %ecx         // Given stack size contains pushed frame pointer, substract it.
2180    subq %rcx, %rsp
2181    movq %rsp, %rdi               // rdi := beginning of stack
2182    rep movsb                     // while (rcx--) { *rdi++ = *rsi++ }
2183    jmp *%rdx
2184END_FUNCTION art_quick_osr_stub
2185
2186DEFINE_FUNCTION art_quick_invoke_polymorphic
2187                                                   // On entry: RDI := unused, RSI := receiver
2188    SETUP_SAVE_REFS_AND_ARGS_FRAME                 // save callee saves
2189    movq %rsi, %rdi                                // RDI := receiver
2190    movq %gs:THREAD_SELF_OFFSET, %rsi              // RSI := Thread (self)
2191    movq %rsp, %rdx                                // RDX := pass SP
2192    call SYMBOL(artInvokePolymorphic)              // invoke with (receiver, self, SP)
2193                                                   // save the code pointer
2194    RESTORE_SAVE_REFS_AND_ARGS_FRAME
2195    movq %rax, %xmm0                               // Result is in RAX. Copy to FP result register.
2196    RETURN_OR_DELIVER_PENDING_EXCEPTION
2197END_FUNCTION art_quick_invoke_polymorphic
2198
2199DEFINE_FUNCTION art_quick_invoke_custom
2200    SETUP_SAVE_REFS_AND_ARGS_FRAME                 // save callee saves
2201                                                   // RDI := call_site_index
2202    movq %gs:THREAD_SELF_OFFSET, %rsi              // RSI := Thread::Current()
2203    movq %rsp, %rdx                                // RDX := SP
2204    call SYMBOL(artInvokeCustom)                   // artInvokeCustom(Thread*, SP)
2205    RESTORE_SAVE_REFS_AND_ARGS_FRAME
2206    movq %rax, %xmm0                               // Result is in RAX. Copy to FP result register.
2207    RETURN_OR_DELIVER_PENDING_EXCEPTION
2208END_FUNCTION art_quick_invoke_custom
2209
2210// Wrap ExecuteSwitchImpl in assembly method which specifies DEX PC for unwinding.
2211//  Argument 0: RDI: The context pointer for ExecuteSwitchImpl.
2212//  Argument 1: RSI: Pointer to the templated ExecuteSwitchImpl to call.
2213//  Argument 2: RDX: The value of DEX PC (memory address of the methods bytecode).
2214DEFINE_FUNCTION ExecuteSwitchImplAsm
2215    PUSH rbx                 // Spill RBX
2216    movq %rdx, %rbx          // RBX = DEX PC (callee save register)
2217    CFI_DEFINE_DEX_PC_WITH_OFFSET(0 /* RAX */, 3 /* RBX */, 0)
2218
2219    call *%rsi               // Call the wrapped function
2220
2221    POP rbx                  // Restore RBX
2222    ret
2223END_FUNCTION ExecuteSwitchImplAsm
2224
2225// On entry: edi is the class, r11 is the inline cache. r10 and rax are available.
2226DEFINE_FUNCTION art_quick_update_inline_cache
2227#if (INLINE_CACHE_SIZE != 5)
2228#error "INLINE_CACHE_SIZE not as expected."
2229#endif
2230    // Don't update the cache if we are marking.
2231    cmpl LITERAL(0), %gs:THREAD_IS_GC_MARKING_OFFSET
2232    jnz .Ldone
2233.Lentry1:
2234    movl INLINE_CACHE_CLASSES_OFFSET(%r11), %eax
2235    cmpl %edi, %eax
2236    je .Ldone
2237    cmpl LITERAL(0), %eax
2238    jne .Lentry2
2239    lock cmpxchg %edi, INLINE_CACHE_CLASSES_OFFSET(%r11)
2240    jz .Ldone
2241    jmp .Lentry1
2242.Lentry2:
2243    movl (INLINE_CACHE_CLASSES_OFFSET+4)(%r11), %eax
2244    cmpl %edi, %eax
2245    je .Ldone
2246    cmpl LITERAL(0), %eax
2247    jne .Lentry3
2248    lock cmpxchg %edi, (INLINE_CACHE_CLASSES_OFFSET+4)(%r11)
2249    jz .Ldone
2250    jmp .Lentry2
2251.Lentry3:
2252    movl (INLINE_CACHE_CLASSES_OFFSET+8)(%r11), %eax
2253    cmpl %edi, %eax
2254    je .Ldone
2255    cmpl LITERAL(0), %eax
2256    jne .Lentry4
2257    lock cmpxchg %edi, (INLINE_CACHE_CLASSES_OFFSET+8)(%r11)
2258    jz .Ldone
2259    jmp .Lentry3
2260.Lentry4:
2261    movl (INLINE_CACHE_CLASSES_OFFSET+12)(%r11), %eax
2262    cmpl %edi, %eax
2263    je .Ldone
2264    cmpl LITERAL(0), %eax
2265    jne .Lentry5
2266    lock cmpxchg %edi, (INLINE_CACHE_CLASSES_OFFSET+12)(%r11)
2267    jz .Ldone
2268    jmp .Lentry4
2269.Lentry5:
2270    // Unconditionally store, the cache is megamorphic.
2271    movl %edi, (INLINE_CACHE_CLASSES_OFFSET+16)(%r11)
2272.Ldone:
2273    ret
2274END_FUNCTION art_quick_update_inline_cache
2275
2276// On entry, method is at the bottom of the stack.
2277DEFINE_FUNCTION art_quick_compile_optimized
2278    SETUP_SAVE_EVERYTHING_FRAME
2279    movq FRAME_SIZE_SAVE_EVERYTHING(%rsp), %rdi // pass ArtMethod
2280    movq %gs:THREAD_SELF_OFFSET, %rsi           // pass Thread::Current()
2281    call SYMBOL(artCompileOptimized)            // (ArtMethod*, Thread*)
2282    RESTORE_SAVE_EVERYTHING_FRAME               // restore frame up to return address
2283    ret
2284END_FUNCTION art_quick_compile_optimized
2285