Welcome to mirror list, hosted at ThFree Co, Russian Federation.

GcProbe.asm « arm64 « Runtime « Native « src - github.com/mono/corert.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
blob: 051b4158dc7ee309e4318c9474879bea89ecf8a1 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
;; Licensed to the .NET Foundation under one or more agreements.
;; The .NET Foundation licenses this file to you under the MIT license.
;; See the LICENSE file in the project root for more information.

#include "AsmMacros.h"

    TEXTAREA

    SETALIAS    GetLoopIndirCells, ?GetLoopIndirCells@ModuleHeader@@QEAAPEAEXZ
    SETALIAS    g_pTheRuntimeInstance, ?g_pTheRuntimeInstance@@3PEAVRuntimeInstance@@EA
    SETALIAS    RuntimeInstance__ShouldHijackLoopForGcStress, ?ShouldHijackLoopForGcStress@RuntimeInstance@@QEAA_N_K@Z

    EXTERN      g_fGcStressStarted

    EXTERN      $g_pTheRuntimeInstance
    EXTERN      $RuntimeInstance__ShouldHijackLoopForGcStress
    EXTERN      $GetLoopIndirCells
    EXTERN      RecoverLoopHijackTarget

PROBE_SAVE_FLAGS_EVERYTHING     equ DEFAULT_FRAME_SAVE_FLAGS + PTFF_SAVE_ALL_SCRATCH

    ;; Build a map of symbols representing offsets into the transition frame (see PInvokeTransitionFrame in
    ;; rhbinder.h) and keep these two in sync.
    map 0
            field OFFSETOF__PInvokeTransitionFrame__m_PreservedRegs
            field 10 * 8 ; x19..x28
m_CallersSP field 8      ; SP at routine entry
            field 19 * 8 ; x0..x18
            field 8      ; lr
m_SavedNZCV field 8      ; Saved condition flags
            field 4 * 8  ; d0..d3
PROBE_FRAME_SIZE    field 0

    ;; Support for setting up a transition frame when performing a GC probe. In many respects this is very
    ;; similar to the logic in PUSH_COOP_PINVOKE_FRAME in AsmMacros.h. In most cases setting up the
    ;; transition frame comprises the entirety of the caller's prolog (and initial non-prolog code) and
    ;; similarly for the epilog. Those cases can be dealt with using PROLOG_PROBE_FRAME and EPILOG_PROBE_FRAME
    ;; defined below. For the special cases where additional work has to be done in the prolog we also provide
    ;; the lower level macros ALLOC_PROBE_FRAME, FREE_PROBE_FRAME and INIT_PROBE_FRAME that allow more control
    ;; to be asserted.
    ;;
    ;; Note that we currently employ a significant simplification of frame setup: we always allocate a
    ;; maximally-sized PInvokeTransitionFrame and save all of the registers. Depending on the caller this can
    ;; lead to up to 20 additional register saves (x0-x18, lr) or 160 bytes of stack space. I have done no
    ;; analysis to see whether any of the worst cases occur on performance sensitive paths and whether the
    ;; additional saves will show any measurable degradation.

    ;; Perform the parts of setting up a probe frame that can occur during the prolog (and indeed this macro
    ;; can only be called from within the prolog).
    MACRO
        ALLOC_PROBE_FRAME $extraStackSpace

        ;; First create PInvokeTransitionFrame      
        PROLOG_SAVE_REG_PAIR   fp, lr, #-(PROBE_FRAME_SIZE + $extraStackSpace)!      ;; Push down stack pointer and store FP and LR

        ;; Slot at [sp, #0x10] is reserved for Thread *
        ;; Slot at [sp, #0x18] is reserved for bitmask of saved registers

        ;; Save callee saved registers
        PROLOG_SAVE_REG_PAIR   x19, x20, #0x20
        PROLOG_SAVE_REG_PAIR   x21, x22, #0x30
        PROLOG_SAVE_REG_PAIR   x23, x24, #0x40
        PROLOG_SAVE_REG_PAIR   x25, x26, #0x50
        PROLOG_SAVE_REG_PAIR   x27, x28, #0x60

        ;; Slot at [sp, #0x70] is reserved for caller sp

        ;; Save the scratch registers 
        PROLOG_NOP str         x0,       [sp, #0x78]
        PROLOG_NOP stp         x1, x2,   [sp, #0x80]
        PROLOG_NOP stp         x3, x4,   [sp, #0x90]
        PROLOG_NOP stp         x5, x6,   [sp, #0xA0]
        PROLOG_NOP stp         x7, x8,   [sp, #0xB0]
        PROLOG_NOP stp         x9, x10,  [sp, #0xC0]
        PROLOG_NOP stp         x11, x12, [sp, #0xD0]
        PROLOG_NOP stp         x13, x14, [sp, #0xE0]
        PROLOG_NOP stp         x15, x16, [sp, #0xF0]
        PROLOG_NOP stp         x17, x18, [sp, #0x100]
        PROLOG_NOP str         lr,       [sp, #0x110]

        ;; Slot at [sp, #0x118] is reserved for NZCV

        ; Save the floating return registers
        PROLOG_NOP stp         d0, d1,   [sp, #0x120]
        PROLOG_NOP stp         d2, d3,   [sp, #0x130]

    MEND

    ;; Undo the effects of an ALLOC_PROBE_FRAME. This may only be called within an epilog. Note that all
    ;; registers are restored (apart for sp and pc), even volatiles.
    MACRO
        FREE_PROBE_FRAME $extraStackSpace

        ;; Restore the scratch registers 
        PROLOG_NOP ldr          x0,       [sp, #0x78]
        PROLOG_NOP ldp          x1, x2,   [sp, #0x80]
        PROLOG_NOP ldp          x3, x4,   [sp, #0x90]
        PROLOG_NOP ldp          x5, x6,   [sp, #0xA0]
        PROLOG_NOP ldp          x7, x8,   [sp, #0xB0]
        PROLOG_NOP ldp          x9, x10,  [sp, #0xC0]
        PROLOG_NOP ldp          x11, x12, [sp, #0xD0]
        PROLOG_NOP ldp          x13, x14, [sp, #0xE0]
        PROLOG_NOP ldp          x15, x16, [sp, #0xF0]
        PROLOG_NOP ldp          x17, x18, [sp, #0x100]
        PROLOG_NOP ldr          lr,       [sp, #0x110]

        ; Restore the floating return registers
        EPILOG_NOP ldp          d0, d1,   [sp, #0x120]
        EPILOG_NOP ldp          d2, d3,   [sp, #0x130]

        ;; Restore callee saved registers
        EPILOG_RESTORE_REG_PAIR x19, x20, #0x20
        EPILOG_RESTORE_REG_PAIR x21, x22, #0x30
        EPILOG_RESTORE_REG_PAIR x23, x24, #0x40
        EPILOG_RESTORE_REG_PAIR x25, x26, #0x50
        EPILOG_RESTORE_REG_PAIR x27, x28, #0x60

        EPILOG_RESTORE_REG_PAIR fp, lr, #(PROBE_FRAME_SIZE + $extraStackSpace)!
    MEND

    ;; Complete the setup of a probe frame allocated with ALLOC_PROBE_FRAME with the initialization that can
    ;; occur only outside the prolog (includes linking the frame to the current Thread). This macro assumes SP
    ;; is invariant outside of the prolog.
    ;;
    ;;  $threadReg     : register containing the Thread* (this will be preserved)
    ;;  $trashReg      : register that can be trashed by this macro
    ;;  $savedRegsMask : value to initialize m_Flags field with (register or #constant)
    ;;  $gcFlags       : value of gcref / gcbyref flags for saved registers, used only if $savedRegsMask is constant
    ;;  $frameSize     : total size of the method's stack frame (including probe frame size)
    MACRO
        INIT_PROBE_FRAME $threadReg, $trashReg, $savedRegsMask, $gcFlags, $frameSize

        LCLS BitmaskStr
BitmaskStr SETS "$savedRegsMask"        

        str         $threadReg, [sp, #OFFSETOF__PInvokeTransitionFrame__m_pThread]            ; Thread *
        IF          BitmaskStr:LEFT:1 == "#"
            ;; The savedRegsMask is a constant, remove the leading "#" since the MOVL64 doesn't expect it
BitmaskStr  SETS BitmaskStr:RIGHT:(:LEN:BitmaskStr - 1)
            MOVL64      $trashReg, $BitmaskStr, $gcFlags
        ELSE
            ASSERT "$gcFlags" == ""
            ;; The savedRegsMask is a register
            mov         $trashReg, $savedRegsMask
        ENDIF
        str         $trashReg, [sp, #OFFSETOF__PInvokeTransitionFrame__m_Flags]
        add         $trashReg, sp, #$frameSize
        str         $trashReg, [sp, #m_CallersSP]
    MEND    

    ;; Simple macro to use when setting up the probe frame can comprise the entire prolog. Call this macro
    ;; first in the method (no further prolog instructions can be added after this).
    ;;
    ;;  $threadReg     : register containing the Thread* (this will be preserved). If defaulted (specify |) then
    ;;                   the current thread will be calculated inline into r2 ($trashReg must not equal r2 in
    ;;                   this case)
    ;;  $trashReg      : register that can be trashed by this macro
    ;;  $savedRegsMask : value to initialize m_dwFlags field with (register or #constant)
    ;;  $gcFlags       : value of gcref / gcbyref flags for saved registers, used only if $savedRegsMask is constant
    MACRO
        PROLOG_PROBE_FRAME $threadReg, $trashReg, $savedRegsMask, $gcFlags

        ; Local string tracking the name of the register in which the Thread* is kept. Defaults to the value
        ; of $threadReg.
        LCLS __PPF_ThreadReg
__PPF_ThreadReg SETS "$threadReg"

        ; Define the method prolog, allocating enough stack space for the PInvokeTransitionFrame and saving
        ; incoming register values into it.
        ALLOC_PROBE_FRAME 0

        ; If the caller didn't provide a value for $threadReg then generate code to fetch the Thread* into x2.
        ; Record that x2 holds the Thread* in our local variable.
        IF "$threadReg" == ""
            ASSERT "$trashReg" != "x2"
__PPF_ThreadReg SETS "x2"
            INLINE_GETTHREAD $__PPF_ThreadReg, $trashReg
        ENDIF

        ; Perform the rest of the PInvokeTransitionFrame initialization.
        INIT_PROBE_FRAME $__PPF_ThreadReg, $trashReg, $savedRegsMask, $gcFlags, PROBE_FRAME_SIZE
        mov         $trashReg, sp
        str         $trashReg, [$__PPF_ThreadReg, #OFFSETOF__Thread__m_pHackPInvokeTunnel]
    MEND

    ; Simple macro to use when PROLOG_PROBE_FRAME was used to set up and initialize the prolog and
    ; PInvokeTransitionFrame. This will define the epilog including a return via the restored LR.
    MACRO
        EPILOG_PROBE_FRAME

        FREE_PROBE_FRAME 0
        EPILOG_RETURN
    MEND

;; ALLOC_PROBE_FRAME will save the first 4 vfp registers, in order to avoid trashing VFP registers across the loop 
;; hijack, we must save the rest -- d4-d31 (28).
EXTRA_SAVE_SIZE equ (28*8)

    MACRO
        ALLOC_LOOP_HIJACK_FRAME

        PROLOG_STACK_ALLOC EXTRA_SAVE_SIZE

;;      save VFP registers that were not saved by the ALLOC_PROBE_FRAME
        PROLOG_NOP stp         d4, d5,   [sp]
        PROLOG_NOP stp         d6, d7,   [sp, #0x10]
        PROLOG_NOP stp         d8, d9,   [sp, #0x20]
        PROLOG_NOP stp         d10, d11, [sp, #0x30]
        PROLOG_NOP stp         d12, d13, [sp, #0x40]
        PROLOG_NOP stp         d14, d15, [sp, #0x50]
        PROLOG_NOP stp         d16, d17, [sp, #0x60]
        PROLOG_NOP stp         d18, d19, [sp, #0x70]
        PROLOG_NOP stp         d20, d21, [sp, #0x80]
        PROLOG_NOP stp         d22, d23, [sp, #0x90]
        PROLOG_NOP stp         d24, d25, [sp, #0xA0]
        PROLOG_NOP stp         d26, d27, [sp, #0xB0]
        PROLOG_NOP stp         d28, d29, [sp, #0xC0]
        PROLOG_NOP stp         d30, d31, [sp, #0xD0]

        ALLOC_PROBE_FRAME 0
    MEND

    MACRO
        FREE_LOOP_HIJACK_FRAME

        FREE_PROBE_FRAME 0

;;      restore VFP registers that will not be restored by the FREE_PROBE_FRAME
        PROLOG_NOP ldp         d4, d5,   [sp]
        PROLOG_NOP ldp         d6, d7,   [sp, #0x10]
        PROLOG_NOP ldp         d8, d9,   [sp, #0x20]
        PROLOG_NOP ldp         d10, d11, [sp, #0x30]
        PROLOG_NOP ldp         d12, d13, [sp, #0x40]
        PROLOG_NOP ldp         d14, d15, [sp, #0x50]
        PROLOG_NOP ldp         d16, d17, [sp, #0x60]
        PROLOG_NOP ldp         d18, d19, [sp, #0x70]
        PROLOG_NOP ldp         d20, d21, [sp, #0x80]
        PROLOG_NOP ldp         d22, d23, [sp, #0x90]
        PROLOG_NOP ldp         d24, d25, [sp, #0xA0]
        PROLOG_NOP ldp         d26, d27, [sp, #0xB0]
        PROLOG_NOP ldp         d28, d29, [sp, #0xC0]
        PROLOG_NOP ldp         d30, d31, [sp, #0xD0]

        EPILOG_STACK_FREE EXTRA_SAVE_SIZE
    MEND

;;
;; Macro to clear the hijack state. This is safe to do because the suspension code will not Unhijack this 
;; thread if it finds it at an IP that isn't managed code.
;;
;; Register state on entry:
;;  x2: thread pointer
;;  
;; Register state on exit:
;;
    MACRO
        ClearHijackState

        ASSERT OFFSETOF__Thread__m_pvHijackedReturnAddress == (OFFSETOF__Thread__m_ppvHijackedReturnAddressLocation + 8)
        ;; Clear m_ppvHijackedReturnAddressLocation and m_pvHijackedReturnAddress
        stp         xzr, xzr, [x2, #OFFSETOF__Thread__m_ppvHijackedReturnAddressLocation]
        ;; Clear m_uHijackedReturnValueFlags
        str         xzr, [x2, #OFFSETOF__Thread__m_uHijackedReturnValueFlags]
    MEND

;;
;; The prolog for all GC suspension hijacks (normal and stress). Fixes up the hijacked return address, and 
;; clears the hijack state.
;;
;; Register state on entry:
;;  All registers correct for return to the original return address.
;;  
;; Register state on exit:
;;  x2: thread pointer
;;  x3: trashed
;;  x12: transition frame flags for the return registers x0 and x1
;;
    MACRO
        FixupHijackedCallstack

        ;; x2 <- GetThread(), TRASHES x3
        INLINE_GETTHREAD x2, x3
        
        ;;
        ;; Fix the stack by restoring the original return address
        ;;
        ASSERT OFFSETOF__Thread__m_uHijackedReturnValueFlags == (OFFSETOF__Thread__m_pvHijackedReturnAddress + 8)
        ;; Load m_pvHijackedReturnAddress and m_uHijackedReturnValueFlags
        ldp         lr, x12, [x2, #OFFSETOF__Thread__m_pvHijackedReturnAddress]

        ClearHijackState
    MEND

;;
;; Set the Thread state and wait for a GC to complete.
;;
;; Register state on entry:
;;  x4: thread pointer
;;  
;; Register state on exit:
;;  x4: thread pointer
;;  All other registers trashed
;;

    EXTERN RhpWaitForGCNoAbort

    MACRO
        WaitForGCCompletion

        ldr         w2, [x4, #OFFSETOF__Thread__m_ThreadStateFlags]
        tst         w2, #TSF_SuppressGcStress__OR__TSF_DoNotTriggerGC
        bne         %ft0

        ldr         x2, [x4, #OFFSETOF__Thread__m_pHackPInvokeTunnel]
        bl          RhpWaitForGCNoAbort
0
    MEND

    MACRO
        HijackTargetFakeProlog

        ;; This is a fake entrypoint for the method that 'tricks' the OS into calling our personality routine.
        ;; The code here should never be executed, and the unwind info is bogus, but we don't mind since the
        ;; stack is broken by the hijack anyway until after we fix it below.
        PROLOG_SAVE_REG_PAIR   fp, lr, #-0x10!
        nop                     ; We also need a nop here to simulate the implied bl instruction.  Without 
                                ; this, an OS-applied -4 will back up into the method prolog and the unwind 
                                ; will not be applied as desired.

    MEND

;;
;;
;;
;; GC Probe Hijack targets
;;
;;
    EXTERN RhpPInvokeExceptionGuard

    NESTED_ENTRY RhpGcProbeHijackWrapper, .text, RhpPInvokeExceptionGuard
        HijackTargetFakeProlog

    LABELED_RETURN_ADDRESS RhpGcProbeHijack

        FixupHijackedCallstack
        orr         x12, x12, #DEFAULT_FRAME_SAVE_FLAGS
        b           RhpGcProbe
    NESTED_END RhpGcProbeHijackWrapper

#ifdef FEATURE_GC_STRESS
;;
;;
;; GC Stress Hijack targets
;;
;;
    LEAF_ENTRY RhpGcStressHijack
        FixupHijackedCallstack
        orr         x12, x12, #DEFAULT_FRAME_SAVE_FLAGS
        b           RhpGcStressProbe
    LEAF_END RhpGcStressHijack
;;
;; Worker for our GC stress probes.  Do not call directly!!  
;; Instead, go through RhpGcStressHijack{Scalar|Object|Byref}. 
;; This worker performs the GC Stress work and returns to the original return address.
;;
;; Register state on entry:
;;  x0: hijacked function return value
;;  x1: hijacked function return value
;;  x2: thread pointer
;;  w12: register bitmask
;;
;; Register state on exit:
;;  Scratch registers, except for x0, have been trashed
;;  All other registers restored as they were when the hijack was first reached.
;;
    NESTED_ENTRY RhpGcStressProbe
        PROLOG_PROBE_FRAME x2, x3, x12, 

        bl          $REDHAWKGCINTERFACE__STRESSGC

        EPILOG_PROBE_FRAME
    NESTED_END RhpGcStressProbe
#endif ;; FEATURE_GC_STRESS

    LEAF_ENTRY RhpGcProbe
        brk 0xf000 ;; TODO: remove after debugging/testing stub
        ldr         x3, =RhpTrapThreads
        ldr         w3, [x3]
        tbnz        x3, #TrapThreadsFlags_TrapThreads_Bit, RhpGcProbeRare
        ret
    LEAF_END RhpGcProbe

    EXTERN RhpThrowHwEx

    NESTED_ENTRY RhpGcProbeRare
        brk 0xf000 ;; TODO: remove after debugging/testing stub
        PROLOG_PROBE_FRAME x2, x3, x12, 

        mov         x4, x2
        WaitForGCCompletion

        ldr         x2, [sp, #OFFSETOF__PInvokeTransitionFrame__m_Flags]
        tbnz        x2, #PTFF_THREAD_ABORT_BIT, %F1

        EPILOG_PROBE_FRAME

1        
        FREE_PROBE_FRAME 0
        EPILOG_NOP mov w0, #STATUS_REDHAWK_THREAD_ABORT
        EPILOG_NOP mov x1, lr ;; return address as exception PC
        EPILOG_NOP b RhpThrowHwEx
    NESTED_END RhpGcProbeRare

    LEAF_ENTRY RhpGcPoll
        brk 0xf000 ;; TODO: remove after debugging/testing stub
        ; @todo: I'm assuming it's not OK to trash any register here. If that's not true we can optimize the
        ; push/pops out of this fast path.
        str         x0, [sp], #-0x10!
        ldr         x0, =RhpTrapThreads
        ldr         w0, [x0]
        tbnz        x0, #TrapThreadsFlags_TrapThreads_Bit, %F0
        ldr         x0, [sp], #0x10!
        ret
0
        ldr         x0, [sp], #0x10!
        b           RhpGcPollRare
    LEAF_END RhpGcPoll

    NESTED_ENTRY RhpGcPollRare
        brk 0xf000 ;; TODO: remove after debugging/testing stub
        PROLOG_PROBE_FRAME |, x3, #PROBE_SAVE_FLAGS_EVERYTHING, 0

        ; Unhijack this thread, if necessary.
        INLINE_THREAD_UNHIJACK x2, x0, x1       ;; trashes x0, x1

        mov         x4, x2
        WaitForGCCompletion

        EPILOG_PROBE_FRAME
    NESTED_END RhpGcPollRare

    LEAF_ENTRY RhpGcPollStress
        ;
        ; loop hijacking is used instead
        ;
        brk 0xf000

    LEAF_END RhpGcPollStress


#ifdef FEATURE_GC_STRESS
    NESTED_ENTRY RhpHijackForGcStress
        ;; This function should be called from right before epilog

        ;; Push FP and LR, and allocate stack to hold PAL_LIMITED_CONTEXT structure and VFP return value registers
        PROLOG_SAVE_REG_PAIR    fp, lr, #-(SIZEOF__PAL_LIMITED_CONTEXT + 0x20)!

        ;;
        ;; Setup a PAL_LIMITED_CONTEXT that looks like what you'd get if you had suspended this thread at the
        ;; IP after the call to this helper.
        ;;
        ;; This is very likely overkill since the calculation of the return address should only need SP and 
        ;; LR, but this is test code, so I'm not too worried about efficiency.
        ;;
        ;; Setup a PAL_LIMITED_CONTEXT on the stack 
        ;; {
            ;; FP and LR already pushed.
            PROLOG_NOP  stp         x0, x1, [sp, #0x10]
            PROLOG_SAVE_REG_PAIR    x19, x20, #0x20
            PROLOG_SAVE_REG_PAIR    x21, x22, #0x30
            PROLOG_SAVE_REG_PAIR    x23, x24, #0x40
            PROLOG_SAVE_REG_PAIR    x25, x26, #0x50
            PROLOG_SAVE_REG_PAIR    x27, x28, #0x60
            PROLOG_SAVE_REG         lr, #0x78

        ;; } end PAL_LIMITED_CONTEXT

        ;; Save VFP return value
        stp         d0, d1, [sp, #(SIZEOF__PAL_LIMITED_CONTEXT + 0x00)]
        stp         d2, d3, [sp, #(SIZEOF__PAL_LIMITED_CONTEXT + 0x10)]

        ;; Compute and save SP at callsite.
        add         x0, sp, #(SIZEOF__PAL_LIMITED_CONTEXT + 0x20)   ;; +0x20 for the pushes right before the context struct
        str         x0, [sp, #OFFSETOF__PAL_LIMITED_CONTEXT__SP]

        mov         x0, sp      ; Address of PAL_LIMITED_CONTEXT
        bl          $THREAD__HIJACKFORGCSTRESS

        ;; Restore return value registers (saved in PAL_LIMITED_CONTEXT structure)
        ldp         x0, x1, [sp, #0x10]

        ;; Restore VFP return value
        ldp         d0, d1, [sp, #(SIZEOF__PAL_LIMITED_CONTEXT + 0x00)]
        ldp         d2, d3, [sp, #(SIZEOF__PAL_LIMITED_CONTEXT + 0x10)]

        ;; Epilog
        EPILOG_RESTORE_REG_PAIR     x19, x20, #0x20
        EPILOG_RESTORE_REG_PAIR     x21, x22, #0x30
        EPILOG_RESTORE_REG_PAIR     x23, x24, #0x40
        EPILOG_RESTORE_REG_PAIR     x25, x26, #0x50
        EPILOG_RESTORE_REG_PAIR     x27, x28, #0x60
        EPILOG_RESTORE_REG_PAIR     fp, lr, #(SIZEOF__PAL_LIMITED_CONTEXT + 0x20)!
        EPILOG_RETURN

    NESTED_END RhpHijackForGcStress

    NESTED_ENTRY RhpHijackForGcStressLeaf
        ;; This should be jumped to, right before epilog
        ;; x9 has the return address (we don't care about trashing scratch regs at this point)

        ;; Push FP and LR, and allocate stack to hold PAL_LIMITED_CONTEXT structure and VFP return value registers
        PROLOG_SAVE_REG_PAIR    fp, lr, #-(SIZEOF__PAL_LIMITED_CONTEXT + 0x20)!

        ;;
        ;; Setup a PAL_LIMITED_CONTEXT that looks like what you'd get if you had suspended this thread at the
        ;; IP after the call to this helper.
        ;;
        ;; This is very likely overkill since the calculation of the return address should only need SP and 
        ;; LR, but this is test code, so I'm not too worried about efficiency.
        ;;
        ;; Setup a PAL_LIMITED_CONTEXT on the stack 
        ;; {
            ;; FP and LR already pushed.
            PROLOG_NOP  stp         x0, x1, [sp, #0x10]
            PROLOG_SAVE_REG_PAIR    x19, x20, #0x20
            PROLOG_SAVE_REG_PAIR    x21, x22, #0x30
            PROLOG_SAVE_REG_PAIR    x23, x24, #0x40
            PROLOG_SAVE_REG_PAIR    x25, x26, #0x50
            PROLOG_SAVE_REG_PAIR    x27, x28, #0x60
            ; PROLOG_SAVE_REG macro doesn't let to use scratch reg:
            PROLOG_NOP  str         x9, [sp, #0x78]           ; this is return address from RhpHijackForGcStress; lr is return address for it's caller

        ;; } end PAL_LIMITED_CONTEXT

        ;; Save VFP return value
        stp         d0, d1, [sp, #(SIZEOF__PAL_LIMITED_CONTEXT + 0x00)]
        stp         d2, d3, [sp, #(SIZEOF__PAL_LIMITED_CONTEXT + 0x10)]

        ;; Compute and save SP at callsite.
        add         x0, sp, #(SIZEOF__PAL_LIMITED_CONTEXT + 0x20)   ;; +0x20 for the pushes right before the context struct
        str         x0, [sp, #OFFSETOF__PAL_LIMITED_CONTEXT__SP]

        mov         x0, sp      ; Address of PAL_LIMITED_CONTEXT
        bl          $THREAD__HIJACKFORGCSTRESS

        ;; Restore return value registers (saved in PAL_LIMITED_CONTEXT structure)
        ldp         x0, x1, [sp, #0x10]

        ;; Restore VFP return value
        ldp         d0, d1, [sp, #(SIZEOF__PAL_LIMITED_CONTEXT + 0x00)]
        ldp         d2, d3, [sp, #(SIZEOF__PAL_LIMITED_CONTEXT + 0x10)]

        ;; Epilog
        EPILOG_RESTORE_REG_PAIR     x19, x20, #0x20
        EPILOG_RESTORE_REG_PAIR     x21, x22, #0x30
        EPILOG_RESTORE_REG_PAIR     x23, x24, #0x40
        EPILOG_RESTORE_REG_PAIR     x25, x26, #0x50
        EPILOG_RESTORE_REG_PAIR     x27, x28, #0x60
        EPILOG_NOP     ldr          x9, [sp, #0x78]
        EPILOG_RESTORE_REG_PAIR     fp, lr, #(SIZEOF__PAL_LIMITED_CONTEXT + 0x20)!
        EPILOG_NOP     ret x9

    NESTED_END RhpHijackForGcStressLeaf

#endif ;; FEATURE_GC_STRESS

#if 0 // used by the binder only
;;
;; The following functions are _jumped_ to when we need to transfer control from one method to another for EH 
;; dispatch. These are needed to properly coordinate with the GC hijacking logic. We are essentially replacing
;; the return from the throwing method with a jump to the handler in the caller, but we need to be aware of 
;; any return address hijack that may be in place for GC suspension. These routines use a quick test of the 
;; return address against a specific GC hijack routine, and then fixup the stack pointer to what it would be 
;; after a real return from the throwing method. Then, if we are not hijacked we can simply jump to the 
;; handler in the caller.
;; 
;; If we are hijacked, then we jump to a routine that will unhijack appropriately and wait for the GC to
;; complete. There are also variants for GC stress.
;;
;; Note that at this point we are either hijacked or we are not, and this will not change until we return to
;; managed code. It is an invariant of the system that a thread will only attempt to hijack or unhijack 
;; another thread while the target thread is suspended in managed code, and this is _not_ managed code.
;;
    MACRO
        RTU_EH_JUMP_HELPER $funcName, $hijackFuncName, $isStress, $stressFuncName

        LEAF_ENTRY $funcName
            ldr         x0, =$hijackFuncName
            cmp         x0, lr
            beq         RhpGCProbeForEHJump

            IF $isStress
            ldr         x0, =$stressFuncName
            cmp         x0, lr
            beq         RhpGCStressProbeForEHJump
            ENDIF

            ;; We are not hijacked, so we can return to the handler.
            ;; We return to keep the call/return prediction balanced.
            mov         lr, x2  ; Update the return address
            ret
        LEAF_END $funcName
    MEND
;; We need an instance of the helper for each possible hijack function. The binder has enough
;; information to determine which one we need to use for any function.
    RTU_EH_JUMP_HELPER RhpEHJumpScalar,         RhpGcProbeHijack, {false}, 0
    RTU_EH_JUMP_HELPER RhpEHJumpObject,         RhpGcProbeHijack, {false}, 0
    RTU_EH_JUMP_HELPER RhpEHJumpByref,          RhpGcProbeHijack,  {false}, 0
#ifdef FEATURE_GC_STRESS
    RTU_EH_JUMP_HELPER RhpEHJumpScalarGCStress, RhpGcProbeHijack, {true},  RhpGcStressHijack
    RTU_EH_JUMP_HELPER RhpEHJumpObjectGCStress, RhpGcProbeHijack, {true},  RhpGcStressHijack
    RTU_EH_JUMP_HELPER RhpEHJumpByrefGCStress,  RhpGcProbeHijack,  {true},  RhpGcStressHijack
#endif

;;
;; Macro to setup our frame and adjust the location of the EH object reference for EH jump probe funcs.
;;
;; Register state on entry:
;;  x0: scratch
;;  x1: reference to the exception object.
;;  x2: handler address we want to jump to.
;;  Non-volatile registers are all already correct for return to the caller.
;;  The stack is as if we are just about to returned from the call
;;  
;; Register state on exit:
;;  x0: reference to the exception object
;;  x2: thread pointer
;;
    MACRO
        EHJumpProbeProlog

        PROLOG_NOP mov x0, x1  ; move the ex object reference into x0 so we can report it
        ALLOC_PROBE_FRAME 0x10
        str         x2, [sp, #PROBE_FRAME_SIZE]

        ;; x2 <- GetThread(), TRASHES x1
        INLINE_GETTHREAD x2, x1
        
        ;; Recover the original return address and update the frame
        ldr         lr, [x2, #OFFSETOF__Thread__m_pvHijackedReturnAddress]
        str         lr, [sp, #OFFSETOF__PInvokeTransitionFrame__m_RIP]

        ;; ClearHijackState expects thread in x2
        ClearHijackState

        ; TRASHES x1
        INIT_PROBE_FRAME x2, x1, #(DEFAULT_FRAME_SAVE_FLAGS + PTFF_SAVE_X0), PTFF_X0_IS_GCREF_HI, (PROBE_FRAME_SIZE + 8)
        add         x1, sp, xzr
        str         x1, [x2, #OFFSETOF__Thread__m_pHackPInvokeTunnel]
    MEND

;;
;; Macro to re-adjust the location of the EH object reference, cleanup the frame, and make the 
;; final jump to the handler for EH jump probe funcs.
;;
;; Register state on entry:
;;  x0: reference to the exception object
;;  x1-x3: scratch
;;  
;; Register state on exit:
;;  sp: correct for return to the caller
;;  x1: reference to the exception object
;;
    MACRO
        EHJumpProbeEpilog

        ldr         x2, [sp, #PROBE_FRAME_SIZE]
        FREE_PROBE_FRAME 0x10       ; This restores exception object back into x0
        EPILOG_NOP  mov x1, x0      ; Move the Exception object back into x1 where the catch handler expects it
        EPILOG_NOP  ret x2
    MEND

;;
;; We are hijacked for a normal GC (not GC stress), so we need to unhijack and wait for the GC to complete.
;;
;; Register state on entry:
;;  x0: reference to the exception object.
;;  x2: thread
;;  Non-volatile registers are all already correct for return to the caller.
;;  The stack is as if we have tail called to this function (lr points to return address).
;;        
;; Register state on exit:
;;  x0: reference to the exception object
;;
    NESTED_ENTRY RhpGCProbeForEHJump
        brk 0xf000 ;; TODO: remove after debugging/testing stub
        EHJumpProbeProlog

#ifdef _DEBUG
        ;;
        ;; If we get here, then we have been hijacked for a real GC, and our SyncState must
        ;; reflect that we've been requested to synchronize.

        ldr         x1, =RhpTrapThreads
        ldr         w1, [x1]
        tbnz        x1, #TrapThreadsFlags_TrapThreads_Bit, %0

        bl          RhDebugBreak
0
#endif ;; _DEBUG

        mov         x4, x2
        WaitForGCCompletion

        EHJumpProbeEpilog
    NESTED_END RhpGCProbeForEHJump

#ifdef FEATURE_GC_STRESS
;;
;; We are hijacked for GC Stress (not a normal GC) so we need to invoke the GC stress helper.
;;
;; Register state on entry:
;;  x1: reference to the exception object.
;;  x2: thread
;;  Non-volatile registers are all already correct for return to the caller.
;;  The stack is as if we have tail called to this function (lr points to return address).
;;        
;; Register state on exit:
;;  x0: reference to the exception object
;;
    NESTED_ENTRY RhpGCStressProbeForEHJump
        brk 0xf000 ;; TODO: remove after debugging/testing stub
        EHJumpProbeProlog

        bl          $REDHAWKGCINTERFACE__STRESSGC

        EHJumpProbeEpilog
    NESTED_END RhpGCStressProbeForEHJump
#endif ;; FEATURE_GC_STRESS
#endif ;; 0

#ifdef FEATURE_GC_STRESS
;;
;; INVARIANT: Don't trash the argument registers, the binder codegen depends on this.
;;
    LEAF_ENTRY RhpSuppressGcStress
        INLINE_GETTHREAD x9, x10
        add         x9, x9, #OFFSETOF__Thread__m_ThreadStateFlags
Retry
        ldxr        w10, [x9]
        orr         w10, w10, #TSF_SuppressGcStress
        stxr        w11, w10, [x9]
        cbz         w11, Success
        b           Retry

Success
        ret
    LEAF_END RhpSuppressGcStress
#endif ;; FEATURE_GC_STRESS

;; Helper called from hijacked loops
    LEAF_ENTRY RhpLoopHijack
;; we arrive here with essentially all registers containing useful content
;; TODO: update this comment after the RhpLoopHijack is implemented in the compiler

;; on the stack, we have two arguments:
;; - [sp+0] has the module header
;; - [sp+8] has the address of the indirection cell we jumped through
;;
;;
        brk 0xf000 ;; TODO: remove after debugging/testing stub
        ALLOC_LOOP_HIJACK_FRAME

        ; save condition codes
        mrs         x12, NZCV
        str         x12, [sp, #m_SavedNZCV]

        INLINE_GETTHREAD x4, x1
        INIT_PROBE_FRAME x4, x1, #PROBE_SAVE_FLAGS_EVERYTHING, 0, (PROBE_FRAME_SIZE + EXTRA_SAVE_SIZE + 8)
;;
;;      compute the index of the indirection cell
;;
        ldr         x0, [sp,#(PROBE_FRAME_SIZE + EXTRA_SAVE_SIZE + 0)]
        bl          $GetLoopIndirCells
        
        ; x0 now has address of the first loop indir cell
        ; subtract that from the address of our cell
        ; and divide by 8 to give the index of our cell
        ldr         x1, [sp,#(PROBE_FRAME_SIZE + EXTRA_SAVE_SIZE + 8)]
        sub         x1, x1, x0
        lsr         x0, x1, #3

        ; x0 now has the index
        ; recover the loop hijack target, passing the module header as an additional argument
        ldr         x1, [sp,#(PROBE_FRAME_SIZE + EXTRA_SAVE_SIZE + 0)]
        bl          RecoverLoopHijackTarget

        ; store the result as our pinvoke return address
        str         x0, [sp, #OFFSETOF__PInvokeTransitionFrame__m_RIP]

        ; also save it in the incoming parameter space for the actual return below
        str         x0, [sp,#(PROBE_FRAME_SIZE + EXTRA_SAVE_SIZE + 8)]

        ; Early out if GC stress is currently suppressed. Do this after we have computed the real address to
        ; return to but before we link the transition frame onto m_pHackPInvokeTunnel (because hitting this
        ; condition implies we're running restricted callouts during a GC itself and we could end up
        ; overwriting a co-op frame set by the code that caused the GC in the first place, e.g. a GC.Collect
        ; call).
        ldr         w1, [x4, #OFFSETOF__Thread__m_ThreadStateFlags]
        tst         w1, #TSF_SuppressGcStress__OR__TSF_DoNotTriggerGC
        bne         DoneWaitingForGc

        ; link the frame into the Thread
        add         x1, sp, xzr
        str         x1, [x4, #OFFSETOF__Thread__m_pHackPInvokeTunnel]

        ;;
        ;; Unhijack this thread, if necessary.
        ;;
        INLINE_THREAD_UNHIJACK x4, x1, x2       ;; trashes x1, x2

#ifdef FEATURE_GC_STRESS

        ldr         x1, =g_fGcStressStarted
        ldr         w1, [x1]
        cbnz        w1, NoGcStress

        mov         x1, x0
        ldr         x0, =$g_pTheRuntimeInstance
        ldr         x0, [x0]
        bl          $RuntimeInstance__ShouldHijackLoopForGcStress
        cbnz        x0, NoGcStress

        bl          $REDHAWKGCINTERFACE__STRESSGC
NoGcStress
#endif ;; FEATURE_GC_STRESS

        add         x2, sp, xzr ; sp is address of PInvokeTransitionFrame
        bl          RhpWaitForGCNoAbort

DoneWaitingForGc
        ldr         x12, [sp, #OFFSETOF__PInvokeTransitionFrame__m_Flags]
        tbnz        x12, #PTFF_THREAD_ABORT_BIT, Abort
        ; restore condition codes
        ldr         x12, [sp, #m_SavedNZCV]
        msr         NZCV, x12

        FREE_LOOP_HIJACK_FRAME

        EPILOG_NOP ldr x1, [sp, #8]    ; hijack target address
        EPILOG_STACK_FREE 0x10 
        EPILOG_NOP ret x1              ; jump to the hijack target

Abort
        FREE_LOOP_HIJACK_FRAME

        EPILOG_NOP mov w0, #STATUS_REDHAWK_THREAD_ABORT
        EPILOG_NOP ldr x1, [sp, #8]    ; hijack target address as exception PC
        EPILOG_STACK_FREE 0x10 
        EPILOG_NOP b RhpThrowHwEx
    LEAF_END RhpLoopHijack

    INLINE_GETTHREAD_CONSTANT_POOL

    end