Welcome to mirror list, hosted at ThFree Co, Russian Federation.

GcProbe.asm « arm « Runtime « Native « src - github.com/mono/corert.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
blob: ac752c4b347e4915ec35d527f5855aad62c64988 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
;; Licensed to the .NET Foundation under one or more agreements.
;; The .NET Foundation licenses this file to you under the MIT license.
;; See the LICENSE file in the project root for more information.

#include "AsmMacros.h"

        TEXTAREA

    SETALIAS    GetLoopIndirCells, ?GetLoopIndirCells@ModuleHeader@@QAAPAEXZ
    SETALIAS    g_fGcStressStarted, ?g_GCShadow@@3PAEA
    SETALIAS    g_pTheRuntimeInstance, ?g_pTheRuntimeInstance@@3PAVRuntimeInstance@@A
    SETALIAS    RuntimeInstance__ShouldHijackLoopForGcStress, ?ShouldHijackLoopForGcStress@RuntimeInstance@@QAA_NI@Z

    EXTERN      $g_fGcStressStarted
    EXTERN      $g_pTheRuntimeInstance
    EXTERN      $RuntimeInstance__ShouldHijackLoopForGcStress
    EXTERN      $GetLoopIndirCells
    EXTERN      RecoverLoopHijackTarget

PROBE_SAVE_FLAGS_EVERYTHING     equ DEFAULT_FRAME_SAVE_FLAGS + PTFF_SAVE_ALL_SCRATCH
PROBE_SAVE_FLAGS_R0_IS_GCREF    equ DEFAULT_FRAME_SAVE_FLAGS + PTFF_SAVE_R0 + PTFF_R0_IS_GCREF


    ;; Build a map of symbols representing offsets into a transition frame (see PInvokeTransitionFrame in
    ;; rhbinder.h and keep these two in sync.
    map 0
m_ChainPointer  field 4         ; r11 - OS frame chain used for quick stackwalks
m_RIP           field 4         ; lr
m_FramePointer  field 4         ; r7
m_pThread       field 4
m_dwFlags       field 4         ; bitmask of saved registers
m_PreservedRegs field (4 * 6)   ; r4-r6,r8-r10
m_CallersSP     field 4         ; sp at routine entry
m_SavedR0       field 4         ; r0
m_VolatileRegs  field (4 * 4)   ; r1-r3,lr
m_ReturnVfpRegs field (8 * 4)   ; d0-d3, not really part of the struct
m_SavedAPSR     field 4         ; saved condition codes
PROBE_FRAME_SIZE    field 0

    ;; Support for setting up a transition frame when performing a GC probe. In many respects this is very
    ;; similar to the logic in PUSH_COOP_PINVOKE_FRAME in AsmMacros.h. In most cases setting up the
    ;; transition frame comprises the entirety of the caller's prolog (and initial non-prolog code) and
    ;; similarly for the epilog. Those cases can be dealt with using PROLOG_PROBE_FRAME and EPILOG_PROBE_FRAME
    ;; defined below. For the special cases where additional work has to be done in the prolog we also provide
    ;; the lower level macros ALLOC_PROBE_FRAME, FREE_PROBE_FRAME and INIT_PROBE_FRAME that allow more control
    ;; to be asserted.
    ;;
    ;; Note that we currently employ a significant simplification of frame setup: we always allocate a
    ;; maximally-sized PInvokeTransitionFrame and save all of the registers. Depending on the caller this can
    ;; lead to upto five additional register saves (r0-r3,r12) or 20 bytes of stack space. I have done no
    ;; analysis to see whether any of the worst cases occur on performance sensitive paths and whether the
    ;; additional saves will show any measurable degradation.

    ;; Perform the parts of setting up a probe frame that can occur during the prolog (and indeed this macro
    ;; can only be called from within the prolog).
    MACRO
        ALLOC_PROBE_FRAME

        PROLOG_STACK_ALLOC  4                   ; Space for saved APSR
        PROLOG_VPUSH        {d0-d3}             ; Save floating point return registers
        PROLOG_PUSH         {r0-r3,lr}          ; Save volatile registers
        PROLOG_STACK_ALLOC  4                   ; Space for caller's SP
        PROLOG_PUSH         {r4-r6,r8-r10}      ; Save non-volatile registers
        PROLOG_STACK_ALLOC  8                   ; Space for flags and Thread*
        PROLOG_PUSH         {r7}                ; Save caller's frame pointer
        PROLOG_PUSH         {r11,lr}            ; Save frame-chain pointer and return address
    MEND

    ;; Undo the effects of an ALLOC_PROBE_FRAME. This may only be called within an epilog. Note that all
    ;; registers are restored (apart for sp and pc), even volatiles.
    MACRO
        FREE_PROBE_FRAME

        EPILOG_POP          {r11,lr}            ; Restore frame-chain pointer and return address
        EPILOG_POP          {r7}                ; Restore caller's frame pointer
        EPILOG_STACK_FREE   8                   ; Discard flags and Thread*
        EPILOG_POP          {r4-r6,r8-r10}      ; Restore non-volatile registers
        EPILOG_STACK_FREE   4                   ; Discard caller's SP
        EPILOG_POP          {r0-r3,lr}          ; Restore volatile registers
        EPILOG_VPOP         {d0-d3}             ; Restore floating point return registers
        EPILOG_STACK_FREE   4                   ; Space for saved APSR
    MEND

    ;; Complete the setup of a probe frame allocated with ALLOC_PROBE_FRAME with the initialization that can
    ;; occur only outside the prolog (includes linking the frame to the current Thread). This macro assumes SP
    ;; is invariant outside of the prolog.
    ;;
    ;;  $threadReg  : register containing the Thread* (this will be preserved)
    ;;  $trashReg   : register that can be trashed by this macro
    ;;  $BITMASK    : value to initialize m_dwFlags field with (register or #constant)
    ;;  $frameSize  : total size of the method's stack frame (including probe frame size)
    MACRO
        INIT_PROBE_FRAME $threadReg, $trashReg, $BITMASK, $frameSize

        str         $threadReg, [sp, #m_pThread]    ; Thread *
        mov         $trashReg, $BITMASK             ; Bitmask of preserved registers
        str         $trashReg, [sp, #m_dwFlags]
        add         $trashReg, sp, #$frameSize
        str         $trashReg, [sp, #m_CallersSP]
    MEND

    ;; Simple macro to use when setting up the probe frame can comprise the entire prolog. Call this macro
    ;; first in the method (no further prolog instructions can be added after this).
    ;;
    ;;  $threadReg  : register containing the Thread* (this will be preserved). If defaulted (specify |) then
    ;;                the current thread will be calculated inline into r2 ($trashReg must not equal r2 in
    ;;                this case)
    ;;  $trashReg   : register that can be trashed by this macro
    ;;  $BITMASK    : value to initialize m_dwFlags field with (register or #constant)
    MACRO
        PROLOG_PROBE_FRAME $threadReg, $trashReg, $BITMASK

        ; Local string tracking the name of the register in which the Thread* is kept. Defaults to the value
        ; of $threadReg.
        LCLS __PPF_ThreadReg
__PPF_ThreadReg SETS "$threadReg"

        ; Define the method prolog, allocating enough stack space for the PInvokeTransitionFrame and saving
        ; incoming register values into it.
        ALLOC_PROBE_FRAME

        ; If the caller didn't provide a value for $threadReg then generate code to fetch the Thread* into r2.
        ; Record that r2 holds the Thread* in our local variable.
        IF "$threadReg" == ""
            ASSERT "$trashReg" != "r2"
__PPF_ThreadReg SETS "r2"
            INLINE_GETTHREAD $__PPF_ThreadReg, $trashReg
        ENDIF

        ; Perform the rest of the PInvokeTransitionFrame initialization.
        INIT_PROBE_FRAME $__PPF_ThreadReg, $trashReg, $BITMASK, PROBE_FRAME_SIZE
        str         sp, [$__PPF_ThreadReg, #OFFSETOF__Thread__m_pHackPInvokeTunnel]
    MEND

    ; Simple macro to use when PROLOG_PROBE_FRAME was used to set up and initialize the prolog and
    ; PInvokeTransitionFrame. This will define the epilog including a return via the restored LR.
    MACRO
        EPILOG_PROBE_FRAME

        FREE_PROBE_FRAME
        EPILOG_RETURN
    MEND


;;
;; Macro to clear the hijack state. This is safe to do because the suspension code will not Unhijack this 
;; thread if it finds it at an IP that isn't managed code.
;;
;; Register state on entry:
;;  r2: thread pointer
;;  
;; Register state on exit:
;;  r12: trashed
;;
    MACRO
        ClearHijackState

        mov         r12, #0
        str         r12, [r2, #OFFSETOF__Thread__m_ppvHijackedReturnAddressLocation]
        str         r12, [r2, #OFFSETOF__Thread__m_pvHijackedReturnAddress]
    MEND


;;
;; The prolog for all GC suspension hijacks (normal and stress). Fixes up the hijacked return address, and 
;; clears the hijack state.
;;
;; Register state on entry:
;;  All registers correct for return to the original return address.
;;  
;; Register state on exit:
;;  r2: thread pointer
;;  r3: trashed
;;  r12: trashed
;;
    MACRO
        FixupHijackedCallstack

        ;; r2 <- GetThread(), TRASHES r3
        INLINE_GETTHREAD r2, r3
        
        ;;
        ;; Fix the stack by restoring the original return address
        ;;
        ldr         lr, [r2, #OFFSETOF__Thread__m_pvHijackedReturnAddress]

        ClearHijackState
    MEND

;;
;; Set the Thread state and wait for a GC to complete.
;;
;; Register state on entry:
;;  r4: thread pointer
;;  
;; Register state on exit:
;;  r4: thread pointer
;;  All other registers trashed
;;

    EXTERN RhpWaitForGCNoAbort

    MACRO
        WaitForGCCompletion

        ldr         r2, [r4, #OFFSETOF__Thread__m_ThreadStateFlags]
        tst         r2, #TSF_SuppressGcStress__OR__TSF_DoNotTriggerGC
        bne         %ft0

        ldr         r2, [r4, #OFFSETOF__Thread__m_pHackPInvokeTunnel]
        bl          RhpWaitForGCNoAbort
0
    MEND


    MACRO
        HijackTargetFakeProlog

        ;; This is a fake entrypoint for the method that 'tricks' the OS into calling our personality routine.
        ;; The code here should never be executed, and the unwind info is bogus, but we don't mind since the
        ;; stack is broken by the hijack anyway until after we fix it below.
        PROLOG_PUSH {lr}
        nop                     ; We also need a nop here to simulate the implied bl instruction.  Without 
                                ; this, an OS-applied -2 will back up into the method prolog and the unwind 
                                ; will not be applied as desired.

    MEND


;;
;;
;;
;; GC Probe Hijack targets
;;
;;
    EXTERN RhpPInvokeExceptionGuard


    NESTED_ENTRY RhpGcProbeHijackScalarWrapper, .text, RhpPInvokeExceptionGuard

        HijackTargetFakeProlog

    LABELED_RETURN_ADDRESS RhpGcProbeHijackScalar

        FixupHijackedCallstack
        mov         r12, #DEFAULT_FRAME_SAVE_FLAGS
        b           RhpGcProbe
    NESTED_END RhpGcProbeHijackScalarWrapper

    NESTED_ENTRY RhpGcProbeHijackObjectWrapper, .text, RhpPInvokeExceptionGuard

        HijackTargetFakeProlog

    LABELED_RETURN_ADDRESS RhpGcProbeHijackObject

        FixupHijackedCallstack
        mov         r12, #(DEFAULT_FRAME_SAVE_FLAGS + PTFF_SAVE_R0 + PTFF_R0_IS_GCREF)
        b           RhpGcProbe
    NESTED_END RhpGcProbeHijackObjectWrapper

    NESTED_ENTRY RhpGcProbeHijackByrefWrapper, .text, RhpPInvokeExceptionGuard

        HijackTargetFakeProlog

    LABELED_RETURN_ADDRESS RhpGcProbeHijackByref

        FixupHijackedCallstack
        mov         r12, #(DEFAULT_FRAME_SAVE_FLAGS + PTFF_SAVE_R0 + PTFF_R0_IS_BYREF)
        b           RhpGcProbe
    NESTED_END RhpGcProbeHijackByrefWrapper

#ifdef FEATURE_GC_STRESS
;;
;;
;; GC Stress Hijack targets
;;
;;
    LEAF_ENTRY RhpGcStressHijackScalar
        FixupHijackedCallstack
        mov         r12, #DEFAULT_FRAME_SAVE_FLAGS
        b           RhpGcStressProbe
    LEAF_END RhpGcStressHijackScalar

    LEAF_ENTRY RhpGcStressHijackObject
        FixupHijackedCallstack
        mov         r12, #(DEFAULT_FRAME_SAVE_FLAGS + PTFF_SAVE_R0 + PTFF_R0_IS_GCREF)
        b           RhpGcStressProbe
    LEAF_END RhpGcStressHijackObject

    LEAF_ENTRY RhpGcStressHijackByref
        FixupHijackedCallstack
        mov         r12, #(DEFAULT_FRAME_SAVE_FLAGS + PTFF_SAVE_R0 + PTFF_R0_IS_BYREF)
        b           RhpGcStressProbe
    LEAF_END RhpGcStressHijackByref


;;
;; Worker for our GC stress probes.  Do not call directly!!  
;; Instead, go through RhpGcStressHijack{Scalar|Object|Byref}. 
;; This worker performs the GC Stress work and returns to the original return address.
;;
;; Register state on entry:
;;  r0: hijacked function return value
;;  r1: hijacked function return value
;;  r2: thread pointer
;;  r12: register bitmask
;;
;; Register state on exit:
;;  Scratch registers, except for r0, have been trashed
;;  All other registers restored as they were when the hijack was first reached.
;;
    NESTED_ENTRY RhpGcStressProbe
        PROLOG_PROBE_FRAME r2, r3, r12

        bl          $REDHAWKGCINTERFACE__STRESSGC

        EPILOG_PROBE_FRAME
    NESTED_END RhpGcStressProbe
#endif ;; FEATURE_GC_STRESS

    EXTERN RhpThrowHwEx

    LEAF_ENTRY RhpGcProbe
        ldr         r3, =RhpTrapThreads
        ldr         r3, [r3]
        tst         r3, #TrapThreadsFlags_TrapThreads
        bne         %0
        bx          lr
0
        b           RhpGcProbeRare
    LEAF_END RhpGcProbe

    NESTED_ENTRY RhpGcProbeRare
        PROLOG_PROBE_FRAME r2, r3, r12

        mov         r4, r2
        WaitForGCCompletion

        ldr         r2, [sp, #OFFSETOF__PInvokeTransitionFrame__m_dwFlags]
        tst         r2, #PTFF_THREAD_ABORT
        bne         %1

        EPILOG_PROBE_FRAME

1        
        FREE_PROBE_FRAME
        EPILOG_NOP mov         r0, #STATUS_REDHAWK_THREAD_ABORT
        EPILOG_NOP mov         r1, lr ;; return address as exception PC
        EPILOG_BRANCH RhpThrowHwEx

    NESTED_END RhpGcProbe

    LEAF_ENTRY RhpGcPoll
        ; @todo: I'm assuming it's not OK to trash any register here. If that's not true we can optimize the
        ; push/pops out of this fast path.
        push        {r0}
        ldr         r0, =RhpTrapThreads
        ldr         r0, [r0]
        tst         r0, #TrapThreadsFlags_TrapThreads
        bne         %0
        pop         {r0}
        bx          lr
0
        pop         {r0}
        b           RhpGcPollRare
    LEAF_END RhpGcPoll

    NESTED_ENTRY RhpGcPollRare
        PROLOG_PROBE_FRAME |, r3, #PROBE_SAVE_FLAGS_EVERYTHING

        ; Unhijack this thread, if necessary.
        INLINE_THREAD_UNHIJACK  r2, r0, r1       ;; trashes r0, r1

        mov         r4, r2
        WaitForGCCompletion

        EPILOG_PROBE_FRAME
    NESTED_END RhpGcPoll

    LEAF_ENTRY RhpGcPollStress
        ;
        ; loop hijacking is used instead
        ;
        __debugbreak

    LEAF_END RhpGcPollStress


#ifdef FEATURE_GC_STRESS
    NESTED_ENTRY RhpHijackForGcStress
        PROLOG_PUSH {r0,r1}     ; Save return value
        PROLOG_VPUSH {d0-d3}    ; Save VFP return value

        ;;
        ;; Setup a PAL_LIMITED_CONTEXT that looks like what you'd get if you had suspended this thread at the
        ;; IP after the call to this helper.
        ;;
        ;; This is very likely overkill since the calculation of the return address should only need SP and 
        ;; LR, but this is test code, so I'm not too worried about efficiency.
        ;;
        ;; Setup a PAL_LIMITED_CONTEXT on the stack {
        ;; we'll need to reserve the size of the D registers in the context
        ;; compute in the funny way below to include any padding between LR and D
DREG_SZ equ     (SIZEOF__PAL_LIMITED_CONTEXT - (OFFSETOF__PAL_LIMITED_CONTEXT__LR + 4))

        PROLOG_STACK_ALLOC  DREG_SZ ;; Reserve space for d8-d15
        PROLOG_PUSH {r0,lr}         ;; Reserve space for SP and store LR
        PROLOG_PUSH {r0,r4-r11,lr}
        ;; } end PAL_LIMITED_CONTEXT

        ;; Compute and save SP at callsite.
        add         r0, sp, #(SIZEOF__PAL_LIMITED_CONTEXT + 0x20 + 8)   ;; +0x20 for vpush {d0-d3}, +8 for push {r0,r1}
        str         r0, [sp, #OFFSETOF__PAL_LIMITED_CONTEXT__SP]

        mov         r0, sp      ; Address of PAL_LIMITED_CONTEXT
        bl          $THREAD__HIJACKFORGCSTRESS

        ;; epilog
        EPILOG_POP  {r0,r4-r11,lr}
        EPILOG_STACK_FREE DREG_SZ + 8 ; Discard saved SP and LR and space for d8-d15
        EPILOG_VPOP {d0-d3}             ; Restore VFP return value
        EPILOG_POP  {r0,r1}             ; Restore return value
        bx          lr
    NESTED_END RhpHijackForGcStress
#endif ;; FEATURE_GC_STRESS


;;
;; The following functions are _jumped_ to when we need to transfer control from one method to another for EH 
;; dispatch. These are needed to properly coordinate with the GC hijacking logic. We are essentially replacing
;; the return from the throwing method with a jump to the handler in the caller, but we need to be aware of 
;; any return address hijack that may be in place for GC suspension. These routines use a quick test of the 
;; return address against a specific GC hijack routine, and then fixup the stack pointer to what it would be 
;; after a real return from the throwing method. Then, if we are not hijacked we can simply jump to the 
;; handler in the caller.
;; 
;; If we are hijacked, then we jump to a routine that will unhijack appropriatley and wait for the GC to 
;; complete. There are also variants for GC stress.
;;
;; Note that at this point we are eiher hijacked or we are not, and this will not change until we return to 
;; managed code. It is an invariant of the system that a thread will only attempt to hijack or unhijack 
;; another thread while the target thread is suspended in managed code, and this is _not_ managed code.
;;
;; Register state on entry:
;;  r0: pointer to this function (i.e., trash)
;;  r1: reference to the exception object.
;;  r2: handler address we want to jump to.
;;  Non-volatile registers are all already correct for return to the caller.
;;  LR still contains the return address.
;;  
;; Register state on exit:
;;  All registers except r0 and lr unchanged
;;
    MACRO
        RTU_EH_JUMP_HELPER $funcName, $hijackFuncName, $isStress, $stressFuncName

        LEAF_ENTRY $funcName
        ; Currently the EH epilog won't pop the return address back into LR,
        ; so we have to have a funny load from [sp-4] here to retrieve it.

            ldr         r0, =$hijackFuncName
            cmp         r0, lr
            beq         RhpGCProbeForEHJump

            IF $isStress
            ldr         r0, =$stressFuncName
            cmp         r0, lr
            beq         RhpGCStressProbeForEHJump
            ENDIF

            ;; We are not hijacked, so we can return to the handler.
            ;; We return to keep the call/return prediction balanced.
            mov         lr, r2  ; Update the return address
            bx          lr
        LEAF_END $funcName
    MEND

;; We need an instance of the helper for each possible hijack function. The binder has enough
;; information to determine which one we need to use for any function.
    RTU_EH_JUMP_HELPER RhpEHJumpScalar,         RhpGcProbeHijackScalar, {false}, 0
    RTU_EH_JUMP_HELPER RhpEHJumpObject,         RhpGcProbeHijackObject, {false}, 0
    RTU_EH_JUMP_HELPER RhpEHJumpByref,          RhpGcProbeHijackByref,  {false}, 0
#ifdef FEATURE_GC_STRESS
    RTU_EH_JUMP_HELPER RhpEHJumpScalarGCStress, RhpGcProbeHijackScalar, {true},  RhpGcStressHijackScalar
    RTU_EH_JUMP_HELPER RhpEHJumpObjectGCStress, RhpGcProbeHijackObject, {true},  RhpGcStressHijackObject
    RTU_EH_JUMP_HELPER RhpEHJumpByrefGCStress,  RhpGcProbeHijackByref,  {true},  RhpGcStressHijackByref
#endif

;;
;; Macro to setup our frame and adjust the location of the EH object reference for EH jump probe funcs.
;;
;; Register state on entry:
;;  r0: scratch
;;  r1: reference to the exception object.
;;  r2: handler address we want to jump to.
;;  Non-volatile registers are all already correct for return to the caller.
;;  The stack is as if we are just about to returned from the call
;;  
;; Register state on exit:
;;  r0: reference to the exception object
;;  r2: thread pointer
;;
    MACRO
        EHJumpProbeProlog

        PROLOG_PUSH         {r1,r2}     ; save the handler address so we can jump to it later (save r1 just for alignment)
        PROLOG_NOP          mov r0, r1  ; move the ex object reference into r0 so we can report it
        ALLOC_PROBE_FRAME

        ;; r2 <- GetThread(), TRASHES r1
        INLINE_GETTHREAD r2, r1
        
        ;; Recover the original return address and update the frame
        ldr         lr, [r2, #OFFSETOF__Thread__m_pvHijackedReturnAddress]
        str         lr, [sp, #OFFSETOF__PInvokeTransitionFrame__m_RIP]

        ;; ClearHijackState expects thread in r2 (trashes r12).
        ClearHijackState

        ; TRASHES r1
        INIT_PROBE_FRAME r2, r1, #PROBE_SAVE_FLAGS_R0_IS_GCREF, (PROBE_FRAME_SIZE + 8)
        str         sp, [r2, #OFFSETOF__Thread__m_pHackPInvokeTunnel]
    MEND

;;
;; Macro to re-adjust the location of the EH object reference, cleanup the frame, and make the 
;; final jump to the handler for EH jump probe funcs.
;;
;; Register state on entry:
;;  r0: reference to the exception object
;;  r1-r3: scratch
;;  
;; Register state on exit:
;;  sp: correct for return to the caller
;;  r1: reference to the exception object
;;
    MACRO
        EHJumpProbeEpilog

        FREE_PROBE_FRAME        ; This restores exception object back into r0
        EPILOG_NOP mov r1, r0   ; Move the Exception object back into r1 where the catch handler expects it
        EPILOG_POP {r0,pc}      ; Recover the handler address and jump to it
    MEND

;;
;; We are hijacked for a normal GC (not GC stress), so we need to unhijack and wait for the GC to complete.
;;
;; Register state on entry:
;;  r0: reference to the exception object.
;;  r2: thread
;;  Non-volatile registers are all already correct for return to the caller.
;;  The stack is as if we have tail called to this function (lr points to return address).
;;        
;; Register state on exit:
;;  r7: previous frame pointer
;;  r0: reference to the exception object
;;
    NESTED_ENTRY RhpGCProbeForEHJump
        EHJumpProbeProlog

#ifdef _DEBUG
        ;;
        ;; If we get here, then we have been hijacked for a real GC, and our SyncState must
        ;; reflect that we've been requested to synchronize.

        ldr         r1, =RhpTrapThreads
        ldr         r1, [r1]
        tst         r1, #TrapThreadsFlags_TrapThreads
        bne         %0

        bl          RhDebugBreak
0
#endif ;; _DEBUG

        mov         r4, r2
        WaitForGCCompletion

        EHJumpProbeEpilog
    NESTED_END RhpGCProbeForEHJump

#ifdef FEATURE_GC_STRESS
;;
;; We are hijacked for GC Stress (not a normal GC) so we need to invoke the GC stress helper.
;;
;; Register state on entry:
;;  r1: reference to the exception object.
;;  r2: thread
;;  Non-volatile registers are all already correct for return to the caller.
;;  The stack is as if we have tail called to this function (lr points to return address).
;;        
;; Register state on exit:
;;  r7: previous frame pointer
;;  r0: reference to the exception object
;;
    NESTED_ENTRY RhpGCStressProbeForEHJump
        EHJumpProbeProlog

        bl          $REDHAWKGCINTERFACE__STRESSGC

        EHJumpProbeEpilog
    NESTED_END RhpGCStressProbeForEHJump

;;
;; INVARIANT: Don't trash the argument registers, the binder codegen depends on this.
;;
    LEAF_ENTRY RhpSuppressGcStress

        push        {r0-r2}
        INLINE_GETTHREAD    r0, r1

Retry
        ldrex       r1, [r0, #OFFSETOF__Thread__m_ThreadStateFlags]
        orr         r1, #TSF_SuppressGcStress
        strex       r2, r1, [r0, #OFFSETOF__Thread__m_ThreadStateFlags]
        cbz         r2, Success
        b           Retry

Success
        pop         {r0-r2}
        bx          lr

    LEAF_END RhpSuppressGcStress
#endif ;; FEATURE_GC_STRESS

;; ALLOC_PROBE_FRAME will save the first 4 vfp registers, in order to avoid trashing VFP registers across the loop 
;; hijack, we must save the rest -- d4-d15 (12) and d16-d31 (16).
VFP_EXTRA_SAVE_SIZE equ ((12*8) + (16*8))

;; Helper called from hijacked loops
    LEAF_ENTRY RhpLoopHijack

;; we arrive here with essentially all registers containing useful content
;; except r12, which we trashed

;; on the stack, we have two arguments:
;; - [sp+0] has the module header
;; - [sp+4] has the address of the indirection cell we jumped through
;;
;;

;;      save registers
        PROLOG_VPUSH        {d4-d15}    ;; save scratch fp regs
        PROLOG_VPUSH        {d16-d31}   ;; ... and more
        ALLOC_PROBE_FRAME

        ; save condition codes
        mrs         r12, apsr
        str         r12, [sp, #m_SavedAPSR]

        INLINE_GETTHREAD    r4, r1

        INIT_PROBE_FRAME r4, r1, #PROBE_SAVE_FLAGS_EVERYTHING, (PROBE_FRAME_SIZE + VFP_EXTRA_SAVE_SIZE + 8)
;;
;;      compute the index of the indirection cell
;;
        ldr         r0, [sp,#(PROBE_FRAME_SIZE + VFP_EXTRA_SAVE_SIZE + 0)]
        bl          $GetLoopIndirCells
        
        ; r0 now has address of the first loop indir cell
        ; subtract that from the address of our cell
        ; and divide by 4 to give the index of our cell
        ldr         r1, [sp,#(PROBE_FRAME_SIZE + VFP_EXTRA_SAVE_SIZE + 4)]
        sub         r1, r0
        lsr         r0, r1, #2

        ; r0 now has the index
        ; recover the loop hijack target, passing the module header as an additional argument
        ldr         r1, [sp,#(PROBE_FRAME_SIZE + VFP_EXTRA_SAVE_SIZE + 0)]
        bl          RecoverLoopHijackTarget

        ; store the result as our pinvoke return address
        str         r0, [sp, #OFFSETOF__PInvokeTransitionFrame__m_RIP]

        ; also save it in the incoming parameter space for the actual return below
        str         r0, [sp,#(PROBE_FRAME_SIZE + VFP_EXTRA_SAVE_SIZE + 4)]

        ; Early out if GC stress is currently suppressed. Do this after we have computed the real address to
        ; return to but before we link the transition frame onto m_pHackPInvokeTunnel (because hitting this
        ; condition implies we're running restricted callouts during a GC itself and we could end up
        ; overwriting a co-op frame set by the code that caused the GC in the first place, e.g. a GC.Collect
        ; call).
        ldr         r1, [r4, #OFFSETOF__Thread__m_ThreadStateFlags]
        tst         r1, #TSF_SuppressGcStress__OR__TSF_DoNotTriggerGC
        bne         DoneWaitingForGc

        ; link the frame into the Thread
        str         sp, [r4, #OFFSETOF__Thread__m_pHackPInvokeTunnel]

        ;;
        ;; Unhijack this thread, if necessary.
        ;;
        INLINE_THREAD_UNHIJACK  r4, r1, r2       ;; trashes r1, r2

#ifdef FEATURE_GC_STRESS

        ldr         r1, =$g_fGcStressStarted
        ldr         r1, [r1]
        cmp         r1, #0
        bne         NoGcStress

        mov         r1, r0
        ldr         r0, =$g_pTheRuntimeInstance
        ldr         r0, [r0]
        bl          $RuntimeInstance__ShouldHijackLoopForGcStress
        cmp         r0, #0
        beq         NoGcStress

        bl          $REDHAWKGCINTERFACE__STRESSGC
NoGcStress
#endif ;; FEATURE_GC_STRESS

        mov         r2, sp ; sp is address of PInvokeTransitionFrame
        bl          RhpWaitForGCNoAbort

DoneWaitingForGc
        ldr         r12, [sp, #OFFSETOF__PInvokeTransitionFrame__m_dwFlags]
        tst         r12, #PTFF_THREAD_ABORT
        bne         Abort
        ; restore condition codes
        ldr         r12, [sp, #m_SavedAPSR]
        msr         apsr_nzcvqg, r12
        FREE_PROBE_FRAME
        EPILOG_VPOP {d16-d31}
        EPILOG_VPOP {d4-d15}
        EPILOG_POP  {r12,pc}      ; recover the hijack target and jump to it
Abort
        FREE_PROBE_FRAME
        EPILOG_VPOP {d16-d31}
        EPILOG_VPOP {d4-d15}
        EPILOG_POP  r12
        EPILOG_NOP  mov         r0, #STATUS_REDHAWK_THREAD_ABORT
        EPILOG_POP  r1            ;hijack target address as exception PC
        EPILOG_BRANCH RhpThrowHwEx

    LEAF_END RhpLoopHijack

        INLINE_GETTHREAD_CONSTANT_POOL

        end