1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
|
;; Licensed to the .NET Foundation under one or more agreements.
;; The .NET Foundation licenses this file to you under the MIT license.
;; See the LICENSE file in the project root for more information.
#include "AsmMacros.h"
TEXTAREA
SETALIAS GetLoopIndirCells, ?GetLoopIndirCells@ModuleHeader@@QEAAPEAEXZ
SETALIAS g_pTheRuntimeInstance, ?g_pTheRuntimeInstance@@3PEAVRuntimeInstance@@EA
SETALIAS RuntimeInstance__ShouldHijackLoopForGcStress, ?ShouldHijackLoopForGcStress@RuntimeInstance@@QEAA_N_K@Z
EXTERN g_fGcStressStarted
EXTERN $g_pTheRuntimeInstance
EXTERN $RuntimeInstance__ShouldHijackLoopForGcStress
EXTERN $GetLoopIndirCells
EXTERN RecoverLoopHijackTarget
PROBE_SAVE_FLAGS_EVERYTHING equ DEFAULT_FRAME_SAVE_FLAGS + PTFF_SAVE_ALL_SCRATCH
;; Build a map of symbols representing offsets into the transition frame (see PInvokeTransitionFrame in
;; rhbinder.h) and keep these two in sync.
map 0
field OFFSETOF__PInvokeTransitionFrame__m_PreservedRegs
field 10 * 8 ; x19..x28
m_CallersSP field 8 ; SP at routine entry
field 19 * 8 ; x0..x18
field 8 ; lr
m_SavedNZCV field 8 ; Saved condition flags
field 4 * 8 ; d0..d3
PROBE_FRAME_SIZE field 0
;; Support for setting up a transition frame when performing a GC probe. In many respects this is very
;; similar to the logic in PUSH_COOP_PINVOKE_FRAME in AsmMacros.h. In most cases setting up the
;; transition frame comprises the entirety of the caller's prolog (and initial non-prolog code) and
;; similarly for the epilog. Those cases can be dealt with using PROLOG_PROBE_FRAME and EPILOG_PROBE_FRAME
;; defined below. For the special cases where additional work has to be done in the prolog we also provide
;; the lower level macros ALLOC_PROBE_FRAME, FREE_PROBE_FRAME and INIT_PROBE_FRAME that allow more control
;; to be asserted.
;;
;; Note that we currently employ a significant simplification of frame setup: we always allocate a
;; maximally-sized PInvokeTransitionFrame and save all of the registers. Depending on the caller this can
;; lead to up to 20 additional register saves (x0-x18, lr) or 160 bytes of stack space. I have done no
;; analysis to see whether any of the worst cases occur on performance sensitive paths and whether the
;; additional saves will show any measurable degradation.
;; Perform the parts of setting up a probe frame that can occur during the prolog (and indeed this macro
;; can only be called from within the prolog).
MACRO
ALLOC_PROBE_FRAME $extraStackSpace
;; First create PInvokeTransitionFrame
PROLOG_SAVE_REG_PAIR fp, lr, #-(PROBE_FRAME_SIZE + $extraStackSpace)! ;; Push down stack pointer and store FP and LR
;; Slot at [sp, #0x10] is reserved for Thread *
;; Slot at [sp, #0x18] is reserved for bitmask of saved registers
;; Save callee saved registers
PROLOG_SAVE_REG_PAIR x19, x20, #0x20
PROLOG_SAVE_REG_PAIR x21, x22, #0x30
PROLOG_SAVE_REG_PAIR x23, x24, #0x40
PROLOG_SAVE_REG_PAIR x25, x26, #0x50
PROLOG_SAVE_REG_PAIR x27, x28, #0x60
;; Slot at [sp, #0x70] is reserved for caller sp
;; Save the scratch registers
PROLOG_NOP str x0, [sp, #0x78]
PROLOG_NOP stp x1, x2, [sp, #0x80]
PROLOG_NOP stp x3, x4, [sp, #0x90]
PROLOG_NOP stp x5, x6, [sp, #0xA0]
PROLOG_NOP stp x7, x8, [sp, #0xB0]
PROLOG_NOP stp x9, x10, [sp, #0xC0]
PROLOG_NOP stp x11, x12, [sp, #0xD0]
PROLOG_NOP stp x13, x14, [sp, #0xE0]
PROLOG_NOP stp x15, x16, [sp, #0xF0]
PROLOG_NOP stp x17, x18, [sp, #0x100]
PROLOG_NOP str lr, [sp, #0x110]
;; Slot at [sp, #0x118] is reserved for NZCV
; Save the floating return registers
PROLOG_NOP stp d0, d1, [sp, #0x120]
PROLOG_NOP stp d2, d3, [sp, #0x130]
MEND
;; Undo the effects of an ALLOC_PROBE_FRAME. This may only be called within an epilog. Note that all
;; registers are restored (apart for sp and pc), even volatiles.
MACRO
FREE_PROBE_FRAME $extraStackSpace
;; Restore the scratch registers
PROLOG_NOP ldr x0, [sp, #0x78]
PROLOG_NOP ldp x1, x2, [sp, #0x80]
PROLOG_NOP ldp x3, x4, [sp, #0x90]
PROLOG_NOP ldp x5, x6, [sp, #0xA0]
PROLOG_NOP ldp x7, x8, [sp, #0xB0]
PROLOG_NOP ldp x9, x10, [sp, #0xC0]
PROLOG_NOP ldp x11, x12, [sp, #0xD0]
PROLOG_NOP ldp x13, x14, [sp, #0xE0]
PROLOG_NOP ldp x15, x16, [sp, #0xF0]
PROLOG_NOP ldp x17, x18, [sp, #0x100]
PROLOG_NOP ldr lr, [sp, #0x110]
; Restore the floating return registers
EPILOG_NOP ldp d0, d1, [sp, #0x120]
EPILOG_NOP ldp d2, d3, [sp, #0x130]
;; Restore callee saved registers
EPILOG_RESTORE_REG_PAIR x19, x20, #0x20
EPILOG_RESTORE_REG_PAIR x21, x22, #0x30
EPILOG_RESTORE_REG_PAIR x23, x24, #0x40
EPILOG_RESTORE_REG_PAIR x25, x26, #0x50
EPILOG_RESTORE_REG_PAIR x27, x28, #0x60
EPILOG_RESTORE_REG_PAIR fp, lr, #(PROBE_FRAME_SIZE + $extraStackSpace)!
MEND
;; Complete the setup of a probe frame allocated with ALLOC_PROBE_FRAME with the initialization that can
;; occur only outside the prolog (includes linking the frame to the current Thread). This macro assumes SP
;; is invariant outside of the prolog.
;;
;; $threadReg : register containing the Thread* (this will be preserved)
;; $trashReg : register that can be trashed by this macro
;; $savedRegsMask : value to initialize m_Flags field with (register or #constant)
;; $gcFlags : value of gcref / gcbyref flags for saved registers, used only if $savedRegsMask is constant
;; $frameSize : total size of the method's stack frame (including probe frame size)
MACRO
INIT_PROBE_FRAME $threadReg, $trashReg, $savedRegsMask, $gcFlags, $frameSize
LCLS BitmaskStr
BitmaskStr SETS "$savedRegsMask"
str $threadReg, [sp, #OFFSETOF__PInvokeTransitionFrame__m_pThread] ; Thread *
IF BitmaskStr:LEFT:1 == "#"
;; The savedRegsMask is a constant, remove the leading "#" since the MOVL64 doesn't expect it
BitmaskStr SETS BitmaskStr:RIGHT:(:LEN:BitmaskStr - 1)
MOVL64 $trashReg, $BitmaskStr, $gcFlags
ELSE
ASSERT "$gcFlags" == ""
;; The savedRegsMask is a register
mov $trashReg, $savedRegsMask
ENDIF
str $trashReg, [sp, #OFFSETOF__PInvokeTransitionFrame__m_Flags]
add $trashReg, sp, #$frameSize
str $trashReg, [sp, #m_CallersSP]
MEND
;; Simple macro to use when setting up the probe frame can comprise the entire prolog. Call this macro
;; first in the method (no further prolog instructions can be added after this).
;;
;; $threadReg : register containing the Thread* (this will be preserved). If defaulted (specify |) then
;; the current thread will be calculated inline into r2 ($trashReg must not equal r2 in
;; this case)
;; $trashReg : register that can be trashed by this macro
;; $savedRegsMask : value to initialize m_dwFlags field with (register or #constant)
;; $gcFlags : value of gcref / gcbyref flags for saved registers, used only if $savedRegsMask is constant
MACRO
PROLOG_PROBE_FRAME $threadReg, $trashReg, $savedRegsMask, $gcFlags
; Local string tracking the name of the register in which the Thread* is kept. Defaults to the value
; of $threadReg.
LCLS __PPF_ThreadReg
__PPF_ThreadReg SETS "$threadReg"
; Define the method prolog, allocating enough stack space for the PInvokeTransitionFrame and saving
; incoming register values into it.
ALLOC_PROBE_FRAME 0
; If the caller didn't provide a value for $threadReg then generate code to fetch the Thread* into x2.
; Record that x2 holds the Thread* in our local variable.
IF "$threadReg" == ""
ASSERT "$trashReg" != "x2"
__PPF_ThreadReg SETS "x2"
INLINE_GETTHREAD $__PPF_ThreadReg, $trashReg
ENDIF
; Perform the rest of the PInvokeTransitionFrame initialization.
INIT_PROBE_FRAME $__PPF_ThreadReg, $trashReg, $savedRegsMask, $gcFlags, PROBE_FRAME_SIZE
mov $trashReg, sp
str $trashReg, [$__PPF_ThreadReg, #OFFSETOF__Thread__m_pHackPInvokeTunnel]
MEND
; Simple macro to use when PROLOG_PROBE_FRAME was used to set up and initialize the prolog and
; PInvokeTransitionFrame. This will define the epilog including a return via the restored LR.
MACRO
EPILOG_PROBE_FRAME
FREE_PROBE_FRAME 0
EPILOG_RETURN
MEND
;; ALLOC_PROBE_FRAME will save the first 4 vfp registers, in order to avoid trashing VFP registers across the loop
;; hijack, we must save the rest -- d4-d31 (28).
EXTRA_SAVE_SIZE equ (28*8)
MACRO
ALLOC_LOOP_HIJACK_FRAME
PROLOG_STACK_ALLOC EXTRA_SAVE_SIZE
;; save VFP registers that were not saved by the ALLOC_PROBE_FRAME
PROLOG_NOP stp d4, d5, [sp]
PROLOG_NOP stp d6, d7, [sp, #0x10]
PROLOG_NOP stp d8, d9, [sp, #0x20]
PROLOG_NOP stp d10, d11, [sp, #0x30]
PROLOG_NOP stp d12, d13, [sp, #0x40]
PROLOG_NOP stp d14, d15, [sp, #0x50]
PROLOG_NOP stp d16, d17, [sp, #0x60]
PROLOG_NOP stp d18, d19, [sp, #0x70]
PROLOG_NOP stp d20, d21, [sp, #0x80]
PROLOG_NOP stp d22, d23, [sp, #0x90]
PROLOG_NOP stp d24, d25, [sp, #0xA0]
PROLOG_NOP stp d26, d27, [sp, #0xB0]
PROLOG_NOP stp d28, d29, [sp, #0xC0]
PROLOG_NOP stp d30, d31, [sp, #0xD0]
ALLOC_PROBE_FRAME 0
MEND
MACRO
FREE_LOOP_HIJACK_FRAME
FREE_PROBE_FRAME 0
;; restore VFP registers that will not be restored by the FREE_PROBE_FRAME
PROLOG_NOP ldp d4, d5, [sp]
PROLOG_NOP ldp d6, d7, [sp, #0x10]
PROLOG_NOP ldp d8, d9, [sp, #0x20]
PROLOG_NOP ldp d10, d11, [sp, #0x30]
PROLOG_NOP ldp d12, d13, [sp, #0x40]
PROLOG_NOP ldp d14, d15, [sp, #0x50]
PROLOG_NOP ldp d16, d17, [sp, #0x60]
PROLOG_NOP ldp d18, d19, [sp, #0x70]
PROLOG_NOP ldp d20, d21, [sp, #0x80]
PROLOG_NOP ldp d22, d23, [sp, #0x90]
PROLOG_NOP ldp d24, d25, [sp, #0xA0]
PROLOG_NOP ldp d26, d27, [sp, #0xB0]
PROLOG_NOP ldp d28, d29, [sp, #0xC0]
PROLOG_NOP ldp d30, d31, [sp, #0xD0]
EPILOG_STACK_FREE EXTRA_SAVE_SIZE
MEND
;;
;; Macro to clear the hijack state. This is safe to do because the suspension code will not Unhijack this
;; thread if it finds it at an IP that isn't managed code.
;;
;; Register state on entry:
;; x2: thread pointer
;;
;; Register state on exit:
;;
MACRO
ClearHijackState
ASSERT OFFSETOF__Thread__m_pvHijackedReturnAddress == (OFFSETOF__Thread__m_ppvHijackedReturnAddressLocation + 8)
;; Clear m_ppvHijackedReturnAddressLocation and m_pvHijackedReturnAddress
stp xzr, xzr, [x2, #OFFSETOF__Thread__m_ppvHijackedReturnAddressLocation]
;; Clear m_uHijackedReturnValueFlags
str xzr, [x2, #OFFSETOF__Thread__m_uHijackedReturnValueFlags]
MEND
;;
;; The prolog for all GC suspension hijacks (normal and stress). Fixes up the hijacked return address, and
;; clears the hijack state.
;;
;; Register state on entry:
;; All registers correct for return to the original return address.
;;
;; Register state on exit:
;; x2: thread pointer
;; x3: trashed
;; x12: transition frame flags for the return registers x0 and x1
;;
MACRO
FixupHijackedCallstack
;; x2 <- GetThread(), TRASHES x3
INLINE_GETTHREAD x2, x3
;;
;; Fix the stack by restoring the original return address
;;
ASSERT OFFSETOF__Thread__m_uHijackedReturnValueFlags == (OFFSETOF__Thread__m_pvHijackedReturnAddress + 8)
;; Load m_pvHijackedReturnAddress and m_uHijackedReturnValueFlags
ldp lr, x12, [x2, #OFFSETOF__Thread__m_pvHijackedReturnAddress]
ClearHijackState
MEND
;;
;; Set the Thread state and wait for a GC to complete.
;;
;; Register state on entry:
;; x4: thread pointer
;;
;; Register state on exit:
;; x4: thread pointer
;; All other registers trashed
;;
EXTERN RhpWaitForGCNoAbort
MACRO
WaitForGCCompletion
ldr w2, [x4, #OFFSETOF__Thread__m_ThreadStateFlags]
tst w2, #TSF_SuppressGcStress__OR__TSF_DoNotTriggerGC
bne %ft0
ldr x2, [x4, #OFFSETOF__Thread__m_pHackPInvokeTunnel]
bl RhpWaitForGCNoAbort
0
MEND
MACRO
HijackTargetFakeProlog
;; This is a fake entrypoint for the method that 'tricks' the OS into calling our personality routine.
;; The code here should never be executed, and the unwind info is bogus, but we don't mind since the
;; stack is broken by the hijack anyway until after we fix it below.
PROLOG_SAVE_REG_PAIR fp, lr, #-0x10!
nop ; We also need a nop here to simulate the implied bl instruction. Without
; this, an OS-applied -4 will back up into the method prolog and the unwind
; will not be applied as desired.
MEND
;;
;;
;;
;; GC Probe Hijack targets
;;
;;
EXTERN RhpPInvokeExceptionGuard
NESTED_ENTRY RhpGcProbeHijackWrapper, .text, RhpPInvokeExceptionGuard
HijackTargetFakeProlog
LABELED_RETURN_ADDRESS RhpGcProbeHijack
FixupHijackedCallstack
orr x12, x12, #DEFAULT_FRAME_SAVE_FLAGS
b RhpGcProbe
NESTED_END RhpGcProbeHijackWrapper
#ifdef FEATURE_GC_STRESS
;;
;;
;; GC Stress Hijack targets
;;
;;
LEAF_ENTRY RhpGcStressHijack
FixupHijackedCallstack
orr x12, x12, #DEFAULT_FRAME_SAVE_FLAGS
b RhpGcStressProbe
LEAF_END RhpGcStressHijack
;;
;; Worker for our GC stress probes. Do not call directly!!
;; Instead, go through RhpGcStressHijack{Scalar|Object|Byref}.
;; This worker performs the GC Stress work and returns to the original return address.
;;
;; Register state on entry:
;; x0: hijacked function return value
;; x1: hijacked function return value
;; x2: thread pointer
;; w12: register bitmask
;;
;; Register state on exit:
;; Scratch registers, except for x0, have been trashed
;; All other registers restored as they were when the hijack was first reached.
;;
NESTED_ENTRY RhpGcStressProbe
PROLOG_PROBE_FRAME x2, x3, x12,
bl $REDHAWKGCINTERFACE__STRESSGC
EPILOG_PROBE_FRAME
NESTED_END RhpGcStressProbe
#endif ;; FEATURE_GC_STRESS
LEAF_ENTRY RhpGcProbe
brk 0xf000 ;; TODO: remove after debugging/testing stub
ldr x3, =RhpTrapThreads
ldr w3, [x3]
tbnz x3, #TrapThreadsFlags_TrapThreads_Bit, RhpGcProbeRare
ret
LEAF_END RhpGcProbe
EXTERN RhpThrowHwEx
NESTED_ENTRY RhpGcProbeRare
brk 0xf000 ;; TODO: remove after debugging/testing stub
PROLOG_PROBE_FRAME x2, x3, x12,
mov x4, x2
WaitForGCCompletion
ldr x2, [sp, #OFFSETOF__PInvokeTransitionFrame__m_Flags]
tbnz x2, #PTFF_THREAD_ABORT_BIT, %F1
EPILOG_PROBE_FRAME
1
FREE_PROBE_FRAME 0
EPILOG_NOP mov w0, #STATUS_REDHAWK_THREAD_ABORT
EPILOG_NOP mov x1, lr ;; return address as exception PC
EPILOG_NOP b RhpThrowHwEx
NESTED_END RhpGcProbeRare
LEAF_ENTRY RhpGcPoll
brk 0xf000 ;; TODO: remove after debugging/testing stub
; @todo: I'm assuming it's not OK to trash any register here. If that's not true we can optimize the
; push/pops out of this fast path.
str x0, [sp], #-0x10!
ldr x0, =RhpTrapThreads
ldr w0, [x0]
tbnz x0, #TrapThreadsFlags_TrapThreads_Bit, %F0
ldr x0, [sp], #0x10!
ret
0
ldr x0, [sp], #0x10!
b RhpGcPollRare
LEAF_END RhpGcPoll
NESTED_ENTRY RhpGcPollRare
brk 0xf000 ;; TODO: remove after debugging/testing stub
PROLOG_PROBE_FRAME |, x3, #PROBE_SAVE_FLAGS_EVERYTHING, 0
; Unhijack this thread, if necessary.
INLINE_THREAD_UNHIJACK x2, x0, x1 ;; trashes x0, x1
mov x4, x2
WaitForGCCompletion
EPILOG_PROBE_FRAME
NESTED_END RhpGcPollRare
LEAF_ENTRY RhpGcPollStress
;
; loop hijacking is used instead
;
brk 0xf000
LEAF_END RhpGcPollStress
#ifdef FEATURE_GC_STRESS
NESTED_ENTRY RhpHijackForGcStress
;; This function should be called from right before epilog
;; Push FP and LR, and allocate stack to hold PAL_LIMITED_CONTEXT structure and VFP return value registers
PROLOG_SAVE_REG_PAIR fp, lr, #-(SIZEOF__PAL_LIMITED_CONTEXT + 0x20)!
;;
;; Setup a PAL_LIMITED_CONTEXT that looks like what you'd get if you had suspended this thread at the
;; IP after the call to this helper.
;;
;; This is very likely overkill since the calculation of the return address should only need SP and
;; LR, but this is test code, so I'm not too worried about efficiency.
;;
;; Setup a PAL_LIMITED_CONTEXT on the stack
;; {
;; FP and LR already pushed.
PROLOG_NOP stp x0, x1, [sp, #0x10]
PROLOG_SAVE_REG_PAIR x19, x20, #0x20
PROLOG_SAVE_REG_PAIR x21, x22, #0x30
PROLOG_SAVE_REG_PAIR x23, x24, #0x40
PROLOG_SAVE_REG_PAIR x25, x26, #0x50
PROLOG_SAVE_REG_PAIR x27, x28, #0x60
PROLOG_SAVE_REG lr, #0x78
;; } end PAL_LIMITED_CONTEXT
;; Save VFP return value
stp d0, d1, [sp, #(SIZEOF__PAL_LIMITED_CONTEXT + 0x00)]
stp d2, d3, [sp, #(SIZEOF__PAL_LIMITED_CONTEXT + 0x10)]
;; Compute and save SP at callsite.
add x0, sp, #(SIZEOF__PAL_LIMITED_CONTEXT + 0x20) ;; +0x20 for the pushes right before the context struct
str x0, [sp, #OFFSETOF__PAL_LIMITED_CONTEXT__SP]
mov x0, sp ; Address of PAL_LIMITED_CONTEXT
bl $THREAD__HIJACKFORGCSTRESS
;; Restore return value registers (saved in PAL_LIMITED_CONTEXT structure)
ldp x0, x1, [sp, #0x10]
;; Restore VFP return value
ldp d0, d1, [sp, #(SIZEOF__PAL_LIMITED_CONTEXT + 0x00)]
ldp d2, d3, [sp, #(SIZEOF__PAL_LIMITED_CONTEXT + 0x10)]
;; Epilog
EPILOG_RESTORE_REG_PAIR x19, x20, #0x20
EPILOG_RESTORE_REG_PAIR x21, x22, #0x30
EPILOG_RESTORE_REG_PAIR x23, x24, #0x40
EPILOG_RESTORE_REG_PAIR x25, x26, #0x50
EPILOG_RESTORE_REG_PAIR x27, x28, #0x60
EPILOG_RESTORE_REG_PAIR fp, lr, #(SIZEOF__PAL_LIMITED_CONTEXT + 0x20)!
EPILOG_RETURN
NESTED_END RhpHijackForGcStress
NESTED_ENTRY RhpHijackForGcStressLeaf
;; This should be jumped to, right before epilog
;; x9 has the return address (we don't care about trashing scratch regs at this point)
;; Push FP and LR, and allocate stack to hold PAL_LIMITED_CONTEXT structure and VFP return value registers
PROLOG_SAVE_REG_PAIR fp, lr, #-(SIZEOF__PAL_LIMITED_CONTEXT + 0x20)!
;;
;; Setup a PAL_LIMITED_CONTEXT that looks like what you'd get if you had suspended this thread at the
;; IP after the call to this helper.
;;
;; This is very likely overkill since the calculation of the return address should only need SP and
;; LR, but this is test code, so I'm not too worried about efficiency.
;;
;; Setup a PAL_LIMITED_CONTEXT on the stack
;; {
;; FP and LR already pushed.
PROLOG_NOP stp x0, x1, [sp, #0x10]
PROLOG_SAVE_REG_PAIR x19, x20, #0x20
PROLOG_SAVE_REG_PAIR x21, x22, #0x30
PROLOG_SAVE_REG_PAIR x23, x24, #0x40
PROLOG_SAVE_REG_PAIR x25, x26, #0x50
PROLOG_SAVE_REG_PAIR x27, x28, #0x60
; PROLOG_SAVE_REG macro doesn't let to use scratch reg:
PROLOG_NOP str x9, [sp, #0x78] ; this is return address from RhpHijackForGcStress; lr is return address for it's caller
;; } end PAL_LIMITED_CONTEXT
;; Save VFP return value
stp d0, d1, [sp, #(SIZEOF__PAL_LIMITED_CONTEXT + 0x00)]
stp d2, d3, [sp, #(SIZEOF__PAL_LIMITED_CONTEXT + 0x10)]
;; Compute and save SP at callsite.
add x0, sp, #(SIZEOF__PAL_LIMITED_CONTEXT + 0x20) ;; +0x20 for the pushes right before the context struct
str x0, [sp, #OFFSETOF__PAL_LIMITED_CONTEXT__SP]
mov x0, sp ; Address of PAL_LIMITED_CONTEXT
bl $THREAD__HIJACKFORGCSTRESS
;; Restore return value registers (saved in PAL_LIMITED_CONTEXT structure)
ldp x0, x1, [sp, #0x10]
;; Restore VFP return value
ldp d0, d1, [sp, #(SIZEOF__PAL_LIMITED_CONTEXT + 0x00)]
ldp d2, d3, [sp, #(SIZEOF__PAL_LIMITED_CONTEXT + 0x10)]
;; Epilog
EPILOG_RESTORE_REG_PAIR x19, x20, #0x20
EPILOG_RESTORE_REG_PAIR x21, x22, #0x30
EPILOG_RESTORE_REG_PAIR x23, x24, #0x40
EPILOG_RESTORE_REG_PAIR x25, x26, #0x50
EPILOG_RESTORE_REG_PAIR x27, x28, #0x60
EPILOG_NOP ldr x9, [sp, #0x78]
EPILOG_RESTORE_REG_PAIR fp, lr, #(SIZEOF__PAL_LIMITED_CONTEXT + 0x20)!
EPILOG_NOP ret x9
NESTED_END RhpHijackForGcStressLeaf
#endif ;; FEATURE_GC_STRESS
#if 0 // used by the binder only
;;
;; The following functions are _jumped_ to when we need to transfer control from one method to another for EH
;; dispatch. These are needed to properly coordinate with the GC hijacking logic. We are essentially replacing
;; the return from the throwing method with a jump to the handler in the caller, but we need to be aware of
;; any return address hijack that may be in place for GC suspension. These routines use a quick test of the
;; return address against a specific GC hijack routine, and then fixup the stack pointer to what it would be
;; after a real return from the throwing method. Then, if we are not hijacked we can simply jump to the
;; handler in the caller.
;;
;; If we are hijacked, then we jump to a routine that will unhijack appropriately and wait for the GC to
;; complete. There are also variants for GC stress.
;;
;; Note that at this point we are either hijacked or we are not, and this will not change until we return to
;; managed code. It is an invariant of the system that a thread will only attempt to hijack or unhijack
;; another thread while the target thread is suspended in managed code, and this is _not_ managed code.
;;
MACRO
RTU_EH_JUMP_HELPER $funcName, $hijackFuncName, $isStress, $stressFuncName
LEAF_ENTRY $funcName
ldr x0, =$hijackFuncName
cmp x0, lr
beq RhpGCProbeForEHJump
IF $isStress
ldr x0, =$stressFuncName
cmp x0, lr
beq RhpGCStressProbeForEHJump
ENDIF
;; We are not hijacked, so we can return to the handler.
;; We return to keep the call/return prediction balanced.
mov lr, x2 ; Update the return address
ret
LEAF_END $funcName
MEND
;; We need an instance of the helper for each possible hijack function. The binder has enough
;; information to determine which one we need to use for any function.
RTU_EH_JUMP_HELPER RhpEHJumpScalar, RhpGcProbeHijack, {false}, 0
RTU_EH_JUMP_HELPER RhpEHJumpObject, RhpGcProbeHijack, {false}, 0
RTU_EH_JUMP_HELPER RhpEHJumpByref, RhpGcProbeHijack, {false}, 0
#ifdef FEATURE_GC_STRESS
RTU_EH_JUMP_HELPER RhpEHJumpScalarGCStress, RhpGcProbeHijack, {true}, RhpGcStressHijack
RTU_EH_JUMP_HELPER RhpEHJumpObjectGCStress, RhpGcProbeHijack, {true}, RhpGcStressHijack
RTU_EH_JUMP_HELPER RhpEHJumpByrefGCStress, RhpGcProbeHijack, {true}, RhpGcStressHijack
#endif
;;
;; Macro to setup our frame and adjust the location of the EH object reference for EH jump probe funcs.
;;
;; Register state on entry:
;; x0: scratch
;; x1: reference to the exception object.
;; x2: handler address we want to jump to.
;; Non-volatile registers are all already correct for return to the caller.
;; The stack is as if we are just about to returned from the call
;;
;; Register state on exit:
;; x0: reference to the exception object
;; x2: thread pointer
;;
MACRO
EHJumpProbeProlog
PROLOG_NOP mov x0, x1 ; move the ex object reference into x0 so we can report it
ALLOC_PROBE_FRAME 0x10
str x2, [sp, #PROBE_FRAME_SIZE]
;; x2 <- GetThread(), TRASHES x1
INLINE_GETTHREAD x2, x1
;; Recover the original return address and update the frame
ldr lr, [x2, #OFFSETOF__Thread__m_pvHijackedReturnAddress]
str lr, [sp, #OFFSETOF__PInvokeTransitionFrame__m_RIP]
;; ClearHijackState expects thread in x2
ClearHijackState
; TRASHES x1
INIT_PROBE_FRAME x2, x1, #(DEFAULT_FRAME_SAVE_FLAGS + PTFF_SAVE_X0), PTFF_X0_IS_GCREF_HI, (PROBE_FRAME_SIZE + 8)
add x1, sp, xzr
str x1, [x2, #OFFSETOF__Thread__m_pHackPInvokeTunnel]
MEND
;;
;; Macro to re-adjust the location of the EH object reference, cleanup the frame, and make the
;; final jump to the handler for EH jump probe funcs.
;;
;; Register state on entry:
;; x0: reference to the exception object
;; x1-x3: scratch
;;
;; Register state on exit:
;; sp: correct for return to the caller
;; x1: reference to the exception object
;;
MACRO
EHJumpProbeEpilog
ldr x2, [sp, #PROBE_FRAME_SIZE]
FREE_PROBE_FRAME 0x10 ; This restores exception object back into x0
EPILOG_NOP mov x1, x0 ; Move the Exception object back into x1 where the catch handler expects it
EPILOG_NOP ret x2
MEND
;;
;; We are hijacked for a normal GC (not GC stress), so we need to unhijack and wait for the GC to complete.
;;
;; Register state on entry:
;; x0: reference to the exception object.
;; x2: thread
;; Non-volatile registers are all already correct for return to the caller.
;; The stack is as if we have tail called to this function (lr points to return address).
;;
;; Register state on exit:
;; x0: reference to the exception object
;;
NESTED_ENTRY RhpGCProbeForEHJump
brk 0xf000 ;; TODO: remove after debugging/testing stub
EHJumpProbeProlog
#ifdef _DEBUG
;;
;; If we get here, then we have been hijacked for a real GC, and our SyncState must
;; reflect that we've been requested to synchronize.
ldr x1, =RhpTrapThreads
ldr w1, [x1]
tbnz x1, #TrapThreadsFlags_TrapThreads_Bit, %0
bl RhDebugBreak
0
#endif ;; _DEBUG
mov x4, x2
WaitForGCCompletion
EHJumpProbeEpilog
NESTED_END RhpGCProbeForEHJump
#ifdef FEATURE_GC_STRESS
;;
;; We are hijacked for GC Stress (not a normal GC) so we need to invoke the GC stress helper.
;;
;; Register state on entry:
;; x1: reference to the exception object.
;; x2: thread
;; Non-volatile registers are all already correct for return to the caller.
;; The stack is as if we have tail called to this function (lr points to return address).
;;
;; Register state on exit:
;; x0: reference to the exception object
;;
NESTED_ENTRY RhpGCStressProbeForEHJump
brk 0xf000 ;; TODO: remove after debugging/testing stub
EHJumpProbeProlog
bl $REDHAWKGCINTERFACE__STRESSGC
EHJumpProbeEpilog
NESTED_END RhpGCStressProbeForEHJump
#endif ;; FEATURE_GC_STRESS
#endif ;; 0
#ifdef FEATURE_GC_STRESS
;;
;; INVARIANT: Don't trash the argument registers, the binder codegen depends on this.
;;
LEAF_ENTRY RhpSuppressGcStress
INLINE_GETTHREAD x9, x10
add x9, x9, #OFFSETOF__Thread__m_ThreadStateFlags
Retry
ldxr w10, [x9]
orr w10, w10, #TSF_SuppressGcStress
stxr w11, w10, [x9]
cbz w11, Success
b Retry
Success
ret
LEAF_END RhpSuppressGcStress
#endif ;; FEATURE_GC_STRESS
;; Helper called from hijacked loops
LEAF_ENTRY RhpLoopHijack
;; we arrive here with essentially all registers containing useful content
;; TODO: update this comment after the RhpLoopHijack is implemented in the compiler
;; on the stack, we have two arguments:
;; - [sp+0] has the module header
;; - [sp+8] has the address of the indirection cell we jumped through
;;
;;
brk 0xf000 ;; TODO: remove after debugging/testing stub
ALLOC_LOOP_HIJACK_FRAME
; save condition codes
mrs x12, NZCV
str x12, [sp, #m_SavedNZCV]
INLINE_GETTHREAD x4, x1
INIT_PROBE_FRAME x4, x1, #PROBE_SAVE_FLAGS_EVERYTHING, 0, (PROBE_FRAME_SIZE + EXTRA_SAVE_SIZE + 8)
;;
;; compute the index of the indirection cell
;;
ldr x0, [sp,#(PROBE_FRAME_SIZE + EXTRA_SAVE_SIZE + 0)]
bl $GetLoopIndirCells
; x0 now has address of the first loop indir cell
; subtract that from the address of our cell
; and divide by 8 to give the index of our cell
ldr x1, [sp,#(PROBE_FRAME_SIZE + EXTRA_SAVE_SIZE + 8)]
sub x1, x1, x0
lsr x0, x1, #3
; x0 now has the index
; recover the loop hijack target, passing the module header as an additional argument
ldr x1, [sp,#(PROBE_FRAME_SIZE + EXTRA_SAVE_SIZE + 0)]
bl RecoverLoopHijackTarget
; store the result as our pinvoke return address
str x0, [sp, #OFFSETOF__PInvokeTransitionFrame__m_RIP]
; also save it in the incoming parameter space for the actual return below
str x0, [sp,#(PROBE_FRAME_SIZE + EXTRA_SAVE_SIZE + 8)]
; Early out if GC stress is currently suppressed. Do this after we have computed the real address to
; return to but before we link the transition frame onto m_pHackPInvokeTunnel (because hitting this
; condition implies we're running restricted callouts during a GC itself and we could end up
; overwriting a co-op frame set by the code that caused the GC in the first place, e.g. a GC.Collect
; call).
ldr w1, [x4, #OFFSETOF__Thread__m_ThreadStateFlags]
tst w1, #TSF_SuppressGcStress__OR__TSF_DoNotTriggerGC
bne DoneWaitingForGc
; link the frame into the Thread
add x1, sp, xzr
str x1, [x4, #OFFSETOF__Thread__m_pHackPInvokeTunnel]
;;
;; Unhijack this thread, if necessary.
;;
INLINE_THREAD_UNHIJACK x4, x1, x2 ;; trashes x1, x2
#ifdef FEATURE_GC_STRESS
ldr x1, =g_fGcStressStarted
ldr w1, [x1]
cbnz w1, NoGcStress
mov x1, x0
ldr x0, =$g_pTheRuntimeInstance
ldr x0, [x0]
bl $RuntimeInstance__ShouldHijackLoopForGcStress
cbnz x0, NoGcStress
bl $REDHAWKGCINTERFACE__STRESSGC
NoGcStress
#endif ;; FEATURE_GC_STRESS
add x2, sp, xzr ; sp is address of PInvokeTransitionFrame
bl RhpWaitForGCNoAbort
DoneWaitingForGc
ldr x12, [sp, #OFFSETOF__PInvokeTransitionFrame__m_Flags]
tbnz x12, #PTFF_THREAD_ABORT_BIT, Abort
; restore condition codes
ldr x12, [sp, #m_SavedNZCV]
msr NZCV, x12
FREE_LOOP_HIJACK_FRAME
EPILOG_NOP ldr x1, [sp, #8] ; hijack target address
EPILOG_STACK_FREE 0x10
EPILOG_NOP ret x1 ; jump to the hijack target
Abort
FREE_LOOP_HIJACK_FRAME
EPILOG_NOP mov w0, #STATUS_REDHAWK_THREAD_ABORT
EPILOG_NOP ldr x1, [sp, #8] ; hijack target address as exception PC
EPILOG_STACK_FREE 0x10
EPILOG_NOP b RhpThrowHwEx
LEAF_END RhpLoopHijack
INLINE_GETTHREAD_CONSTANT_POOL
end
|