src/Native/Runtime/CachedInterfaceDispatch.cpp


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575

// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.
// ==--==
//
// Shared (non-architecture specific) portions of a mechanism to perform interface dispatch using an alternate
// mechanism to VSD that does not require runtime generation of code.
//
// ============================================================================
#include "common.h"
#ifdef FEATURE_CACHED_INTERFACE_DISPATCH

#include "CommonTypes.h"
#include "CommonMacros.h"
#include "daccess.h"
#include "DebugMacrosExt.h"
#include "PalRedhawkCommon.h"
#include "PalRedhawk.h"
#include "rhassert.h"
#include "slist.h"
#include "holder.h"
#include "Crst.h"
#include "RedhawkWarnings.h"
#include "TargetPtrs.h"
#include "eetype.h"
#include "Range.h"
#include "allocheap.h"
#include "rhbinder.h"
#include "ObjectLayout.h"
#include "gcrhinterface.h"
#include "shash.h"
#include "RWLock.h"
#include "module.h"
#include "RuntimeInstance.h"
#include "eetype.inl"

#include "CachedInterfaceDispatch.h"

// We always allocate cache sizes with a power of 2 number of entries. We have a maximum size we support,
// defined below.
#define CID_MAX_CACHE_SIZE_LOG2 6
#define CID_MAX_CACHE_SIZE      (1 << CID_MAX_CACHE_SIZE_LOG2)

//#define FEATURE_CID_STATS 1

#ifdef FEATURE_CID_STATS

// Some counters used for debugging and profiling the algorithms.
extern "C"
{
    UInt32 CID_g_cLoadVirtFunc = 0;
    UInt32 CID_g_cCacheMisses = 0;
    UInt32 CID_g_cCacheSizeOverflows = 0;
    UInt32 CID_g_cCacheOutOfMemory = 0;
    UInt32 CID_g_cCacheReallocates = 0;
    UInt32 CID_g_cCacheAllocates = 0;
    UInt32 CID_g_cCacheDiscards = 0;
    UInt32 CID_g_cInterfaceDispatches = 0;
    UInt32 CID_g_cbMemoryAllocated = 0;
    UInt32 CID_g_rgAllocatesBySize[CID_MAX_CACHE_SIZE_LOG2 + 1] = { 0 };
};

#define CID_COUNTER_INC(_counter_name) CID_g_c##_counter_name++

#else

#define CID_COUNTER_INC(_counter_name)

#endif // FEATURE_CID_STATS

// Helper function for updating two adjacent pointers (which are aligned on a double pointer-sized boundary)
// atomically.
//
// This is used to update interface dispatch cache entries and also the stub/cache pair in
// interface dispatch indirection cells. The cases have slightly different semantics: cache entry updates
// (fFailOnNonNull == true) require that the existing values in the location are both NULL whereas indirection
// cell updates have no such restriction. In both cases we'll try the update once; on failure we'll return the
// new value of the second pointer and on success we'll the old value of the second pointer.
//
// This suits the semantics of both callers. For indirection cell updates the caller needs to know the address
// of the cache that can now be scheduled for release and the cache pointer is the second one in the pair. For
// cache entry updates the caller only needs a success/failure indication: on success the return value will be
// NULL and on failure non-NULL.
static void * UpdatePointerPairAtomically(void * pPairLocation,
                                          void * pFirstPointer,
                                          void * pSecondPointer,
                                          bool fFailOnNonNull)
{
#if defined(BIT64)
    // The same comments apply to the AMD64 version. The CompareExchange looks a little different since the
    // API was refactored in terms of Int64 to avoid creating a 128-bit integer type.

    Int64 rgComparand[2] = { 0 , 0 };
    if (!fFailOnNonNull)
    {
        rgComparand[0] = *(Int64 volatile *)pPairLocation;
        rgComparand[1] = *((Int64 volatile *)pPairLocation + 1);
    }

    UInt8 bResult = PalInterlockedCompareExchange128((Int64*)pPairLocation, (Int64)pSecondPointer, (Int64)pFirstPointer, rgComparand);
    if (bResult == 1)
    {
        // Success, return old value of second pointer (rgComparand is updated by
        // PalInterlockedCompareExchange128 with the old pointer values in this case).
        return (void*)rgComparand[1];
    }

    // Failure, return the new second pointer value.
    return pSecondPointer;
#else
    // Stuff the two pointers into a 64-bit value as the proposed new value for the CompareExchange64 below.
    Int64 iNewValue = (Int64)((UInt64)(UIntNative)pFirstPointer | ((UInt64)(UIntNative)pSecondPointer << 32));

    // Read the old value in the location. If fFailOnNonNull is set we just assume this was zero and we'll
    // fail below if that's not the case.
    Int64 iOldValue = fFailOnNonNull ? 0 : *(Int64 volatile *)pPairLocation;

    Int64 iUpdatedOldValue = PalInterlockedCompareExchange64((Int64*)pPairLocation, iNewValue, iOldValue);
    if (iUpdatedOldValue == iOldValue)
    {
        // Successful update. Return the previous value of the second pointer. For cache entry updates
        // (fFailOnNonNull == true) this is guaranteed to be NULL in this case and the result being being
        // NULL in the success case is all the caller cares about. For indirection cell updates the second
        // pointer represents the old cache and the caller needs this data so they can schedule the cache
        // for deletion once it becomes safe to do so.
        return (void*)(UInt32)(iOldValue >> 32);
    }

    // The update failed due to a racing update to the same location. Return the new value of the second
    // pointer (either a new cache that lost the race or a non-NULL pointer in the cache entry update case).
    return pSecondPointer;
#endif // BIT64
}

// Helper method for updating an interface dispatch cache entry atomically. See comments by the usage of
// this method for the details of why we need this. If a racing update is detected false is returned and the
// update abandoned. This is necessary since it's not safe to update a valid cache entry (one with a non-NULL
// m_pInstanceType field) outside of a GC.
static bool UpdateCacheEntryAtomically(InterfaceDispatchCacheEntry *pEntry,
                                       EEType * pInstanceType,
                                       void * pTargetCode)
{
    C_ASSERT(sizeof(InterfaceDispatchCacheEntry) == (sizeof(void*) * 2));
    C_ASSERT(offsetof(InterfaceDispatchCacheEntry, m_pInstanceType) < offsetof(InterfaceDispatchCacheEntry, m_pTargetCode));

    return UpdatePointerPairAtomically(pEntry, pInstanceType, pTargetCode, true) == NULL;
}

// Helper method for updating an interface dispatch indirection cell's stub and cache pointer atomically.
// Returns the value of the cache pointer that is not referenced by the cell after this operation. This can be
// NULL on the initial cell update, the value of the old cache pointer or the value of the new cache pointer
// supplied (in the case where another thread raced with us for the update and won). In any case, if the
// returned pointer is non-NULL it represents a cache that should be scheduled for release.
static InterfaceDispatchCache * UpdateCellStubAndCache(InterfaceDispatchCell * pCell,
                                                       void * pStub,
                                                       UIntNative newCacheValue)
{
    C_ASSERT(offsetof(InterfaceDispatchCell, m_pStub) == 0);
    C_ASSERT(offsetof(InterfaceDispatchCell, m_pCache) == sizeof(void*));

    UIntNative oldCacheValue = (UIntNative)UpdatePointerPairAtomically(pCell, pStub, (void*)newCacheValue, false);

    if (InterfaceDispatchCell::IsCache(oldCacheValue))
    {
        return (InterfaceDispatchCache *)oldCacheValue;
    }
    else
    {
        return nullptr;
    }
}

//
// Cache allocation logic.
//
// We use the existing AllocHeap mechanism as our base allocator for cache blocks. This is because it can
// provide the required 16-byte alignment with no padding or heap header costs. The downside is that there is
// no deallocation support (which would be hard to implement without implementing a cache block compaction
// scheme, which is certainly possible but not necessarily needed at this point).
//
// Instead, much like the original VSD algorithm, we keep discarded cache blocks and use them to satisfy new
// allocation requests before falling back on AllocHeap.
//
// We can't re-use discarded cache blocks immediately since there may be code that is still using them.
// Instead we link them into a global list and then at the next GC (when no code can hold a reference to these
// any more) we can place them on one of several free lists based on their size.
//

#if defined(_AMD64_) || defined(_ARM64_)

// Head of the list of discarded cache blocks that can't be re-used just yet.
InterfaceDispatchCache * g_pDiscardedCacheList; // for AMD64 and ARM64, m_pCell is not used and we can link the discarded blocks themselves

#else // defined(_AMD64_) || defined(_ARM64_)

struct DiscardedCacheBlock
{
    DiscardedCacheBlock *       m_pNext;        // for x86 and ARM, we are short of registers, thus need the m_pCell back pointers
    InterfaceDispatchCache *    m_pCache;       // and thus need this auxiliary list
};

// Head of the list of discarded cache blocks that can't be re-used just yet.
static DiscardedCacheBlock * g_pDiscardedCacheList = NULL;

// Free list of DiscardedCacheBlock items
static DiscardedCacheBlock * g_pDiscardedCacheFree = NULL;

#endif // defined(_AMD64_) || defined(_ARM64_)

// Free lists for each cache size up to the maximum. We allocate from these in preference to new memory.
static InterfaceDispatchCache * g_rgFreeLists[CID_MAX_CACHE_SIZE_LOG2 + 1];

// Lock protecting both g_pDiscardedCacheList and g_rgFreeLists. We don't use the OS SLIST support here since
// it imposes too much space overhead on list entries on 64-bit (each is actually 16 bytes).
static CrstStatic g_sListLock;

// The base memory allocator.
static AllocHeap * g_pAllocHeap = NULL;

// Each cache size has an associated stub used to perform lookup over that cache.
extern "C" void (*RhpInterfaceDispatch1)();
extern "C" void (*RhpInterfaceDispatch2)();
extern "C" void (*RhpInterfaceDispatch4)();
extern "C" void (*RhpInterfaceDispatch8)();
extern "C" void (*RhpInterfaceDispatch16)();
extern "C" void (*RhpInterfaceDispatch32)();
extern "C" void (*RhpInterfaceDispatch64)();

extern "C" void (*RhpVTableOffsetDispatch)();

typedef void (*InterfaceDispatchStub)();

static void * g_rgDispatchStubs[CID_MAX_CACHE_SIZE_LOG2 + 1] = {
    &RhpInterfaceDispatch1,
    &RhpInterfaceDispatch2,
    &RhpInterfaceDispatch4,
    &RhpInterfaceDispatch8,
    &RhpInterfaceDispatch16,
    &RhpInterfaceDispatch32,
    &RhpInterfaceDispatch64,
};

// Map a cache size into a linear index.
static UInt32 CacheSizeToIndex(UInt32 cCacheEntries)
{
    switch (cCacheEntries)
    {
    case 1:
        return 0;
    case 2:
        return 1;
    case 4:
        return 2;
    case 8:
        return 3;
    case 16:
        return 4;
    case 32:
        return 5;
    case 64:
        return 6;
    default:
        UNREACHABLE();
    }
}

// Allocates and initializes new cache of the given size. If given a previous version of the cache (guaranteed
// to be smaller) it will also pre-populate the new cache with the contents of the old. Additionally the
// address of the interface dispatch stub associated with this size of cache is returned.
static UIntNative AllocateCache(UInt32 cCacheEntries, InterfaceDispatchCache * pExistingCache, const DispatchCellInfo *pNewCellInfo, void ** ppStub)
{
    if (pNewCellInfo->CellType == DispatchCellType::VTableOffset)
    {
        ASSERT(pNewCellInfo->VTableOffset < InterfaceDispatchCell::IDC_MaxVTableOffsetPlusOne);
        *ppStub = &RhpVTableOffsetDispatch;
        ASSERT(!InterfaceDispatchCell::IsCache(pNewCellInfo->VTableOffset));
        return pNewCellInfo->VTableOffset;
    }

    ASSERT((cCacheEntries >= 1) && (cCacheEntries <= CID_MAX_CACHE_SIZE));
    ASSERT((pExistingCache == NULL) || (pExistingCache->m_cEntries < cCacheEntries));

    InterfaceDispatchCache * pCache = NULL;

    // Transform cache size back into a linear index.
    UInt32 idxCacheSize = CacheSizeToIndex(cCacheEntries);

    // Attempt to allocate the head of the free list of the correct cache size.
    if (g_rgFreeLists[idxCacheSize] != NULL)
    {
        CrstHolder lh(&g_sListLock);

        pCache = g_rgFreeLists[idxCacheSize];
        if (pCache != NULL)
        {
            g_rgFreeLists[idxCacheSize] = pCache->m_pNextFree;
            CID_COUNTER_INC(CacheReallocates);
        }
    }

    if (pCache == NULL)
    {
        // No luck with the free list, allocate the cache from via the AllocHeap.
        pCache = (InterfaceDispatchCache*)g_pAllocHeap->AllocAligned(sizeof(InterfaceDispatchCache) +
                                                                     (sizeof(InterfaceDispatchCacheEntry) * cCacheEntries),
                                                                     sizeof(void*) * 2);
        if (pCache == NULL)
            return NULL;

        CID_COUNTER_INC(CacheAllocates);
#ifdef FEATURE_CID_STATS
        CID_g_cbMemoryAllocated += sizeof(InterfaceDispatchCacheEntry) * cCacheEntries;
        CID_g_rgAllocatesBySize[idxCacheSize]++;
#endif
    }

    // We have a cache block, now initialize it.
    pCache->m_pNextFree = NULL;
    pCache->m_cEntries = cCacheEntries;
    pCache->m_cacheHeader.Initialize(pNewCellInfo);

    // Copy over entries from previous version of the cache (if any) and zero the rest.
    if (pExistingCache)
    {
        memcpy(pCache->m_rgEntries,
               pExistingCache->m_rgEntries,
               sizeof(InterfaceDispatchCacheEntry) * pExistingCache->m_cEntries);
        memset(&pCache->m_rgEntries[pExistingCache->m_cEntries],
               0,
               (cCacheEntries - pExistingCache->m_cEntries) * sizeof(InterfaceDispatchCacheEntry));
    }
    else
    {
        memset(pCache->m_rgEntries,
               0,
               cCacheEntries * sizeof(InterfaceDispatchCacheEntry));
    }

    // Pass back the stub the corresponds to this cache size.
    *ppStub = g_rgDispatchStubs[idxCacheSize];

    return (UIntNative)pCache;
}

// Discards a cache by adding it to a list of caches that may still be in use but will be made available for
// re-allocation at the next GC.
static void DiscardCache(InterfaceDispatchCache * pCache)
{
    CID_COUNTER_INC(CacheDiscards);

    CrstHolder lh(&g_sListLock);

#if defined(_AMD64_) || defined(_ARM64_)

    // on AMD64 and ARM64, we can thread the list through the blocks directly
    pCache->m_pNextFree = g_pDiscardedCacheList;
    g_pDiscardedCacheList = pCache;

#else // defined(_AMD64_) || defined(_ARM64_)

    // on other architectures, we cannot overwrite pCache->m_pNextFree yet
    // because it shares storage with m_pCell which may still be used as a back
    // pointer to the dispatch cell.

    // instead, allocate an auxiliary node (with its own auxiliary free list)
    DiscardedCacheBlock * pDiscardedCacheBlock = g_pDiscardedCacheFree;
    if (pDiscardedCacheBlock != NULL)
        g_pDiscardedCacheFree = pDiscardedCacheBlock->m_pNext;
    else
        pDiscardedCacheBlock = (DiscardedCacheBlock *)g_pAllocHeap->Alloc(sizeof(DiscardedCacheBlock));

    if (pDiscardedCacheBlock != NULL) // if we did NOT get the memory, we leak the discarded block
    {
        pDiscardedCacheBlock->m_pNext = g_pDiscardedCacheList;
        pDiscardedCacheBlock->m_pCache = pCache;

        g_pDiscardedCacheList = pDiscardedCacheBlock;
    }
#endif // defined(_AMD64_) || defined(_ARM64_)
}

// Called during a GC to empty the list of discarded caches (which we can now guarantee aren't being accessed)
// and sort the results into the free lists we maintain for each cache size.
void ReclaimUnusedInterfaceDispatchCaches()
{
    // No need for any locks, we're not racing with any other threads any more.

    // Walk the list of discarded caches.
#if defined(_AMD64_) || defined(_ARM64_)

    // on AMD64, this is threaded directly through the cache blocks
    InterfaceDispatchCache * pCache = g_pDiscardedCacheList;
    while (pCache)
    {
        InterfaceDispatchCache * pNextCache = pCache->m_pNextFree;

        // Transform cache size back into a linear index.
        UInt32 idxCacheSize = CacheSizeToIndex(pCache->m_cEntries);

        // Insert the cache onto the head of the correct free list.
        pCache->m_pNextFree = g_rgFreeLists[idxCacheSize];
        g_rgFreeLists[idxCacheSize] = pCache;

        pCache = pNextCache;
    }

#else // defined(_AMD64_) || defined(_ARM64_)

    // on other architectures, we use an auxiliary list instead
    DiscardedCacheBlock * pDiscardedCacheBlock = g_pDiscardedCacheList;
    while (pDiscardedCacheBlock)
    {
        InterfaceDispatchCache * pCache = pDiscardedCacheBlock->m_pCache;

        // Transform cache size back into a linear index.
        UInt32 idxCacheSize = CacheSizeToIndex(pCache->m_cEntries);

        // Insert the cache onto the head of the correct free list.
        pCache->m_pNextFree = g_rgFreeLists[idxCacheSize];
        g_rgFreeLists[idxCacheSize] = pCache;

        // Insert the container to its own free list
        DiscardedCacheBlock * pNextDiscardedCacheBlock = pDiscardedCacheBlock->m_pNext;
        pDiscardedCacheBlock->m_pNext = g_pDiscardedCacheFree;
        g_pDiscardedCacheFree = pDiscardedCacheBlock;
        pDiscardedCacheBlock = pNextDiscardedCacheBlock;
    }

#endif // defined(_AMD64_) || defined(_ARM64_)

    // We processed all the discarded entries, so we can simply NULL the list head.
    g_pDiscardedCacheList = NULL;
}

// One time initialization of interface dispatch.
bool InitializeInterfaceDispatch()
{
    g_pAllocHeap = new AllocHeap();
    if (g_pAllocHeap == NULL)
        return false;

    if (!g_pAllocHeap->Init())
        return false;

    g_sListLock.Init(CrstInterfaceDispatchGlobalLists, CRST_DEFAULT);

    return true;
}

COOP_PINVOKE_HELPER(PTR_Code, RhpUpdateDispatchCellCache, (InterfaceDispatchCell * pCell, PTR_Code pTargetCode, EEType* pInstanceType, DispatchCellInfo *pNewCellInfo))
{
    // Attempt to update the cache with this new mapping (if we have any cache at all, the initial state
    // is none).
    InterfaceDispatchCache * pCache = (InterfaceDispatchCache*)pCell->GetCache();
    UInt32 cOldCacheEntries = 0;
    if (pCache != NULL)
    {
        InterfaceDispatchCacheEntry * pCacheEntry = pCache->m_rgEntries;
        for (UInt32 i = 0; i < pCache->m_cEntries; i++, pCacheEntry++)
        {
            if (pCacheEntry->m_pInstanceType == NULL)
            {
                if (UpdateCacheEntryAtomically(pCacheEntry, pInstanceType, pTargetCode))
                    return (PTR_Code)pTargetCode;
            }
        }

        cOldCacheEntries = pCache->m_cEntries;
    }

    // Failed to update an existing cache, we need to allocate a new cache. The old one, if any, might
    // still be in use so we can't simply reclaim it. Instead we keep it around until the next GC at which
    // point we know no code is holding a reference to it. Particular cache sizes are associated with a
    // (globally shared) stub which implicitly knows the size of the cache.

    if (cOldCacheEntries == CID_MAX_CACHE_SIZE)
    {
        // We already reached the maximum cache size we wish to allocate. For now don't attempt to cache
        // the mapping we just did: there's no safe way to update the existing cache right now if it
        // doesn't have an empty entries. There are schemes that would let us do this at the next GC point
        // but it's not clear whether we should do this or re-tune the cache max size, we need to measure
        // this.
        CID_COUNTER_INC(CacheSizeOverflows);
        return (PTR_Code)pTargetCode;
    }

    UInt32 cNewCacheEntries = cOldCacheEntries ? cOldCacheEntries * 2 : 1;
    void *pStub;
    UIntNative newCacheValue = AllocateCache(cNewCacheEntries, pCache, pNewCellInfo, &pStub);
    if (newCacheValue == 0)
    {
        CID_COUNTER_INC(CacheOutOfMemory);
        return (PTR_Code)pTargetCode;
    }

    if (InterfaceDispatchCell::IsCache(newCacheValue))
    {
        pCache = (InterfaceDispatchCache*)newCacheValue;
#if !defined(_AMD64_) && !defined(_ARM64_)
        // Set back pointer to interface dispatch cell for non-AMD64 and non-ARM64
        // for AMD64 and ARM64, we have enough registers to make this trick unnecessary
        pCache->m_pCell = pCell;
#endif // !defined(_AMD64_) && !defined(_ARM64_)

        // Add entry to the first unused slot.
        InterfaceDispatchCacheEntry * pCacheEntry = &pCache->m_rgEntries[cOldCacheEntries];
        pCacheEntry->m_pInstanceType = pInstanceType;
        pCacheEntry->m_pTargetCode = pTargetCode;
    }

    // Publish the new cache by atomically updating both the cache and stub pointers in the indirection
    // cell. This returns us a cache to discard which may be NULL (no previous cache), the previous cache
    // value or the cache we just allocated (another thread peformed an update first).
    InterfaceDispatchCache * pDiscardedCache = UpdateCellStubAndCache(pCell, pStub, newCacheValue);
    if (pDiscardedCache)
        DiscardCache(pDiscardedCache);

    return (PTR_Code)pTargetCode;
}

COOP_PINVOKE_HELPER(PTR_Code, RhpSearchDispatchCellCache, (InterfaceDispatchCell * pCell, EEType* pInstanceType))
{
    // This function must be implemented in native code so that we do not take a GC while walking the cache
    InterfaceDispatchCache * pCache = (InterfaceDispatchCache*)pCell->GetCache();
    if (pCache != NULL)
    {
        InterfaceDispatchCacheEntry * pCacheEntry = pCache->m_rgEntries;
        for (UInt32 i = 0; i < pCache->m_cEntries; i++, pCacheEntry++)
            if (pCacheEntry->m_pInstanceType == pInstanceType)
                return (PTR_Code)pCacheEntry->m_pTargetCode;
    }

    return nullptr;
}

// Given a dispatch cell, get the type and slot associated with it. This function MUST be implemented
// in cooperative native code, as the m_pCache field on the cell is unsafe to access from managed
// code due to its use of the GC state as a lock, and as lifetime control
COOP_PINVOKE_HELPER(void, RhpGetDispatchCellInfo, (InterfaceDispatchCell * pCell, DispatchCellInfo* pDispatchCellInfo))
{
    *pDispatchCellInfo = pCell->GetDispatchCellInfo();
}

EXTERN_C DECLSPEC_THREAD void* t_TLS_DispatchCell = nullptr;

COOP_PINVOKE_HELPER(void, RhpSetTLSDispatchCell, (void *dispatchCell))
{
    t_TLS_DispatchCell = dispatchCell;
}

extern "C" void(*RhpTailCallTLSDispatchCell)();
COOP_PINVOKE_HELPER(void*, RhpGetTailCallTLSDispatchCell, ())
{
    return &RhpTailCallTLSDispatchCell;
}

extern "C" void(*RhpCastableObjectDispatchHelper)();
COOP_PINVOKE_HELPER(void*, RhpGetCastableObjectDispatchHelper, ())
{
    return &RhpCastableObjectDispatchHelper;
}

extern "C" void(*RhpCastableObjectDispatchHelper_TailCalled)();
COOP_PINVOKE_HELPER(void*, RhpGetCastableObjectDispatchHelper_TailCalled, ())
{
    return &RhpCastableObjectDispatchHelper_TailCalled;
}

extern "C" void(*RhpCastableObjectDispatch_CommonStub)();
COOP_PINVOKE_HELPER(void*, RhpGetCastableObjectDispatch_CommonStub, ())
{
    return &RhpCastableObjectDispatch_CommonStub;
}

#endif // FEATURE_CACHED_INTERFACE_DISPATCH