// Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. // See the LICENSE file in the project root for more information. // // Unmanaged GC memory helpers // // This function fills a piece of memory in a GC safe way. It makes the guarantee // that it will fill memory in at least pointer sized chunks whenever possible. // Unaligned memory at the beginning and remaining bytes at the end are written bytewise. // We must make this guarantee whenever we clear memory in the GC heap that could contain // object references. The GC or other user threads can read object references at any time, // clearing them bytewise can result in a read on another thread getting incorrect data. FORCEINLINE void InlineGCSafeFillMemory(void * mem, size_t size, size_t pv) { UInt8 * memBytes = (UInt8 *)mem; UInt8 * endBytes = &memBytes[size]; // handle unaligned bytes at the beginning while (!IS_ALIGNED(memBytes, sizeof(void *)) && (memBytes < endBytes)) *memBytes++ = (UInt8)pv; // now write pointer sized pieces // volatile ensures that this doesn't get optimized back into a memset call size_t nPtrs = (endBytes - memBytes) / sizeof(void *); volatile UIntNative* memPtr = (UIntNative*)memBytes; for (size_t i = 0; i < nPtrs; i++) *memPtr++ = pv; // handle remaining bytes at the end memBytes = (UInt8*)memPtr; while (memBytes < endBytes) *memBytes++ = (UInt8)pv; } // These functions copy memory in a GC safe way. They makes the guarantee // that the memory is copies in at least pointer sized chunks. FORCEINLINE void InlineForwardGCSafeCopy(void * dest, const void *src, size_t len) { // All parameters must be pointer-size-aligned ASSERT(IS_ALIGNED(dest, sizeof(size_t))); ASSERT(IS_ALIGNED(src, sizeof(size_t))); ASSERT(IS_ALIGNED(len, sizeof(size_t))); size_t size = len; UInt8 * dmem = (UInt8 *)dest; UInt8 * smem = (UInt8 *)src; // regions must be non-overlapping ASSERT(dmem <= smem || smem + size <= dmem); // copy 4 pointers at a time while (size >= 4 * sizeof(size_t)) { size -= 4 * sizeof(size_t); ((size_t *)dmem)[0] = ((size_t *)smem)[0]; ((size_t *)dmem)[1] = ((size_t *)smem)[1]; ((size_t *)dmem)[2] = ((size_t *)smem)[2]; ((size_t *)dmem)[3] = ((size_t *)smem)[3]; smem += 4 * sizeof(size_t); dmem += 4 * sizeof(size_t); } // copy 2 trailing pointers, if needed if ((size & (2 * sizeof(size_t))) != 0) { ((size_t *)dmem)[0] = ((size_t *)smem)[0]; ((size_t *)dmem)[1] = ((size_t *)smem)[1]; smem += 2 * sizeof(size_t); dmem += 2 * sizeof(size_t); } // finish with one pointer, if needed if ((size & sizeof(size_t)) != 0) { ((size_t *)dmem)[0] = ((size_t *)smem)[0]; } } FORCEINLINE void InlineBackwardGCSafeCopy(void * dest, const void *src, size_t len) { // All parameters must be pointer-size-aligned ASSERT(IS_ALIGNED(dest, sizeof(size_t))); ASSERT(IS_ALIGNED(src, sizeof(size_t))); ASSERT(IS_ALIGNED(len, sizeof(size_t))); size_t size = len; UInt8 * dmem = (UInt8 *)dest + len; UInt8 * smem = (UInt8 *)src + len; // regions must be non-overlapping ASSERT(smem <= dmem || dmem + size <= smem); // copy 4 pointers at a time while (size >= 4 * sizeof(size_t)) { size -= 4 * sizeof(size_t); smem -= 4 * sizeof(size_t); dmem -= 4 * sizeof(size_t); ((size_t *)dmem)[3] = ((size_t *)smem)[3]; ((size_t *)dmem)[2] = ((size_t *)smem)[2]; ((size_t *)dmem)[1] = ((size_t *)smem)[1]; ((size_t *)dmem)[0] = ((size_t *)smem)[0]; } // copy 2 trailing pointers, if needed if ((size & (2 * sizeof(size_t))) != 0) { smem -= 2 * sizeof(size_t); dmem -= 2 * sizeof(size_t); ((size_t *)dmem)[1] = ((size_t *)smem)[1]; ((size_t *)dmem)[0] = ((size_t *)smem)[0]; } // finish with one pointer, if needed if ((size & sizeof(size_t)) != 0) { smem -= sizeof(size_t); dmem -= sizeof(size_t); ((size_t *)dmem)[0] = ((size_t *)smem)[0]; } } #ifndef DACCESS_COMPILE #ifdef WRITE_BARRIER_CHECK extern uint8_t* g_GCShadow; extern uint8_t* g_GCShadowEnd; typedef DPTR(uint8_t) PTR_uint8_t; extern "C" { GPTR_DECL(uint8_t, g_lowest_address); GPTR_DECL(uint8_t, g_highest_address); } #endif typedef DPTR(uint32_t) PTR_uint32_t; extern "C" { GPTR_DECL(uint32_t, g_card_table); } static const UInt32 INVALIDGCVALUE = 0xcccccccd; FORCEINLINE void InlineWriteBarrier(void * dst, void * ref) { if (((uint8_t*)ref >= g_ephemeral_low) && ((uint8_t*)ref < g_ephemeral_high)) { // volatile is used here to prevent fetch of g_card_table from being reordered // with g_lowest/highest_address check above. See comment in code:gc_heap::grow_brick_card_tables. uint8_t* pCardByte = (uint8_t *)VolatileLoadWithoutBarrier(&g_card_table) + ((size_t)dst >> LOG2_CLUMP_SIZE); if (*pCardByte != 0xFF) *pCardByte = 0xFF; } } FORCEINLINE void InlineCheckedWriteBarrier(void * dst, void * ref) { // if the dst is outside of the heap (unboxed value classes) then we // simply exit if (((uint8_t*)dst < g_lowest_address) || ((uint8_t*)dst >= g_highest_address)) return; InlineWriteBarrier(dst, ref); } FORCEINLINE void InlinedBulkWriteBarrier(void* pMemStart, size_t cbMemSize) { // Check whether the writes were even into the heap. If not there's no card update required. // Also if the size is smaller than a pointer, no write barrier is required. // This case can occur with universal shared generic code where the size // is not known at compile time. if (pMemStart < g_lowest_address || (pMemStart >= g_highest_address) || (cbMemSize < sizeof(UIntNative))) { return; } #ifdef WRITE_BARRIER_CHECK // Perform shadow heap updates corresponding to the gc heap updates that immediately preceded this helper // call. // If g_GCShadow is 0, don't perform the check. if (g_GCShadow != NULL) { // Compute the shadow heap address corresponding to the beginning of the range of heap addresses modified // and in the process range check it to make sure we have the shadow version allocated. UIntNative* shadowSlot = (UIntNative*)(g_GCShadow + ((uint8_t*)pMemStart - g_lowest_address)); if (shadowSlot <= (UIntNative*)g_GCShadowEnd) { // Iterate over every pointer sized slot in the range, copying data from the real heap to the shadow heap. // As we perform each copy we need to recheck the real heap contents with an ordered read to ensure we're // not racing with another heap updater. If we discover a race we invalidate the corresponding shadow heap // slot using a special well-known value so that this location will not be tested during the next shadow // heap validation. UIntNative* realSlot = (UIntNative*)pMemStart; UIntNative slotCount = cbMemSize / sizeof(UIntNative); do { // Update shadow slot from real slot. UIntNative realValue = *realSlot; *shadowSlot = realValue; // Memory barrier to ensure the next read is ordered wrt to the shadow heap write we just made. PalMemoryBarrier(); // Read the real slot contents again. If they don't agree with what we just wrote then someone just raced // with us and updated the heap again. In such cases we invalidate the shadow slot. if (*realSlot != realValue) { *shadowSlot = INVALIDGCVALUE; } realSlot++; shadowSlot++; slotCount--; } while (slotCount > 0); } } #endif // WRITE_BARRIER_CHECK // Compute the starting card address and the number of bytes to write (groups of 8 cards). We could try // for further optimization here using aligned 32-bit writes but there's some overhead in setup required // and additional complexity. It's not clear this is warranted given that a single byte of card table // update already covers 1K of object space (2K on 64-bit platforms). It's also not worth probing that // 1K/2K range to see if any of the pointers appear to be non-ephemeral GC references. Given the size of // the area the chances are high that at least one interesting GC refenence is present. size_t startAddress = (size_t)pMemStart; size_t endAddress = startAddress + cbMemSize; size_t startingClump = startAddress >> LOG2_CLUMP_SIZE; size_t endingClump = (endAddress + CLUMP_SIZE - 1) >> LOG2_CLUMP_SIZE; // calculate the number of clumps to mark (round_up(end) - start) size_t clumpCount = endingClump - startingClump; // VolatileLoadWithoutBarrier() is used here to prevent fetch of g_card_table from being reordered // with g_lowest/highest_address check at the beginning of this function. uint8_t* card = ((uint8_t*)VolatileLoadWithoutBarrier(&g_card_table)) + startingClump; // Fill the cards. To avoid cache line thrashing we check whether the cards have already been set before // writing. do { if (*card != 0xff) { *card = 0xff; } card++; clumpCount--; } while (clumpCount != 0); } #endif // DACCESS_COMPILE