Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/mono/corert.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMichal Strehovsky <michals@microsoft.com>2017-08-14 20:59:13 +0300
committerMichal Strehovsky <michals@microsoft.com>2017-08-14 20:59:13 +0300
commit1e9054a91177eed92094790ae063a4d741185aa2 (patch)
tree2b37c7212aed534b66c9843b8cf2b86c0c24925b /src/Native/Runtime/amd64
parentd714e3be0bb99c0f76f3b62ce310878be0b3f66e (diff)
Speed up string allocations by 35%
`FastAllocateString` (the choke point through which all string allocations go through) wasn't as fast as it could be and we were 30% slower than CLR on allocating strings. We were leaving a lot of perf on the table. Before this change, string allocation was using the same allocator as arrays. Since there's a subtle difference between the failure modes on overflow (string allocation throws OOM, array allocation throws OverflowException), `FastAllocateString` required a try/catch block to handle the corner case. This was inhibiting codegen optimizations around this code path - to fix that problem, we needed a separate allocator. And since we now had a separate allocator for strings, I also took the liberty of inlining some details around strings (component size and base size) into the helper. It turns out runtime already hardcodes the details around strings (the component size) in a couple places anyway, so this is not that big of a "separation of concerns" violation as it looks like. [tfs-changeset: 1670224]
Diffstat (limited to 'src/Native/Runtime/amd64')
-rw-r--r--src/Native/Runtime/amd64/AllocFast.S79
-rw-r--r--src/Native/Runtime/amd64/AllocFast.asm52
2 files changed, 131 insertions, 0 deletions
diff --git a/src/Native/Runtime/amd64/AllocFast.S b/src/Native/Runtime/amd64/AllocFast.S
index 443d52b19..513175fad 100644
--- a/src/Native/Runtime/amd64/AllocFast.S
+++ b/src/Native/Runtime/amd64/AllocFast.S
@@ -119,6 +119,85 @@ LOCAL_LABEL(NewOutOfMemory):
NESTED_END RhpNewObject, _TEXT
+// Allocate a string.
+// RDI == EEType
+// ESI == character/element count
+NESTED_ENTRY RhpNewString, _TEXT, NoHandler
+ // we want to limit the element count to the non-negative 32-bit int range
+ cmp rsi, 07fffffffh
+ ja LOCAL_LABEL(StringSizeOverflow)
+
+ push_nonvol_reg rbx
+ push_nonvol_reg r12
+ push_register rcx // padding
+
+ mov rbx, rdi // save EEType
+ mov r12, rsi // save element count
+
+ // rax = GetThread()
+ INLINE_GETTHREAD
+
+ mov rcx, rax // rcx = Thread*
+
+ // Compute overall allocation size (align(base size + (element size * elements), 8)).
+ lea rax, [(r12 * STRING_COMPONENT_SIZE) + (STRING_BASE_SIZE + 7)]
+ and rax, -8
+
+ // rax == string size
+ // rbx == EEType
+ // rcx == Thread*
+ // r12 == element count
+
+ mov rdx, rax
+ add rax, [rcx + OFFSETOF__Thread__m_alloc_context__alloc_ptr]
+ jc LOCAL_LABEL(RhpNewString_RarePath)
+
+ // rax == new alloc ptr
+ // rbx == EEType
+ // rcx == Thread*
+ // rdx == string size
+ // r12 == element count
+ cmp rax, [rcx + OFFSETOF__Thread__m_alloc_context__alloc_limit]
+ ja LOCAL_LABEL(RhpNewString_RarePath)
+
+ mov [rcx + OFFSETOF__Thread__m_alloc_context__alloc_ptr], rax
+
+ // calc the new object pointer
+ sub rax, rdx
+
+ mov [rax + OFFSETOF__Object__m_pEEType], rbx
+ mov [rax + OFFSETOF__String__m_Length], r12d
+
+ .cfi_remember_state
+ pop_register rcx // padding
+ pop_nonvol_reg r12
+ pop_nonvol_reg rbx
+ ret
+
+ .cfi_restore_state
+ .cfi_def_cfa_offset 32 // workaround cfi_restore_state bug
+LOCAL_LABEL(RhpNewString_RarePath):
+ mov rdi, rbx // restore EEType
+ mov rsi, r12 // restore element count
+ // passing string size in rdx
+
+ pop_register rcx // padding
+ pop_nonvol_reg r12
+ pop_nonvol_reg rbx
+ jmp C_FUNC(RhpNewArrayRare)
+
+LOCAL_LABEL(StringSizeOverflow):
+ // We get here if the size of the final string object can't be represented as an unsigned
+ // 32-bit value. We're going to tail-call to a managed helper that will throw
+ // an OOM exception that the caller of this allocator understands.
+
+ // rdi holds EEType pointer already
+ xor esi, esi // Indicate that we should throw OOM.
+ jmp C_FUNC(RhExceptionHandling_FailedAllocation)
+
+NESTED_END RhpNewString, _TEXT
+
+
// Allocate one dimensional, zero based array (SZARRAY).
// RDI == EEType
// ESI == element count
diff --git a/src/Native/Runtime/amd64/AllocFast.asm b/src/Native/Runtime/amd64/AllocFast.asm
index c8d10c685..6dae9dab9 100644
--- a/src/Native/Runtime/amd64/AllocFast.asm
+++ b/src/Native/Runtime/amd64/AllocFast.asm
@@ -102,6 +102,58 @@ NewOutOfMemory:
NESTED_END RhpNewObject, _TEXT
+;; Allocate a string.
+;; RCX == EEType
+;; EDX == character/element count
+LEAF_ENTRY RhNewString, _TEXT
+
+ ; we want to limit the element count to the non-negative 32-bit int range
+ cmp rdx, 07fffffffh
+ ja StringSizeOverflow
+
+ ; Compute overall allocation size (align(base size + (element size * elements), 8)).
+ lea rax, [(rdx * STRING_COMPONENT_SIZE) + (STRING_BASE_SIZE + 7)]
+ and rax, -8
+
+ ; rax == string size
+ ; rcx == EEType
+ ; rdx == element count
+
+ INLINE_GETTHREAD r10, r8
+
+ mov r8, rax
+ add rax, [r10 + OFFSETOF__Thread__m_alloc_context__alloc_ptr]
+ jc RhpNewArrayRare
+
+ ; rax == new alloc ptr
+ ; rcx == EEType
+ ; rdx == element count
+ ; r8 == array size
+ ; r10 == thread
+ cmp rax, [r10 + OFFSETOF__Thread__m_alloc_context__alloc_limit]
+ ja RhpNewArrayRare
+
+ mov [r10 + OFFSETOF__Thread__m_alloc_context__alloc_ptr], rax
+
+ ; calc the new object pointer
+ sub rax, r8
+
+ mov [rax + OFFSETOF__Object__m_pEEType], rcx
+ mov [rax + OFFSETOF__String__m_Length], edx
+
+ ret
+
+StringSizeOverflow:
+ ; We get here if the size of the final string object can't be represented as an unsigned
+ ; 32-bit value. We're going to tail-call to a managed helper that will throw
+ ; an OOM exception that the caller of this allocator understands.
+
+ ; rcx holds EEType pointer already
+ xor edx, edx ; Indicate that we should throw OOM.
+ jmp RhExceptionHandling_FailedAllocation
+LEAF_END RhNewString, _TEXT
+
+
;; Allocate one dimensional, zero based array (SZARRAY).
;; RCX == EEType
;; EDX == element count