Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/mono/cecil.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorEugene Rozenfeld <erozen@microsoft.com>2017-02-10 09:57:24 +0300
committerEugene Rozenfeld <erozen@microsoft.com>2017-02-13 21:43:24 +0300
commit32d77a6e861d7648c2e75d3ea3d742ab1a0fb5a7 (patch)
treea2be6f46c2244b052c63ad3c8edfa8e923ce222d
parent0d657ab73f60a39d9e20cf1215034136807ceff9 (diff)
Optimize StringHeap size.
This change reduces the size of StringHeap by re-using string suffixes wherever possible. Before this change, if metadata strings `foo` and `foobar` were needed, both would be emitted. With this change, only `foobar` will be emitted and its suffix will be used for `bar`. Most of the implementation is taken from Roslyn: https://github.com/dotnet/roslyn/blob/614299ff83da9959fa07131c6d0ffbc58873b6ae/src/Compilers/Core/Portable/System/Reflection/Metadata/Ecma335/MetadataBuilder.Heaps.cs#L287 StringHeapBuffer.GetStringIndex doesn't eagerly write bytes to the buffer; instead, it just assigns each string an index and keeps the map from strings to indexes. After AssemblyWriter.BuildModule completes and all strings have been collected, a new method WriteStrings implements the optimization that reuses string suffixes. A map of string indexes to buffer offsets is returned. That map is used in Metadata.WriteString to emit correct string offsets.
-rw-r--r--Mono.Cecil.Cil/PortablePdb.cs5
-rw-r--r--Mono.Cecil.Metadata/Buffers.cs80
-rw-r--r--Mono.Cecil/AssemblyWriter.cs2
3 files changed, 82 insertions, 5 deletions
diff --git a/Mono.Cecil.Cil/PortablePdb.cs b/Mono.Cecil.Cil/PortablePdb.cs
index 4df8813..fb897d5 100644
--- a/Mono.Cecil.Cil/PortablePdb.cs
+++ b/Mono.Cecil.Cil/PortablePdb.cs
@@ -249,6 +249,7 @@ namespace Mono.Cecil.Cil {
return;
WritePdbHeap ();
+
WriteTableHeap ();
writer.BuildMetadataTextMap ();
@@ -291,6 +292,10 @@ namespace Mono.Cecil.Cil {
void WriteTableHeap ()
{
+ if (module_metadata.string_heap != pdb_metadata.string_heap) {
+ pdb_metadata.table_heap.string_offsets = pdb_metadata.string_heap.WriteStrings ();
+ }
+
pdb_metadata.table_heap.WriteTableHeap ();
}
}
diff --git a/Mono.Cecil.Metadata/Buffers.cs b/Mono.Cecil.Metadata/Buffers.cs
index 5a915e2..01f60c7 100644
--- a/Mono.Cecil.Metadata/Buffers.cs
+++ b/Mono.Cecil.Metadata/Buffers.cs
@@ -34,6 +34,8 @@ namespace Mono.Cecil.Metadata {
readonly int [] coded_index_sizes = new int [Mixin.CodedIndexCount];
readonly Func<Table, int> counter;
+ internal uint [] string_offsets;
+
public override bool IsEmpty {
get { return false; }
}
@@ -81,7 +83,7 @@ namespace Mono.Cecil.Metadata {
public void WriteString (uint @string)
{
- WriteBySize (@string, large_string);
+ WriteBySize (string_offsets [@string], large_string);
}
public void WriteBlob (uint blob)
@@ -299,7 +301,7 @@ namespace Mono.Cecil.Metadata {
class StringHeapBuffer : HeapBuffer {
- readonly Dictionary<string, uint> strings = new Dictionary<string, uint> (StringComparer.Ordinal);
+ protected Dictionary<string, uint> strings = new Dictionary<string, uint> (StringComparer.Ordinal);
public sealed override bool IsEmpty {
get { return length <= 1; }
@@ -311,23 +313,79 @@ namespace Mono.Cecil.Metadata {
WriteByte (0);
}
- public uint GetStringIndex (string @string)
+ public virtual uint GetStringIndex (string @string)
{
uint index;
if (strings.TryGetValue (@string, out index))
return index;
- index = (uint) base.position;
- WriteString (@string);
+ index = (uint) strings.Count + 1;
strings.Add (@string, index);
return index;
}
+ public uint [] WriteStrings()
+ {
+ var sorted = new List<KeyValuePair<string, uint>> (strings);
+ sorted.Sort (new SuffixSort ());
+ strings = null;
+
+ // Add 1 for empty string whose index and offset are both 0
+ var string_offsets = new uint [sorted.Count + 1];
+ string_offsets [0] = 0;
+
+ // Find strings that can be folded
+ var previous = string.Empty;
+ foreach (KeyValuePair<string, uint> entry in sorted) {
+ int position = base.position;
+
+ // It is important to use ordinal comparison otherwise we'll use the current culture!
+ if (previous.EndsWith (entry.Key, StringComparison.Ordinal) && !IsLowSurrogateChar (entry.Key[0])) {
+ // Map over the tail of prev string. Watch for null-terminator of prev string.
+ string_offsets [entry.Value] = (uint) (position - (Encoding.UTF8.GetByteCount (entry.Key) + 1));
+ }
+ else {
+ string_offsets [entry.Value] = (uint) position;
+ WriteString (entry.Key);
+ }
+
+ previous = entry.Key;
+ }
+
+ return string_offsets;
+ }
+
+ static bool IsLowSurrogateChar(int c)
+ {
+ return unchecked((uint)(c - 0xDC00)) <= 0xDFFF - 0xDC00;
+ }
+
protected virtual void WriteString (string @string)
{
WriteBytes (Encoding.UTF8.GetBytes (@string));
WriteByte (0);
}
+
+ // Sorts strings such that a string is followed immediately by all strings
+ // that are a suffix of it.
+ private class SuffixSort : IComparer<KeyValuePair<string, uint>> {
+ public int Compare(KeyValuePair<string, uint> xPair, KeyValuePair<string, uint> yPair) {
+ var x = xPair.Key;
+ var y = yPair.Key;
+
+ for (int i = x.Length - 1, j = y.Length - 1; i >= 0 & j >= 0; i--, j--) {
+ if (x [i] < y [j]) {
+ return -1;
+ }
+
+ if (x [i] > y [j]) {
+ return +1;
+ }
+ }
+
+ return y.Length.CompareTo (x.Length);
+ }
+ }
}
sealed class BlobHeapBuffer : HeapBuffer {
@@ -365,6 +423,18 @@ namespace Mono.Cecil.Metadata {
sealed class UserStringHeapBuffer : StringHeapBuffer {
+ public override uint GetStringIndex (string @string)
+ {
+ uint index;
+ if (strings.TryGetValue (@string, out index))
+ return index;
+
+ index = (uint) base.position;
+ WriteString (@string);
+ strings.Add (@string, index);
+ return index;
+ }
+
protected override void WriteString (string @string)
{
WriteCompressedUInt32 ((uint) @string.Length * 2 + 1);
diff --git a/Mono.Cecil/AssemblyWriter.cs b/Mono.Cecil/AssemblyWriter.cs
index 169a47a..fd0fee2 100644
--- a/Mono.Cecil/AssemblyWriter.cs
+++ b/Mono.Cecil/AssemblyWriter.cs
@@ -1007,6 +1007,8 @@ namespace Mono.Cecil {
{
BuildModule ();
+ table_heap.string_offsets = string_heap.WriteStrings ();
+
table_heap.WriteTableHeap ();
}