md_doc_dom.html


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367

<!-- HTML header for doxygen 1.8.7-->
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<meta http-equiv="Content-Type" content="text/xhtml;charset=UTF-8"/>
<meta http-equiv="X-UA-Compatible" content="IE=9"/>
<meta name="generator" content="Doxygen 1.8.7"/>
<title>RapidJSON: DOM</title>
<link href="tabs.css" rel="stylesheet" type="text/css"/>
<script type="text/javascript" src="jquery.js"></script>
<script type="text/javascript" src="dynsections.js"></script>
<link href="navtree.css" rel="stylesheet" type="text/css"/>
<script type="text/javascript" src="resize.js"></script>
<script type="text/javascript" src="navtree.js"></script>
<script type="text/javascript">
  $(document).ready(initResizable);
  $(window).load(resizeHeight);
</script>
<link href="search/search.css" rel="stylesheet" type="text/css"/>
<script type="text/javascript" src="search/search.js"></script>
<script type="text/javascript">
  $(document).ready(function() { searchBox.OnSelectItem(0); });
</script>
<link href="doxygen.css" rel="stylesheet" type="text/css" />
<link href="doxygenextra.css" rel="stylesheet" type="text/css"/>
</head>
<script>
  (function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
  (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
  m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
  })(window,document,'script','//www.google-analytics.com/analytics.js','ga');
  ga('create', 'UA-63929386-1', 'auto');
  ga('send', 'pageview');
</script>
<body>
<div id="top"><!-- do not remove this div, it is closed by doxygen! -->
<div id="topbanner"><a href="https://github.com/miloyip/rapidjson" title="RapidJSON GitHub"><i class="githublogo"></i></a></div>
        <div id="MSearchBox" class="MSearchBoxInactive">
        <span class="left">
          <img id="MSearchSelect" src="search/mag_sel.png"
               onmouseover="return searchBox.OnSearchSelectShow()"
               onmouseout="return searchBox.OnSearchSelectHide()"
               alt=""/>
          <input type="text" id="MSearchField" value="Search" accesskey="S"
               onfocus="searchBox.OnSearchFieldFocus(true)" 
               onblur="searchBox.OnSearchFieldFocus(false)" 
               onkeyup="searchBox.OnSearchFieldChange(event)"/>
          </span><span class="right">
            <a id="MSearchClose" href="javascript:searchBox.CloseResultsWindow()"><img id="MSearchCloseImg" border="0" src="search/close.png" alt=""/></a>
          </span>
        </div>
<!-- end header part -->
<!-- Generated by Doxygen 1.8.7 -->
<script type="text/javascript">
var searchBox = new SearchBox("searchBox", "search",false,'Search');
</script>
</div><!-- top -->
<div id="side-nav" class="ui-resizable side-nav-resizable">
  <div id="nav-tree">
    <div id="nav-tree-contents">
      <div id="nav-sync" class="sync"></div>
    </div>
  </div>
  <div id="splitbar" style="-moz-user-select:none;" 
       class="ui-resizable-handle">
  </div>
</div>
<script type="text/javascript">
$(document).ready(function(){initNavTree('md_doc_dom.html','');});
</script>
<div id="doc-content">
<!-- window showing the filter options -->
<div id="MSearchSelectWindow"
     onmouseover="return searchBox.OnSearchSelectShow()"
     onmouseout="return searchBox.OnSearchSelectHide()"
     onkeydown="return searchBox.OnSearchSelectKey(event)">
<a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(0)"><span class="SelectionMark">&#160;</span>All</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(1)"><span class="SelectionMark">&#160;</span>Classes</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(2)"><span class="SelectionMark">&#160;</span>Namespaces</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(3)"><span class="SelectionMark">&#160;</span>Files</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(4)"><span class="SelectionMark">&#160;</span>Functions</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(5)"><span class="SelectionMark">&#160;</span>Variables</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(6)"><span class="SelectionMark">&#160;</span>Typedefs</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(7)"><span class="SelectionMark">&#160;</span>Enumerations</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(8)"><span class="SelectionMark">&#160;</span>Enumerator</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(9)"><span class="SelectionMark">&#160;</span>Friends</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(10)"><span class="SelectionMark">&#160;</span>Macros</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(11)"><span class="SelectionMark">&#160;</span>Groups</a><a class="SelectItem" href="javascript:void(0)" onclick="searchBox.OnSelectItem(12)"><span class="SelectionMark">&#160;</span>Pages</a></div>

<!-- iframe showing the search results (closed by default) -->
<div id="MSearchResultsWindow">
<iframe src="javascript:void(0)" frameborder="0" 
        name="MSearchResults" id="MSearchResults">
</iframe>
</div>

<div class="header">
  <div class="headertitle">
<div class="title">DOM </div>  </div>
</div><!--header-->
<div class="contents">
<div class="toc"><h3>Table of Contents</h3>
<ul><li class="level1"><a href="#Template">Template</a><ul><li class="level2"><a href="#Encoding">Encoding</a></li>
<li class="level2"><a href="#Allocator">Allocator</a></li>
</ul>
</li>
<li class="level1"><a href="#Parsing">Parsing</a><ul><li class="level2"><a href="#ParseError">Parse Error</a></li>
<li class="level2"><a href="#InSituParsing">In Situ Parsing</a></li>
<li class="level2"><a href="#TranscodingAndValidation">Transcoding and Validation</a></li>
</ul>
</li>
<li class="level1"><a href="#Techniques">Techniques</a><ul><li class="level2"><a href="#UserBuffer">User Buffer</a></li>
</ul>
</li>
</ul>
</div>
<div class="textblock"><p>Document Object Model(DOM) is an in-memory representation of JSON for query and manipulation. The basic usage of DOM is described in <a class="el" href="md_doc_tutorial.html">Tutorial</a>. This section will describe some details and more advanced usages.</p>
<h1><a class="anchor" id="Template"></a>
Template</h1>
<p>In the tutorial, <code>Value</code> and <code>Document</code> was used. Similarly to <code>std::string</code>, these are actually <code>typedef</code> of template classes:</p>
<div class="fragment"><div class="line"><span class="keyword">namespace </span>rapidjson {</div>
<div class="line"></div>
<div class="line"><span class="keyword">template</span> &lt;<span class="keyword">typename</span> Encoding, <span class="keyword">typename</span> Allocator = MemoryPoolAllocator&lt;&gt; &gt;</div>
<div class="line"><span class="keyword">class </span>GenericValue {</div>
<div class="line">    <span class="comment">// ...</span></div>
<div class="line">};</div>
<div class="line"></div>
<div class="line"><span class="keyword">template</span> &lt;<span class="keyword">typename</span> Encoding, <span class="keyword">typename</span> Allocator = MemoryPoolAllocator&lt;&gt; &gt;</div>
<div class="line"><span class="keyword">class </span>GenericDocument : <span class="keyword">public</span> GenericValue&lt;Encoding, Allocator&gt; {</div>
<div class="line">    <span class="comment">// ...</span></div>
<div class="line">};</div>
<div class="line"></div>
<div class="line"><span class="keyword">typedef</span> GenericValue&lt;UTF8&lt;&gt; &gt; <a class="code" href="namespacerapidjson.html#aa65fc9fb381b2cbc54f98673eadd6505">Value</a>;</div>
<div class="line"><span class="keyword">typedef</span> GenericDocument&lt;UTF8&lt;&gt; &gt; <a class="code" href="namespacerapidjson.html#ace11b5b575baf1cccd5ba5f8586dcdc8">Document</a>;</div>
<div class="line"></div>
<div class="line">} <span class="comment">// namespace rapidjson</span></div>
</div><!-- fragment --><p>User can customize these template parameters.</p>
<h2><a class="anchor" id="Encoding"></a>
Encoding</h2>
<p>The <code>Encoding</code> parameter specifies the encoding of JSON String value in memory. Possible options are <code>UTF8</code>, <code>UTF16</code>, <code>UTF32</code>. Note that, these 3 types are also template class. <code>UTF8&lt;&gt;</code> is <code>UTF8&lt;char&gt;</code>, which means using char to store the characters. You may refer to <a class="el" href="md_doc_encoding.html">Encoding</a> for details.</p>
<p>Suppose a Windows application would query localization strings stored in JSON files. Unicode-enabled functions in Windows use UTF-16 (wide character) encoding. No matter what encoding was used in JSON files, we can store the strings in UTF-16 in memory.</p>
<div class="fragment"><div class="line"><span class="keyword">using namespace </span>rapidjson;</div>
<div class="line"></div>
<div class="line"><span class="keyword">typedef</span> <a class="code" href="classrapidjson_1_1_generic_document.html">GenericDocument&lt;UTF16&lt;&gt;</a> &gt; WDocument;</div>
<div class="line"><span class="keyword">typedef</span> <a class="code" href="classrapidjson_1_1_generic_value.html">GenericValue&lt;UTF16&lt;&gt;</a> &gt; WValue;</div>
<div class="line"></div>
<div class="line">FILE* fp = fopen(<span class="stringliteral">&quot;localization.json&quot;</span>, <span class="stringliteral">&quot;rb&quot;</span>); <span class="comment">// non-Windows use &quot;r&quot;</span></div>
<div class="line"></div>
<div class="line"><span class="keywordtype">char</span> readBuffer[256];</div>
<div class="line"><a class="code" href="classrapidjson_1_1_file_read_stream.html">FileReadStream</a> bis(fp, readBuffer, <span class="keyword">sizeof</span>(readBuffer));</div>
<div class="line"></div>
<div class="line"><a class="code" href="classrapidjson_1_1_auto_u_t_f_input_stream.html">AutoUTFInputStream&lt;unsigned, FileReadStream&gt;</a> eis(bis);  <span class="comment">// wraps bis into eis</span></div>
<div class="line"></div>
<div class="line">WDocument d;</div>
<div class="line">d.ParseStream&lt;0, <a class="code" href="structrapidjson_1_1_auto_u_t_f.html">AutoUTF&lt;unsigned&gt;</a> &gt;(eis);</div>
<div class="line"></div>
<div class="line"><span class="keyword">const</span> WValue locale(L<span class="stringliteral">&quot;ja&quot;</span>); <span class="comment">// Japanese</span></div>
<div class="line"></div>
<div class="line">MessageBoxW(hWnd, d[locale].GetString(), L<span class="stringliteral">&quot;Test&quot;</span>, MB_OK);</div>
</div><!-- fragment --><h2><a class="anchor" id="Allocator"></a>
Allocator</h2>
<p>The <code>Allocator</code> defines which allocator class is used when allocating/deallocating memory for <code>Document</code>/<code>Value</code>. <code>Document</code> owns, or references to an <code>Allocator</code> instance. On the other hand, <code>Value</code> does not do so, in order to reduce memory consumption.</p>
<p>The default allocator used in <code>GenericDocument</code> is <code>MemoryPoolAllocator</code>. This allocator actually allocate memory sequentially, and cannot deallocate one by one. This is very suitable when parsing a JSON into a DOM tree.</p>
<p>Another allocator is <code>CrtAllocator</code>, of which CRT is short for C RunTime library. This allocator simply calls the standard <code>malloc()</code>/<code>realloc()</code>/<code>free()</code>. When there is a lot of add and remove operations, this allocator may be preferred. But this allocator is far less efficient than <code>MemoryPoolAllocator</code>.</p>
<h1><a class="anchor" id="Parsing"></a>
Parsing</h1>
<p><code>Document</code> provides several functions for parsing. In below, (1) is the fundamental function, while the others are helpers which call (1).</p>
<div class="fragment"><div class="line"><span class="keyword">using namespace </span>rapidjson;</div>
<div class="line"></div>
<div class="line"><span class="comment">// (1) Fundamental</span></div>
<div class="line"><span class="keyword">template</span> &lt;<span class="keywordtype">unsigned</span> parseFlags, <span class="keyword">typename</span> SourceEncoding, <span class="keyword">typename</span> InputStream&gt;</div>
<div class="line"><a class="code" href="classrapidjson_1_1_generic_document.html">GenericDocument</a>&amp; <a class="code" href="classrapidjson_1_1_generic_document.html#a3ae97682cf04685c7db9d89ebc399b85">GenericDocument::ParseStream</a>(InputStream&amp; is);</div>
<div class="line"></div>
<div class="line"><span class="comment">// (2) Using the same Encoding for stream</span></div>
<div class="line"><span class="keyword">template</span> &lt;<span class="keywordtype">unsigned</span> parseFlags, <span class="keyword">typename</span> InputStream&gt;</div>
<div class="line"><a class="code" href="classrapidjson_1_1_generic_document.html">GenericDocument</a>&amp; <a class="code" href="classrapidjson_1_1_generic_document.html#a3ae97682cf04685c7db9d89ebc399b85">GenericDocument::ParseStream</a>(InputStream&amp; is);</div>
<div class="line"></div>
<div class="line"><span class="comment">// (3) Using default parse flags</span></div>
<div class="line"><span class="keyword">template</span> &lt;<span class="keyword">typename</span> InputStream&gt;</div>
<div class="line"><a class="code" href="classrapidjson_1_1_generic_document.html">GenericDocument</a>&amp; <a class="code" href="classrapidjson_1_1_generic_document.html#a3ae97682cf04685c7db9d89ebc399b85">GenericDocument::ParseStream</a>(InputStream&amp; is);</div>
<div class="line"></div>
<div class="line"><span class="comment">// (4) In situ parsing</span></div>
<div class="line"><span class="keyword">template</span> &lt;<span class="keywordtype">unsigned</span> parseFlags&gt;</div>
<div class="line"><a class="code" href="classrapidjson_1_1_generic_document.html">GenericDocument</a>&amp; <a class="code" href="classrapidjson_1_1_generic_document.html#a7ba626bf84eb45a9bca0b7723bf47f3f">GenericDocument::ParseInsitu</a>(Ch* str);</div>
<div class="line"></div>
<div class="line"><span class="comment">// (5) In situ parsing, using default parse flags</span></div>
<div class="line"><a class="code" href="classrapidjson_1_1_generic_document.html">GenericDocument</a>&amp; <a class="code" href="classrapidjson_1_1_generic_document.html#a7ba626bf84eb45a9bca0b7723bf47f3f">GenericDocument::ParseInsitu</a>(Ch* str);</div>
<div class="line"></div>
<div class="line"><span class="comment">// (6) Normal parsing of a string</span></div>
<div class="line"><span class="keyword">template</span> &lt;<span class="keywordtype">unsigned</span> parseFlags, <span class="keyword">typename</span> SourceEncoding&gt;</div>
<div class="line"><a class="code" href="classrapidjson_1_1_generic_document.html">GenericDocument</a>&amp; <a class="code" href="classrapidjson_1_1_generic_document.html#aea842b533a858c9a3861451ad9e8642c">GenericDocument::Parse</a>(<span class="keyword">const</span> Ch* str);</div>
<div class="line"></div>
<div class="line"><span class="comment">// (7) Normal parsing of a string, using same Encoding of Document</span></div>
<div class="line"><span class="keyword">template</span> &lt;<span class="keywordtype">unsigned</span> parseFlags&gt;</div>
<div class="line"><a class="code" href="classrapidjson_1_1_generic_document.html">GenericDocument</a>&amp; <a class="code" href="classrapidjson_1_1_generic_document.html#aea842b533a858c9a3861451ad9e8642c">GenericDocument::Parse</a>(<span class="keyword">const</span> Ch* str);</div>
<div class="line"></div>
<div class="line"><span class="comment">// (8) Normal parsing of a string, using default parse flags</span></div>
<div class="line"><a class="code" href="classrapidjson_1_1_generic_document.html">GenericDocument</a>&amp; <a class="code" href="classrapidjson_1_1_generic_document.html#aea842b533a858c9a3861451ad9e8642c">GenericDocument::Parse</a>(<span class="keyword">const</span> Ch* str);</div>
</div><!-- fragment --><p>The examples of <a class="el" href="md_doc_tutorial.html">tutorial</a> uses (8) for normal parsing of string. The examples of <a class="el" href="md_doc_stream.html">stream</a> uses the first three. <em>In situ</em> parsing will be described soon.</p>
<p>The <code>parseFlags</code> are combination of the following bit-flags:</p>
<table class="doxtable">
<tr>
<th>Parse flags </th><th>Meaning  </th></tr>
<tr>
<td><code>kParseNoFlags</code> </td><td>No flag is set. </td></tr>
<tr>
<td><code>kParseDefaultFlags</code> </td><td>Default parse flags. It is equal to macro <code>RAPIDJSON_PARSE_DEFAULT_FLAGS</code>, which is defined as <code>kParseNoFlags</code>. </td></tr>
<tr>
<td><code>kParseInsituFlag</code> </td><td>In-situ(destructive) parsing. </td></tr>
<tr>
<td><code>kParseValidateEncodingFlag</code> </td><td>Validate encoding of JSON strings. </td></tr>
<tr>
<td><code>kParseIterativeFlag</code> </td><td>Iterative(constant complexity in terms of function call stack size) parsing. </td></tr>
<tr>
<td><code>kParseStopWhenDoneFlag</code> </td><td>After parsing a complete JSON root from stream, stop further processing the rest of stream. When this flag is used, parser will not generate <code>kParseErrorDocumentRootNotSingular</code> error. Using this flag for parsing multiple JSONs in the same stream. </td></tr>
<tr>
<td><code>kParseFullPrecisionFlag</code> </td><td>Parse number in full precision (slower). If this flag is not set, the normal precision (faster) is used. Normal precision has maximum 3 <a href="http://en.wikipedia.org/wiki/Unit_in_the_last_place">ULP</a> error. </td></tr>
<tr>
<td><code>kParseCommentsFlag</code> </td><td>Allow one-line <code>// ...</code> and multi-line <code>/* ... */</code> comments (relaxed JSON syntax). </td></tr>
</table>
<p>By using a non-type template parameter, instead of a function parameter, C++ compiler can generate code which is optimized for specified combinations, improving speed, and reducing code size (if only using a single specialization). The downside is the flags needed to be determined in compile-time.</p>
<p>The <code>SourceEncoding</code> parameter defines what encoding is in the stream. This can be differed to the <code>Encoding</code> of the <code>Document</code>. See <a href="#TranscodingAndValidation">Transcoding and Validation</a> section for details.</p>
<p>And the <code>InputStream</code> is type of input stream.</p>
<h2><a class="anchor" id="ParseError"></a>
Parse Error</h2>
<p>When the parse processing succeeded, the <code>Document</code> contains the parse results. When there is an error, the original DOM is <em>unchanged</em>. And the error state of parsing can be obtained by <code>bool HasParseError()</code>, <code>ParseErrorCode GetParseError()</code> and <code>size_t GetParseOffset()</code>.</p>
<table class="doxtable">
<tr>
<th>Parse Error Code </th><th>Description  </th></tr>
<tr>
<td><code>kParseErrorNone</code> </td><td>No error. </td></tr>
<tr>
<td><code>kParseErrorDocumentEmpty</code> </td><td>The document is empty. </td></tr>
<tr>
<td><code>kParseErrorDocumentRootNotSingular</code> </td><td>The document root must not follow by other values. </td></tr>
<tr>
<td><code>kParseErrorValueInvalid</code> </td><td>Invalid value. </td></tr>
<tr>
<td><code>kParseErrorObjectMissName</code> </td><td>Missing a name for object member. </td></tr>
<tr>
<td><code>kParseErrorObjectMissColon</code> </td><td>Missing a colon after a name of object member. </td></tr>
<tr>
<td><code>kParseErrorObjectMissCommaOrCurlyBracket</code> </td><td>Missing a comma or <code>}</code> after an object member. </td></tr>
<tr>
<td><code>kParseErrorArrayMissCommaOrSquareBracket</code> </td><td>Missing a comma or <code>]</code> after an array element. </td></tr>
<tr>
<td><code>kParseErrorStringUnicodeEscapeInvalidHex</code> </td><td>Incorrect hex digit after <code>\\u</code> escape in string. </td></tr>
<tr>
<td><code>kParseErrorStringUnicodeSurrogateInvalid</code> </td><td>The surrogate pair in string is invalid. </td></tr>
<tr>
<td><code>kParseErrorStringEscapeInvalid</code> </td><td>Invalid escape character in string. </td></tr>
<tr>
<td><code>kParseErrorStringMissQuotationMark</code> </td><td>Missing a closing quotation mark in string. </td></tr>
<tr>
<td><code>kParseErrorStringInvalidEncoding</code> </td><td>Invalid encoding in string. </td></tr>
<tr>
<td><code>kParseErrorNumberTooBig</code> </td><td>Number too big to be stored in <code>double</code>. </td></tr>
<tr>
<td><code>kParseErrorNumberMissFraction</code> </td><td>Miss fraction part in number. </td></tr>
<tr>
<td><code>kParseErrorNumberMissExponent</code> </td><td>Miss exponent in number. </td></tr>
</table>
<p>The offset of error is defined as the character number from beginning of stream. Currently RapidJSON does not keep track of line number.</p>
<p>To get an error message, RapidJSON provided a English messages in <code><a class="el" href="en_8h_source.html">rapidjson/error/en.h</a></code>. User can customize it for other locales, or use a custom localization system.</p>
<p>Here shows an example of parse error handling.</p>
<div class="fragment"><div class="line"><span class="preprocessor">#include &quot;<a class="code" href="document_8h.html">rapidjson/document.h</a>&quot;</span></div>
<div class="line"><span class="preprocessor">#include &quot;rapidjson/error/en.h&quot;</span></div>
<div class="line"></div>
<div class="line"><span class="comment">// ...</span></div>
<div class="line"><a class="code" href="namespacerapidjson.html#ace11b5b575baf1cccd5ba5f8586dcdc8">Document</a> d;</div>
<div class="line"><span class="keywordflow">if</span> (d.Parse(json).HasParseError()) {</div>
<div class="line">    fprintf(stderr, <span class="stringliteral">&quot;\nError(offset %u): %s\n&quot;</span>, </div>
<div class="line">        (<span class="keywordtype">unsigned</span>)d.GetErrorOffset(),</div>
<div class="line">        <a class="code" href="group___r_a_p_i_d_j_s_o_n___e_r_r_o_r_s.html#gabdaf1a7a4db30fb0e3d927fdf0fabe79">GetParseError_En</a>(d.GetParseErrorCode()));</div>
<div class="line">    <span class="comment">// ...</span></div>
<div class="line">}</div>
</div><!-- fragment --><h2><a class="anchor" id="InSituParsing"></a>
In Situ Parsing</h2>
<p>From <a href="http://en.wikipedia.org/wiki/In_situ">Wikipedia</a>:</p>
<blockquote class="doxtable">
<p><em>In situ</em> ... is a Latin phrase that translates literally to "on site" or "in position". It means "locally", "on site", "on the premises" or "in place" to describe an event where it takes place, and is used in many different contexts. ... (In computer science) An algorithm is said to be an in situ algorithm, or in-place algorithm, if the extra amount of memory required to execute the algorithm is O(1), that is, does not exceed a constant no matter how large the input. For example, heapsort is an in situ sorting algorithm. </p>
</blockquote>
<p>In normal parsing process, a large overhead is to decode JSON strings and copy them to other buffers. <em>In situ</em> parsing decodes those JSON string at the place where it is stored. It is possible in JSON because the length of decoded string is always shorter than or equal to the one in JSON. In this context, decoding a JSON string means to process the escapes, such as <code>"\\n"</code>, <code>"\\u1234"</code>, etc., and add a null terminator (`'\0'`)at the end of string.</p>
<p>The following diagrams compare normal and <em>in situ</em> parsing. The JSON string values contain pointers to the decoded string.</p>
<div class="image">
<img src="normalparsing.png" alt="normalparsing.png"/>
<div class="caption">
normal parsing</div></div>
<p> In normal parsing, the decoded string are copied to freshly allocated buffers. <code>"\\\\n"</code> (2 characters) is decoded as <code>"\\n"</code> (1 character). <code>"\\\\u0073"</code> (6 characters) is decoded as <code>"s"</code> (1 character).</p>
<div class="image">
<img src="insituparsing.png" alt="insituparsing.png"/>
<div class="caption">
instiu parsing</div></div>
<p> <em>In situ</em> parsing just modified the original JSON. Updated characters are highlighted in the diagram. If the JSON string does not contain escape character, such as <code>"msg"</code>, the parsing process merely replace the closing double quotation mark with a null character.</p>
<p>Since <em>in situ</em> parsing modify the input, the parsing API needs <code>char*</code> instead of <code>const char*</code>.</p>
<div class="fragment"><div class="line"><span class="comment">// Read whole file into a buffer</span></div>
<div class="line">FILE* fp = fopen(<span class="stringliteral">&quot;test.json&quot;</span>, <span class="stringliteral">&quot;r&quot;</span>);</div>
<div class="line">fseek(fp, 0, SEEK_END);</div>
<div class="line"><span class="keywordtype">size_t</span> filesize = (size_t)ftell(fp);</div>
<div class="line">fseek(fp, 0, SEEK_SET);</div>
<div class="line"><span class="keywordtype">char</span>* buffer = (<span class="keywordtype">char</span>*)malloc(filesize + 1);</div>
<div class="line"><span class="keywordtype">size_t</span> readLength = fread(buffer, 1, filesize, fp);</div>
<div class="line">buffer[readLength] = <span class="charliteral">&#39;\0&#39;</span>;</div>
<div class="line">fclose(fp);</div>
<div class="line"></div>
<div class="line"><span class="comment">// In situ parsing the buffer into d, buffer will also be modified</span></div>
<div class="line"><a class="code" href="namespacerapidjson.html#ace11b5b575baf1cccd5ba5f8586dcdc8">Document</a> d;</div>
<div class="line">d.ParseInsitu(buffer);</div>
<div class="line"></div>
<div class="line"><span class="comment">// Query/manipulate the DOM here...</span></div>
<div class="line"></div>
<div class="line">free(buffer);</div>
<div class="line"><span class="comment">// Note: At this point, d may have dangling pointers pointed to the deallocated buffer.</span></div>
</div><!-- fragment --><p>The JSON strings are marked as const-string. But they may not be really "constant". The life cycle of it depends on the JSON buffer.</p>
<p>In situ parsing minimizes allocation overheads and memory copying. Generally this improves cache coherence, which is an important factor of performance in modern computer.</p>
<p>There are some limitations of <em>in situ</em> parsing:</p>
<ol type="1">
<li>The whole JSON is in memory.</li>
<li>The source encoding in stream and target encoding in document must be the same.</li>
<li>The buffer need to be retained until the document is no longer used.</li>
<li>If the DOM need to be used for long period after parsing, and there are few JSON strings in the DOM, retaining the buffer may be a memory waste.</li>
</ol>
<p><em>In situ</em> parsing is mostly suitable for short-term JSON that only need to be processed once, and then be released from memory. In practice, these situation is very common, for example, deserializing JSON to C++ objects, processing web requests represented in JSON, etc.</p>
<h2><a class="anchor" id="TranscodingAndValidation"></a>
Transcoding and Validation</h2>
<p>RapidJSON supports conversion between Unicode formats (officially termed UCS Transformation Format) internally. During DOM parsing, the source encoding of the stream can be different from the encoding of the DOM. For example, the source stream contains a UTF-8 JSON, while the DOM is using UTF-16 encoding. There is an example code in <a class="el" href="md_doc_stream.html">EncodedInputStream</a>.</p>
<p>When writing a JSON from DOM to output stream, transcoding can also be used. An example is in <a class="el" href="md_doc_stream.html">EncodedOutputStream</a>.</p>
<p>During transcoding, the source string is decoded to into Unicode code points, and then the code points are encoded in the target format. During decoding, it will validate the byte sequence in the source string. If it is not a valid sequence, the parser will be stopped with <code>kParseErrorStringInvalidEncoding</code> error.</p>
<p>When the source encoding of stream is the same as encoding of DOM, by default, the parser will <em>not</em> validate the sequence. User may use <code>kParseValidateEncodingFlag</code> to force validation.</p>
<h1><a class="anchor" id="Techniques"></a>
Techniques</h1>
<p>Some techniques about using DOM API is discussed here.</p>
<h2>DOM as SAX Event Publisher</h2>
<p>In RapidJSON, stringifying a DOM with <code>Writer</code> may be look a little bit weired.</p>
<div class="fragment"><div class="line"><span class="comment">// ...</span></div>
<div class="line">Writer&lt;StringBuffer&gt; writer(buffer);</div>
<div class="line">d.Accept(writer);</div>
</div><!-- fragment --><p>Actually, <code>Value::Accept()</code> is responsible for publishing SAX events about the value to the handler. With this design, <code>Value</code> and <code>Writer</code> are decoupled. <code>Value</code> can generate SAX events, and <code>Writer</code> can handle those events.</p>
<p>User may create custom handlers for transforming the DOM into other formats. For example, a handler which converts the DOM into XML.</p>
<p>For more about SAX events and handler, please refer to <a class="el" href="md_doc_sax.html">SAX</a>.</p>
<h2><a class="anchor" id="UserBuffer"></a>
User Buffer</h2>
<p>Some applications may try to avoid memory allocations whenever possible.</p>
<p><code>MemoryPoolAllocator</code> can support this by letting user to provide a buffer. The buffer can be on the program stack, or a "scratch buffer" which is statically allocated (a static/global array) for storing temporary data.</p>
<p><code>MemoryPoolAllocator</code> will use the user buffer to satisfy allocations. When the user buffer is used up, it will allocate a chunk of memory from the base allocator (by default the <code>CrtAllocator</code>).</p>
<p>Here is an example of using stack memory. The first allocator is for storing values, while the second allocator is for storing temporary data during parsing.</p>
<div class="fragment"><div class="line"><span class="keyword">typedef</span> GenericDocument&lt;UTF8&lt;&gt;, MemoryPoolAllocator&lt;&gt;, MemoryPoolAllocator&lt;&gt;&gt; DocumentType;</div>
<div class="line"><span class="keywordtype">char</span> valueBuffer[4096];</div>
<div class="line"><span class="keywordtype">char</span> parseBuffer[1024];</div>
<div class="line">MemoryPoolAllocator&lt;&gt; valueAllocator(valueBuffer, <span class="keyword">sizeof</span>(valueBuffer));</div>
<div class="line">MemoryPoolAllocator&lt;&gt; parseAllocator(parseBuffer, <span class="keyword">sizeof</span>(parseBuffer));</div>
<div class="line">DocumentType d(&amp;valueAllocator, <span class="keyword">sizeof</span>(parseBuffer), &amp;parseAllocator);</div>
<div class="line">d.Parse(json);</div>
</div><!-- fragment --><p>If the total size of allocation is less than 4096+1024 bytes during parsing, this code does not invoke any heap allocation (via <code>new</code> or <code>malloc()</code>) at all.</p>
<p>User can query the current memory consumption in bytes via <code>MemoryPoolAllocator::Size()</code>. And then user can determine a suitable size of user buffer. </p>
</div></div><!-- contents -->
</div><!-- doc-content -->
<!-- HTML footer for doxygen 1.8.7-->
<!-- start footer part -->
<div id="nav-path" class="navpath"><!-- id is needed for treeview function! -->
  <ul>
  </ul>
</div>
<script type="text/javascript">
    /* * * CONFIGURATION VARIABLES * * */
    var disqus_shortname = 'rapidjson-doc';
    /* * * DON'T EDIT BELOW THIS LINE * * */
    (function() {
	    var dt = document.createElement('div');
	    dt.id = "disqus_thread";
	    (document.getElementsByClassName('contents')[0]).appendChild(dt);
        var dsq = document.createElement('script'); dsq.type = 'text/javascript'; dsq.async = true;
        dsq.src = '//' + disqus_shortname + '.disqus.com/embed.js';
        (document.getElementsByTagName('head')[0] || document.getElementsByTagName('body')[0]).appendChild(dsq);
    })();
</script>
</body>
</html>