Welcome to mirror list, hosted at ThFree Co, Russian Federation.

test-data.lua.html « examples « docs - github.com/stevedonovan/Penlight.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
blob: 053a062b44747608f0d202b08f09b202162aa5ed (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
   "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
<html>
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8"/>
<head>
    <title>Penlight Documentation</title>
    <link rel="stylesheet" href="../ldoc_fixed.css" type="text/css" />
</head>
<body>

<div id="container">

<div id="product">
	<div id="product_logo"></div>
	<div id="product_name"><big><b></b></big></div>
	<div id="product_description"></div>
</div> <!-- id="product" -->


<div id="main">


<!-- Menu -->

<div id="navigation">
<br/>
<h1>Penlight</h1>

<ul>
  <li><a href="../index.html">Index</a></li>
</ul>



<h2>Examples</h2>
<ul class="nowrap">
  <li><a href="../examples/seesubst.lua.html">seesubst.lua</a></li>
  <li><a href="../examples/sipscan.lua.html">sipscan.lua</a></li>
  <li><a href="../examples/symbols.lua.html">symbols.lua</a></li>
  <li><a href="../examples/test-cmp.lua.html">test-cmp.lua</a></li>
  <li><strong>test-data.lua</strong></li>
  <li><a href="../examples/test-listcallbacks.lua.html">test-listcallbacks.lua</a></li>
  <li><a href="../examples/test-pretty.lua.html">test-pretty.lua</a></li>
  <li><a href="../examples/test-symbols.lua.html">test-symbols.lua</a></li>
  <li><a href="../examples/testclone.lua.html">testclone.lua</a></li>
  <li><a href="../examples/testconfig.lua.html">testconfig.lua</a></li>
  <li><a href="../examples/testglobal.lua.html">testglobal.lua</a></li>
  <li><a href="../examples/testinputfields.lua.html">testinputfields.lua</a></li>
  <li><a href="../examples/testinputfields2.lua.html">testinputfields2.lua</a></li>
  <li><a href="../examples/testxml.lua.html">testxml.lua</a></li>
  <li><a href="../examples/which.lua.html">which.lua</a></li>
</ul>
<h2>Libraries</h2>
<ul class="nowrap">
  <li><a href="../libraries/pl.html">pl</a></li>
  <li><a href="../libraries/pl.app.html">pl.app</a></li>
  <li><a href="../libraries/pl.array2d.html">pl.array2d</a></li>
  <li><a href="../libraries/pl.class.html">pl.class</a></li>
  <li><a href="../libraries/pl.compat.html">pl.compat</a></li>
  <li><a href="../libraries/pl.comprehension.html">pl.comprehension</a></li>
  <li><a href="../libraries/pl.config.html">pl.config</a></li>
  <li><a href="../libraries/pl.data.html">pl.data</a></li>
  <li><a href="../libraries/pl.dir.html">pl.dir</a></li>
  <li><a href="../libraries/pl.file.html">pl.file</a></li>
  <li><a href="../libraries/pl.func.html">pl.func</a></li>
  <li><a href="../libraries/pl.import_into.html">pl.import_into</a></li>
  <li><a href="../libraries/pl.input.html">pl.input</a></li>
  <li><a href="../libraries/pl.lapp.html">pl.lapp</a></li>
  <li><a href="../libraries/pl.lexer.html">pl.lexer</a></li>
  <li><a href="../libraries/pl.luabalanced.html">pl.luabalanced</a></li>
  <li><a href="../libraries/pl.operator.html">pl.operator</a></li>
  <li><a href="../libraries/pl.path.html">pl.path</a></li>
  <li><a href="../libraries/pl.permute.html">pl.permute</a></li>
  <li><a href="../libraries/pl.pretty.html">pl.pretty</a></li>
  <li><a href="../libraries/pl.seq.html">pl.seq</a></li>
  <li><a href="../libraries/pl.sip.html">pl.sip</a></li>
  <li><a href="../libraries/pl.strict.html">pl.strict</a></li>
  <li><a href="../libraries/pl.stringio.html">pl.stringio</a></li>
  <li><a href="../libraries/pl.stringx.html">pl.stringx</a></li>
  <li><a href="../libraries/pl.tablex.html">pl.tablex</a></li>
  <li><a href="../libraries/pl.template.html">pl.template</a></li>
  <li><a href="../libraries/pl.test.html">pl.test</a></li>
  <li><a href="../libraries/pl.text.html">pl.text</a></li>
  <li><a href="../libraries/pl.types.html">pl.types</a></li>
  <li><a href="../libraries/pl.url.html">pl.url</a></li>
  <li><a href="../libraries/pl.utils.html">pl.utils</a></li>
  <li><a href="../libraries/pl.xml.html">pl.xml</a></li>
</ul>
<h2>Classes</h2>
<ul class="nowrap">
  <li><a href="../classes/pl.Date.html">pl.Date</a></li>
  <li><a href="../classes/pl.List.html">pl.List</a></li>
  <li><a href="../classes/pl.Map.html">pl.Map</a></li>
  <li><a href="../classes/pl.MultiMap.html">pl.MultiMap</a></li>
  <li><a href="../classes/pl.OrderedMap.html">pl.OrderedMap</a></li>
  <li><a href="../classes/pl.Set.html">pl.Set</a></li>
</ul>
<h2>Manual</h2>
<ul class="nowrap">
  <li><a href="../manual/01-introduction.md.html">Introduction</a></li>
  <li><a href="../manual/02-arrays.md.html">Tables and Arrays</a></li>
  <li><a href="../manual/03-strings.md.html">Strings. Higher-level operations on strings.</a></li>
  <li><a href="../manual/04-paths.md.html">Paths and Directories</a></li>
  <li><a href="../manual/05-dates.md.html">Date and Time</a></li>
  <li><a href="../manual/06-data.md.html">Data</a></li>
  <li><a href="../manual/07-functional.md.html">Functional Programming</a></li>
  <li><a href="../manual/08-additional.md.html">Additional Libraries</a></li>
  <li><a href="../manual/09-discussion.md.html">Technical Choices</a></li>
</ul>

</div>

<div id="content">

    <h2>test-data.lua</h2>
<pre>
<span class="keyword">local</span> data = <span class="global">require</span> <span class="string">'pl.data'</span>
<span class="keyword">local</span> List = <span class="global">require</span> <span class="string">'pl.List'</span>
<span class="keyword">local</span> array = <span class="global">require</span> <span class="string">'pl.array2d'</span>
<span class="keyword">local</span> func = <span class="global">require</span> <span class="string">'pl.func'</span>
<span class="keyword">local</span> seq = <span class="global">require</span> <span class="string">'pl.seq'</span>
<span class="keyword">local</span> stringio = <span class="global">require</span> <span class="string">'pl.stringio'</span>
<span class="keyword">local</span> open = stringio. open
<span class="keyword">local</span> asserteq = <span class="global">require</span> <span class="string">'pl.test'</span> . asserteq
<span class="keyword">local</span> T = <span class="global">require</span> <span class="string">'pl.test'</span>. tuple

<span class="comment">--[=[
dat,err = data.read(open [[
1.0 0.1
0.2 1.3
]])

if err then print(err) end

require 'pl.pretty'.dump(dat)
os.exit(0)
--]=]</span>

<span class="comment">-- tab-separated data, explicit column names
</span><span class="keyword">local</span> t1f = open <span class="string">[[
EventID	Magnitude	LocationX	LocationY	LocationZ	LocationError	EventDate	DataFile
981124001	2.0	18988.4	10047.1	4149.7	33.8	24/11/1998 11:18:05	981124DF.AAB
981125001	0.8	19104.0	9970.4	5088.7	3.0	25/11/1998 05:44:54	981125DF.AAB
981127003	0.5	19012.5	9946.9	3831.2	46.0	27/11/1998 17:15:17	981127DF.AAD
981127005	0.6	18676.4	10606.2	3761.9	4.4	27/11/1998 17:46:36	981127DF.AAF
981127006	0.2	19109.9	9716.5	3612.0	11.8	27/11/1998 19:29:51	981127DF.AAG
]]</span>

<span class="keyword">local</span> t1 = data.read (t1f)
<span class="comment">-- column_by_name returns a List
</span>asserteq(t1:column_by_name <span class="string">'Magnitude'</span>,List{<span class="number">2</span>,<span class="number">0.8</span>,<span class="number">0.5</span>,<span class="number">0.6</span>,<span class="number">0.2</span>})
<span class="comment">-- can use array.column as well
</span>asserteq(array.column(t1,<span class="number">2</span>),{<span class="number">2</span>,<span class="number">0.8</span>,<span class="number">0.5</span>,<span class="number">0.6</span>,<span class="number">0.2</span>})

<span class="comment">-- only numerical columns (deduced from first data row) are converted by default
</span><span class="comment">-- can look up indices in the list fieldnames.
</span><span class="keyword">local</span> EDI = t1.fieldnames:index <span class="string">'EventDate'</span>
<span class="global">assert</span>(<span class="global">type</span>(t1[<span class="number">1</span>][EDI]) == <span class="string">'string'</span>)

<span class="comment">-- select method returns a sequence, in this case single-valued.
</span><span class="comment">-- (Note that seq.copy returns a List)
</span>asserteq(seq(t1:<span class="global">select</span> <span class="string">'LocationX where Magnitude &gt; 0.5'</span>):copy(),List{<span class="number">18988.4</span>,<span class="number">19104</span>,<span class="number">18676.4</span>})

<span class="comment">--[[
--a common select usage pattern:
for event,mag in t1:select 'EventID,Magnitude sort by Magnitude desc' do
    print(event,mag)
end
--]]</span>

<span class="comment">-- space-separated, but with last field containing spaces.
</span><span class="keyword">local</span> t2f = open <span class="string">[[
USER PID %MEM %CPU COMMAND
sdonovan 2333  0.3 0.1 background --n=2
root 2332  0.4  0.2 fred --start=yes
root 2338  0.2  0.1 backyard-process
]]</span>

<span class="keyword">local</span> t2,err = data.read(t2f,{last_field_collect=<span class="keyword">true</span>})
<span class="keyword">if</span> <span class="keyword">not</span> t2 <span class="keyword">then</span> <span class="keyword">return</span> <span class="global">print</span> (err) <span class="keyword">end</span>

<span class="comment">-- the last_field_collect option is useful with space-delimited data where the last
</span><span class="comment">-- field may contain spaces. Otherwise, a record count mismatch should be an error!
</span><span class="keyword">local</span> lt2 = List(t2[<span class="number">2</span>])
asserteq(lt2:join <span class="string">','</span>,<span class="string">'root,2332,0.4,0.2,fred --start=yes'</span>)

<span class="comment">-- fieldnames are converted into valid identifiers by substituting _
</span><span class="comment">-- (we do this to make select queries parseable by Lua)
</span>asserteq(t2.fieldnames,List{<span class="string">'USER'</span>,<span class="string">'PID'</span>,<span class="string">'_MEM'</span>,<span class="string">'_CPU'</span>,<span class="string">'COMMAND'</span>})

<span class="comment">-- select queries are NOT SQL so remember to use == ! (and no 'between' operator, sorry)
</span><span class="comment">--s,err = t2:select('_MEM where USER="root"')
</span><span class="comment">--assert(err == [[[string "tmp"]:9: unexpected symbol near '=']])
</span>
<span class="keyword">local</span> s = t2:<span class="global">select</span>(<span class="string">'_MEM where USER=="root"'</span>)
<span class="global">assert</span>(s() == <span class="number">0.4</span>)
<span class="global">assert</span>(s() == <span class="number">0.2</span>)
<span class="global">assert</span>(s() == <span class="keyword">nil</span>)

<span class="comment">-- CSV, Excel style. Double-quoted fields are allowed, and they may contain commas!
</span><span class="keyword">local</span> t3f = open <span class="string">[[
"Department Name","Employee ID",Project,"Hours Booked"
sales,1231,overhead,4
sales,1255,overhead,3
engineering,1501,development,5
engineering,1501,maintenance,3
engineering,1433,maintenance,10
]]</span>

<span class="keyword">local</span> t3 = data.read(t3f,{csv=<span class="keyword">true</span>})

<span class="comment">-- although fieldnames are turned in valid Lua identifiers, there is always <code>original_fieldnames</code>
</span>asserteq(t3.fieldnames,List{<span class="string">'Department_Name'</span>,<span class="string">'Employee_ID'</span>,<span class="string">'Project'</span>,<span class="string">'Hours_Booked'</span>})
asserteq(t3.original_fieldnames,List{<span class="string">'Department Name'</span>,<span class="string">'Employee ID'</span>,<span class="string">'Project'</span>,<span class="string">'Hours Booked'</span>})

<span class="comment">-- a common operation is to select using a given list of columns, and each row
</span><span class="comment">-- on some explicit condition. The select() method can take a table with these
</span><span class="comment">-- parameters
</span><span class="keyword">local</span> keepcols = {<span class="string">'Employee_ID'</span>,<span class="string">'Hours_Booked'</span>}

<span class="keyword">local</span> q = t3:<span class="global">select</span> { fields = keepcols,
    where = <span class="keyword">function</span>(row) <span class="keyword">return</span> row[<span class="number">1</span>]==<span class="string">'engineering'</span> <span class="keyword">end</span>
    }

asserteq(seq.copy2(q),{{<span class="number">1501</span>,<span class="number">5</span>},{<span class="number">1501</span>,<span class="number">3</span>},{<span class="number">1433</span>,<span class="number">10</span>}})

<span class="comment">-- another pattern is doing a select to restrict rows &amp; columns, process some
</span><span class="comment">-- fields and write out the modified rows.
</span>
<span class="keyword">local</span> outf = stringio.create()

<span class="keyword">local</span> names = {[<span class="number">1501</span>]=<span class="string">'don'</span>,[<span class="number">1433</span>]=<span class="string">'dilbert'</span>}

t3:write_row (outf,{<span class="string">'Employee'</span>,<span class="string">'Hours_Booked'</span>})
q = t3:select_row {fields=keepcols,where=func.Eq(func._1[<span class="number">1</span>],<span class="string">'engineering'</span>)}
<span class="keyword">for</span> row <span class="keyword">in</span> q <span class="keyword">do</span>
    row[<span class="number">1</span>] = names[row[<span class="number">1</span>]]
    t3:write_row(outf,row)
<span class="keyword">end</span>

asserteq(outf:value(),
<span class="string">[[
Employee,Hours_Booked
don,5
don,3
dilbert,10
]]</span>)

<span class="comment">-- data may not always have column headers. When creating a data object
</span><span class="comment">-- from a two-dimensional array, may specify the fieldnames, as a list or a string.
</span><span class="comment">-- The delimiter is deduced from the fieldname string, so a string just containing
</span><span class="comment">-- the delimiter will set it,  and the fieldnames will be empty.
</span><span class="keyword">local</span> dat = List()
<span class="keyword">local</span> row = List.range(<span class="number">1</span>,<span class="number">10</span>)
<span class="keyword">for</span> i = <span class="number">1</span>,<span class="number">10</span> <span class="keyword">do</span>
    dat:append(row:map(<span class="string">'*'</span>,i))
<span class="keyword">end</span>
dat = data.new(dat,<span class="string">','</span>)
<span class="keyword">local</span> out = stringio.create()
dat:write(out,<span class="string">','</span>)
asserteq(out:value(), <span class="string">[[
1,2,3,4,5,6,7,8,9,10
2,4,6,8,10,12,14,16,18,20
3,6,9,12,15,18,21,24,27,30
4,8,12,16,20,24,28,32,36,40
5,10,15,20,25,30,35,40,45,50
6,12,18,24,30,36,42,48,54,60
7,14,21,28,35,42,49,56,63,70
8,16,24,32,40,48,56,64,72,80
9,18,27,36,45,54,63,72,81,90
10,20,30,40,50,60,70,80,90,100
]]</span>)

<span class="comment">-- you can always use numerical field indices, AWK-style;
</span><span class="comment">-- note how the copy_select method gives you a data object instead of an
</span><span class="comment">-- iterator over the fields
</span><span class="keyword">local</span> res = dat:copy_select <span class="string">'$1,$3 where $1 &gt; 5'</span>
<span class="keyword">local</span> L = List
asserteq(L(res),L{
    L{<span class="number">6</span>, <span class="number">18</span>},
    L{<span class="number">7</span>,<span class="number">21</span>},
    L{<span class="number">8</span>,<span class="number">24</span>},
    L{<span class="number">9</span>,<span class="number">27</span>},
    L{<span class="number">10</span>,<span class="number">30</span>},
})

<span class="comment">-- the column_by_name method may take a fieldname or an index
</span>asserteq(dat:column_by_name(<span class="number">2</span>), L{<span class="number">2</span>,<span class="number">4</span>,<span class="number">6</span>,<span class="number">8</span>,<span class="number">10</span>,<span class="number">12</span>,<span class="number">14</span>,<span class="number">16</span>,<span class="number">18</span>,<span class="number">20</span>})

<span class="comment">-- the field list may contain expressions or even constants
</span><span class="keyword">local</span> q = dat:<span class="global">select</span> <span class="string">'$3,2*$4 where $1 == 8'</span>
asserteq(T(q()),T(<span class="number">24</span>,<span class="number">64</span>))

dat,err = data.read(open <span class="string">[[
1.0 0.1
0.2 1.3
]]</span>)

<span class="keyword">if</span> err <span class="keyword">then</span> <span class="global">print</span>(err) <span class="keyword">end</span>

<span class="comment">-- if a method cannot be found, then we look up in array2d
</span><span class="comment">-- array2d.flatten(t) makes a 1D list out of a 2D array,
</span><span class="comment">-- and then List.minmax() gets the extrema.
</span>
asserteq(T(dat:flatten():minmax()),T(<span class="number">0.1</span>,<span class="number">1.3</span>))

<span class="keyword">local</span> f = open <span class="string">[[
Time Message
1266840760 +# EE7C0600006F0D00C00F06010302054000000308010A00002B00407B00
1266840760 closure data 0.000000 1972 1972 0
1266840760 ++ 1266840760 EE 1
1266840760 +# EE7C0600006F0D00C00F06010302054000000408020A00002B00407B00
1266840764 closure data 0.000000 1972 1972 0
1266840764 ++ 1266840764 EE 1
1266840764 +# EE7C0600006F0D00C00F06010302054000000508030A00002B00407B00
1266840768 duplicate?
1266840768 +# EE7C0600006F0D00C00F06010302054000000508030A00002B00407B00
1266840768 closure data 0.000000 1972 1972 0
]]</span>

<span class="comment">-- the <code>convert</code> option provides custom converters for each specified column.
</span><span class="comment">-- Here we convert the timestamps into Date objects and collect everything
</span><span class="comment">-- else into one field
</span><span class="keyword">local</span> Date = <span class="global">require</span> <span class="string">'pl.Date'</span>

<span class="keyword">local</span> <span class="keyword">function</span> date_convert (ds)
    <span class="keyword">return</span> Date(<span class="global">tonumber</span>(ds))
<span class="keyword">end</span>

<span class="keyword">local</span> d = data.read(f,{convert={[<span class="number">1</span>]=date_convert},last_field_collect=<span class="keyword">true</span>})

asserteq(#d[<span class="number">1</span>],<span class="number">2</span>)
asserteq(d[<span class="number">2</span>][<span class="number">1</span>]:year(),<span class="number">2010</span>)

d = {{<span class="number">1</span>,<span class="number">2</span>,<span class="number">3</span>},{<span class="number">10</span>,<span class="number">20</span>,<span class="number">30</span>}}
out = stringio.create()
data.write(d,out,{<span class="string">'A'</span>,<span class="string">'B'</span>,<span class="string">'C'</span>},<span class="string">','</span>)
asserteq(out:value(),
<span class="string">[[
A,B,C
1,2,3
10,20,30
]]</span>)

out = stringio.create()
d.fieldnames = {<span class="string">'A'</span>,<span class="string">'B'</span>,<span class="string">'C'</span>}
data.write(d,out)

asserteq(out:value(),
<span class="string">[[
A	B	C
1	2	3
10	20	30
]]</span>)


d = data.read(stringio.open <span class="string">'One,Two\n1,\n,20\n'</span>,{csv=<span class="keyword">true</span>})
asserteq(d,{
    {<span class="number">1</span>,<span class="number">0</span>},{<span class="number">0</span>,<span class="number">20</span>},
    original_fieldnames={<span class="string">"One"</span>,<span class="string">"Two"</span>},fieldnames={<span class="string">"One"</span>,<span class="string">"Two"</span>},delim=<span class="string">","</span>
})</pre>


</div> <!-- id="content" -->
</div> <!-- id="main" -->
<div id="about">
<i>generated by <a href="http://github.com/stevedonovan/LDoc">LDoc 1.4.6</a></i>
<i style="float:right;">Last updated 2020-08-05 10:30:11 </i>
</div> <!-- id="about" -->
</div> <!-- id="container" -->
</body>
</html>