doc/CoreNLP.proto


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732

syntax = "proto2";

package edu.stanford.nlp.pipeline;

option java_package = "edu.stanford.nlp.pipeline";
option java_outer_classname = "CoreNLPProtos";

//
// From JAVANLP_HOME, you can build me with the command:
//
//  protoc -I=src/edu/stanford/nlp/pipeline/ --java_out=src src/edu/stanford/nlp/pipeline/CoreNLP.proto
//

//
// To do the python version:
//
//  protoc -I=./doc --python_out=./stanza/protobuf ./doc/CoreNLP.proto
//

//
// An enumeration for the valid languages allowed in CoreNLP
//
enum Language {
  Unknown  = 0;
  Any      = 1;
  Arabic   = 2;
  Chinese  = 3;
  English  = 4;
  German   = 5;
  French   = 6;
  Hebrew   = 7;
  Spanish  = 8;
  UniversalEnglish = 9;
  UniversalChinese = 10;
}

//
// A document; that is, the equivalent of an Annotation.
//
message Document {
  required string     text        = 1;
  repeated Sentence   sentence    = 2;
  repeated CorefChain corefChain  = 3;
  optional string     docID       = 4;
  optional string     docDate     = 7;
  optional uint64     calendar    = 8;

  /**
   * A peculiar field, for the corner case when a Document is
   * serialized without any sentences. Otherwise
   */
  repeated Token      sentencelessToken = 5;
  repeated Token      character = 10;

  repeated Quote      quote = 6;

  /**
   * This field is for entity mentions across the document.
   */
  repeated NERMention mentions = 9;
  optional bool hasEntityMentionsAnnotation = 13; // used to differentiate between null and empty list

  /**
   * xml information
   */
  optional bool    xmlDoc = 11;
  repeated Section sections = 12;

  /** coref mentions for entire document **/
  repeated Mention         mentionsForCoref                    = 14;
  optional bool hasCorefMentionAnnotation = 15;
  optional bool hasCorefAnnotation = 16;
  repeated int32 corefMentionToEntityMentionMappings = 17;
  repeated int32 entityMentionToCorefMentionMappings = 18;

  extensions 100 to 255;
}

//
// The serialized version of a CoreMap representing a sentence.
//
message Sentence {
  repeated Token            token                               = 1;
  required uint32           tokenOffsetBegin                    = 2;
  required uint32           tokenOffsetEnd                      = 3;
  optional uint32           sentenceIndex                       = 4;
  optional uint32           characterOffsetBegin                = 5;
  optional uint32           characterOffsetEnd                  = 6;
  optional ParseTree        parseTree                           = 7;
  optional ParseTree        binarizedParseTree                  = 31;
  optional ParseTree        annotatedParseTree                  = 32;
  optional string           sentiment                           = 33;
  repeated ParseTree        kBestParseTrees                     = 34;
  optional DependencyGraph  basicDependencies                   = 8;
  optional DependencyGraph  collapsedDependencies               = 9;
  optional DependencyGraph  collapsedCCProcessedDependencies    = 10;
  optional DependencyGraph  alternativeDependencies             = 13;
  repeated RelationTriple   openieTriple                        = 14;   // The OpenIE triples in the sentence
  repeated RelationTriple   kbpTriple                           = 16;   // The KBP triples in this sentence
  repeated SentenceFragment entailedSentence                    = 15;   // The entailed sentences, by natural logic
  repeated SentenceFragment entailedClause                      = 35;   // The entailed clauses, by natural logic
  optional DependencyGraph  enhancedDependencies                = 17;
  optional DependencyGraph  enhancedPlusPlusDependencies        = 18;
  repeated Token            character                           = 19;

  optional uint32           paragraph                           = 11;

  optional string           text                                = 12;   // Only needed if we're only saving the sentence.

  optional uint32           lineNumber                          = 20;

  // Fields set by other annotators in CoreNLP
  optional bool            hasRelationAnnotations              = 51;
  repeated Entity          entity                              = 52;
  repeated Relation        relation                            = 53;
  optional bool            hasNumerizedTokensAnnotation        = 54;
  repeated NERMention      mentions                            = 55;
  repeated Mention         mentionsForCoref                    = 56;
  optional bool            hasCorefMentionsAnnotation          = 57;

  optional string          sentenceID                          = 58;  // Useful when storing sentences (e.g. ForEach)
  optional string          sectionDate                         = 59;  // date of section
  optional uint32          sectionIndex                        = 60;  // section index for this sentence's section
  optional string          sectionName                         = 61;  // name of section
  optional string          sectionAuthor                       = 62;  // author of section
  optional string          docID                               = 63;  // doc id
  optional bool            sectionQuoted                       = 64;  // is this sentence in an xml quote in a post

  optional bool            hasEntityMentionsAnnotation         = 65;  // check if there are entity mentions
  optional bool            hasKBPTriplesAnnotation             = 68;  // check if there are KBP triples
  optional bool            hasOpenieTriplesAnnotation          = 69;  // check if there are OpenIE triples

  // quote stuff
  optional uint32             chapterIndex                     = 66;
  optional uint32             paragraphIndex                   = 67;
  // the quote annotator can soometimes add merged sentences
  optional Sentence           enhancedSentence                 = 70;

  // speaker stuff
  optional string          speaker                             = 71;  // The speaker speaking this sentence
  optional string          speakerType                         = 72;  // The type of speaker speaking this sentence

  extensions 100 to 255;
}

//
// The serialized version of a Token (a CoreLabel).
//
message Token {
  // Fields set by the default annotators [new CoreNLP(new Properties())]
  optional string word              = 1;    // the word's gloss (post-tokenization)
  optional string pos               = 2;    // The word's part of speech tag
  optional string value             = 3;    // The word's 'value', (e.g., parse tree node)
  optional string category          = 4;    // The word's 'category' (e.g., parse tree node)
  optional string before            = 5;    // The whitespace/xml before the token
  optional string after             = 6;    // The whitespace/xml after the token
  optional string originalText      = 7;    // The original text for this token
  optional string ner               = 8;    // The word's NER tag
  optional string coarseNER         = 62;   // The word's coarse NER tag
  optional string fineGrainedNER    = 63;   // The word's fine-grained NER tag
  repeated string nerLabelProbs     = 66;   // listing of probs
  optional string normalizedNER     = 9;    // The word's normalized NER tag
  optional string lemma             = 10;   // The word's lemma
  optional uint32 beginChar         = 11;   // The character offset begin, in the document
  optional uint32 endChar           = 12;   // The character offset end, in the document
  optional uint32 utterance         = 13;   // The utterance tag used in dcoref
  optional string speaker           = 14;   // The speaker speaking this word
  optional string speakerType       = 77;   // The type of speaker speaking this word
  optional uint32 beginIndex        = 15;   // The begin index of, e.g., a span
  optional uint32 endIndex          = 16;   // The begin index of, e.g., a span
  optional uint32 tokenBeginIndex   = 17;   // The begin index of the token
  optional uint32 tokenEndIndex     = 18;   // The end index of the token
  optional Timex  timexValue        = 19;   // The time this word refers to
  optional bool   hasXmlContext     = 21;   // Used by clean xml annotator
  repeated string xmlContext        = 22;   // Used by clean xml annotator
  optional uint32 corefClusterID    = 23;   // The [primary] cluster id for this token
  optional string answer            = 24;   // A temporary annotation which is occasionally left in
  //  optional string projectedCategory = 25;   // The syntactic category of the maximal constituent headed by the word. Not used anywhere, so deleted.
  optional uint32    headWordIndex  = 26;   // The index of the head word of this word.
  optional Operator  operator       = 27;   // If this is an operator, which one is it and what is its scope (as per Natural Logic)?
  optional Polarity  polarity       = 28;   // The polarity of this word, according to Natural Logic
  optional string    polarity_dir   = 39;   // The polarity of this word, either "up", "down", or "flat"
  optional Span      span           = 29;   // The span of a leaf node of a tree
  optional string    sentiment      = 30;   // The final sentiment of the sentence
  optional int32     quotationIndex = 31;   // The index of the quotation this token refers to
  optional MapStringString conllUFeatures = 32;
  optional string coarseTag         = 33; //  The coarse POS tag (used to store the UPOS tag)
  optional Span conllUTokenSpan     = 34;
  optional string conllUMisc        = 35;
  optional MapStringString conllUSecondaryDeps = 36;
  optional string   wikipediaEntity = 37;
  optional bool     isNewline = 38;


  // Fields set by other annotators in CoreNLP
  optional string gender          = 51;  // gender annotation (machine reading)
  optional string trueCase        = 52;  // true case type of token
  optional string trueCaseText    = 53;  // true case gloss of token

  //  Chinese character info
  optional string chineseChar     = 54;
  optional string chineseSeg      = 55;
  optional string chineseXMLChar  = 60;

  //  Arabic character info
  optional string arabicSeg       = 76;

  // Section info
  optional string sectionName     = 56;
  optional string sectionAuthor   = 57;
  optional string sectionDate     = 58;
  optional string sectionEndLabel = 59;

  // French tokens have parents
  optional string parent          = 61;

  // mention index info
  repeated uint32 corefMentionIndex = 64;
  optional uint32 entityMentionIndex = 65;

  // mwt stuff
  optional bool isMWT = 67;
  optional bool isFirstMWT = 68;
  optional string mwtText = 69;

  // number info
  optional uint64 numericValue = 70;
  optional string numericType = 71;
  optional uint64 numericCompositeValue = 72;
  optional string numericCompositeType = 73;

  optional uint32 codepointOffsetBegin   = 74;
  optional uint32 codepointOffsetEnd     = 75;

  // Fields in the CoreLabel java class that are moved elsewhere
  //       string text           @see Document#text + character offsets
  //       uint32 sentenceIndex  @see Sentence#sentenceIndex
  //       string docID          @see Document#docID
  //       uint32 index          @see implicit in Sentence
  //       uint32 paragraph      @see Sentence#paragraph

  extensions 100 to 255;
}

//
// An enumeration of valid sentiment values for the sentiment classifier.
//
enum Sentiment {
  STRONG_NEGATIVE   = 0;
  WEAK_NEGATIVE     = 1;
  NEUTRAL           = 2;
  WEAK_POSITIVE     = 3;
  STRONG_POSITIVE   = 4;
}

//
// A quotation marker in text
//
message Quote {
  optional string text           = 1;
  optional uint32 begin          = 2;
  optional uint32 end            = 3;
  optional uint32 sentenceBegin  = 5;
  optional uint32 sentenceEnd    = 6;
  optional uint32 tokenBegin     = 7;
  optional uint32 tokenEnd       = 8;
  optional string docid          = 9;
  optional uint32 index          = 10;
  optional string author         = 11;
  optional string mention        = 12;
  optional uint32 mentionBegin   = 13;
  optional uint32 mentionEnd     = 14;
  optional string mentionType    = 15;
  optional string mentionSieve   = 16;
  optional string speaker        = 17;
  optional string speakerSieve   = 18;
  optional string canonicalMention = 19;
  optional uint32 canonicalMentionBegin = 20;
  optional uint32 canonicalMentionEnd = 21;
  optional DependencyGraph attributionDependencyGraph = 22;
}

//
// A syntactic parse tree, with scores.
//
message ParseTree {
  repeated ParseTree child           = 1;
  optional string    value           = 2;
  optional uint32    yieldBeginIndex = 3;
  optional uint32    yieldEndIndex   = 4;
  optional double    score           = 5;
  optional Sentiment sentiment       = 6;
}

//
// A dependency graph representation.
//
message DependencyGraph {
  message Node {
    required uint32 sentenceIndex  = 1;
    required uint32 index          = 2;
    optional uint32 copyAnnotation = 3;
  }

  message Edge {
    required uint32 source     = 1;
    required uint32 target     = 2;
    optional string dep        = 3;
    optional bool   isExtra    = 4;
    optional uint32 sourceCopy = 5;
    optional uint32 targetCopy = 6;
    optional Language language = 7 [default=Unknown];
  }
  
  repeated Node     node     = 1;
  repeated Edge     edge     = 2;
  repeated uint32   root     = 3 [packed=true];
}

//
// A coreference chain.
// These fields are not *really* optional. CoreNLP will crash without them.
//
message CorefChain {
  message CorefMention {
    optional int32  mentionID          = 1;
    optional string mentionType        = 2;
    optional string number             = 3;
    optional string gender             = 4;
    optional string animacy            = 5;
    optional uint32 beginIndex         = 6;
    optional uint32 endIndex           = 7;
    optional uint32 headIndex          = 9;
    optional uint32 sentenceIndex      = 10;
    optional uint32 position           = 11;  // the second element of position
  }

  required int32        chainID        = 1;
  repeated CorefMention mention        = 2;
  required uint32       representative = 3;
}

//
// a mention
//

message Mention {
  optional int32 mentionID             = 1;
  optional string mentionType          = 2;
  optional string number               = 3;
  optional string gender               = 4;
  optional string animacy              = 5;
  optional string person               = 6;
  optional uint32 startIndex           = 7;
  optional uint32 endIndex             = 9;
  optional int32 headIndex             = 10;
  optional string headString           = 11;
  optional string nerString            = 12;
  optional int32 originalRef           = 13;
  optional int32 goldCorefClusterID    = 14;
  optional int32 corefClusterID        = 15;
  optional int32 mentionNum            = 16;
  optional int32 sentNum               = 17;
  optional int32 utter                 = 18;
  optional int32 paragraph             = 19;
  optional bool isSubject              = 20;
  optional bool isDirectObject         = 21;
  optional bool isIndirectObject       = 22;
  optional bool isPrepositionObject    = 23;
  optional bool hasTwin                = 24;
  optional bool generic                = 25;
  optional bool isSingleton            = 26;
  optional bool hasBasicDependency     = 27;
  optional bool hasEnhancedDepenedncy  = 28;
  optional bool hasContextParseTree    = 29;
  optional IndexedWord headIndexedWord = 30;
  optional IndexedWord   dependingVerb = 31;
  optional IndexedWord       headWord  = 32;
  optional SpeakerInfo    speakerInfo  = 33;

  repeated IndexedWord         sentenceWords = 50;
  repeated IndexedWord         originalSpan = 51;
  repeated string dependents           = 52;
  repeated string preprocessedTerms    = 53;
  repeated int32 appositions = 54;
  repeated int32 predicateNominatives = 55;
  repeated int32 relativePronouns = 56;
  repeated int32 listMembers = 57;
  repeated int32 belongToLists = 58;

}

//
// store the position (sentence, token index) of a CoreLabel
//

message IndexedWord {
  optional  int32 sentenceNum          = 1;
  optional  int32 tokenIndex           = 2;
  optional  int32 docID                = 3;
  optional uint32 copyCount            = 4;
}

//
// speaker info, this is used for Mentions
//

message SpeakerInfo {
  optional string speakerName          = 1;
  repeated int32 mentions    = 2;
}

//
// A Span of text
//
message Span {
  required uint32 begin      = 1;
  required uint32 end        = 2;
}

//
// A Timex object, representing a temporal expression (TIMe EXpression)
// These fields are not *really* optional. CoreNLP will crash without them.
//
message Timex {
  optional string value      = 1;
  optional string altValue   = 2;
  optional string text       = 3;
  optional string type       = 4;
  optional string tid        = 5;
  optional uint32 beginPoint = 6;
  optional uint32 endPoint   = 7;
}

//
// A representation of an entity in a relation.
// This corresponds to the EntityMention, and more broadly the
// ExtractionObject classes.
//
message Entity {
  optional uint32 headStart      = 6;
  optional uint32 headEnd        = 7;
  optional string mentionType    = 8;
  optional string normalizedName = 9;
  optional uint32 headTokenIndex = 10;
  optional string corefID        = 11;
  // inherited from ExtractionObject
  optional string objectID = 1;
  optional uint32 extentStart    = 2;
  optional uint32 extentEnd      = 3;
  optional string type           = 4;
  optional string subtype        = 5;
  // Implicit
  //       uint32 sentence       @see implicit in sentence
}

//
// A representation of a relation, mirroring RelationMention
//
message Relation {
  repeated string argName   = 6;
  repeated Entity arg       = 7;
  optional string signature = 8;
  // inherited from ExtractionObject
  optional string objectID = 1;
  optional uint32 extentStart    = 2;
  optional uint32 extentEnd      = 3;
  optional string type           = 4;
  optional string subtype        = 5;
  // Implicit
  //       uint32 sentence       @see implicit in sentence
}

//
// A Natural Logic operator
//
message Operator {
  required string name                = 1;
  required int32  quantifierSpanBegin = 2;
  required int32  quantifierSpanEnd   = 3;
  required int32  subjectSpanBegin    = 4;
  required int32  subjectSpanEnd      = 5;
  required int32  objectSpanBegin     = 6;
  required int32  objectSpanEnd       = 7;
}

//
// The seven informative Natural Logic relations
//
enum NaturalLogicRelation {
  EQUIVALENCE        = 0;
  FORWARD_ENTAILMENT = 1;
  REVERSE_ENTAILMENT = 2;
  NEGATION           = 3;
  ALTERNATION        = 4;
  COVER              = 5;
  INDEPENDENCE       = 6;
}

//
// The polarity of a word, according to Natural Logic
//
message Polarity {
  required NaturalLogicRelation projectEquivalence       = 1;
  required NaturalLogicRelation projectForwardEntailment = 2;
  required NaturalLogicRelation projectReverseEntailment = 3;
  required NaturalLogicRelation projectNegation          = 4;
  required NaturalLogicRelation projectAlternation       = 5;
  required NaturalLogicRelation projectCover             = 6;
  required NaturalLogicRelation projectIndependence      = 7;
}

//
// An NER mention in the text
//
message NERMention {
  optional uint32 sentenceIndex                 = 1;
  required uint32 tokenStartInSentenceInclusive = 2;
  required uint32 tokenEndInSentenceExclusive   = 3;
  required string ner                           = 4;
  optional string normalizedNER                 = 5;
  optional string entityType                    = 6;
  optional Timex  timex                         = 7;
  optional string wikipediaEntity               = 8;
  optional string gender                        = 9;
  optional uint32 entityMentionIndex            = 10;
  optional uint32 canonicalEntityMentionIndex   = 11;
  optional string entityMentionText             = 12;
}

//
// An entailed sentence fragment.
// Created by the openie annotator.
//
message SentenceFragment {
  repeated uint32 tokenIndex     = 1;
  optional uint32 root           = 2;
  optional bool   assumedTruth   = 3;
  optional double score          = 4;
}


//
// The index of a token in a document, including the sentence
// index and the offset.
//
message TokenLocation {
 optional uint32 sentenceIndex = 1;
 optional uint32 tokenIndex    = 2;

}


//
// An OpenIE relation triple.
// Created by the openie annotator.
//
message RelationTriple {
  optional string          subject        = 1;   // The surface form of the subject
  optional string          relation       = 2;   // The surface form of the relation (required)
  optional string          object         = 3;   // The surface form of the object
  optional double          confidence     = 4;   // The [optional] confidence of the extraction
  repeated TokenLocation   subjectTokens  = 13; // The tokens comprising the subject of the triple
  repeated TokenLocation   relationTokens = 14; // The tokens comprising the relation of the triple
  repeated TokenLocation   objectTokens   = 15; // The tokens comprising the object of the triple
  optional DependencyGraph tree           = 8;   // The dependency graph fragment for this triple
  optional bool            istmod         = 9;   // If true, this expresses an implicit tmod relation
  optional bool            prefixBe       = 10;  // If true, this relation string is missing a 'be' prefix
  optional bool            suffixBe       = 11;  // If true, this relation string is missing a 'be' suffix
  optional bool            suffixOf       = 12;  // If true, this relation string is missing a 'of' prefix
}


//
// A map from strings to strings.
// Used, minimally, in the CoNLLU featurizer
//
message MapStringString {
  repeated string key   = 1;
  repeated string value = 2;
}

//
// A map from integers to strings.
// Used, minimally, in the CoNLLU featurizer
//
message MapIntString {
  repeated uint32 key   = 1;
  repeated string value = 2;
}

//
// Store section info
//

message Section {
  required uint32 charBegin         = 1;
  required uint32 charEnd           = 2;
  optional string author            = 3;
  repeated uint32 sentenceIndexes   = 4;
  optional string datetime          = 5;
  repeated Quote quotes             = 6;
  optional uint32 authorCharBegin   = 7;
  optional uint32 authorCharEnd     = 8;
  required Token xmlTag             = 9;
}


// A message for requesting a semgrex
// Each sentence stores information about the tokens making up the
// corresponding graph
// An alternative would have been to use the existing Document or
// Sentence classes, but the problem with that is it would be
// ambiguous which dependency object to use.
message SemgrexRequest {
  message Dependencies {
    repeated Token           token       = 1;
    required DependencyGraph graph       = 2;
  }

  repeated string            semgrex     = 1;
  repeated Dependencies      query       = 2;
}

// The response from running a semgrex
// If you pass in M semgrex expressions and N dependency graphs,
// this returns MxN nested results.  Each SemgrexResult can match
// multiple times in one graph
message SemgrexResponse {
  message NamedNode {
    required string          name        = 1;
    required int32           matchIndex  = 2;
  }

  message NamedRelation {
    required string          name        = 1;
    required string          reln        = 2;
  }

  message Match {
    required int32           matchIndex  = 1;
    repeated NamedNode       node        = 2;
    repeated NamedRelation   reln        = 3;
  }

  message SemgrexResult {
    repeated Match           match       = 1;
  }

  message GraphResult {
    repeated SemgrexResult   result      = 1;
  }

  repeated GraphResult       result      = 1;
}


// It's possible to send in a whole document, but we
// only care about the Sentences and Tokens
message TokensRegexRequest {
  required Document          doc         = 1;
  repeated string            pattern     = 2;
}

// The result will be a nested structure:
// repeated PatternMatch, one for each pattern
// each PatternMatch has a repeated Match,
// which tells you which sentence matched and where
message TokensRegexResponse {
  message MatchLocation {
    optional string          text        = 1;
    optional int32           begin       = 2;
    optional int32           end         = 3;
  }

  message Match {
    required int32           sentence    = 1;
    required MatchLocation   match       = 2;
    repeated MatchLocation   group       = 3;
  }

  message PatternMatch {
    repeated Match           match       = 1;
  }

  repeated PatternMatch      match       = 1;
}

// A protobuf which allows to pass in a document with basic
// dependencies to be converted to enhanced
message DependencyEnhancerRequest {
  required Document          document           = 1;

  oneof ref {
    Language          language           = 2;
    // The expected value of this is a regex which matches relative pronouns
    string            relativePronouns   = 3;
  }
}

// A version of ParseTree with a flattened structure so that deep trees
// don't exceed the protobuf stack depth
message FlattenedParseTree {
  message Node {
    oneof contents {
      bool              openNode           = 1;
      bool              closeNode          = 2;
      string            value              = 3;
    }

    optional double     score              = 4;
  }

  repeated Node         nodes              = 1;
}

// A protobuf for calling the java constituency parser evaluator from elsewhere
message EvaluateParserRequest {
  message ParseResult {
    required FlattenedParseTree         gold           = 1;
    // repeated so you can send in kbest parses, if your parser handles that
    // note that this already includes a score field
    repeated FlattenedParseTree         predicted      = 2;
  }

  repeated ParseResult         treebank       = 1;
}

message EvaluateParserResponse {
  required double              f1             = 1;
}