#!/usr/bin/env perl use warnings; use strict; while() { if (/^\(\(\)\)/) { print "\n"; # parse failures next; } # prep s/^\( \( (.+) \)$/\(TOP $1/; # remove double wrapped parenthesis s/^\( /\(TOP /; # escape words s/\&/\&/g; # escape escape s/\|/\&bar;/g; # factor separator s/\/\>/g; # xml s/\'/\'/g; # xml s/\"/\"/g; # xml s/\[/\[/g; # syntax non-terminal s/\]/\]/g; # syntax non-terminal # escape parentheses that were part of the input text s/(\(\S+ )\(\)/$1\&openingparenthesis;\)/g; s/(\(\S+ )\)\)/$1\&closingparenthesis;\)/g; # convert into tree s/\((\S+) / /g; s/\)/ <\/tree> /g; s/\"\-LRB\-\"/\"LRB\"/g; # labels s/\"\-RRB\-\"/\"RRB\"/g; s/\-LRB\-/\(/g; # tokens s/\-RRB\-/\)/g; s/ +/ /g; s/ $//g; # de-escape parentheses that were part of the input text s/\&openingparenthesis;/\(/g; s/\&closingparenthesis;/\)/g; # output, replace words with original print $_; }