Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
path: root/misc
diff options
context:
space:
mode:
authorhieuhoang1972 <hieuhoang1972@1f5c12ca-751b-0410-a591-d2e778427230>2006-08-11 08:05:20 +0400
committerhieuhoang1972 <hieuhoang1972@1f5c12ca-751b-0410-a591-d2e778427230>2006-08-11 08:05:20 +0400
commit0fbd7735cc39056918242a8dc7c4889f091c62ce (patch)
treea7f6a6e62185834fa1d65c21afd919e0d8b20d77 /misc
parentf6f7ee3dbb6409030f73e4f3ff026a81513b7a16 (diff)
improved tagging
git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@639 1f5c12ca-751b-0410-a591-d2e778427230
Diffstat (limited to 'misc')
-rw-r--r--misc/java-utils/TagHierarchy.java31
1 files changed, 16 insertions, 15 deletions
diff --git a/misc/java-utils/TagHierarchy.java b/misc/java-utils/TagHierarchy.java
index 934820a1e..455712dc0 100644
--- a/misc/java-utils/TagHierarchy.java
+++ b/misc/java-utils/TagHierarchy.java
@@ -10,25 +10,33 @@ class TagHierarchy
System.err.println("Starting...");
InputStreamReader inStream = args.length > 0 ? new FileReader(args[0]) : new InputStreamReader(System.in);
- OutputStreamWriter outStream = args.length > 1 ? new FileWriter(args[1]) : new OutputStreamWriter(System.out);
+ PrintStream outStream = args.length > 1 ? new PrintStream(new File(args[1])) : System.out;
new TagHierarchy(inStream, outStream);
System.err.println("End...");
}
- public TagHierarchy(Reader inStream, Writer outStream) throws Exception
+ public TagHierarchy(Reader inStream, PrintStream outStream) throws Exception
{
BufferedReader inFile = new BufferedReader(inStream);
- BufferedWriter outFile = new BufferedWriter(outStream);
// tokenise
String inLine;
+ int nullLines = 0;
while ((inLine = inFile.readLine()) != null)
{
- if (inLine.compareTo("null") != 0)
- OutputHierarchy2(inLine, outFile);
+ if (inLine.equals("null"))
+ {
+ nullLines++;
+ outStream.println("null");
+ }
+ else
+ {
+ OutputHierarchy2(inLine, outStream);
+ }
}
+ System.err.println(nullLines + " null lines\n");
}
public void OutputHierarchy(String inLine, BufferedWriter outFile) throws Exception
@@ -61,7 +69,7 @@ class TagHierarchy
outFile.write('\n');
}
- public void OutputHierarchy2(String inLine, BufferedWriter outFile) throws Exception
+ public void OutputHierarchy2(String inLine, PrintStream outFile) throws Exception
{
int level = 0;
Stack prevTags = new Stack();
@@ -73,13 +81,7 @@ class TagHierarchy
String parsed = st.nextToken();
if (parsed.substring(0, 1).compareTo("(") == 0)
{ // start of new node
- outFile.write('\n');
- for (int currLevel = 0 ; currLevel < level ; currLevel++)
- {
- outFile.write(' ');
- }
String tag = parsed.substring(1, parsed.length());
- outFile.write(tag);
prevTags.push(tag);
level++;
}
@@ -89,16 +91,15 @@ class TagHierarchy
String parentTag = (String) prevTags.get(prevTags.size() - 2)
, currTag = (String) prevTags.get(prevTags.size() - 1);
if (currTag.equals("NN-NK") && parentTag.equals("NP-SB"))
- outFile.write("_" + parentTag);
+ currTag += "_" + parentTag;
int firstBracket = parsed.indexOf(')');
int noBracket = parsed.length() - firstBracket;
String word = parsed.substring(0, firstBracket);
- outFile.write(" == " + word);
+ outFile.print(currTag + " ");
level -= noBracket;
-
// pop the rest
for (int i = 0 ; i < noBracket ; ++i)
{