Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
path: root/misc
diff options
context:
space:
mode:
authorhieuhoang1972 <hieuhoang1972@1f5c12ca-751b-0410-a591-d2e778427230>2006-08-12 05:31:16 +0400
committerhieuhoang1972 <hieuhoang1972@1f5c12ca-751b-0410-a591-d2e778427230>2006-08-12 05:31:16 +0400
commit4d0922afab799590cca1f9e5029470ac939c5a37 (patch)
tree3fb9dd2ddf99884c2b2ae3a7e7f8e7799b8f2e09 /misc
parent59730584035da8b2769971b160efb966be529c7d (diff)
*** empty log message ***
git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@677 1f5c12ca-751b-0410-a591-d2e778427230
Diffstat (limited to 'misc')
-rw-r--r--misc/java-utils/CombineTags.java16
-rw-r--r--misc/java-utils/ProcessShallowParse.java14
-rw-r--r--misc/java-utils/TagHierarchy.java24
3 files changed, 36 insertions, 18 deletions
diff --git a/misc/java-utils/CombineTags.java b/misc/java-utils/CombineTags.java
index f858bd519..207791682 100644
--- a/misc/java-utils/CombineTags.java
+++ b/misc/java-utils/CombineTags.java
@@ -3,6 +3,7 @@
import java.io.*;
import java.util.*;
+// create sentences with all features combined from files with individual tags
class CombineTags
{
public static void main(String[] args) throws Exception
@@ -12,17 +13,18 @@ class CombineTags
Vector vecInstream = new Vector();
for (int i = 0 ; i < args.length ; i++)
{
- BufferedReader inStream = new BufferedReader(new FileReader(args[i]));
+ InputStreamReader temp = new InputStreamReader(new FileInputStream(args[i]), "Latin1");
+ BufferedReader inStream = new BufferedReader(temp);
vecInstream.add(inStream);
}
- PrintStream outStream = System.out;
+ OutputStreamWriter outStream = new OutputStreamWriter((OutputStream)System.out, "Latin1");
new CombineTags(vecInstream, outStream);
System.err.println("End...");
}
- public CombineTags(Vector vecInstream , PrintStream outStream) throws Exception
+ public CombineTags(Vector vecInstream , OutputStreamWriter outStream) throws Exception
{
BufferedReader inFile = (BufferedReader) vecInstream.get(0);
String inLine;
@@ -74,10 +76,14 @@ class CombineTags
outLine += otherTag + "|";
}
outLine = outLine.substring(0, outLine.length() - 1) + " ";
- outStream.print(outLine);
+ outStream.write(outLine);
}
- outStream.println();
+ outStream.write("\n");
}
+ // close stream
+ outStream.flush();
+ outStream.close();
+ outStream = null;
}
}
diff --git a/misc/java-utils/ProcessShallowParse.java b/misc/java-utils/ProcessShallowParse.java
index ba26c4e72..dd3b2430e 100644
--- a/misc/java-utils/ProcessShallowParse.java
+++ b/misc/java-utils/ProcessShallowParse.java
@@ -4,14 +4,18 @@
import java.io.*;
import java.util.*;
+//input is the sentences with all features combined
+//output shrunked sentences with only those words we are interested in
public class ProcessShallowParse
{
public static void main(String[] args) throws Exception
{
System.err.println("Starting...");
- InputStreamReader inStream = args.length > 0 ? new FileReader(args[0]) : new InputStreamReader(System.in);
- OutputStreamWriter outStream = args.length > 1 ? new FileWriter(args[1]) : new OutputStreamWriter(System.out);
+ InputStreamReader inStream = new InputStreamReader(args.length > 0 ? new FileInputStream(args[0]) : System.in
+ , "Latin1");
+ OutputStreamWriter outStream = new OutputStreamWriter(args.length > 1 ? new FileOutputStream(args[1]) : (OutputStream) System.out
+ , "Latin1");
new ProcessShallowParse2(inStream, outStream);
@@ -59,7 +63,7 @@ class ProcessShallowParse2
String factoredWord = st.nextToken();
ret += Output(factoredWord);
}
- outFile.write(i++ + " " + ret);
+ outFile.write(ret);
if (ret.length() > 0)
outFile.write("\n");
}
@@ -78,7 +82,7 @@ class ProcessShallowParse2
if (posImproved.indexOf("ART-SB") == 0
|| posImproved.indexOf("NN-NK_NP-SB") == 0)
{
- ret = posImproved + "|" + morph + " ";
+ ret = posImproved + "_" + morph + " ";
}
else if (posImproved.indexOf("VAFIN-HD") == 0
|| posImproved.indexOf("VVFIN-HD") == 0
@@ -90,7 +94,7 @@ class ProcessShallowParse2
|| posImproved.indexOf("PPER-EP") == 0
)
{
- ret = posImproved + "|" + surface + " ";
+ ret = surface + " ";
}
return ret;
diff --git a/misc/java-utils/TagHierarchy.java b/misc/java-utils/TagHierarchy.java
index 455712dc0..61f48871b 100644
--- a/misc/java-utils/TagHierarchy.java
+++ b/misc/java-utils/TagHierarchy.java
@@ -3,23 +3,28 @@
import java.io.*;
import java.util.*;
+// create pos-tag sentences from LISP-like input tree.
+// NN-NK tag augmented with NP-SP if parent is NP-SB
class TagHierarchy
{
public static void main(String[] args) throws Exception
{
System.err.println("Starting...");
- InputStreamReader inStream = args.length > 0 ? new FileReader(args[0]) : new InputStreamReader(System.in);
- PrintStream outStream = args.length > 1 ? new PrintStream(new File(args[1])) : System.out;
+ InputStreamReader inStream = new InputStreamReader(args.length > 0 ? new FileInputStream(args[0]) : System.in
+ , "Latin1");
+ OutputStreamWriter outStream = new OutputStreamWriter(args.length > 1 ? new FileOutputStream(args[1]) : (OutputStream) System.out
+ , "Latin1");
new TagHierarchy(inStream, outStream);
System.err.println("End...");
}
- public TagHierarchy(Reader inStream, PrintStream outStream) throws Exception
+ public TagHierarchy(Reader inStream, OutputStreamWriter outStream) throws Exception
{
- BufferedReader inFile = new BufferedReader(inStream);
+ BufferedReader inFile = new BufferedReader(inStream);
+ BufferedWriter outFile = new BufferedWriter(outStream);
// tokenise
String inLine;
@@ -29,13 +34,16 @@ class TagHierarchy
if (inLine.equals("null"))
{
nullLines++;
- outStream.println("null");
+ outFile.write("null\n");
}
else
{
- OutputHierarchy2(inLine, outStream);
+ OutputHierarchy2(inLine, outFile);
}
}
+ outFile.flush();
+ outFile.close();
+ outFile = null;
System.err.println(nullLines + " null lines\n");
}
@@ -69,7 +77,7 @@ class TagHierarchy
outFile.write('\n');
}
- public void OutputHierarchy2(String inLine, PrintStream outFile) throws Exception
+ public void OutputHierarchy2(String inLine, BufferedWriter outFile) throws Exception
{
int level = 0;
Stack prevTags = new Stack();
@@ -96,7 +104,7 @@ class TagHierarchy
int firstBracket = parsed.indexOf(')');
int noBracket = parsed.length() - firstBracket;
String word = parsed.substring(0, firstBracket);
- outFile.print(currTag + " ");
+ outFile.write(currTag + " ");
level -= noBracket;