Welcome to mirror list, hosted at ThFree Co, Russian Federation.

process_sst.sh « sentiment « scripts - github.com/stanfordnlp/stanza.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
blob: 7ee7fb677d70da9b54ab08925066089b6bf11fab (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
# Convert five class sentiment data into text files of the type you
# might use a sentence classifier for.

# The default directories are what I set up on my local machine.
# -i and -o change the input and output dirs.

# The latest version of the sentiment treebank can be cloned from
#   https://github.com/stanfordnlp/sentiment-treebank

INPUT_DIR=extern_data/sentiment/sentiment-treebank
OUTPUT_DIR=extern_data/sentiment/sst-processed

while getopts "i:o:" OPTION
do
  case $OPTION in 
  i)
    INPUT_DIR=$OPTARG	    
    ;;
  o)
    OUTPUT_DIR=$OPTARG	    
    ;;
  esac
done

  
echo INPUT DIR: $INPUT_DIR
echo OUTPUT DIR: $OUTPUT_DIR

mkdir -p $OUTPUT_DIR/binary
mkdir -p $OUTPUT_DIR/fiveclass
mkdir -p $OUTPUT_DIR/threeclass

echo $OUTPUT_DIR/fiveclass/train-phrases.txt
java edu.stanford.nlp.trees.OutputSubtrees -input $INPUT_DIR/fiveclass/train.txt > $OUTPUT_DIR/fiveclass/train-phrases.txt

echo $OUTPUT_DIR/fiveclass/dev-phrases.txt
java edu.stanford.nlp.trees.OutputSubtrees -input $INPUT_DIR/fiveclass/dev.txt > $OUTPUT_DIR/fiveclass/dev-phrases.txt

echo $OUTPUT_DIR/fiveclass/test-phrases.txt
java edu.stanford.nlp.trees.OutputSubtrees -input $INPUT_DIR/fiveclass/test.txt > $OUTPUT_DIR/fiveclass/test-phrases.txt


echo $OUTPUT_DIR/fiveclass/train-roots.txt
java edu.stanford.nlp.trees.OutputSubtrees -input $INPUT_DIR/fiveclass/train.txt -root_only > $OUTPUT_DIR/fiveclass/train-roots.txt

echo $OUTPUT_DIR/fiveclass/dev-roots.txt
java edu.stanford.nlp.trees.OutputSubtrees -input $INPUT_DIR/fiveclass/dev.txt  -root_only > $OUTPUT_DIR/fiveclass/dev-roots.txt

echo $OUTPUT_DIR/fiveclass/test-roots.txt
java edu.stanford.nlp.trees.OutputSubtrees -input $INPUT_DIR/fiveclass/test.txt -root_only > $OUTPUT_DIR/fiveclass/test-roots.txt


echo $OUTPUT_DIR/binary/train-binary-phrases.txt
java edu.stanford.nlp.trees.OutputSubtrees -input $INPUT_DIR/fiveclass/train.txt -ignore_labels 2 -remap_labels "1=0,2=-1,3=1,4=1" > $OUTPUT_DIR/binary/train-binary-phrases.txt

echo $OUTPUT_DIR/binary/dev-binary-phrases.txt
java edu.stanford.nlp.trees.OutputSubtrees -input $INPUT_DIR/fiveclass/dev.txt -ignore_labels 2 -remap_labels "1=0,2=-1,3=1,4=1" > $OUTPUT_DIR/binary/dev-binary-phrases.txt

echo $OUTPUT_DIR/binary/test-binary-phrases.txt
java edu.stanford.nlp.trees.OutputSubtrees -input $INPUT_DIR/fiveclass/test.txt -ignore_labels 2 -remap_labels "1=0,2=-1,3=1,4=1" > $OUTPUT_DIR/binary/test-binary-phrases.txt

echo $OUTPUT_DIR/binary/dev-binary-roots.txt
java edu.stanford.nlp.trees.OutputSubtrees -input $INPUT_DIR/fiveclass/dev.txt -root_only -ignore_labels 2 -remap_labels "1=0,2=-1,3=1,4=1" > $OUTPUT_DIR/binary/dev-binary-roots.txt

echo $OUTPUT_DIR/binary/test-binary-roots.txt
java edu.stanford.nlp.trees.OutputSubtrees -input $INPUT_DIR/fiveclass/test.txt -root_only -ignore_labels 2 -remap_labels "1=0,2=-1,3=1,4=1" > $OUTPUT_DIR/binary/test-binary-roots.txt



echo $OUTPUT_DIR/threeclass/train-threeclass-phrases.txt
java edu.stanford.nlp.trees.OutputSubtrees -input $INPUT_DIR/fiveclass/train.txt -remap_labels "0=0,1=0,2=1,3=2,4=2" > $OUTPUT_DIR/threeclass/train-threeclass-phrases.txt

echo $OUTPUT_DIR/threeclass/dev-threeclass-phrases.txt
java edu.stanford.nlp.trees.OutputSubtrees -input $INPUT_DIR/fiveclass/dev.txt -remap_labels "0=0,1=0,2=1,3=2,4=2" > $OUTPUT_DIR/threeclass/dev-threeclass-phrases.txt

echo $OUTPUT_DIR/threeclass/test-threeclass-phrases.txt
java edu.stanford.nlp.trees.OutputSubtrees -input $INPUT_DIR/fiveclass/test.txt -remap_labels "0=0,1=0,2=1,3=2,4=2" > $OUTPUT_DIR/threeclass/test-threeclass-phrases.txt

echo $OUTPUT_DIR/threeclass/dev-threeclass-roots.txt
java edu.stanford.nlp.trees.OutputSubtrees -input $INPUT_DIR/fiveclass/dev.txt -root_only -remap_labels "0=0,1=0,2=1,3=2,4=2" > $OUTPUT_DIR/threeclass/dev-threeclass-roots.txt

echo $OUTPUT_DIR/threeclass/test-threeclass-roots.txt
java edu.stanford.nlp.trees.OutputSubtrees -input $INPUT_DIR/fiveclass/test.txt -root_only -remap_labels "0=0,1=0,2=1,3=2,4=2" > $OUTPUT_DIR/threeclass/test-threeclass-roots.txt