(loading...)
Precision of Input Words by Coverage"; print "The graphs display what ratio of words of a specific type are translated correctly (yellow), and what ratio is deleted (blue)."; print " The extend of the boxes is scaled on the x-axis by the number of tokens of the displayed type."; // load data $data = file(get_current_analysis_filename("precision","precision-by-corpus-coverage")); $total = 0; $log_info = array(); for($i=0;$i0) { $log_count = (int) (log($item[0])/log(2)); } if (!array_key_exists($log_count,$log_info)) { $log_info[$log_count]["precision"] = 0; $log_info[$log_count]["delete"] = 0; $log_info[$log_count]["length"] = 0; $log_info[$log_count]["total"] = 0; } $log_info[$log_count]["precision"] += $item[1]; $log_info[$log_count]["delete"] += $item[2]; $log_info[$log_count]["length"] += $item[3]; $log_info[$log_count]["total"] += $item[4]; } print "

By log2-count in the training corpus

"; precision_by_coverage_graph("byCoverage",$log_info,$total,$img_width,SORT_NUMERIC); # load factored data $d = dir("$dir/evaluation/$set.analysis.".get_precision_analysis_version($dir,$set,$id)); while (false !== ($file = $d->read())) { if (preg_match('/precision-by-corpus-coverage.(.+)$/',$file, $match)) { precision_by_coverage_factored($img_width,$total,$file,$match[1]); } } } function precision_by_coverage_factored($img_width,$total,$file,$factor_id) { global $dir,$set,$id; $data = file(get_current_analysis_filename("precision",$file)); for($i=0;$i0) { $log_count = (int) (log($count)/log(2)); } $log_info_factored[$factor][$log_count]["precision"] += $item[2]; $log_info_factored[$factor][$log_count]["delete"] += $item[3]; $log_info_factored[$factor][$log_count]["length"] += $item[4]; $log_info_factored[$factor][$log_count]["total"] += $item[5]; } print "

By factor ".factor_name("input",$factor_id)."

"; precision_by_coverage_graph("byFactor",$info_factored_sum,$total,$img_width,SORT_STRING); print "

For each factor, by log2-count in the corpus

"; foreach ($log_info_factored as $factor => $info) { if ($total_factored[$factor]/$total > 0.01) { print "
$factor
"; precision_by_coverage_graph("byCoverageFactor$factor",$info,$total_factored[$factor],10+2*$img_width*$total_factored[$factor]/$total,SORT_NUMERIC); print "
"; } } } function precision_by_word($type) { global $dir,$set,$id; $byCoverage = -2; $byFactor = "false"; if ($type == "byCoverage") { $byCoverage = (int) $_GET["type"]; } else if ($type == "byFactor") { $byFactor = $_GET["type"]; } else if (preg_match("/byCoverageFactor(.+)/",$type,$match)) { $byCoverage = (int) $_GET["type"]; $byFactor = $match[1]; } $data = file(get_current_analysis_filename("precision","precision-by-input-word")); for($i=0;$i0) { $log_count = (int) (log($count)/log(2)); } if ($byCoverage != -2 && $byCoverage != $log_count) { continue; } //# filter for factor $word = $item[5]; if ($byFactor != "false" && $byFactor != $item[6]) { continue; } $info[$word]["precision"] = $item[0]; $info[$word]["delete"] = $item[1]; $info[$word]["length"] = $item[2]; $info[$word]["total"] = $item[3]; $total += $item[3]; } print "\n"; foreach ($info as $word => $wordinfo) { print ""; printf("",$wordinfo["precision"]/$wordinfo["total"]*100,"%",$wordinfo["precision"],$wordinfo["total"]); printf("",$wordinfo["delete"]/$wordinfo["total"]*100,"%",$wordinfo["delete"],$wordinfo["total"]); printf("",$wordinfo["length"]/$wordinfo["total"]); print ""; } print "
CountPrecisionDeleteLength
$word%.1f%s%.1f/%d%.1f%s%d/%d%.3f
\n"; } function precision_by_coverage_latex($name,$log_info,$total,$img_width,$sort_type) { $keys = array_keys($log_info); sort($keys,$sort_type); $img_width /= 100; print "
(show LaTeX)
\n"; print "
\n"; print "\\begin{tikzpicture}
"; print "% co-ordinates for precision
"; for($line=0;$line<=9;$line++) { $height = 1.8-$line/10*1.8; print "\\draw[thin,lightgray] (0.2,-$height) "; print "node[anchor=east,black] {".$line."0\\%} -- "; print "($img_width,-$height) ;
\n"; } print "% co-ordinates for deletion
\n"; for($line=0;$line<=3;$line++) { $height = 2+$line/10*1.80; print "\\draw[thin,lightgray] (0.2,-$height) "; if ($line != 0) { print "node[anchor=east,black] {".$line."0\\%} "; } print "-- ($img_width,-$height) ;
\n"; } print "% boxes
\n"; $total_so_far = 0; foreach ($keys as $i) { $prec_ratio = $log_info[$i]["precision"]/$log_info[$i]["total"]; $x = .2+($img_width-.2) * $total_so_far/$total; $y = 1.80-($prec_ratio*1.80); $width = $img_width * $log_info[$i]["total"]/$total; $height = $prec_ratio*1.80; $width += $x; $height += $y; print "\\filldraw[very thin,gray] ($x,-$y) rectangle($width,-$height) ;
"; print "\\draw[very thin,black] ($x,-$y) rectangle($width,-$height);
"; if ($width-$x>.1) { print "\\draw (".(($x+$width)/2).",-1.8) node[anchor=north,black] {".$i."};
"; } $del_ratio = $log_info[$i]["delete"]/$log_info[$i]["total"]; $height = $del_ratio*1.80; $height += 2; print "\\filldraw[very thin,lightgray] ($x,-2) rectangle($width,-$height);
\n"; print "\\draw[very thin,black] ($x,-2) rectangle($width,-$height);
\n"; $total_so_far += $log_info[$i]["total"]; } print "\\end{tikzpicture}
"; print "
"; } function precision_by_coverage_graph($name,$log_info,$total,$img_width,$sort_type) { $keys = array_keys($log_info); sort($keys,$sort_type); print "
(hide table)
\n"; precision_by_coverage_latex($name,$log_info,$total,$img_width,$sort_type); print "
\n"; print "\n"; foreach ($keys as $i) { if (array_key_exists($i,$log_info)) { print ""; printf("",$log_info[$i]["precision"]/$log_info[$i]["total"]*100,"%",$log_info[$i]["precision"],$log_info[$i]["total"]); printf("",$log_info[$i]["delete"]/$log_info[$i]["total"]*100,"%",$log_info[$i]["delete"],$log_info[$i]["total"]); printf("",$log_info[$i]["length"]/$log_info[$i]["total"]); print ""; print ""; } } print "
CountPrecisionDeleteLength
$i%.1f%s%.1f/%d%.1f%s%d/%d%.3f
"; print "
"; print "
"; print ""; } //# stats on precision and recall function precision_recall_details() { ?>
(loading...)
(loading...)
(loading...)
(loading...)
(loading...)
(loading...)
(loading...)
(loading...)
"; //#foreach (array("precision","recall") as $type) { print "Precision of Output\n"; $type = "precision"; print "\n"; printf("\n", $info["$type-1-correct"], $info["$type-2-correct"], $info["$type-3-correct"], $info["$type-4-correct"]); printf("\n", $info["$type-1-correct"]/$info["$type-1-total"]*100,'%', $info["$type-2-correct"]/$info["$type-2-total"]*100,'%', $info["$type-3-correct"]/$info["$type-3-total"]*100,'%', $info["$type-4-correct"]/$info["$type-4-total"]*100,'%'); printf("\n", $info["$type-1-total"]-$info["$type-1-correct"], $info["$type-2-total"]-$info["$type-2-correct"], $info["$type-3-total"]-$info["$type-3-correct"], $info["$type-4-total"]-$info["$type-4-correct"]); print "
$type1-gram2-gram3-gram4-gram
correct%d%d%d%d
 %.1f%s%.1f%s%.1f%s%.1f%s
wrong%d%d%d%d
"; //} print "details "; if (file_exists(get_current_analysis_filename("precision","precision-by-corpus-coverage"))) { print "| precision of input by coverage "; } print ""; $each_score = explode(" ; ",$experiment[$id]->result[$set]); $header = ""; $score_line = ""; for($i=0;$iMetrics".$header."".$score_line."
"; printf("

length-diff: %d (%.1f%s)",$info["precision-1-total"]-$info["recall-1-total"],($info["precision-1-total"]-$info["recall-1-total"])/$info["recall-1-total"]*100,"%"); // coverage if (file_exists(get_current_analysis_filename("coverage","corpus-coverage-summary"))) { print ""; print "

"; coverage_summary(); print "
"; } // phrase segmentation if (file_exists(get_current_analysis_filename("basic","segmentation")) || file_exists(get_current_analysis_filename("basic","rule"))) { print ""; print "
"; segmentation_summary(); print "
"; } // rules if (file_exists(get_current_analysis_filename("basic","rule"))) { print ""; print "
"; rule_summary(); print "
"; } print ""; } // details on ngram precision/recall function ngram_show($type) { global $set,$id,$dir; // load data $order = $_GET['order']; $data = file(get_current_analysis_filename("basic","n-gram-$type.$order")); for($i=0;$i$order-gram $type
sorted by "; if ($sort == "ratio_worst") { print "ratio "; print "smooth-$smooth "; print "+ "; print "- "; } else { print "ratio "; } if ($sort == "abs_worst") { print "absolute "; } else { print "absolute "; } print "showing $count "; if ($count < 9999) { print "more "; print "all "; } else { print "top5 "; } print "
\n"; print "\n"; print "\n"; for($i=0;$i<$count && $i"; print ""; print ""; printf("",$line["correct"]/$line["total"]); } print "
$order-gramokxratio
".$line["ngram"]."".$line["correct"]."".($line["total"]-$line["correct"])."%.3f
\n"; } // details on ngram coverage function coverage_details() { global $dir,$set,$id; $count = array(); $token = array(); foreach (array("ttable","corpus") as $corpus) { foreach (array("token","type") as $b) { for($i=0;$i<=7;$i++) { foreach (array("6+","2-5","1","0") as $range) { $count[$corpus][$b][$i][$range] = 0; } $total[$corpus][$b][$i] = 0; } } $data = file(filename_fallback_to_factored(get_current_analysis_filename("coverage","$corpus-coverage-summary"))); for($i=0;$i5) { $count[$corpus]["type"][$item[0]]["6+"] += $item[2]; $count[$corpus]["token"][$item[0]]["6+"] += $item[3]; } else if ($item[1]>1) { $count[$corpus]["type"][$item[0]]["2-5"] += $item[2]; $count[$corpus]["token"][$item[0]]["2-5"] += $item[3]; } else if ($item[1]==1) { $count[$corpus]["type"][$item[0]]["1"] += $item[2]; $count[$corpus]["token"][$item[0]]["1"] += $item[3]; } else { $count[$corpus]["type"][$item[0]]["0"] += $item[2]; $count[$corpus]["token"][$item[0]]["0"] += $item[3]; } $total[$corpus]["type"][$item[0]] += $item[2]; $total[$corpus]["token"][$item[0]] += $item[3]; } } print "coverage
\n"; print ""; foreach (array("token","type") as $by) { for($i=1;$i<=4;$i++) { print "\n"; } print ""; } print "
$i-gram ($by)
\n"; print "\n"; foreach (array("0","1","2-5","6+") as $range) { print ""; foreach (array("ttable","corpus") as $corpus) { printf("",$count[$corpus][$by][$i][$range],100*$count[$corpus][$by][$i][$range]/($total[$corpus][$by][$i]+0.0001),"%"); } print "\n"; } print "
modelcorpus
$range%d (%.1f%s)
\n"; $data = file(filename_fallback_to_factored(get_current_analysis_filename("coverage","ttable-unknown"))); for($i=0;$i $b["count"]) { return -1; } else if ($a["count"] < $b["count"]) { return 1; } else { return strcmp($a["word"],$b["word"]); } } usort($unknown, 'cmp'); print "unknown words (to model)
\n"; print ""; } else { print $item["word"]; } } print "
"; $state = 5; foreach ($unknown as $item) { if ($item["count"] < $state) { if ($state == 5) { print "
"; } print "
".$item["count"].": "; $state = $item["count"]; if ($state == 1) { print ""; } } else if ($state<5) { print ", "; } if ($state == 5) { print "
".$item["count"]."".$item["word"]."
\n"; } function filename_fallback_to_factored($file) { if (file_exists($file)) { return $file; } $path = pathinfo($file); $dh = opendir($path['dirname']); while (($factored_file = readdir($dh)) !== false) { if (strlen($factored_file) > strlen($path['basename']) && substr($factored_file,0,strlen($path['basename'])) == $path['basename'] && preg_match("/0/",substr($factored_file,strlen($path['basename'])))) { return $path['dirname']."/".$factored_file; } } // found nothing... return $file; } function factor_name($input_output,$factor_id) { global $dir,$set,$id; $file = get_current_analysis_filename("coverage","factor-names"); if (!file_exists($file)) { return $factor_id; } $in_out_names = file($file); $names = explode(",",trim($in_out_names[($input_output == "input")?0:1])); return "'".$names[$factor_id]."' ($factor_id)"; } // stats on ngram coverage function coverage_summary() { global $dir,$set,$id,$corpus; if (array_key_exists("by",$_GET)) { $by = $_GET['by']; } else { $by = 'token'; } $total = array(); $count = array(); foreach (array("ttable","corpus") as $corpus) { foreach (array("token","type") as $b) { foreach (array("6+","2-5","1","0") as $c) { $count[$corpus][$b][$c] = 0; } $total[$corpus][$b] = 0; } $data = file(filename_fallback_to_factored(get_current_analysis_filename("coverage","$corpus-coverage-summary"))); for($i=0;$i5) { $count[$corpus]["type"]["6+"] += $item[2]; $count[$corpus]["token"]["6+"] += $item[3]; } else if ($item[1]>1) { $count[$corpus]["type"]["2-5"] += $item[2]; $count[$corpus]["token"]["2-5"] += $item[3]; } else if ($item[1]==1) { $count[$corpus]["type"]["1"] += $item[2]; $count[$corpus]["token"]["1"] += $item[3]; } else { $count[$corpus]["type"]["0"] += $item[2]; $count[$corpus]["token"]["0"] += $item[3]; } $total[$corpus]["type"] += $item[2]; $total[$corpus]["token"] += $item[3]; } } } print "Coverage\n"; print "\n"; foreach (array("0","1","2-5","6+") as $range) { print ""; foreach (array("ttable","corpus") as $corpus) { printf("",$count[$corpus][$by][$range],100*$count[$corpus][$by][$range]/($total[$corpus][$by]+0.0001),"%"); } print "\n"; } print "
modelcorpus
$range%d (%.1f%s)
\n"; if ($by == 'token') { print "by token"; } else { print "by token "; } print " / "; if ($by == 'type') { print "by type"; } else { print "by type "; } print " / "; print " "; } // stats on segmenation (phrase-based) function segmentation_summary() { global $dir,$set,$id; if (array_key_exists("by",$_GET)) { $by = $_GET['by']; } else { $by = 'word'; } $count = array(); for($i=0;$i<=4;$i++) { $count[$i] = array(); for($j=0;$j<=4;$j++) { $count[$i][$j] = 0; } } $total = 0; $file = get_current_analysis_filename("basic","segmentation"); if (file_exists($file)) { $data = file($file); for($i=0;$i4) { $in = 4; } if ($out>4) { $out = 4; } $total += $c; $count[$in][$out] += $c; } } else { $data = file(get_current_analysis_filename("basic","rule")); for($i=0;$i 2) { $c = $field[2]; } else { $c = 0; } if ($type == "rule") { list($rule_in,$in,$nt,$rule_out,$out) = split(":",$rule); if ($by == "word") { $c *= $in; } if ($in>4) { $in = 4; } if ($out>4) { $out = 4; } $total += $c; $count[$in][$out] += $c; } } } print "Phrase Segmentation
\n"; print ""; print ""; for($in=1;$in<=4;$in++) { print ""; for($out=1;$out<=4;$out++) { if (array_key_exists($in,$count) && array_key_exists($out,$count[$in])) { $c = $count[$in][$out]; } else { $c = 0; } printf("",$c,100*$c/$total,"%"); } print ""; } print "
1234+
$in".($in==4?"+":"")." to%d (%.1f%s)
\n"; if ($by == 'word') { print "by word"; } else { print "by word "; } print " / "; if ($by == 'phrase') { print "by phrase"; } else { print "by phrase "; } } // hierarchical rules used in translation function rule_summary() { global $dir,$set,$id; $data = file(get_current_analysis_filename("basic","rule")); $rule = array(); $count = array(); $count_nt = array(); $count_w = array(); $nt_count = 0; $total = 0; foreach ($data as $item) { $field = split("\t",$item); $type = $field[0]; $d = $field[1]; if (count($field) > 2) { $d2 = $field[2]; } else { $d2 = 0; } if ($type == "sentence-count") { $sentence_count = $d; } else if ($type == "glue-rule") { $glue_rule = $d / $sentence_count; } else if ($type == "depth") { $depth = $d / $sentence_count; } else { list($rule_in,$word_in,$nt,$rule_out,$word_out) = split(":",$d); $rule_in = preg_replace("/a/","x",$rule_in); $rule_in = preg_replace("/b/","y",$rule_in); $rule_in = preg_replace("/c/","z",$rule_in); $rule_out = preg_replace("/a/","x",$rule_out); $rule_out = preg_replace("/b/","y",$rule_out); $rule_out = preg_replace("/c/","z",$rule_out); $nt_count += $d2 * $nt; if (!array_key_exists($d,$rule)) { $rule[$d] = 0; } $rule[$d] += $d2; if (!array_key_exists($nt,$count)) { $count[$nt] = 0; } $count[$nt] += $d2; $just_nt = preg_replace("/\d/","",$rule_in)."-".preg_replace("/\d/","",$rule_out); $no_wc = preg_replace("/\d/","W",$rule_in)."-".preg_replace("/\d/","",$rule_out); if ($just_nt == "-") { $just_nt = "lexical"; } if (!array_key_exists($just_nt,$count_nt)) { $count_nt[$just_nt] = 0; } $count_nt[$just_nt] += $d2; if (!array_key_exists($no_wc,$count_w)) { $count_w[$no_wc] = 0; } $count_w[$no_wc] += $d2; $total += $d2; } } print "Rules
\n"; printf("glue rule: %.2f
\n",$glue_rule); printf("tree depth: %.2f
\n",$depth); printf("nt/rule: %.2f
\n",$nt_count/$total); print "\n"; foreach ($count_nt as $rule => $count) { printf("\n",$rule,$count,$count/$total*100,'%'); } print "
%s%d%.1f%s
\n"; } // annotated sentences, navigation function bleu_show() { $count = $_GET['count']; if ($count == 0) { $count = 5; } $filter = ""; if (array_key_exists("filter",$_GET)) { $filter = base64_decode($_GET['filter']); } print "annotated sentences
sorted by: "; if ($_GET['sort'] == "order" || $_GET['sort'] == "") { print "order "; } else { print "order "; } if ($_GET['sort'] == "best") { print "best "; } else { print "best "; } if ($_GET['sort'] == "25") { print "25% "; } else { print "25% "; } if ($_GET['sort'] == "avg") { print "avg "; } else { print "avg "; } if ($_GET['sort'] == "75") { print "75% "; } else { print "75% "; } if ($_GET['sort'] == "worst") { print "worst; "; } else { print "worst; "; } print "showing: $count "; print "more "; print "all"; if ($filter != "") { print "; filter: '$filter'"; } sentence_annotation($count,$filter); print "

5 more | "; print "10 more | "; print "20 more | "; print "50 more | "; print "100 more | "; print "all "; } // annotated sentences core: reads data, sorts sentences, displays them function sentence_annotation($count,$filter) { global $set,$id,$dir,$biconcor; # get input $filtered = array(); $file = get_current_analysis_filename("coverage","input-annotation"); if (file_exists($file)) { $input = file($file); # filter is so specified if ($filter != "") { for($i=0;$i3) { $line["reference"] .= "
"; }; $line["reference"] .= $item[$j]; } $bleu[] = $line; } } # sort and label additional sentences as filtered global $sort; function cmp($a, $b) { global $sort; if ($sort == "order") { $a_idx = $a["id"]; $b_idx = $b["id"]; } else if ($sort == "worst" || $sort == "75") { $a_idx = $a["bleu"]; $b_idx = $b["bleu"]; if ($a_idx == $b_idx) { $a_idx = $b["id"]; $b_idx = $a["id"]; } } else if ($sort == "best" || $sort == "avg" || $sort == "25") { $a_idx = -$a["bleu"]; $b_idx = -$b["bleu"]; if ($a_idx == $b_idx) { $a_idx = $a["id"]; $b_idx = $b["id"]; } } if ($a_idx == $b_idx) { return 0; } return ($a_idx < $b_idx) ? -1 : 1; } $sort = $_GET['sort']; if ($sort == '') { $sort = "order"; } usort($bleu, 'cmp'); $offset = 0; if ($sort == "25" || $sort == "75") { $offset = (int) (count($bleu)/4); } else if ($sort == "avg") { $offset = (int) (count($bleu)/2); } $retained = array(); for($i=$offset;$i<$count+$offset && $i $segmentation_span) { # foreach ($segmentation_span as $span => $type) { # $segmentation[$sentence][$span]["nt"]=""; # } # } # } } $file = get_current_analysis_filename("basic","output-tree"); if (file_exists($file)) { $data = file($file); $span = 0; $last_sentence = -1; $nt_count = array(); for($i=0;$i $segmentation_span) { foreach ($segmentation_span as $span => $type) { $segmentation_out[$sentence][$span]["nt"]=""; } } } } $file = get_current_analysis_filename("basic","node"); if (file_exists($file)) { $data = file($file); $n = 0; $last_sentence = -1; for($i=0;$i
\n"; $biconcor = get_biconcor_version($dir,$set,$id); //print "

$sort / $offset
"; for($i=$offset;$i<$count+$offset && $i0 occ. in corpus, 0 translations, entropy: 0.00\n"; if ($biconcor) { print "
(click on input phrase for bilingual concordancer)
"; } if ($hierarchical) { sentence_annotation_hierarchical("#".$line["id"],$line["id"],$input[$line["id"]],$segmentation[$line["id"]],"in"); } else { print "[#".$line["id"]."] "; input_annotation($line["id"],$input[$line["id"]],$segmentation[$line["id"]],$filter); } } //else { // print "[".$line["id"].":".$line["bleu"]."] "; //} if ($hierarchical) { sentence_annotation_hierarchical($line["bleu"],$line["id"],$line["system"],$segmentation_out[$line["id"]],"out"); } else { print "[".$line["bleu"]."] "; output_annotation($line["id"],$line["system"],$segmentation[$line["id"]]); } print "
[ref] ".$line["reference"]."
"; } } function coverage($coverage_vector) { # get information from line in input annotation file $coverage = array(); foreach (split(" ",$coverage_vector) as $item) { if (preg_match("/[\-:]/",$item)) { $field = preg_split("/[\-:]/",$item); $from = $field[0]; $to = $field[1]; if (count($field)>2){ $coverage[$from][$to]["corpus_count"]=$field[2]; } if (count($field)>3){ $coverage[$from][$to]["ttable_count"]=$field[3]; } if (count($field)>4){ $coverage[$from][$to]["ttabel_entropy"]=$field[4]; } } } return $coverage; } // annotate an inpute sentence function input_annotation($sentence,$input,$segmentation,$filter) { global $biconcor; list($words,$coverage_vector) = split("\t",$input); # get information from line in input annotation file $coverage = array(); foreach (split(" ",$coverage_vector) as $item) { if (preg_match("/[\-:]/",$item)) { list($from,$to,$corpus_count,$ttable_count,$ttable_entropy) = preg_split("/[\-:]/",$item); $coverage[$from][$to]["corpus_count"] = $corpus_count; $coverage[$from][$to]["ttable_count"] = $ttable_count; $coverage[$from][$to]["ttable_entropy"] = $ttable_entropy; } } $word = split(" ",$words); # compute the display level for each input phrase for($j=0;$j"; for($level=$max_level;$level>=1;$level--) { # rows for phrase display print ""; for($from=$sep_start;$from<$sep_end;$from++) { if (array_key_exists($from,$box[$level])) { $to = $box[$level][$from]; $size = $to - $from + 1; if ($size == 1) { print "
".$word[$from]; } else { $color = coverage_color($coverage[$from][$to]); $phrase = ""; $highlightwords = ""; $lowlightwords = ""; for($j=$from;$j<=$to;$j++) { if ($j>$from) { $phrase .= " "; } $phrase .= $word[$j]; $highlightwords .= " document.getElementById('inputword-$i-$j').style.backgroundColor='#ffff80';"; $lowlightwords .= " document.getElementById('inputword-$i-$j').style.backgroundColor='".coverage_color($coverage[$j][$j])."';"; } print "
"; } print "
"; $from += $size-1; } else { print "
"; } } print "\n"; } # display input words print "
"; for($j=$sep_start;$j<$sep_end;$j++) { if ($segmentation && array_key_exists($j,$segmentation["input_start"])) { $id = $segmentation["input_start"][$j]; print ""; } if (array_key_exists($j,$coverage)) { $color = coverage_color($coverage[$j][$j]); $cc = $coverage[$j][$j]["corpus_count"]; $tc = $coverage[$j][$j]["ttable_count"]; $te = $coverage[$j][$j]["ttable_entropy"]; } else { # unknown words $color = '#ffffff'; $cc = 0; $tc = 0; $te = 0; } print ""; if ($word[$j] == $filter) { print "".$word[$j].""; } else { print $word[$j]; } print ""; if ($segmentation && array_key_exists($j,$segmentation["input_end"])) { print ""; } print " "; } print "
\n"; print "\n"; $sep_start = $sep_end; } } print "
"; } // color-coded coverage stats (corpus count, ttable count, entropy) function coverage_color($phrase) { $corpus_count = 255 - 10 * log(1 + $phrase["corpus_count"]); if ($corpus_count < 128) { $corpus_count = 128; } $cc_color = dechex($corpus_count / 16) . dechex($corpus_count % 16); $ttable_count = 255 - 20 * log(1 + $phrase["ttable_count"]); if ($ttable_count < 128) { $ttable_count = 128; } $tc_color = dechex($ttable_count / 16) . dechex($ttable_count % 16); $ttable_entropy = 255 - 32 * $phrase["ttable_entropy"]; if ($ttable_entropy < 128) { $ttable_entropy = 128; } $te_color = dechex($ttable_entropy / 16) . dechex($ttable_entropy % 16); // $color = "#". $cc_color . $te_color . $tc_color; # reddish browns with some green // $color = "#". $cc_color . $tc_color . $te_color; # reddish brown with some blueish purple $color = "#". $te_color . $cc_color . $tc_color; # pale green towards red // $color = "#". $te_color . $tc_color . $cc_color; # pale purple towards red // $color = "#". $tc_color . $te_color . $cc_color; // # blue-grey towards green // $color = "#". $tc_color . $cc_color . $te_color; // # green-grey towards blue return $color; } // annotate an output sentence function output_annotation($sentence,$system,$segmentation) { #$color = array("#FFC0C0","#FFC0FF","#C0C0FF","#C0FFFF","#C0FFC0"); $color = array("#c0c0c0","#e0e0ff","#b0b0ff","#8080ff","#4040ff"); $word = split(" ",$system); for($j=0;$j"; } print "$surface"; if ($segmentation && array_key_exists($j,$segmentation["output_end"])) { print ""; } print " "; } } function annotation_hierarchical($sentence,$segmentation,$segmentation_out,$node) { print "\n"; } function sentence_annotation_hierarchical($info,$sentence,$sequence,$segmentation,$in_out) { $In_Out = $in_out == "out" ? "Out" : "In"; #list($words,$coverage_vector) = split("\t",$input); $coverage = coverage($sequence); $word = preg_split("/\s/",$sequence); $color = array("#ffe0e0","#f0e0ff","#e0e0ff","#c0c0ff","#a0a0ff"); #$color = array("#FFC0C0","#FFC0FF","#C0C0FF","#C0FFFF","#C0FFC0"); #$color = array("#c0c0c0","#e0e0ff","#b0b0ff","#8080ff","#4040ff"); print "
\n"; print "
[$info]
"; $word_count = 0; for($span=0;$span"; for($depth=0;$depth"; } $words = $segmentation[$span]["words"]; # non terminal if (array_key_exists("nt",$segmentation[$span]) && $segmentation[$span]["nt"] != "") { print $segmentation[$span]["nt"].": "; } # no nonterminal and no words => invisible bar else if($words == "") { print "|"; } $span_word = array(); if ($words != "") { $span_word = split(" ",$words); } for($w=0;$w 0) { print " "; } if ($in_out == "in") { #print ""; print $word[$word_count]; #print ""; } else { list($surface,$correct) = split("\|", $word[$word_count]); print "$surface"; } $word_count++; } for($depth=0;$depth"; } print ""; # enclosing } print "
\n"; } function biconcor($query) { global $set,$id,$dir; $sentence = $_GET['sentence']; $biconcor = get_biconcor_version($dir,$set,$id); print "
"; $cmd = "./biconcor -l $dir/model/biconcor.$biconcor -Q ".base64_encode($query)." 2>/dev/null"; #print $cmd."

"; system($cmd); # print "

done."; print "

"; }