Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/salm.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'Src/SuffixArrayApplications/SuffixArraySearch/Applications/LocateNgramInCorpus.cpp~')
-rwxr-xr-xSrc/SuffixArrayApplications/SuffixArraySearch/Applications/LocateNgramInCorpus.cpp~66
1 files changed, 66 insertions, 0 deletions
diff --git a/Src/SuffixArrayApplications/SuffixArraySearch/Applications/LocateNgramInCorpus.cpp~ b/Src/SuffixArrayApplications/SuffixArraySearch/Applications/LocateNgramInCorpus.cpp~
new file mode 100755
index 0000000..71097f9
--- /dev/null
+++ b/Src/SuffixArrayApplications/SuffixArraySearch/Applications/LocateNgramInCorpus.cpp~
@@ -0,0 +1,66 @@
+#include "stdio.h"
+#include "stdlib.h"
+
+#include "_SuffixArraySearchApplicationBase.h"
+
+#include <vector>
+#include <iostream>
+
+using namespace std;
+
+/**
+* \ingroup search
+*
+* Locate an n-gram in the indexed corpus, return its locations as <sentId, offsetInSent> pairs
+* SentID and offset are all 1-based
+*
+* Note:
+* The offset of the n-gram in a sentence is represented as "char" in the returned structure S_SimplePhraseLocationElement
+* To output it as a number, one needs to cast it to integer type for proper display
+*
+*
+* Revision $Rev: 3794 $
+* Last Modified $LastChangedDate: 2007-06-29 02:17:32 -0400 (Fri, 29 Jun 2007) $
+**/
+int main(int argc, char * argv[]){
+ //-----------------------------------------------------------------------------
+ //check parameter
+ if(argc<2){
+ fprintf(stderr,"\nOutput all the locations of an n-gram in an indexed corpus\n");
+ fprintf(stderr,"\nUsage:\n");
+ fprintf(stderr,"\n%s corpusFileNameStem < list of n-grams\n\n",argv[0]);
+
+ exit(-1);
+ }
+
+ //-----------------------------------------------------------------------------
+
+ C_SuffixArraySearchApplicationBase saObj;
+
+ //load the indexed corpus with vocabulary(noVoc=false) and with offset(noOffset=false)
+ saObj.loadData_forSearch(argv[1], false, false);
+
+
+ cerr<<"Input N-grams:\n";
+ char tmpString[10000];
+ while(!cin.eof()){
+ cin.getline(tmpString,10000,'\n');
+ if(strlen(tmpString)>0){
+ vector<S_SimplePhraseLocationElement> locations;
+
+ locations = saObj.locateExactPhraseInCorpus(tmpString);
+
+ if(locations.size()==0){
+ cout<<"No occurrences found.\n";
+ }
+ else{
+ for(int i=0;i<locations.size(); i++){
+ cout<<"SentId="<<locations[i].sentIdInCorpus<<" Pos="<<(int)locations[i].posInSentInCorpus<<endl;
+ }
+ }
+ cout<<endl;
+ }
+ }
+
+ return 0;
+}