Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--scripts/share/nonbreaking_prefixes/nonbreaking_prefix.hu103
-rw-r--r--scripts/share/nonbreaking_prefixes/nonbreaking_prefix.lv100
2 files changed, 203 insertions, 0 deletions
diff --git a/scripts/share/nonbreaking_prefixes/nonbreaking_prefix.hu b/scripts/share/nonbreaking_prefixes/nonbreaking_prefix.hu
new file mode 100644
index 000000000..c6b9af8ca
--- /dev/null
+++ b/scripts/share/nonbreaking_prefixes/nonbreaking_prefix.hu
@@ -0,0 +1,103 @@
+#Anything in this file, followed by a period (and an upper-case word), does NOT indicate an end-of-sentence marker.
+#Special cases are included for prefixes that ONLY appear before 0-9 numbers.
+
+#any single upper case letter followed by a period is not a sentence ender (excluding I occasionally, but we leave it in)
+#usually upper case letters are initials in a name
+A
+B
+C
+D
+E
+F
+G
+H
+I
+J
+K
+L
+M
+N
+O
+P
+Q
+R
+S
+T
+U
+V
+W
+X
+Y
+Z
+
+#List of titles. These are often followed by upper-case names, but do not indicate sentence breaks
+Dr
+dr
+kb
+Kb
+vö
+Vö
+pl
+Pl
+ca
+Ca
+min
+Min
+max
+Max
+ún
+Ún
+prof
+Prof
+de
+De
+du
+Du
+Szt
+St
+
+#Numbers only. These should only induce breaks when followed by a numeric sequence
+# add NUMERIC_ONLY after the word for this function
+#This case is mostly for the english "No." which can either be a sentence of its own, or
+#if followed by a number, a non-breaking prefix
+
+# Month name abbreviations
+jan #NUMERIC_ONLY#
+Jan #NUMERIC_ONLY#
+Feb #NUMERIC_ONLY#
+feb #NUMERIC_ONLY#
+márc #NUMERIC_ONLY#
+Márc #NUMERIC_ONLY#
+ápr #NUMERIC_ONLY#
+Ápr #NUMERIC_ONLY#
+máj #NUMERIC_ONLY#
+Máj #NUMERIC_ONLY#
+jún #NUMERIC_ONLY#
+Jún #NUMERIC_ONLY#
+Júl #NUMERIC_ONLY#
+júl #NUMERIC_ONLY#
+aug #NUMERIC_ONLY#
+Aug #NUMERIC_ONLY#
+Szept #NUMERIC_ONLY#
+szept #NUMERIC_ONLY#
+okt #NUMERIC_ONLY#
+Okt #NUMERIC_ONLY#
+nov #NUMERIC_ONLY#
+Nov #NUMERIC_ONLY#
+dec #NUMERIC_ONLY#
+Dec #NUMERIC_ONLY#
+
+# Other abbreviations
+tel #NUMERIC_ONLY#
+Tel #NUMERIC_ONLY#
+Fax #NUMERIC_ONLY#
+fax #NUMERIC_ONLY#
diff --git a/scripts/share/nonbreaking_prefixes/nonbreaking_prefix.lv b/scripts/share/nonbreaking_prefixes/nonbreaking_prefix.lv
new file mode 100644
index 000000000..81754a17a
--- /dev/null
+++ b/scripts/share/nonbreaking_prefixes/nonbreaking_prefix.lv
@@ -0,0 +1,100 @@
+#Anything in this file, followed by a period (and an upper-case word), does NOT indicate an end-of-sentence marker.
+#Special cases are included for prefixes that ONLY appear before 0-9 numbers.
+
+#any single upper case letter followed by a period is not a sentence ender (excluding I occasionally, but we leave it in)
+#usually upper case letters are initials in a name
+A
+B
+C
+D
+E
+F
+G
+H
+I
+J
+K
+L
+M
+N
+O
+P
+Q
+R
+S
+T
+U
+V
+W
+X
+Y
+Z
+
+#List of titles. These are often followed by upper-case names, but do not indicate sentence breaks
+dr
+Dr
+med
+prof
+Prof
+inž
+Inž
+ist.loc
+Ist.loc
+kor.loc
+Kor.loc
+v.i
+vietn
+Vietn
+
+#misc - odd period-ending items that NEVER indicate breaks (p.m. does NOT fall into this category - it sometimes ends a sentence)
+a.l
+t.p
+pārb
+Pārb
+vec
+Vec
+inv
+Inv
+sk
+Sk
+spec
+Spec
+vienk
+Vienk
+virz
+Virz
+māksl
+Māksl
+mūz
+Mūz
+akad
+Akad
+soc
+Soc
+galv
+Galv
+vad
+Vad
+sertif
+Sertif
+folkl
+Folkl
+hum
+Hum
+
+#Numbers only. These should only induce breaks when followed by a numeric sequence
+# add NUMERIC_ONLY after the word for this function
+#This case is mostly for the english "No." which can either be a sentence of its own, or
+#if followed by a number, a non-breaking prefix
+Nr #NUMERIC_ONLY#