# FOMA script for ku-latn and ku-arab conversions # Generates .att files when run from this directory using: # foma -f ku.foma # Then generate executable .pfst files with: # ../tools/build-langconv-fst.js -l ku-latn ku-arab # ../tools/build-langconv-fst.js -l ku-arab ku-latn echo EXPECT 12 source brackets.foma source safety.foma define CONVERT'KU'LATN NO'BRACKETS .o. [ {ب} @-> {b} ,, {ج} @-> {c} ,, {چ} @-> {ç} ,, {د} @-> {d} ,, {ف} @-> {f} ,, {گ} @-> {g} ,, {ھ} @-> {h} ,, {ہ} @-> {h} ,, {ه} @-> {h} ,, {ح} @-> {h} ,, {ژ} @-> {j} ,, {ك} @-> {k} ,, {ک} @-> {k} ,, {ل} @-> {l} ,, {م} @-> {m} ,, {ن} @-> {n} ,, {پ} @-> {p} ,, {ق} @-> {q} ,, {ر} @-> {r} ,, {س} @-> {s} ,, {ش} @-> {ş} ,, {ت} @-> {t} ,, {ڤ} @-> {v} ,, {خ} @-> {x} ,, {غ} @-> {x} ,, {ز} @-> {z} ,, # ک و => ku -- ist richtig # و ك=> ku -- ist auch richtig # Doppel- und Halbvokale {ڵ} @-> {ll} ,, # ll {ڕ} @-> {rr} ,, # rr {ا} @-> {a} ,, # {ئێ} @-> {ê} ,, # initial e {ە} @-> {e} ,, {ه‌} @-> {e} ,, # with one non-joiner {ه‌‌} @-> {e} ,, # with two non-joiner {ة} @-> {e} ,, {ێ} @-> {ê} ,, {ي} @-> {î} ,, {ی} @-> {î} ,, # U+06CC db 8c ARABIC LETTER FARSI YEH {ى} @-> {î} ,, # U+0649 d9 89 ARABIC LETTER ALEF MAKSURA {ۆ} @-> {o} ,, {و} @-> {w} ,, {ئ} @-> 0 ,, # initial hemze should not be shown {،} @-> {,} ,, {ع} @-> {'} ,, # ayn ' {؟} @-> {?} ,, # digits {٠} @-> {0} ,, # ٠ {١} @-> {1} ,, # ١ {٢} @-> {2} ,, # ٢ {٣} @-> {3} ,, # ٣ {٤} @-> {4} ,, # ٤ {٥} @-> {5} ,, # ٥ {٦} @-> {6} ,, # ٦ {٧} @-> {7} ,, # ٧ {٨} @-> {8} ,, # ٨ {٩} @-> {9} # ٩ ]; # XXX This table imported from PHP has duplicate entries for H and K :( define CONVERT'KU'ARAB NO'BRACKETS .o. [ {b} @-> {ب} ,, {c} @-> {ج} ,, {ç} @-> {چ} ,, {d} @-> {د} ,, {f} @-> {ف} ,, {g} @-> {گ} ,, {h} @-> {ه} ,, {j} @-> {ژ} ,, {k} @-> {ک} ,, {l} @-> {ل} ,, {m} @-> {م} ,, {n} @-> {ن} ,, {p} @-> {پ} ,, {q} @-> {ق} ,, {r} @-> {ر} ,, {s} @-> {س} ,, {ş} @-> {ش} ,, {t} @-> {ت} ,, {v} @-> {ڤ} ,, {x} @-> {خ} ,, {y} @-> {ی} ,, {z} @-> {ز} ,, {B} @-> {ب} ,, {C} @-> {ج} ,, {Ç} @-> {چ} ,, {D} @-> {د} ,, {F} @-> {ف} ,, {G} @-> {گ} ,, #{H} @-> {ھ} ,, # AMBIGUOUS IN PHP #{H} @-> {ہ} ,, # AMBIGUOUS IN PHP #{H} @-> {ه} ,, # AMBIGUOUS IN PHP {H} @-> {ح} ,, {J} @-> {ژ} ,, # {K} @-> {ك} ,, # AMBIGUOUS IN PHP {K} @-> {ک} ,, {L} @-> {ل} ,, {M} @-> {م} ,, {N} @-> {ن} ,, {P} @-> {پ} ,, {Q} @-> {ق} ,, {R} @-> {ر} ,, {S} @-> {س} ,, {Ş} @-> {ش} ,, {T} @-> {ت} ,, {V} @-> {ڤ} ,, {W} @-> {و} ,, {X} @-> {خ} ,, {Y} @-> {ی} ,, {Z} @-> {ز} ,, ## Doppelkonsonanten # {ll} @-> {ڵ} ,, # wenn es geht, doppel-l und l getrennt zu behandeln # {rr} @-> {ڕ} ,, # selbiges für doppel-r ## Einzelne Großbuchstaben # { C} @-> {ج} ,, ## Vowels {a} @-> {ا} ,, {e} @-> {ە} ,, {ê} @-> {ێ} ,, {i} @-> 0, {î} @-> {ی} ,, {o} @-> {ۆ} ,, {u} @-> {و} ,, {û} @-> {وو} ,, {w} @-> {و} ,, {,} @-> {،} ,, {?} @-> {؟} ,, ## Try to replace the leading vowel { a} @-> {ئا } ,, { e} @-> {ئە } ,, { ê} @-> {ئێ } ,, { î} @-> {ئی } ,, { o} @-> {ئۆ } ,, { u} @-> {ئو } ,, { û} @-> {ئوو } ,, {A} @-> {ئا} ,, {E} @-> {ئە} ,, {Ê} @-> {ئێ} ,, {Î} @-> {ئی} ,, {O} @-> {ئۆ} ,, {U} @-> {ئو} ,, {Û} @-> {ئوو} ,, { A} @-> {ئا } ,, { E} @-> {ئە } ,, { Ê} @-> {ئێ } ,, { Î} @-> {ئی } ,, { O} @-> {ئۆ } ,, { U} @-> {ئو } ,, { Û} @-> {ئوو } # eyn erstmal deaktivieren, einfache Anführungsstriche sind einfach zu # häufig, um sie als eyn zu interpretieren. # {'} @-> {ع} ,, # ' ### deactivated for now, breaks links i.e. in header of Special:Recentchanges :-( ## digits #{0} @-> {٠} ,, # ٠ #{1} @-> {١} ,, # ١ #{2} @-> {٢} ,, # ٢ #{3} @-> {٣} ,, # ٣ #{4} @-> {٤} ,, # ٤ #{5} @-> {٥} ,, # ٥ #{6} @-> {٦} ,, # ٦ #{7} @-> {٧} ,, # ٧ #{8} @-> {٨} ,, # ٨ #{9} @-> {٩} ,, # ٩ ]; # Generate and save the transducers regex TEST'UPPER'UNIVERSAL(CONVERT'KU'LATN); echo EXPECT[trans-ku-latn universal]: 1 (1 = TRUE, 0 = FALSE) test null pop stack regex CONVERT'KU'LATN; minimize net echo EXPECT[trans-ku-latn functional]: 1 (1 = TRUE, 0 = FALSE) test functional byte machine write att trans-ku-latn.att pop stack regex LONGSAFE'NOT'OK(CONVERT'KU'LATN .o. CONVERT'KU'ARAB); echo EXPECT[brack-ku-latn-ku-arab safe]: 1 (1 = TRUE, 0 = FALSE) test null print shortest-string pop stack regex LONGSAFE(CONVERT'KU'LATN .o. CONVERT'KU'ARAB); minimize net echo EXPECT[brack-ku-latn-ku-arab functional]: 1 (1 = TRUE, 0 = FALSE) test functional byte machine write att brack-ku-latn-ku-arab.att pop stack regex LONGSAFE'NOT'OK(CONVERT'KU'LATN .o. IDENTITY); echo EXPECT[brack-ku-latn-noop safe]: 1 (1 = TRUE, 0 = FALSE) test null print shortest-string pop stack regex LONGSAFE(CONVERT'KU'LATN .o. IDENTITY); minimize net echo EXPECT[brack-ku-latn-noop functional]: 1 (1 = TRUE, 0 = FALSE) test functional byte machine write att brack-ku-latn-noop.att pop stack regex TEST'UPPER'UNIVERSAL(CONVERT'KU'ARAB); echo EXPECT[trans-ku-arab universal]: 1 (1 = TRUE, 0 = FALSE) test null pop stack regex CONVERT'KU'ARAB; minimize net echo EXPECT[trans-ku-arab functional]: 1 (1 = TRUE, 0 = FALSE) test functional byte machine write att trans-ku-arab.att pop stack regex LONGSAFE'NOT'OK(CONVERT'KU'ARAB .o. CONVERT'KU'LATN); echo EXPECT[brack-ku-arab-ku-latn safe]: 1 (1 = TRUE, 0 = FALSE) test null print shortest-string pop stack regex LONGSAFE(CONVERT'KU'ARAB .o. CONVERT'KU'LATN); minimize net echo EXPECT[brack-ku-arab-ku-latn functional]: 1 (1 = TRUE, 0 = FALSE) test functional byte machine write att brack-ku-arab-ku-latn.att pop stack regex LONGSAFE'NOT'OK(CONVERT'KU'ARAB .o. IDENTITY); echo EXPECT[brack-ku-arab-noop safe]: 1 (1 = TRUE, 0 = FALSE) test null print shortest-string pop stack regex LONGSAFE(CONVERT'KU'ARAB .o. IDENTITY); minimize net echo EXPECT[brack-ku-arab-noop functional]: 1 (1 = TRUE, 0 = FALSE) test functional byte machine write att brack-ku-arab-noop.att pop stack