Page MenuHomeWickedGov Phorge

BanConverter.php
No OneTemporary

Size
24 KB
Referenced Files
None
Subscribers
None

BanConverter.php

<?php
/**
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along
* with this program; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
* http://www.gnu.org/copyleft/gpl.html
*
* @file
*/
/**
* Balinese specific code.
*
* @ingroup Languages
*/
class BanConverter extends LanguageConverterIcu {
public function getMainCode(): string {
return 'ban';
}
public function getLanguageVariants(): array {
return [ 'ban', 'ban-bali', 'ban-x-dharma', 'ban-x-palmleaf', 'ban-x-pku' ];
}
public function getVariantsFallbacks(): array {
return [
'ban-bali' => 'ban',
'ban-x-dharma' => 'ban',
'ban-x-palmleaf' => 'ban',
'ban-x-pku' => 'ban',
];
}
public function getVariantNames(): array {
$names = [
'ban' => 'Basa Bali',
'ban-bali' => 'ᬩᬲᬩᬮᬶ',
'ban-x-dharma' => 'Basa Bali (alih aksara DHARMA)',
'ban-x-palmleaf' => 'Basa Bali (alih aksara Palmleaf.org)',
'ban-x-pku' => 'Basa Bali (alih aksara Puri Kauhan Ubud)',
];
return array_merge( parent::getVariantNames(), $names );
}
protected function getIcuRules() {
$rules = [];
# transliteration rules developed for Palmleaf.org
$rules['ban-x-palmleaf'] = <<<'EOF'
::NFC;
ᬒᬁ \uE050; # OM
\uE001; # SIGN ULU CANDRA
\uE002; # SIGN CECEK
\uE003; # SIGN BISAH
\uE005; # LETTER AKARA
\uE006; # LETTER AKARA TEDUNG
\uE007; # LETTER IKARA
\uE008; # LETTER IKARA TEDUNG
\uE009; # LETTER UKARA
\uE00A; # LETTER UKARA TEDUNG
\uE00B; # LETTER RA REPA
\uE060; # LETTER RA REPA TEDUNG
\uE00C; # LETTER LA LENGA
\uE061; # LETTER LA LENGA TEDUNG
\uE00F; # LETTER EKARA
\uE010; # LETTER AIKARA
\uE013; # LETTER OKARA
\uE014; # LETTER OKARA TEDUNG
\uE015; # LETTER KA
\uE016; # LETTER KA MAHAPRANA
\uE017; # LETTER GA
\uE018; # LETTER GA GORA
\uE019; # LETTER NGA
\uE01A; # LETTER CA
\uE01B; # LETTER CA LACA
\uE01C; # LETTER JA
\uE01D; # LETTER JA JERA
\uE01E; # LETTER NYA
\uE01F; # LETTER TA LATIK
\uE020; # LETTER TA MURDA MAHAPRANA
\uE021; # LETTER DA MURDA ALPAPRANA
\uE022; # LETTER DA MURDA MAHAPRANA
\uE023; # LETTER NA RAMBAT
\uE024; # LETTER TA
\uE025; # LETTER TA TAWA
\uE026; # LETTER DA
\uE027; # LETTER DA MADU
\uE028; # LETTER NA
\uE02A; # LETTER PA
\uE02B; # LETTER PA KAPAL
\uE02C; # LETTER BA
\uE02D; # LETTER BA KEMBANG
\uE02E; # LETTER MA
\uE02F; # LETTER YA
\uE030; # LETTER RA
\uE032; # LETTER LA
\uE035; # LETTER WA
\uE036; # LETTER SA SAGA
\uE037; # LETTER SA SAPA
\uE038; # LETTER SA
\uE039; # LETTER HA
\uE03C; # SIGN REREKAN
\uE03E; # VOWEL SIGN TEDUNG
\uE03F; # VOWEL SIGN ULU
\uE040; # VOWEL SIGN ULU SARI
\uE041; # VOWEL SIGN SUKU
\uE042; # VOWEL SIGN SUKU ILUT
\uE043; # VOWEL SIGN RA REPA
\uE044; # VOWEL SIGN RA REPA TEDUNG
ᬼ→ \uE062; # VOWEL SIGN LA LENGA
\uE063; # VOWEL SIGN LA LENGA TEDUNG
\uE047; # VOWEL SIGN TALING
ᬿ \uE048; # VOWEL SIGN TALING REPA
\uE04B; # VOWEL SIGN TALING TEDUNG
\uE04C; # VOWEL SIGN TALING REPA TEDUNG
\uE045; # VOWEL SIGN PEPET
\uE049; # VOWEL SIGN PEPET TEDUNG
\uE04D; # ADEG ADEG
\uE058; # LETTER KAF SASAK
\uE059; # LETTER KHOT SASAK
\uE024\uE03C; # LETTER TZIR SASAK
\uE05E; # LETTER EF SASAK
\uE081; # LETTER VE SASAK
\uE05B; # LETTER ZAL SASAK
\uE038\uE03C; # LETTER ASYURA SASAK
\uE066; # DIGIT ZERO
\uE067; # DIGIT ONE
\uE068; # DIGIT TWO
\uE069; # DIGIT THREE
\uE06A; # DIGIT FO
\uE06B; # DIGIT FIVE
\uE06C; # DIGIT SIX
\uE06D; # DIGIT SEVEN
\uE06E; # DIGIT EIGHT
\uE06F; # DIGIT NINE
'//'; # PANTI
'///'; # PAMADA
; # WINDU
᭟᭜᭟ '\\\\';
' ' ' ' '\\\\';
\:; # CARIK PAMUNGKAH
\uE064; # CARIK SIKI
\uE065; # CARIK PAREREN
; # PAMENENG
#consonants
$chandrabindu=\uE001;
$ardhachandra=\u1B00;
$anusvara=\uE002;
$visarga=\uE003;
# w←vowel→ represents the stand-alone form
$wa=\uE005;
$waa=\uE006;
$wi=\uE007;
$wii=\uE008;
$wu=\uE009;
$wuu=\uE00A;
$wr=\uE00B;
$wl=\uE00C;
$wce=\uE00D; # LETTER CANDRA E
$wse=\uE00E; # LETTER SHORT E
$we=\uE00F; # ए LETTER E
$wai=\uE010;
$wco=\uE011; # LETTER CANDRA O
$wso=\uE012; # LETTER SHORT O
$wo=\uE013; # ओ LETTER O
$wau=\uE014;
$ka=\uE015;
$kha=\uE016;
$ga=\uE017;
$gha=\uE018;
$nga=\uE019;
$ca=\uE01A;
$cha=\uE01B;
$ja=\uE01C;
$jha=\uE01D;
$nya=\uE01E;
$tta=\uE01F;
$ttha=\uE020;
$dda=\uE021;
$ddha=\uE022;
$nna=\uE023;
$ta=\uE024;
$tha=\uE025;
$da=\uE026;
$dha=\uE027;
$na=\uE028;
$ena=\uE029; #compatibility
$pa=\uE02A;
$pha=\uE02B;
$ba=\uE02C;
$bha=\uE02D;
$ma=\uE02E;
$ya=\uE02F;
$ra=\uE030;
$vva=\uE081;
$rra=\uE031;
$la=\uE032;
$lla=\uE033;
$ela=\uE034; #compatibility
$va=\uE035;
$sha=\uE036;
$ssa=\uE037;
$sa=\uE038;
$ha=\uE039;
$nukta=\uE03C;
$avagraha=\uE03D; # SIGN AVAGRAHA
# ←vowel→ represents the dependent form
$aa=\uE03E;
$i=\uE03F;
$ii=\uE040;
$u=\uE041;
$uu=\uE042;
$rh=\uE043;
$rrh=\uE044;
$ce=\uE045; #VOWEL SIGN CANDRA E
$se=\uE046; #VOWEL SIGN SHORT E
$e=\uE047;
$ai=\uE048;
$co=\uE049; # VOWEL SIGN CANDRA O
$so=\uE04A; # VOWEL SIGN SHORT O
$o=\uE04B; # ो
$au=\uE04C;
$virama=\uE04D;
$om=\uE050; # OM
\uE051→; # UNMAPPED STRESS SIGN UDATTA
\uE052→; # UNMAPPED STRESS SIGN ANUDATTA
\uE053→; # UNMAPPED GRAVE ACCENT
\uE054→; # UNMAPPED ACUTE ACCENT
$lm = \uE055;# Telugu Length Mark
$ailm=\uE056;# AI Length Mark
$aulm=\uE057;# AU Length Mark
#urdu compatibity forms
$uka=\uE058;
$ukha=\uE059;
$ugha=\uE05A;
$ujha=\uE05B;
$uddha=\uE05C;
$udha=\uE05D;
$ufa=\uE05E;
$uya=\uE05F;
$wrr=\uE060;
$wll=\uE061;
$lh=\uE062;
$llh=\uE063;
$danda=\uE064;
$doubleDanda=\uE065;
$zero=\uE066; # DIGIT ZERO
$one=\uE067; # DIGIT ONE
$two=\uE068; # DIGIT TWO
$three=\uE069; # DIGIT THREE
$four=\uE06A; # DIGIT FOUR
$five=\uE06B; # DIGIT FIVE
$six=\uE06C; # DIGIT SIX
$seven=\uE06D; # DIGIT SEVEN
$eight=\uE06E; # DIGIT EIGHT
$nine=\uE06F; # DIGIT NINE
# Glottal stop
$dgs=\uE082;
#Khanda-ta
$kta=\uE083;
$depVowelAbove=[\uE03E-\uE040\uE045-\uE04C];
$depVowelBelow=[\uE041-\uE044];
# $x was originally called '§'; $z was '%'
$x=[$aa$ai$au$ii$i$uu$u$rrh$rh$lh$llh$e$o$se$ce$so$co];
$z=[bcdfghjklmnpqrstvwxyz];
$vowels=[aeiour̥̄̆];
$forceIndependentMatra = [^[[:L:][̀-͌]]];
$strike=\u0336;
######################################################################
# normalize input
######################################################################
# delete zwnj
\u200C→;
# reprocess from beginning
::Null;
######################################################################
# convert from Native letters to Latin letters
######################################################################
#glottal stop
$wa$virama k'';
#anusvara
$anusvara ng;
#surang
;
# Urdu compatibility
$ya$nukta}$x y;
$ya$nukta$virama y;
$ya$nukta ya;
$la$nukta }$x l;
$la$nukta$virama l;
$la$nukta la;
$na$nukta }$x n;
$na$nukta$virama n;
$na$nukta na;
$ena }$x n;
$ena$virama n;
$ena na;
$uka qa;
$ka$nukta }$x q;
$ka$nukta$virama q;
$ka$nukta qa;
$kha$nukta }$x kh;
$kha$nukta$virama kh;
$kha$nukta kha;
$ukha$virama kh;
$ukha kha;
$ugha gha;
$ga$nukta }$x gh;
$ga$nukta$virama gh;
$ga$nukta gha;
$ujha za;
$ja$nukta }$x z;
$ja$nukta$virama z;
$ja$nukta za;
$ddha$nukta}$x r;
$ddha$nukta$virama r;
$ddha$nukta ra;
$uddha}$x r;
$uddha$virama r;
$uddha ra;
$udha ra;
$dda$nukta}$x r;
$dda$nukta$virama r;
$dda$nukta ra;
$pha$nukta }$x f;
$pha$nukta$virama f;
$pha$nukta fa;
$ufa }$x f;
$ufa$virama f;
$ufa fa;
$ra$nukta}$x r;
$ra$nukta$virama r;
$ra$nukta ra;
$lla$nukta}$x l;
$lla$nukta$virama l;
$lla$nukta la;
$ela}$x l;
$ela$virama l;
$ela la;
$uya}$x y;
$uya$virama y;
$uya ya;
# normal consonants
$ka$virama}$ha→k'';
$ka}$x→k;
$ka$virama→k;
$ka→ka;
$kha$i$u→k $strike h $strike;
$kha}$x→kh;
$kha$virama→kh;
$kha→kha;
$ga$virama}$ha→g'';
$ga}$x→g;
$ga$virama→g;
$ga→ga;
$gha$i$u→g $strike h $strike;
$gha}$x→gh;
$gha$virama→gh;
$gha→gha;
$nga$i$u→n $strike g $strike;
$nga}$x→ng;
$nga$virama→ng;
$nga→nga;
$ca$virama}$ha→c'';
$ca}$x→c;
$ca$virama→c;
$ca→ca;
$cha$i$u→c $strike h $strike;
$cha}$x→ch;
$cha$virama→ch;
$cha→cha;
$ja$virama}$ha→j'';
$ja}$x→j;
$ja$virama→j;
$ja→ja;
$jha$i$u→j $strike h $strike;
$jha}$x→jh;
$jha$virama→jh;
$jha→jha;
$nya }$x→ñ;
$nya$virama→ñ;
$nya ña;
$tta$virama}$ha→ṭ'';
$tta}$x→ṭ;
$tta$virama→ṭ;
$tta→ṭa;
$ttha$i$u→ṭ $strike h $strike;
$ttha}$x→ṭh;
$ttha$virama→ṭh;
$ttha→ṭha;
$dda}$x$ha→ḍ'';
$dda}$x→ḍ;
$dda$virama→ḍ;
$dda→ḍa;
$ddha$i$u→ḍ $strike h $strike;
$ddha}$x→ḍh;
$ddha$virama→ḍh;
$ddha→ḍha;
$nna}$x→ṇ;
$nna$virama→ṇ;
$nna→ṇa;
$ta$virama}$ha→t'';
$ta}$x→t;
$ta$virama→t;
$ta→ta;
$tha$i$u→t $strike h $strike;
$tha}$x→th;
$tha$virama→th;
$tha→tha;
$da$virama}$ha→d'';
$da}$x→d;
$da$virama→d;
$da→da;
$dha$i$u→d $strike h $strike;
$dha}$x→dh;
$dha$virama→dh;
$dha→dha;
$na$virama}$ga→n'';
$na}$x→n;
$na$virama→n;
$na→na;
$pa$virama}$ha→p'';
$pa}$x→p;
$pa$virama→p;
$pa→pa;
$pha$i$u→p $strike h $strike;
$pha}$x→ph;
$pha$virama→ph;
$pha→pha;
$ba$virama}$ha→b'';
$ba}$x→b;
$ba$virama→b;
$ba→ba;
$bha$i$u→b $strike h $strike;
$bha}$x→bh;
$bha$virama→bh;
$bha→bha;
$ma}$x→m;
$ma$virama→m;
$ma→ma;
$ya}$x→y;
$ya$virama→y;
$ya→ya;
$ra}$x→r;
$ra$virama→r;
$ra→ra;
$vva}$x→v;
$vva$virama→v;
$vva→va;
$rra}$x→r;
$rra$virama→r;
$rra→ra;
$la}$x→l;
$la$virama→l;
$la→la;
$lla}$x→l;
$lla$virama→l;
$lla→la;
$va}$x→w;
$va$virama→w;
$va→wa;
$sa}$x→s;
$sa$virama→s;
#for gurmukhi
$sa$nukta}$x→sy;
$sa$nukta$virama→sy;
$sa$nukta→sya;
$sa→sa;
$sha}$x→ś;
$sha$virama→ś;
$sha→śa;
$ssa}$x→sy;
$ssa$virama→ṣ;
$ssa→ṣa;
$ha}$x→h;
$ha$virama→h;
$ha→ha;
# dependent vowels (should never occur except following consonants)
$forceIndependentMatra{$aa ̔ā;
$forceIndependentMatra{$ai ̔ai;
$forceIndependentMatra{$au ̔au;
$forceIndependentMatra{$ii ̔ī;
$forceIndependentMatra{$i ̔i;
$forceIndependentMatra{$uu ̔ū;
$forceIndependentMatra{$u ̔u;
$forceIndependentMatra{$rrh ̔r̥ö;
$forceIndependentMatra{$rh ̔r̥ĕ;
$forceIndependentMatra{$llh ̔l̥ö;
$forceIndependentMatra{$lh ̔l̥ĕ;
$forceIndependentMatra{$e ̔e;
$forceIndependentMatra{$o ̔o;
#extra vowels
$forceIndependentMatra{$ce ̔ĕ;
$forceIndependentMatra{$co ̔ö;
$forceIndependentMatra{$se ̔ĕ;
$forceIndependentMatra{$so ̔o;
$forceIndependentMatra{$nukta ; # Nukta cannot appear independently or as first character
$forceIndependentMatra{$virama ; # Virama cannot appear independently or as first character
$i$u $strike;
$aa ;
$ai ai;
$au au;
$ii ;
$i i;
$uu ;
$u u;
$rrh r̥ö;
$rh r̥ĕ;
$llh l̥ö;
$lh l̥ĕ;
$e e;
$o o;
#extra vowels
$ce ĕ;
$co ö;
$se ĕ;
$so o;
#dependent vowels when following independent vowels. Generally Illegal only for roundtripping
$waa} $x ;
$wai} $x ai;
$wau} $x au;
$wii} $x ;
$wi } $x i;
$wuu} $x ;
$wu } $x u;
$wrr} $x r̥ö;
$wr } $x r̥ĕ;
$wll} $x l̥ö;
$wl } $x l̥ĕ;
$we } $x e;
$wo } $x o;
$wa } $x a;
#extra vowels
$wce} $x ĕ;
$wco} $x ö;
$wse} $x ĕ;
$wso} $x o;
$om} $x oṁ;
# independent vowels when preceeded by vowels
$vowels{$waa '';
$vowels{$wai ''ai;
$vowels{$wau ''au;
$vowels{$wii '';
$vowels{$wi ''i;
$vowels{$wuu '';
$vowels{$wu ''u;
$vowels{$we ''e;
$vowels{$wo ''o;
$vowels{$wa ''a;
#extra vowels
$vowels{$wce ''ĕ;
$vowels{$wco ''ö;
$vowels{$wse ''ĕ;
$vowels{$wso ''o;
$vowels{$om ''oṁ;
# independent vowels (otherwise)
$waa ;
$wai ai;
$wau au;
$wii ;
$wi i;
$wuu ;
$wu u;
$wrr r̥ö;
$wr r̥ĕ;
$wll l̥ö;
$wl l̥ĕ;
$we e;
$wo o;
$wa a;
#extra vowels
$wce ĕ;
$wco ö;
$wse ĕ;
$wso o;
$om oṁ;
# stress marks
$avagraha ;
$chandrabindu ṅġ;
$ardhachandra ;
$visarga ;
# numbers
$zero 0;
$one 1;
$two 2;
$three 3;
$four 4;
$five 5;
$six 6;
$seven 7;
$eight 8;
$nine 9;
$lm ;
$ailm ;
$aulm ;
$dgs→'';
$kta→t;
# Balinese numbers are surrounded by dandas which can be removed
$danda } [$zero$one$two$three$four$five$six$seven$eight$nine] ' ';
[0123456789] { $danda ' ';
$danda→', ';
$doubleDanda→'. ';
\uE070→; # ABBREVIATION SIGN
# LETTER RA WITH MIDDLE DIAGONAL
\uE071}$x→ra;
\uE071$virama→r;
\uE071→ra;
# LETTER RA WITH LOWER DIAGONAL
\uE072}$x→ra;
\uE072$virama→r;
\uE072→ra;
\uE073→; # RUPEE MARK
\uE074→; # RUPEE SIGN
\uE075→; # CURRENCY NUMERATOR ONE
\uE076→; # CURRENCY NUMERATOR TWO
\uE077→; # CURRENCY NUMERATOR THREE
\uE078→; # CURRENCY NUMERATOR FOUR
\uE079→; # CURRENCY NUMERATOR ONE LESS THAN THE DENOMINATOR
\uE07A→; # CURRENCY DENOMINATOR SIXTEEN
\uE07B→; # ISSHAR
\uE07C→; # TIPPI
\uE07D→; # ADDAK
\uE07E→; # IRI
\uE07F→; # URA
\uE080→; # EK ONKAR
\uE004→; # DEVANAGARI VOWEL SIGN SHORT A
::NFC;
EOF;
# transliteration rules following DHARMA project "strict transliteration"
# mostly follows ISO-15919, with modifications for precision and broader coverage
# https://hal.inria.fr/halshs-02272407/
$rules['ban-x-dharma'] = <<<'EOF'
::NFC;
$dv_no_rerekan = [\u1B35-\u1B44];
$dv = [\u1B34$dv_no_rerekan];
$c = [\u1B13-\u1B33 \u1B45-\u1B4C];
# disambiguation from aspirates
[kgcjṭḍtdpb] { ᭄ } ᬳ → \:;
# various signs
ᬀ → ṁ\*; # ulu ricem / ardhacandra
ᬁ → m̐; # ulu candra / candrabindu
ᬂ → ṁ; # cecek / anusvara
ᬃ → r\=; # surang / repha (note, "Indonesian mode" not "Indian mode")
ᬄ → ḥ; # bisah / visarga
# akara used as glottal
ᬅ } $dv_no_rerekan → q;
# independent vowels
ᬅ → A; # LETTER AKARA
ᬆ → A\:; # LETTER AKARA TEDUNG
ᬇ → I; # LETTER IKARA
ᬈ → I\:; # LETTER IKARA TEDUNG
ᬉ → U; # LETTER UKARA
ᬊ → U\:; # LETTER UKARA TEDUNG
ᬋ → R̥; # LETTER RA REPA
ᬌ → R̥\:; # LETTER RA REPA TEDUNG
ᬍ → L̥; # LETTER LA LENGA
ᬎ → L̥̄; # LETTER LA LENGA TEDUNG
ᬏ → E; # LETTER EKARA
ᬐ → Ai; # LETTER AIKARA
ᬑ → O; # LETTER OKARA
ᬒ → O\:; # LETTER OKARA TEDUNG
# consonants
ᬓ } $dv → k;
ᬓ → ka; # LETTER KA
ᬔ } $dv → kh;
ᬔ → kha; # LETTER KA MAHAPRANA
ᬕ } $dv → g;
ᬕ → ga; # LETTER GA
ᬖ } $dv → gh;
ᬖ → gha; # LETTER GA GORA
ᬗ } $dv → ṅ;
ᬗ → ṅa; # LETTER NGA
ᬘ } $dv → c;
ᬘ → ca; # LETTER CA
ᬙ } $dv → ch;
ᬙ → cha; # LETTER CA LACA
ᬚ } $dv → j;
ᬚ → ja; # LETTER JA
ᬛ } $dv → jh;
ᬛ → jha; # LETTER JA JERA
ᬜ } $dv → ñ;
ᬜ → ña; # LETTER NYA
ᬝ } $dv → ṭ;
ᬝ → ṭa; # LETTER TA LATIK
ᬞ } $dv → ṭh;
ᬞ → ṭha; # LETTER TA MURDA MAHAPRANA
ᬟ } $dv → ḍ;
ᬟ → ḍa; # LETTER DA MURDA ALPAPRANA
ᬠ } $dv → ḍh;
ᬠ → ḍha; # LETTER DA MURDA MAHAPRANA
ᬡ } $dv → ṇ;
ᬡ → ṇa; # LETTER NA RAMBAT
ᬢ } $dv → t;
ᬢ → ta; # LETTER TA
ᬣ } $dv → th;
ᬣ → tha; # LETTER TA TAWA
ᬤ } $dv → d;
ᬤ → da; # LETTER DA
ᬥ } $dv → dh;
ᬥ → dha; # LETTER DA MADU
ᬦ } $dv → n;
ᬦ → na; # LETTER NA
ᬧ } $dv → p;
ᬧ → pa; # LETTER PA
ᬨ } $dv → ph;
ᬨ → pha; # LETTER PA KAPAL
ᬩ } $dv → b;
ᬩ → ba; # LETTER BA
ᬪ } $dv → bh;
ᬪ → bha; # LETTER BA KEMBANG
ᬫ } $dv → m;
ᬫ → ma; # LETTER MA
ᬬ } $dv → y;
ᬬ → ya; # LETTER YA
ᬭ } $dv → r;
ᬭ → ra; # LETTER RA
ᬮ } $dv → l;
ᬮ → la; # LETTER LA
ᬯ } $dv → v;
ᬯ → va; # LETTER WA
ᬰ } $dv → ś;
ᬰ → śa; # LETTER SA SAGA
ᬱ } $dv → ṣ;
ᬱ → ṣa; # LETTER SA SAPA
ᬲ } $dv → s;
ᬲ → sa; # LETTER SA
ᬳ } $dv → h;
ᬳ → ha; # LETTER HA
\u1B4C } $dv → j\=ñ;
\u1B4C → j\=ña; # LETTER ARCHAIC JNYA
# rerekan (not present in DHARMA, "*" used as impromptu mark)
᬴ } $dv_no_rerekan → \*;
᬴ → \* a; # SIGN REREKAN
# dependent vowels
ᬵ → ā; # VOWEL SIGN TEDUNG
ᬶ → i; # VOWEL SIGN ULU
ᬷ → ī; # VOWEL SIGN ULU SARI
ᬸ → u; # VOWEL SIGN SUKU
ᬹ → ū; # VOWEL SIGN SUKU ILUT
ᬺ → r̥; # VOWEL SIGN RA REPA
ᬻ → r̥\:; # VOWEL SIGN RA REPA TEDUNG
ᬼ→ l̥; # VOWEL SIGN LA LENGA
ᬽ → l̥\:; # VOWEL SIGN LA LENGA TEDUNG
ᬾ → e; # VOWEL SIGN TALING
ᬿ → ai; # VOWEL SIGN TALING REPA
ᭀ → o; # VOWEL SIGN TALING TEDUNG
ᭁ → au; # VOWEL SIGN TALING REPA TEDUNG
ᭂ → ə; # VOWEL SIGN PEPET
ᭃ → ə\:; # VOWEL SIGN PEPET TEDUNG
# adeg-adeg
᭄\u200C → ·; # explicit ADEG ADEG
᭄ } $c → ; # ADEG ADEG
᭄ → ·; # ADEG ADEG
# Sasak consonants (not present in DHARMA, "'" used as impromptu mark)
ᭅ } $dv → k\';
ᭅ → k\'a; # LETTER KAF SASAK
ᭆ } $dv → kh\';
ᭆ → kh\'a; # LETTER KHOT SASAK
ᭇ } $dv → t\';
ᭇ → t\'a; # LETTER TZIR SASAK
ᭈ } $dv → p\';
ᭈ → p\'a; # LETTER EF SASAK
ᭉ } $dv → v\';
ᭉ → v\'a; # LETTER VE SASAK
ᭊ } $dv → j\';
ᭊ → j\'a; # LETTER ZAL SASAK
ᭋ } $dv → s\';
ᭋ → s\'a; # LETTER ASYURA SASAK
# digits
᭐ → 0; # DIGIT ZERO
᭑ → 1; # DIGIT ONE
᭒ → 2; # DIGIT TWO
᭓ → 3; # DIGIT THREE
᭔ → 4; # DIGIT FOUR
᭕ → 5; # DIGIT FIVE
᭖ → 6; # DIGIT SIX
᭗ → 7; # DIGIT SEVEN
᭘ → 8; # DIGIT EIGHT
᭙ → 9; # DIGIT NINE
# punctuation
᭚ → '<g type="panti"/>'; # PANTI
᭛ → '<g type="pamada"/>'; # PAMADA
᭜ → \@; # WINDU
᭝ → '<g type="pamungkah"/>'; # CARIK PAMUNGKAH
᭞ → \,; # CARIK SIKI
᭟ → \,\,; # CARIK PAREREN
᭠ → '<g type="pameneng"/>'; # PAMENENG
\u1B7D → '<g type="pantiLantang"/>';
\u1B7E → '<g type="pamadaLantang"/>';
EOF;
# transliteration rules developed at Puri Kauhan Ubud and widely used in Bali
# default Balinese to Latin transliteration variant
$rules['ban-x-pku'] = <<<'EOF'
::NFC;
$dv_no_rerekan = [\u1B35-\u1B44];
$dv = [\u1B34$dv_no_rerekan];
$c = [\u1B13-\u1B33 \u1B45-\u1B4C];
$base = [\u1B05-\u1B33 \u1B45-\u1B60];
# ulu suku deletion mark
$base ᬶᬸ → ∅;
# disambiguation from aspirates
[kgcjṭḍtdpb] { ᭄ } ᬳ → \:;
# various signs
ᬀ → ṃ; # ulu ricem / ardhacandra
ᬁ → m̐; # ulu candra / candrabindu
ᬂ → ŋ; # cecek / anusvara
ᬃ → ŕ; # surang / repha (note, "Indonesian mode" not "Indian mode")
ᬄ → ḥ; # bisah / visarga
# akara used as glottal
ᬅ } $dv_no_rerekan → \*;
# independent vowels
ᬅ → ᵒa; # LETTER AKARA
ᬆ → ᵒā; # LETTER AKARA TEDUNG
ᬇ → ᵒi; # LETTER IKARA
ᬈ → ᵒī; # LETTER IKARA TEDUNG
ᬉ → ᵒu; # LETTER UKARA
ᬊ → ᵒū; # LETTER UKARA TEDUNG
ᬋ → r̥; # LETTER RA REPA
ᬌ → r̥̄; # LETTER RA REPA TEDUNG
ᬍ → l̥; # LETTER LA LENGA
ᬎ → l̥̄; # LETTER LA LENGA TEDUNG
ᬏ → ᵒe; # LETTER EKARA
ᬐ → ᵒai; # LETTER AIKARA
ᬑ → ᵒo; # LETTER OKARA
ᬒ → ᵒau; # LETTER OKARA TEDUNG
# consonants
ᬓ } $dv → k;
ᬓ → ka; # LETTER KA
ᬔ } $dv → kh;
ᬔ → kha; # LETTER KA MAHAPRANA
ᬕ } $dv → g;
ᬕ → ga; # LETTER GA
ᬖ } $dv → gh;
ᬖ → gha; # LETTER GA GORA
ᬗ } $dv → ṅ;
ᬗ → ṅa; # LETTER NGA
ᬘ } $dv → c;
ᬘ → ca; # LETTER CA
ᬙ } $dv → ch;
ᬙ → cha; # LETTER CA LACA
ᬚ } $dv → j;
ᬚ → ja; # LETTER JA
ᬛ } $dv → jh;
ᬛ → jha; # LETTER JA JERA
ᬜ } $dv → ñ;
ᬜ → ña; # LETTER NYA
ᬝ } $dv → ṭ;
ᬝ → ṭa; # LETTER TA LATIK
ᬞ } $dv → ṭh;
ᬞ → ṭha; # LETTER TA MURDA MAHAPRANA
ᬟ } $dv → ḍ;
ᬟ → ḍa; # LETTER DA MURDA ALPAPRANA
ᬠ } $dv → ḍh;
ᬠ → ḍha; # LETTER DA MURDA MAHAPRANA
ᬡ } $dv → ṇ;
ᬡ → ṇa; # LETTER NA RAMBAT
ᬢ } $dv → t;
ᬢ → ta; # LETTER TA
ᬣ } $dv → th;
ᬣ → tha; # LETTER TA TAWA
ᬤ } $dv → d;
ᬤ → da; # LETTER DA
ᬥ } $dv → dh;
ᬥ → dha; # LETTER DA MADU
ᬦ } $dv → n;
ᬦ → na; # LETTER NA
ᬧ } $dv → p;
ᬧ → pa; # LETTER PA
ᬨ } $dv → ph;
ᬨ → pha; # LETTER PA KAPAL
ᬩ } $dv → b;
ᬩ → ba; # LETTER BA
ᬪ } $dv → bh;
ᬪ → bha; # LETTER BA KEMBANG
ᬫ } $dv → m;
ᬫ → ma; # LETTER MA
ᬬ } $dv → y;
ᬬ → ya; # LETTER YA
ᬭ } $dv → r;
ᬭ → ra; # LETTER RA
ᬮ } $dv → l;
ᬮ → la; # LETTER LA
ᬯ } $dv → w;
ᬯ → wa; # LETTER WA
ᬰ } $dv → ś;
ᬰ → śa; # LETTER SA SAGA
ᬱ } $dv → ṣ;
ᬱ → ṣa; # LETTER SA SAPA
ᬲ } $dv → s;
ᬲ → sa; # LETTER SA
ᬳ } $dv → h;
ᬳ → ha; # LETTER HA
\u1B4C } $dv → j\=ñ;
\u1B4C → j\=ña; # LETTER ARCHAIC JNYA
# rerekan (not present in DHARMA, "*" used as impromptu mark)
᬴ } $dv_no_rerekan → \*;
᬴ → \* a; # SIGN REREKAN
# dependent vowels
ᬵ → ā; # VOWEL SIGN TEDUNG
ᬶ → i; # VOWEL SIGN ULU
ᬷ → ī; # VOWEL SIGN ULU SARI
ᬸ → u; # VOWEL SIGN SUKU
ᬹ → ū; # VOWEL SIGN SUKU ILUT
ᬺᭂ → r̥ĕ;
ᬺ → r̥ĕ; # VOWEL SIGN RA REPA
ᬻ → r̥ö; # VOWEL SIGN RA REPA TEDUNG
ᬼ→ lĕ; # VOWEL SIGN LA LENGA
ᬽ → lö; # VOWEL SIGN LA LENGA TEDUNG
ᬾ → e; # VOWEL SIGN TALING
ᬿ → ai; # VOWEL SIGN TALING REPA
ᭀ → o; # VOWEL SIGN TALING TEDUNG
ᭁ → au; # VOWEL SIGN TALING REPA TEDUNG
ᭂ → ĕ; # VOWEL SIGN PEPET
ᭃ → ö; # VOWEL SIGN PEPET TEDUNG
# adeg-adeg
᭄\u200C → ·; # explicit ADEG ADEG
᭄ } $c → ; # ADEG ADEG
᭄ → ·; # ADEG ADEG
# Sasak consonants (not present in DHARMA, "'" used as impromptu mark)
ᭅ } $dv → k\';
ᭅ → k\'a; # LETTER KAF SASAK
ᭆ } $dv → kh\';
ᭆ → kh\'a; # LETTER KHOT SASAK
ᭇ } $dv → t\';
ᭇ → t\'a; # LETTER TZIR SASAK
ᭈ } $dv → p\';
ᭈ → p\'a; # LETTER EF SASAK
ᭉ } $dv → w\';
ᭉ → w\'a; # LETTER VE SASAK
ᭊ } $dv → j\';
ᭊ → j\'a; # LETTER ZAL SASAK
ᭋ } $dv → s\';
ᭋ → s\'a; # LETTER ASYURA SASAK
# digits
᭐ → 0; # DIGIT ZERO
᭑ → 1; # DIGIT ONE
᭒ → 2; # DIGIT TWO
᭓ → 3; # DIGIT THREE
᭔ → 4; # DIGIT FOUR
᭕ → 5; # DIGIT FIVE
᭖ → 6; # DIGIT SIX
᭗ → 7; # DIGIT SEVEN
᭘ → 8; # DIGIT EIGHT
᭙ → 9; # DIGIT NINE
# punctuation
᭚ → '||'; # PANTI
᭛ → '//'; # PAMADA
᭜ → 0; # WINDU
᭝ → \=; # CARIK PAMUNGKAH
᭞ → \,; # CARIK SIKI
᭟ → \.; # CARIK PAREREN
᭠ → \-; # PAMENENG
\u1B7D → '|||';
\u1B7E → '///';
EOF;
return $rules;
}
protected function getTransliteratorAliases() {
return [
'ban' => 'ban-x-pku',
'ban-bali' => 'ban-x-pku',
];
}
/**
* Guess if a text is written in Balinese or Latin.
* Overrides LanguageConverter::guessVariant()
*
* @param string $text The text to be checked
* @param string $variant Language code of the variant to be checked for
* @return bool True if $text appears to be written in $variant
*/
public function guessVariant( $text, $variant ) {
$hasBalinese = preg_match( "/[\x{1B00}-\x{1B7F}]/u", $text );
return ( $variant == 'ban-bali' ) == $hasBalinese;
}
}

File Metadata

Mime Type
text/x-php
Expires
Sat, May 16, 21:33 (1 d, 9 h)
Storage Engine
local-disk
Storage Format
Raw Data
Storage Handle
89/f5/0041a388885617329ac74be6283d
Default Alt Text
BanConverter.php (24 KB)

Event Timeline