Page Menu
Home
WickedGov Phorge
Search
Configure Global Search
Log In
Files
F1432232
BanConverter.php
No One
Temporary
Actions
Download File
Edit File
Delete File
View Transforms
Subscribe
Flag For Later
Award Token
Size
24 KB
Referenced Files
None
Subscribers
None
BanConverter.php
View Options
<?php
/**
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along
* with this program; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
* http://www.gnu.org/copyleft/gpl.html
*
* @file
*/
/**
* Balinese specific code.
*
* @ingroup Languages
*/
class
BanConverter
extends
LanguageConverterIcu
{
public
function
getMainCode
():
string
{
return
'ban'
;
}
public
function
getLanguageVariants
():
array
{
return
[
'ban'
,
'ban-bali'
,
'ban-x-dharma'
,
'ban-x-palmleaf'
,
'ban-x-pku'
];
}
public
function
getVariantsFallbacks
():
array
{
return
[
'ban-bali'
=>
'ban'
,
'ban-x-dharma'
=>
'ban'
,
'ban-x-palmleaf'
=>
'ban'
,
'ban-x-pku'
=>
'ban'
,
];
}
public
function
getVariantNames
():
array
{
$names
=
[
'ban'
=>
'Basa Bali'
,
'ban-bali'
=>
'ᬩᬲᬩᬮᬶ'
,
'ban-x-dharma'
=>
'Basa Bali (alih aksara DHARMA)'
,
'ban-x-palmleaf'
=>
'Basa Bali (alih aksara Palmleaf.org)'
,
'ban-x-pku'
=>
'Basa Bali (alih aksara Puri Kauhan Ubud)'
,
];
return
array_merge
(
parent
::
getVariantNames
(),
$names
);
}
protected
function
getIcuRules
()
{
$rules
=
[];
# transliteration rules developed for Palmleaf.org
$rules
[
'ban-x-palmleaf'
]
=
<<<
'EOF'
::
NFC
;
ᬒᬁ
→
\uE050
;
# OM
ᬁ
→
\uE001
;
# SIGN ULU CANDRA
ᬂ
→
\uE002
;
# SIGN CECEK
ᬄ
→
\uE003
;
# SIGN BISAH
ᬅ
→
\uE005
;
# LETTER AKARA
ᬆ
→
\uE006
;
# LETTER AKARA TEDUNG
ᬇ
→
\uE007
;
# LETTER IKARA
ᬈ
→
\uE008
;
# LETTER IKARA TEDUNG
ᬉ
→
\uE009
;
# LETTER UKARA
ᬊ
→
\uE00A
;
# LETTER UKARA TEDUNG
ᬋ
→
\uE00B
;
# LETTER RA REPA
ᬌ
→
\uE060
;
# LETTER RA REPA TEDUNG
ᬍ
→
\uE00C
;
# LETTER LA LENGA
ᬎ
→
\uE061
;
# LETTER LA LENGA TEDUNG
ᬏ
→
\uE00F
;
# LETTER EKARA
ᬐ
→
\uE010
;
# LETTER AIKARA
ᬑ
→
\uE013
;
# LETTER OKARA
ᬒ
→
\uE014
;
# LETTER OKARA TEDUNG
ᬓ
→
\uE015
;
# LETTER KA
ᬔ
→
\uE016
;
# LETTER KA MAHAPRANA
ᬕ
→
\uE017
;
# LETTER GA
ᬖ
→
\uE018
;
# LETTER GA GORA
ᬗ
→
\uE019
;
# LETTER NGA
ᬘ
→
\uE01A
;
# LETTER CA
ᬙ
→
\uE01B
;
# LETTER CA LACA
ᬚ
→
\uE01C
;
# LETTER JA
ᬛ
→
\uE01D
;
# LETTER JA JERA
ᬜ
→
\uE01E
;
# LETTER NYA
ᬝ
→
\uE01F
;
# LETTER TA LATIK
ᬞ
→
\uE020
;
# LETTER TA MURDA MAHAPRANA
ᬟ
→
\uE021
;
# LETTER DA MURDA ALPAPRANA
ᬠ
→
\uE022
;
# LETTER DA MURDA MAHAPRANA
ᬡ
→
\uE023
;
# LETTER NA RAMBAT
ᬢ
→
\uE024
;
# LETTER TA
ᬣ
→
\uE025
;
# LETTER TA TAWA
ᬤ
→
\uE026
;
# LETTER DA
ᬥ
→
\uE027
;
# LETTER DA MADU
ᬦ
→
\uE028
;
# LETTER NA
ᬧ
→
\uE02A
;
# LETTER PA
ᬨ
→
\uE02B
;
# LETTER PA KAPAL
ᬩ
→
\uE02C
;
# LETTER BA
ᬪ
→
\uE02D
;
# LETTER BA KEMBANG
ᬫ
→
\uE02E
;
# LETTER MA
ᬬ
→
\uE02F
;
# LETTER YA
ᬭ
→
\uE030
;
# LETTER RA
ᬮ
→
\uE032
;
# LETTER LA
ᬯ
→
\uE035
;
# LETTER WA
ᬰ
→
\uE036
;
# LETTER SA SAGA
ᬱ
→
\uE037
;
# LETTER SA SAPA
ᬲ
→
\uE038
;
# LETTER SA
ᬳ
→
\uE039
;
# LETTER HA
᬴
→
\uE03C
;
# SIGN REREKAN
ᬵ
→
\uE03E
;
# VOWEL SIGN TEDUNG
ᬶ
→
\uE03F
;
# VOWEL SIGN ULU
ᬷ
→
\uE040
;
# VOWEL SIGN ULU SARI
ᬸ
→
\uE041
;
# VOWEL SIGN SUKU
ᬹ
→
\uE042
;
# VOWEL SIGN SUKU ILUT
ᬺ
→
\uE043
;
# VOWEL SIGN RA REPA
ᬻ
→
\uE044
;
# VOWEL SIGN RA REPA TEDUNG
ᬼ→
\uE062
;
# VOWEL SIGN LA LENGA
ᬽ
→
\uE063
;
# VOWEL SIGN LA LENGA TEDUNG
ᬾ
→
\uE047
;
# VOWEL SIGN TALING
ᬿ
→
\uE048
;
# VOWEL SIGN TALING REPA
ᭀ
→
\uE04B
;
# VOWEL SIGN TALING TEDUNG
ᭁ
→
\uE04C
;
# VOWEL SIGN TALING REPA TEDUNG
ᭂ
→
\uE045
;
# VOWEL SIGN PEPET
ᭃ
→
\uE049
;
# VOWEL SIGN PEPET TEDUNG
᭄
→
\uE04D
;
# ADEG ADEG
ᭅ
→
\uE058
;
# LETTER KAF SASAK
ᭆ
→
\uE059
;
# LETTER KHOT SASAK
ᭇ
→
\uE024\uE03C
;
# LETTER TZIR SASAK
ᭈ
→
\uE05E
;
# LETTER EF SASAK
ᭉ
→
\uE081
;
# LETTER VE SASAK
ᭊ
→
\uE05B
;
# LETTER ZAL SASAK
ᭋ
→
\uE038\uE03C
;
# LETTER ASYURA SASAK
᭐
→
\uE066
;
# DIGIT ZERO
᭑
→
\uE067
;
# DIGIT ONE
᭒
→
\uE068
;
# DIGIT TWO
᭓
→
\uE069
;
# DIGIT THREE
᭔
→
\uE06A
;
# DIGIT FO
᭕
→
\uE06B
;
# DIGIT FIVE
᭖
→
\uE06C
;
# DIGIT SIX
᭗
→
\uE06D
;
# DIGIT SEVEN
᭘
→
\uE06E
;
# DIGIT EIGHT
᭙
→
\uE06F
;
# DIGIT NINE
᭚
→
'//'
;
# PANTI
᭛
→
'///'
;
# PAMADA
᭜
→
•
;
# WINDU
᭟᭜᭟
→
'
\\
•
\\
'
;
᭟
' '
᭜
' '
᭟
→
'
\\
•
\\
'
;
᭝
→
\
:;
# CARIK PAMUNGKAH
᭞
→
\uE064
;
# CARIK SIKI
᭟
→
\uE065
;
# CARIK PAREREN
᭠
→
‐
;
# PAMENENG
#consonants
$chandrabindu
=
\uE001
;
$ardhachandra
=
\u1B00
;
$anusvara
=
\uE002
;
$visarga
=
\uE003
;
# w←vowel→ represents the stand-alone form
$wa
=
\uE005
;
$waa
=
\uE006
;
$wi
=
\uE007
;
$wii
=
\uE008
;
$wu
=
\uE009
;
$wuu
=
\uE00A
;
$wr
=
\uE00B
;
$wl
=
\uE00C
;
$wce
=
\uE00D
;
# LETTER CANDRA E
$wse
=
\uE00E
;
# LETTER SHORT E
$we
=
\uE00F
;
# ए LETTER E
$wai
=
\uE010
;
$wco
=
\uE011
;
# LETTER CANDRA O
$wso
=
\uE012
;
# LETTER SHORT O
$wo
=
\uE013
;
# ओ LETTER O
$wau
=
\uE014
;
$ka
=
\uE015
;
$kha
=
\uE016
;
$ga
=
\uE017
;
$gha
=
\uE018
;
$nga
=
\uE019
;
$ca
=
\uE01A
;
$cha
=
\uE01B
;
$ja
=
\uE01C
;
$jha
=
\uE01D
;
$nya
=
\uE01E
;
$tta
=
\uE01F
;
$ttha
=
\uE020
;
$dda
=
\uE021
;
$ddha
=
\uE022
;
$nna
=
\uE023
;
$ta
=
\uE024
;
$tha
=
\uE025
;
$da
=
\uE026
;
$dha
=
\uE027
;
$na
=
\uE028
;
$ena
=
\uE029
;
#compatibility
$pa
=
\uE02A
;
$pha
=
\uE02B
;
$ba
=
\uE02C
;
$bha
=
\uE02D
;
$ma
=
\uE02E
;
$ya
=
\uE02F
;
$ra
=
\uE030
;
$vva
=
\uE081
;
$rra
=
\uE031
;
$la
=
\uE032
;
$lla
=
\uE033
;
$ela
=
\uE034
;
#compatibility
$va
=
\uE035
;
$sha
=
\uE036
;
$ssa
=
\uE037
;
$sa
=
\uE038
;
$ha
=
\uE039
;
$nukta
=
\uE03C
;
$avagraha
=
\uE03D
;
# SIGN AVAGRAHA
# ←vowel→ represents the dependent form
$aa
=
\uE03E
;
$i
=
\uE03F
;
$ii
=
\uE040
;
$u
=
\uE041
;
$uu
=
\uE042
;
$rh
=
\uE043
;
$rrh
=
\uE044
;
$ce
=
\uE045
;
#VOWEL SIGN CANDRA E
$se
=
\uE046
;
#VOWEL SIGN SHORT E
$e
=
\uE047
;
$ai
=
\uE048
;
$co
=
\uE049
;
# VOWEL SIGN CANDRA O
$so
=
\uE04A
;
# VOWEL SIGN SHORT O
$o
=
\uE04B
;
# ो
$au
=
\uE04C
;
$virama
=
\uE04D
;
$om
=
\uE050
;
# OM
\uE051→
;
# UNMAPPED STRESS SIGN UDATTA
\uE052→
;
# UNMAPPED STRESS SIGN ANUDATTA
\uE053→
;
# UNMAPPED GRAVE ACCENT
\uE054→
;
# UNMAPPED ACUTE ACCENT
$lm
=
\uE055
;
# Telugu Length Mark
$ailm
=
\uE056
;
# AI Length Mark
$aulm
=
\uE057
;
# AU Length Mark
#urdu compatibity forms
$uka
=
\uE058
;
$ukha
=
\uE059
;
$ugha
=
\uE05A
;
$ujha
=
\uE05B
;
$uddha
=
\uE05C
;
$udha
=
\uE05D
;
$ufa
=
\uE05E
;
$uya
=
\uE05F
;
$wrr
=
\uE060
;
$wll
=
\uE061
;
$lh
=
\uE062
;
$llh
=
\uE063
;
$danda
=
\uE064
;
$doubleDanda
=
\uE065
;
$zero
=
\uE066
;
# DIGIT ZERO
$one
=
\uE067
;
# DIGIT ONE
$two
=
\uE068
;
# DIGIT TWO
$three
=
\uE069
;
# DIGIT THREE
$four
=
\uE06A
;
# DIGIT FOUR
$five
=
\uE06B
;
# DIGIT FIVE
$six
=
\uE06C
;
# DIGIT SIX
$seven
=
\uE06D
;
# DIGIT SEVEN
$eight
=
\uE06E
;
# DIGIT EIGHT
$nine
=
\uE06F
;
# DIGIT NINE
# Glottal stop
$dgs
=
\uE082
;
#Khanda-ta
$kta
=
\uE083
;
$depVowelAbove
=[
\uE03E
-
\uE040\uE045
-
\uE04C
];
$depVowelBelow
=[
\uE041
-
\uE044
];
# $x was originally called '§'; $z was '%'
$x
=[
$aa$ai$au$ii$i$uu$u$rrh$rh$lh$llh$e$o$se$ce$so$co
];
$z
=[
bcdfghjklmnpqrstvwxyz
];
$vowels
=[
aeiour̥̄̆
];
$forceIndependentMatra
=
[^[[:
L
:][
̀
-
͌
]]];
$strike
=
\u0336
;
######################################################################
# normalize input
######################################################################
# delete zwnj
\u200C→
;
# reprocess from beginning
::
Null
;
######################################################################
# convert from Native letters to Latin letters
######################################################################
#glottal stop
$wa$virama
→
k
''
;
#anusvara
$anusvara
→
ng
;
#surang
ᬃ
→
r̀
;
# Urdu compatibility
$ya$nukta
}
$x
→
y
;
$ya$nukta$virama
→
y
;
$ya$nukta
→
ya
;
$la$nukta
}
$x
→
l
;
$la$nukta$virama
→
l
;
$la$nukta
→
la
;
$na$nukta
}
$x
→
n
;
$na$nukta$virama
→
n
;
$na$nukta
→
na
;
$ena
}
$x
→
n
;
$ena$virama
→
n
;
$ena
→
na
;
$uka
→
qa
;
$ka$nukta
}
$x
→
q
;
$ka$nukta$virama
→
q
;
$ka$nukta
→
qa
;
$kha$nukta
}
$x
→
kh
;
$kha$nukta$virama
→
kh
;
$kha$nukta
→
kha
;
$ukha$virama
→
kh
;
$ukha
→
kha
;
$ugha
→
gha
;
$ga$nukta
}
$x
→
gh
;
$ga$nukta$virama
→
gh
;
$ga$nukta
→
gha
;
$ujha
→
za
;
$ja$nukta
}
$x
→
z
;
$ja$nukta$virama
→
z
;
$ja$nukta
→
za
;
$ddha$nukta
}
$x
→
r
;
$ddha$nukta$virama
→
r
;
$ddha$nukta
→
ra
;
$uddha
}
$x
→
r
;
$uddha$virama
→
r
;
$uddha
→
ra
;
$udha
→
ra
;
$dda$nukta
}
$x
→
r
;
$dda$nukta$virama
→
r
;
$dda$nukta
→
ra
;
$pha$nukta
}
$x
→
f
;
$pha$nukta$virama
→
f
;
$pha$nukta
→
fa
;
$ufa
}
$x
→
f
;
$ufa$virama
→
f
;
$ufa
→
fa
;
$ra$nukta
}
$x
→
r
;
$ra$nukta$virama
→
r
;
$ra$nukta
→
ra
;
$lla$nukta
}
$x
→
l
;
$lla$nukta$virama
→
l
;
$lla$nukta
→
la
;
$ela
}
$x
→
l
;
$ela$virama
→
l
;
$ela
→
la
;
$uya
}
$x
→
y
;
$uya$virama
→
y
;
$uya
→
ya
;
# normal consonants
$ka$virama
}
$ha→k
''
;
$ka
}
$x→k
;
$ka$virama→k
;
$ka→ka
;
$kha$i$u→k
$strike
h
$strike
;
$kha
}
$x→kh
;
$kha$virama→kh
;
$kha→kha
;
$ga$virama
}
$ha→g
''
;
$ga
}
$x→g
;
$ga$virama→g
;
$ga→ga
;
$gha$i$u→g
$strike
h
$strike
;
$gha
}
$x→gh
;
$gha$virama→gh
;
$gha→gha
;
$nga$i$u→n
$strike
g
$strike
;
$nga
}
$x→ng
;
$nga$virama→ng
;
$nga→nga
;
$ca$virama
}
$ha→c
''
;
$ca
}
$x→c
;
$ca$virama→c
;
$ca→ca
;
$cha$i$u→c
$strike
h
$strike
;
$cha
}
$x→ch
;
$cha$virama→ch
;
$cha→cha
;
$ja$virama
}
$ha→j
''
;
$ja
}
$x→j
;
$ja$virama→j
;
$ja→ja
;
$jha$i$u→j
$strike
h
$strike
;
$jha
}
$x→jh
;
$jha$virama→jh
;
$jha→jha
;
$nya
}
$x→ñ
;
$nya$virama→ñ
;
$nya
→
ña
;
$tta$virama
}
$ha→ṭ
''
;
$tta
}
$x→ṭ
;
$tta$virama→ṭ
;
$tta→ṭa
;
$ttha$i$u→ṭ
$strike
h
$strike
;
$ttha
}
$x→ṭh
;
$ttha$virama→ṭh
;
$ttha→ṭha
;
$dda
}
$x$ha→ḍ
''
;
$dda
}
$x→ḍ
;
$dda$virama→ḍ
;
$dda→ḍa
;
$ddha$i$u→ḍ
$strike
h
$strike
;
$ddha
}
$x→ḍh
;
$ddha$virama→ḍh
;
$ddha→ḍha
;
$nna
}
$x→ṇ
;
$nna$virama→ṇ
;
$nna→ṇa
;
$ta$virama
}
$ha→t
''
;
$ta
}
$x→t
;
$ta$virama→t
;
$ta→ta
;
$tha$i$u→t
$strike
h
$strike
;
$tha
}
$x→th
;
$tha$virama→th
;
$tha→tha
;
$da$virama
}
$ha→d
''
;
$da
}
$x→d
;
$da$virama→d
;
$da→da
;
$dha$i$u→d
$strike
h
$strike
;
$dha
}
$x→dh
;
$dha$virama→dh
;
$dha→dha
;
$na$virama
}
$ga→n
''
;
$na
}
$x→n
;
$na$virama→n
;
$na→na
;
$pa$virama
}
$ha→p
''
;
$pa
}
$x→p
;
$pa$virama→p
;
$pa→pa
;
$pha$i$u→p
$strike
h
$strike
;
$pha
}
$x→ph
;
$pha$virama→ph
;
$pha→pha
;
$ba$virama
}
$ha→b
''
;
$ba
}
$x→b
;
$ba$virama→b
;
$ba→ba
;
$bha$i$u→b
$strike
h
$strike
;
$bha
}
$x→bh
;
$bha$virama→bh
;
$bha→bha
;
$ma
}
$x→m
;
$ma$virama→m
;
$ma→ma
;
$ya
}
$x→y
;
$ya$virama→y
;
$ya→ya
;
$ra
}
$x→r
;
$ra$virama→r
;
$ra→ra
;
$vva
}
$x→v
;
$vva$virama→v
;
$vva→va
;
$rra
}
$x→r
;
$rra$virama→r
;
$rra→ra
;
$la
}
$x→l
;
$la$virama→l
;
$la→la
;
$lla
}
$x→l
;
$lla$virama→l
;
$lla→la
;
$va
}
$x→w
;
$va$virama→w
;
$va→wa
;
$sa
}
$x→s
;
$sa$virama→s
;
#for gurmukhi
$sa$nukta
}
$x→sy
;
$sa$nukta$virama→sy
;
$sa$nukta→sya
;
$sa→sa
;
$sha
}
$x→ś
;
$sha$virama→ś
;
$sha→śa
;
$ssa
}
$x→sy
;
$ssa$virama→ṣ
;
$ssa→ṣa
;
$ha
}
$x→h
;
$ha$virama→h
;
$ha→ha
;
# dependent vowels (should never occur except following consonants)
$forceIndependentMatra
{
$aa
→
̔ā
;
$forceIndependentMatra
{
$ai
→
̔ai
;
$forceIndependentMatra
{
$au
→
̔au
;
$forceIndependentMatra
{
$ii
→
̔ī
;
$forceIndependentMatra
{
$i
→
̔i
;
$forceIndependentMatra
{
$uu
→
̔ū
;
$forceIndependentMatra
{
$u
→
̔u
;
$forceIndependentMatra
{
$rrh
→
̔r̥ö
;
$forceIndependentMatra
{
$rh
→
̔r̥ĕ
;
$forceIndependentMatra
{
$llh
→
̔l̥ö
;
$forceIndependentMatra
{
$lh
→
̔l̥ĕ
;
$forceIndependentMatra
{
$e
→
̔e
;
$forceIndependentMatra
{
$o
→
̔o
;
#extra vowels
$forceIndependentMatra
{
$ce
→
̔ĕ
;
$forceIndependentMatra
{
$co
→
̔ö
;
$forceIndependentMatra
{
$se
→
̔ĕ
;
$forceIndependentMatra
{
$so
→
̔o
;
$forceIndependentMatra
{
$nukta
→
;
# Nukta cannot appear independently or as first character
$forceIndependentMatra
{
$virama
→
;
# Virama cannot appear independently or as first character
$i$u
→
$strike
;
$aa
→
ā
;
$ai
→
ai
;
$au
→
au
;
$ii
→
ī
;
$i
→
i
;
$uu
→
ū
;
$u
→
u
;
$rrh
→
r̥ö
;
$rh
→
r̥ĕ
;
$llh
→
l̥ö
;
$lh
→
l̥ĕ
;
$e
→
e
;
$o
→
o
;
#extra vowels
$ce
→
ĕ
;
$co
→
ö
;
$se
→
ĕ
;
$so
→
o
;
#dependent vowels when following independent vowels. Generally Illegal only for roundtripping
$waa
}
$x
→
ā
;
$wai
}
$x
→
ai
;
$wau
}
$x
→
au
;
$wii
}
$x
→
ī
;
$wi
}
$x
→
i
;
$wuu
}
$x
→
ū
;
$wu
}
$x
→
u
;
$wrr
}
$x
→
r̥ö
;
$wr
}
$x
→
r̥ĕ
;
$wll
}
$x
→
l̥ö
;
$wl
}
$x
→
l̥ĕ
;
$we
}
$x
→
e
;
$wo
}
$x
→
o
;
$wa
}
$x
→
a
;
#extra vowels
$wce
}
$x
→
ĕ
;
$wco
}
$x
→
ö
;
$wse
}
$x
→
ĕ
;
$wso
}
$x
→
o
;
$om
}
$x
→
oṁ
;
# independent vowels when preceeded by vowels
$vowels
{
$waa
→
''
ā
;
$vowels
{
$wai
→
''
ai
;
$vowels
{
$wau
→
''
au
;
$vowels
{
$wii
→
''
ī
;
$vowels
{
$wi
→
''
i
;
$vowels
{
$wuu
→
''
ū
;
$vowels
{
$wu
→
''
u
;
$vowels
{
$we
→
''
e
;
$vowels
{
$wo
→
''
o
;
$vowels
{
$wa
→
''
a
;
#extra vowels
$vowels
{
$wce
→
''
ĕ
;
$vowels
{
$wco
→
''
ö
;
$vowels
{
$wse
→
''
ĕ
;
$vowels
{
$wso
→
''
o
;
$vowels
{
$om
→
''
oṁ
;
# independent vowels (otherwise)
$waa
→
ā
;
$wai
→
ai
;
$wau
→
au
;
$wii
→
ī
;
$wi
→
i
;
$wuu
→
ū
;
$wu
→
u
;
$wrr
→
r̥ö
;
$wr
→
r̥ĕ
;
$wll
→
l̥ö
;
$wl
→
l̥ĕ
;
$we
→
e
;
$wo
→
o
;
$wa
→
a
;
#extra vowels
$wce
→
ĕ
;
$wco
→
ö
;
$wse
→
ĕ
;
$wso
→
o
;
$om
→
oṁ
;
# stress marks
$avagraha
→
;
$chandrabindu
→
ṅġ
;
$ardhachandra
→
ṃ
;
$visarga
→
ḥ
;
# numbers
$zero
→
0
;
$one
→
1
;
$two
→
2
;
$three
→
3
;
$four
→
4
;
$five
→
5
;
$six
→
6
;
$seven
→
7
;
$eight
→
8
;
$nine
→
9
;
$lm
→
;
$ailm
→
;
$aulm
→
;
$dgs→
''
;
$kta→t
;
# Balinese numbers are surrounded by dandas which can be removed
$danda
}
[
$zero$one$two$three$four$five$six$seven$eight$nine
]
→
' '
;
[
01234567
89
]
{
$danda
→
' '
;
$danda→
', '
;
$doubleDanda→
'. '
;
\uE070→
;
# ABBREVIATION SIGN
# LETTER RA WITH MIDDLE DIAGONAL
\uE071
}
$x→ra
;
\uE071
$virama→r
;
\uE071→ra
;
# LETTER RA WITH LOWER DIAGONAL
\uE072
}
$x→ra
;
\uE072
$virama→r
;
\uE072→ra
;
\uE073→
;
# RUPEE MARK
\uE074→
;
# RUPEE SIGN
\uE075→
;
# CURRENCY NUMERATOR ONE
\uE076→
;
# CURRENCY NUMERATOR TWO
\uE077→
;
# CURRENCY NUMERATOR THREE
\uE078→
;
# CURRENCY NUMERATOR FOUR
\uE079→
;
# CURRENCY NUMERATOR ONE LESS THAN THE DENOMINATOR
\uE07A→
;
# CURRENCY DENOMINATOR SIXTEEN
\uE07B→
;
# ISSHAR
\uE07C→
;
# TIPPI
\uE07D→
;
# ADDAK
\uE07E→
;
# IRI
\uE07F→
;
# URA
\uE080→
;
# EK ONKAR
\uE004→
;
# DEVANAGARI VOWEL SIGN SHORT A
::
NFC
;
EOF
;
# transliteration rules following DHARMA project "strict transliteration"
# mostly follows ISO-15919, with modifications for precision and broader coverage
# https://hal.inria.fr/halshs-02272407/
$rules
[
'ban-x-dharma'
]
=
<<<'EOF'
::NFC;
$dv_no_rerekan = [\u1B35-\u1B44];
$dv = [\u1B34$dv_no_rerekan];
$c = [\u1B13-\u1B33 \u1B45-\u1B4C];
# disambiguation from aspirates
[kgcjṭḍtdpb] { ᭄ } ᬳ → \:;
# various signs
ᬀ → ṁ\*; # ulu ricem / ardhacandra
ᬁ → m̐; # ulu candra / candrabindu
ᬂ → ṁ; # cecek / anusvara
ᬃ → r\=; # surang / repha (note, "Indonesian mode" not "Indian mode")
ᬄ → ḥ; # bisah / visarga
# akara used as glottal
ᬅ } $dv_no_rerekan → q;
# independent vowels
ᬅ → A; # LETTER AKARA
ᬆ → A\:; # LETTER AKARA TEDUNG
ᬇ → I; # LETTER IKARA
ᬈ → I\:; # LETTER IKARA TEDUNG
ᬉ → U; # LETTER UKARA
ᬊ → U\:; # LETTER UKARA TEDUNG
ᬋ → R̥; # LETTER RA REPA
ᬌ → R̥\:; # LETTER RA REPA TEDUNG
ᬍ → L̥; # LETTER LA LENGA
ᬎ → L̥̄; # LETTER LA LENGA TEDUNG
ᬏ → E; # LETTER EKARA
ᬐ → Ai; # LETTER AIKARA
ᬑ → O; # LETTER OKARA
ᬒ → O\:; # LETTER OKARA TEDUNG
# consonants
ᬓ } $dv → k;
ᬓ → ka; # LETTER KA
ᬔ } $dv → kh;
ᬔ → kha; # LETTER KA MAHAPRANA
ᬕ } $dv → g;
ᬕ → ga; # LETTER GA
ᬖ } $dv → gh;
ᬖ → gha; # LETTER GA GORA
ᬗ } $dv → ṅ;
ᬗ → ṅa; # LETTER NGA
ᬘ } $dv → c;
ᬘ → ca; # LETTER CA
ᬙ } $dv → ch;
ᬙ → cha; # LETTER CA LACA
ᬚ } $dv → j;
ᬚ → ja; # LETTER JA
ᬛ } $dv → jh;
ᬛ → jha; # LETTER JA JERA
ᬜ } $dv → ñ;
ᬜ → ña; # LETTER NYA
ᬝ } $dv → ṭ;
ᬝ → ṭa; # LETTER TA LATIK
ᬞ } $dv → ṭh;
ᬞ → ṭha; # LETTER TA MURDA MAHAPRANA
ᬟ } $dv → ḍ;
ᬟ → ḍa; # LETTER DA MURDA ALPAPRANA
ᬠ } $dv → ḍh;
ᬠ → ḍha; # LETTER DA MURDA MAHAPRANA
ᬡ } $dv → ṇ;
ᬡ → ṇa; # LETTER NA RAMBAT
ᬢ } $dv → t;
ᬢ → ta; # LETTER TA
ᬣ } $dv → th;
ᬣ → tha; # LETTER TA TAWA
ᬤ } $dv → d;
ᬤ → da; # LETTER DA
ᬥ } $dv → dh;
ᬥ → dha; # LETTER DA MADU
ᬦ } $dv → n;
ᬦ → na; # LETTER NA
ᬧ } $dv → p;
ᬧ → pa; # LETTER PA
ᬨ } $dv → ph;
ᬨ → pha; # LETTER PA KAPAL
ᬩ } $dv → b;
ᬩ → ba; # LETTER BA
ᬪ } $dv → bh;
ᬪ → bha; # LETTER BA KEMBANG
ᬫ } $dv → m;
ᬫ → ma; # LETTER MA
ᬬ } $dv → y;
ᬬ → ya; # LETTER YA
ᬭ } $dv → r;
ᬭ → ra; # LETTER RA
ᬮ } $dv → l;
ᬮ → la; # LETTER LA
ᬯ } $dv → v;
ᬯ → va; # LETTER WA
ᬰ } $dv → ś;
ᬰ → śa; # LETTER SA SAGA
ᬱ } $dv → ṣ;
ᬱ → ṣa; # LETTER SA SAPA
ᬲ } $dv → s;
ᬲ → sa; # LETTER SA
ᬳ } $dv → h;
ᬳ → ha; # LETTER HA
\u1B4C } $dv → j\=ñ;
\u1B4C → j\=ña; # LETTER ARCHAIC JNYA
# rerekan (not present in DHARMA, "*" used as impromptu mark)
᬴ } $dv_no_rerekan → \*;
᬴ → \* a; # SIGN REREKAN
# dependent vowels
ᬵ → ā; # VOWEL SIGN TEDUNG
ᬶ → i; # VOWEL SIGN ULU
ᬷ → ī; # VOWEL SIGN ULU SARI
ᬸ → u; # VOWEL SIGN SUKU
ᬹ → ū; # VOWEL SIGN SUKU ILUT
ᬺ → r̥; # VOWEL SIGN RA REPA
ᬻ → r̥\:; # VOWEL SIGN RA REPA TEDUNG
ᬼ→ l̥; # VOWEL SIGN LA LENGA
ᬽ → l̥\:; # VOWEL SIGN LA LENGA TEDUNG
ᬾ → e; # VOWEL SIGN TALING
ᬿ → ai; # VOWEL SIGN TALING REPA
ᭀ → o; # VOWEL SIGN TALING TEDUNG
ᭁ → au; # VOWEL SIGN TALING REPA TEDUNG
ᭂ → ə; # VOWEL SIGN PEPET
ᭃ → ə\:; # VOWEL SIGN PEPET TEDUNG
# adeg-adeg
᭄\u200C → ·; # explicit ADEG ADEG
᭄ } $c → ; # ADEG ADEG
᭄ → ·; # ADEG ADEG
# Sasak consonants (not present in DHARMA, "'" used as impromptu mark)
ᭅ } $dv → k\';
ᭅ → k\'a; # LETTER KAF SASAK
ᭆ } $dv → kh\';
ᭆ → kh\'a; # LETTER KHOT SASAK
ᭇ } $dv → t\';
ᭇ → t\'a; # LETTER TZIR SASAK
ᭈ } $dv → p\';
ᭈ → p\'a; # LETTER EF SASAK
ᭉ } $dv → v\';
ᭉ → v\'a; # LETTER VE SASAK
ᭊ } $dv → j\';
ᭊ → j\'a; # LETTER ZAL SASAK
ᭋ } $dv → s\';
ᭋ → s\'a; # LETTER ASYURA SASAK
# digits
᭐ → 0; # DIGIT ZERO
᭑ → 1; # DIGIT ONE
᭒ → 2; # DIGIT TWO
᭓ → 3; # DIGIT THREE
᭔ → 4; # DIGIT FOUR
᭕ → 5; # DIGIT FIVE
᭖ → 6; # DIGIT SIX
᭗ → 7; # DIGIT SEVEN
᭘ → 8; # DIGIT EIGHT
᭙ → 9; # DIGIT NINE
# punctuation
᭚ → '<g type="panti"/>'; # PANTI
᭛ → '<g type="pamada"/>'; # PAMADA
᭜ → \@; # WINDU
᭝ → '<g type="pamungkah"/>'; # CARIK PAMUNGKAH
᭞ → \,; # CARIK SIKI
᭟ → \,\,; # CARIK PAREREN
᭠ → '<g type="pameneng"/>'; # PAMENENG
\u1B7D → '<g type="pantiLantang"/>';
\u1B7E → '<g type="pamadaLantang"/>';
EOF;
# transliteration rules developed at Puri Kauhan Ubud and widely used in Bali
# default Balinese to Latin transliteration variant
$rules
[
'ban-x-pku'
]
=
<<<'EOF'
::NFC;
$dv_no_rerekan = [\u1B35-\u1B44];
$dv = [\u1B34$dv_no_rerekan];
$c = [\u1B13-\u1B33 \u1B45-\u1B4C];
$base = [\u1B05-\u1B33 \u1B45-\u1B60];
# ulu suku deletion mark
$base ᬶᬸ → ∅;
# disambiguation from aspirates
[kgcjṭḍtdpb] { ᭄ } ᬳ → \:;
# various signs
ᬀ → ṃ; # ulu ricem / ardhacandra
ᬁ → m̐; # ulu candra / candrabindu
ᬂ → ŋ; # cecek / anusvara
ᬃ → ŕ; # surang / repha (note, "Indonesian mode" not "Indian mode")
ᬄ → ḥ; # bisah / visarga
# akara used as glottal
ᬅ } $dv_no_rerekan → \*;
# independent vowels
ᬅ → ᵒa; # LETTER AKARA
ᬆ → ᵒā; # LETTER AKARA TEDUNG
ᬇ → ᵒi; # LETTER IKARA
ᬈ → ᵒī; # LETTER IKARA TEDUNG
ᬉ → ᵒu; # LETTER UKARA
ᬊ → ᵒū; # LETTER UKARA TEDUNG
ᬋ → r̥; # LETTER RA REPA
ᬌ → r̥̄; # LETTER RA REPA TEDUNG
ᬍ → l̥; # LETTER LA LENGA
ᬎ → l̥̄; # LETTER LA LENGA TEDUNG
ᬏ → ᵒe; # LETTER EKARA
ᬐ → ᵒai; # LETTER AIKARA
ᬑ → ᵒo; # LETTER OKARA
ᬒ → ᵒau; # LETTER OKARA TEDUNG
# consonants
ᬓ } $dv → k;
ᬓ → ka; # LETTER KA
ᬔ } $dv → kh;
ᬔ → kha; # LETTER KA MAHAPRANA
ᬕ } $dv → g;
ᬕ → ga; # LETTER GA
ᬖ } $dv → gh;
ᬖ → gha; # LETTER GA GORA
ᬗ } $dv → ṅ;
ᬗ → ṅa; # LETTER NGA
ᬘ } $dv → c;
ᬘ → ca; # LETTER CA
ᬙ } $dv → ch;
ᬙ → cha; # LETTER CA LACA
ᬚ } $dv → j;
ᬚ → ja; # LETTER JA
ᬛ } $dv → jh;
ᬛ → jha; # LETTER JA JERA
ᬜ } $dv → ñ;
ᬜ → ña; # LETTER NYA
ᬝ } $dv → ṭ;
ᬝ → ṭa; # LETTER TA LATIK
ᬞ } $dv → ṭh;
ᬞ → ṭha; # LETTER TA MURDA MAHAPRANA
ᬟ } $dv → ḍ;
ᬟ → ḍa; # LETTER DA MURDA ALPAPRANA
ᬠ } $dv → ḍh;
ᬠ → ḍha; # LETTER DA MURDA MAHAPRANA
ᬡ } $dv → ṇ;
ᬡ → ṇa; # LETTER NA RAMBAT
ᬢ } $dv → t;
ᬢ → ta; # LETTER TA
ᬣ } $dv → th;
ᬣ → tha; # LETTER TA TAWA
ᬤ } $dv → d;
ᬤ → da; # LETTER DA
ᬥ } $dv → dh;
ᬥ → dha; # LETTER DA MADU
ᬦ } $dv → n;
ᬦ → na; # LETTER NA
ᬧ } $dv → p;
ᬧ → pa; # LETTER PA
ᬨ } $dv → ph;
ᬨ → pha; # LETTER PA KAPAL
ᬩ } $dv → b;
ᬩ → ba; # LETTER BA
ᬪ } $dv → bh;
ᬪ → bha; # LETTER BA KEMBANG
ᬫ } $dv → m;
ᬫ → ma; # LETTER MA
ᬬ } $dv → y;
ᬬ → ya; # LETTER YA
ᬭ } $dv → r;
ᬭ → ra; # LETTER RA
ᬮ } $dv → l;
ᬮ → la; # LETTER LA
ᬯ } $dv → w;
ᬯ → wa; # LETTER WA
ᬰ } $dv → ś;
ᬰ → śa; # LETTER SA SAGA
ᬱ } $dv → ṣ;
ᬱ → ṣa; # LETTER SA SAPA
ᬲ } $dv → s;
ᬲ → sa; # LETTER SA
ᬳ } $dv → h;
ᬳ → ha; # LETTER HA
\u1B4C } $dv → j\=ñ;
\u1B4C → j\=ña; # LETTER ARCHAIC JNYA
# rerekan (not present in DHARMA, "*" used as impromptu mark)
᬴ } $dv_no_rerekan → \*;
᬴ → \* a; # SIGN REREKAN
# dependent vowels
ᬵ → ā; # VOWEL SIGN TEDUNG
ᬶ → i; # VOWEL SIGN ULU
ᬷ → ī; # VOWEL SIGN ULU SARI
ᬸ → u; # VOWEL SIGN SUKU
ᬹ → ū; # VOWEL SIGN SUKU ILUT
ᬺᭂ → r̥ĕ;
ᬺ → r̥ĕ; # VOWEL SIGN RA REPA
ᬻ → r̥ö; # VOWEL SIGN RA REPA TEDUNG
ᬼ→ lĕ; # VOWEL SIGN LA LENGA
ᬽ → lö; # VOWEL SIGN LA LENGA TEDUNG
ᬾ → e; # VOWEL SIGN TALING
ᬿ → ai; # VOWEL SIGN TALING REPA
ᭀ → o; # VOWEL SIGN TALING TEDUNG
ᭁ → au; # VOWEL SIGN TALING REPA TEDUNG
ᭂ → ĕ; # VOWEL SIGN PEPET
ᭃ → ö; # VOWEL SIGN PEPET TEDUNG
# adeg-adeg
᭄\u200C → ·; # explicit ADEG ADEG
᭄ } $c → ; # ADEG ADEG
᭄ → ·; # ADEG ADEG
# Sasak consonants (not present in DHARMA, "'" used as impromptu mark)
ᭅ } $dv → k\';
ᭅ → k\'a; # LETTER KAF SASAK
ᭆ } $dv → kh\';
ᭆ → kh\'a; # LETTER KHOT SASAK
ᭇ } $dv → t\';
ᭇ → t\'a; # LETTER TZIR SASAK
ᭈ } $dv → p\';
ᭈ → p\'a; # LETTER EF SASAK
ᭉ } $dv → w\';
ᭉ → w\'a; # LETTER VE SASAK
ᭊ } $dv → j\';
ᭊ → j\'a; # LETTER ZAL SASAK
ᭋ } $dv → s\';
ᭋ → s\'a; # LETTER ASYURA SASAK
# digits
᭐ → 0; # DIGIT ZERO
᭑ → 1; # DIGIT ONE
᭒ → 2; # DIGIT TWO
᭓ → 3; # DIGIT THREE
᭔ → 4; # DIGIT FOUR
᭕ → 5; # DIGIT FIVE
᭖ → 6; # DIGIT SIX
᭗ → 7; # DIGIT SEVEN
᭘ → 8; # DIGIT EIGHT
᭙ → 9; # DIGIT NINE
# punctuation
᭚ → '||'; # PANTI
᭛ → '//'; # PAMADA
᭜ → 0; # WINDU
᭝ → \=; # CARIK PAMUNGKAH
᭞ → \,; # CARIK SIKI
᭟ → \.; # CARIK PAREREN
᭠ → \-; # PAMENENG
\u1B7D → '|||';
\u1B7E → '///';
EOF;
return
$rules
;
}
protected
function
getTransliteratorAliases
()
{
return
[
'ban'
=>
'ban-x-pku'
,
'ban-bali'
=>
'ban-x-pku'
,
];
}
/**
* Guess if a text is written in Balinese or Latin.
* Overrides LanguageConverter::guessVariant()
*
* @param string $text The text to be checked
* @param string $variant Language code of the variant to be checked for
* @return bool True if $text appears to be written in $variant
*/
public
function
guessVariant
(
$text
,
$variant
)
{
$hasBalinese
=
preg_match
(
"/[
\x
{1B00}-
\x
{1B7F}]/u"
,
$text
);
return
(
$variant
==
'ban-bali'
)
==
$hasBalinese
;
}
}
File Metadata
Details
Attached
Mime Type
text/x-php
Expires
Sat, May 16, 21:33 (1 d, 9 h)
Storage Engine
local-disk
Storage Format
Raw Data
Storage Handle
89/f5/0041a388885617329ac74be6283d
Default Alt Text
BanConverter.php (24 KB)
Attached To
Mode
rMWPROD MediaWiki Production
Attached
Detach File
Event Timeline
Log In to Comment