Page Menu
Home
WickedGov Phorge
Search
Configure Global Search
Log In
Files
F1426730
MniConverter.php
No One
Temporary
Actions
Download File
Edit File
Delete File
View Transforms
Subscribe
Flag For Later
Award Token
Size
7 KB
Referenced Files
None
Subscribers
None
MniConverter.php
View Options
<?php
/**
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along
* with this program; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
* http://www.gnu.org/copyleft/gpl.html
*
* @file MniConverter.php
* @author Nokib Sarkar
* @author Haoreima
*/
/**
* Meitei specific converter routines.
*
* @ingroup Languages
*/
class
MniConverter
extends
LanguageConverterSpecific
{
private
const
O
=
'ꯑ'
;
private
const
OO
=
'ꯑꯣ'
;
private
const
U
=
'ꯎ'
;
private
const
EE
=
'ꯑꯤ'
;
private
const
YA
=
'ꯌ'
;
private
const
Y_
=
'য'
;
private
const
WA
=
'ꯋ'
;
private
const
BA
=
'ꯕ'
;
private
const
NA_
=
'ꯟ'
;
private
const
NA
=
'ꯅ'
;
private
const
DIACRITIC_AA
=
'ꯥ'
;
private
const
HALANTA
=
'꯭'
;
private
const
SKIP
=
''
;
private
const
PERIOD
=
'꯫'
;
private
const
PA_
=
'ꯞ'
;
private
const
DIACRITICS_WITH_O
=
[
'ꯣ'
=>
'ো'
,
'ꯤ'
=>
'ী'
,
'ꯥ'
=>
'া'
,
'ꯦ'
=>
'ে'
,
'ꯧ'
=>
'ৌ'
,
'ꯩ'
=>
'ৈ'
,
'ꯪ'
=>
'ং'
,
];
private
const
CONJUGATE_WITH_O
=
[
'ꯑꯣ'
=>
'ও'
,
'ꯑꯤ'
=>
'ঈ'
,
'ꯑꯥ'
=>
'আ'
,
'ꯑꯦ'
=>
'এ'
,
'ꯑꯧ'
=>
'ঔ'
,
'ꯑꯩ'
=>
'ঐ'
,
'ꯑꯪ'
=>
'অং'
,
];
private
const
NOT_WEIRD_AFTER_NA_
=
[
'ꯇ'
,
'ꯊ'
,
'ꯗ'
,
'ꯙ'
,
'ꯟ'
,
'ꯕ'
,
'ꯌ'
,
'ꯁ'
];
private
const
NUMERALS
=
[
'꯰'
=>
'০'
,
'꯱'
=>
'১'
,
'꯲'
=>
'২'
,
'꯳'
=>
'৩'
,
'꯴'
=>
'৪'
,
'꯵'
=>
'৫'
,
'꯶'
=>
'৬'
,
'꯷'
=>
'৭'
,
'꯸'
=>
'৮'
,
'꯹'
=>
'৯'
,
];
private
const
HALANTA_CONSONANTS
=
[
'ꯟ'
=>
'ন্'
,
'ꯛ'
=>
'ক্'
,
'ꯝ'
=>
'ম্'
,
'ꯡ'
=>
'ং'
,
'ꯜ'
=>
'ল্'
,
'ꯠ'
=>
'ৎ'
,
'ꯞ'
=>
'প্'
,
];
private
const
HALANTA_CONSONANTS_TO_NORMAL
=
[
'ꯟ'
=>
'ন'
,
'ꯛ'
=>
'ক'
,
'ꯝ'
=>
'ম'
,
'ꯡ'
=>
'ং'
,
'ꯜ'
=>
'ল'
,
'ꯠ'
=>
'ৎ'
,
'ꯞ'
=>
'প'
,
];
private
const
NON_WORD_CHARACTER_PATTERN
=
"/[
\s
꯫
\p
{P}<>=
\-\|
$+^~]+?/u"
;
private
const
CONSONANTS
=
self
::
HALANTA_CONSONANTS
+
[
'ꯀ'
=>
'ক'
,
'ꯈ'
=>
'খ'
,
'ꯒ'
=>
'গ'
,
'ꯘ'
=>
'ঘ'
,
'ꯉ'
=>
'ঙ'
,
'ꯆ'
=>
'চ'
,
'ꯖ'
=>
'জ'
,
'ꯓ'
=>
'ঝ'
,
'ꯇ'
=>
'ত'
,
'ꯊ'
=>
'থ'
,
'ꯗ'
=>
'দ'
,
'ꯙ'
=>
'ধ'
,
'ꯅ'
=>
'ন'
,
'ꯄ'
=>
'প'
,
'ꯐ'
=>
'ফ'
,
'ꯕ'
=>
'ব'
,
'ꯚ'
=>
'ভ'
,
'ꯃ'
=>
'ম'
,
'ꯌ'
=>
'য়'
,
'ꯔ'
=>
'র'
,
'ꯂ'
=>
'ল'
,
'ꯋ'
=>
'ৱ'
,
'ꫩ'
=>
'শ'
,
'ꫪ'
=>
'ষ'
,
'ꯁ'
=>
'স'
,
'ꯍ'
=>
'হ'
,
];
private
const
VOWELS
=
[
'ꯑ'
=>
'অ'
,
'ꯏ'
=>
'ই'
,
'ꯎ'
=>
'উ'
,
'ꯢ'
=>
'ই'
,
'ꯨ'
=>
'ু'
,
];
private
const
MTEI_TO_BENG_MAP_EXTRA
=
[
'꯫'
=>
'।'
,
'꯭'
=>
'্'
,
];
private
const
MTEI_TO_BENG_MAP
=
self
::
VOWELS
+
self
::
DIACRITICS_WITH_O
+
self
::
CONJUGATE_WITH_O
+
self
::
CONSONANTS
+
self
::
NUMERALS
+
self
::
MTEI_TO_BENG_MAP_EXTRA
;
private
function
isBeginning
(
$position
,
$text
)
{
$at_first
=
$position
===
0
;
return
$at_first
||
preg_match
(
self
::
NON_WORD_CHARACTER_PATTERN
,
$text
[
$position
-
1
]
);
}
private
function
isEndOfWord
(
$char
)
{
if
(
$char
===
self
::
PERIOD
)
{
return
true
;
}
$status
=
preg_match
(
self
::
NON_WORD_CHARACTER_PATTERN
,
$char
,
$matches
);
return
count
(
$matches
)
>
0
;
}
private
function
mteiToBengali
(
$text
)
{
$chars
=
mb_str_split
(
$text
);
$l
=
count
(
$chars
);
$i
=
0
;
while
(
$i
<
$l
)
{
$char
=
$chars
[
$i
];
if
(
$char
===
self
::
O
&&
$i
+
1
<
$l
&&
array_key_exists
(
$chars
[
$i
+
1
],
self
::
DIACRITICS_WITH_O
)
)
{
/**
* We have only 3 true vowels,
* ꯑ(a), ꯏ(i), ꯎ (u)
* Others are just extension from "a" by mixing with diacritics
*/
yield
self
::
CONJUGATE_WITH_O
[
$char
.
$chars
[
$i
+
1
]];
$i
+=
1
;
}
elseif
(
$char
===
self
::
HALANTA
&&
$i
>
0
&&
array_key_exists
(
$chars
[
$i
-
1
],
self
::
HALANTA_CONSONANTS
)
)
{
// Remove halanta if the consonant has halanta already
yield
self
::
SKIP
;
}
elseif
(
array_key_exists
(
$char
,
self
::
HALANTA_CONSONANTS
)
&&
(
$i
===
$l
-
1
||
(
$i
+
1
<
$l
&&
$this
->
isEndOfWord
(
$chars
[
$i
+
1
]
)
)
)
)
{
// Remove halanta if this is the last character of the word
yield
self
::
HALANTA_CONSONANTS_TO_NORMAL
[
$char
];
}
elseif
(
$char
===
self
::
YA
&&
$i
>
0
&&
$chars
[
$i
-
1
]
===
self
::
HALANTA
)
{
// য + ্ = য়
yield
self
::
Y_
;
}
elseif
(
$char
===
self
::
WA
&&
$i
-
2
>=
0
&&
$chars
[
$i
-
1
]
===
self
::
HALANTA
&&
array_key_exists
(
$chars
[
$i
-
2
],
self
::
CONSONANTS
)
)
{
// ব + ্ + র = ব্র
yield
self
::
CONSONANTS
[
self
::
BA
];
}
elseif
(
$char
===
self
::
PA_
&&
$i
+
1
<
$l
&&
$chars
[
$i
+
1
]
===
'ꯀ'
)
{
// do not conjugate with halanta if it's followed by "ক"
yield
self
::
HALANTA_CONSONANTS_TO_NORMAL
[
$char
];
}
elseif
(
$char
===
self
::
NA_
&&
$i
+
1
<
$l
&&
!
in_array
(
$chars
[
$i
+
1
],
self
::
NOT_WEIRD_AFTER_NA_
)
&&
array_key_exists
(
$chars
[
$i
+
1
],
self
::
CONSONANTS
)
)
{
/**
* ন্ / ণ্ + any consonant
* (except, ট, ঠ, ড, ঢ, , ত, থ, দ, ধ, ন, ব, য, য়) = weird
* Any consonant + ্ + ন = maybe ok
*/
yield
self
::
MTEI_TO_BENG_MAP
[
self
::
NA
];
$i
+=
1
;
continue
;
}
elseif
(
$char
===
self
::
U
&&
!
$this
->
isBeginning
(
$i
,
$text
)
)
{
// উ/ঊ in the middle of words are often replaced by ও
yield
self
::
MTEI_TO_BENG_MAP
[
self
::
OO
];
}
elseif
(
$char
===
self
::
O
&&
$i
+
2
<
$l
&&
$chars
[
$i
+
1
]
===
self
::
EE
[
0
]
&&
$chars
[
$i
+
2
]
===
self
::
EE
[
1
]
)
{
/**
* Instead of হাঈবা, people love to use হায়বা.
* But this is only in the case when ee or ya is
* in the middle of the words,
* never to do it if it's in the beginning.
*/
yield
self
::
MTEI_TO_BENG_MAP
[
self
::
YA
];
}
elseif
(
!
array_key_exists
(
$char
,
self
::
HALANTA_CONSONANTS
)
&&
array_key_exists
(
$char
,
self
::
CONSONANTS
)
&&
(
$i
===
$l
-
1
||
(
$i
+
1
<
$l
&&
$this
->
isEndOfWord
(
$chars
[
$i
+
1
]
)
)
)
)
{
// Consonants without halantas should end with diacritics of aa sound everytime.
yield
self
::
MTEI_TO_BENG_MAP
[
$char
]
.
self
::
MTEI_TO_BENG_MAP
[
self
::
DIACRITIC_AA
];
}
else
{
yield
(
array_key_exists
(
$char
,
self
::
MTEI_TO_BENG_MAP
)
?
self
::
MTEI_TO_BENG_MAP
[
$char
]
:
$char
);
}
$i
+=
1
;
}
}
public
function
transliterate
(
$text
)
{
$transliterated
=
''
;
foreach
(
$this
->
mteiToBengali
(
$text
)
as
$char
)
{
$transliterated
.=
$char
;
}
return
$transliterated
;
}
public
function
getMainCode
():
string
{
return
'mni'
;
}
public
function
getLanguageVariants
():
array
{
return
[
'mni'
,
'mni-beng'
];
}
public
function
getVariantsFallbacks
():
array
{
return
[
'mni-beng'
=>
'mni'
];
}
protected
function
loadDefaultTables
():
array
{
return
[
'mni'
=>
new
ReplacementArray
(),
'mni-beng'
=>
new
ReplacementArray
(),
];
}
/**
* Transliterates text into Bangla Script. This allows developers to test the language variants
* functionality and user interface without having to switch wiki language away from default.
* This method also processes custom conversion rules to allow testing these parts of the
* language converter as well.
*
* @param string $text
* @param string $toVariant
* @return string
*/
public
function
translate
(
$text
,
$toVariant
)
{
if
(
$toVariant
===
'mni-beng'
)
{
return
$this
->
transliterate
(
$text
);
}
return
$text
;
}
}
File Metadata
Details
Attached
Mime Type
text/x-php
Expires
Sat, May 16, 13:37 (1 d, 19 h)
Storage Engine
local-disk
Storage Format
Raw Data
Storage Handle
33/06/f217c46712d42cd8257c7ac2d111
Default Alt Text
MniConverter.php (7 KB)
Attached To
Mode
rMWPROD MediaWiki Production
Attached
Detach File
Event Timeline
Log In to Comment