<?php
/**
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License along
 * with this program; if not, write to the Free Software Foundation, Inc.,
 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
 * http://www.gnu.org/copyleft/gpl.html
 *
 * @file MniConverter.php
 * @author Nokib Sarkar
 * @author Haoreima
 */
/**
 * Meitei specific converter routines.
 *
 * @ingroup Languages
 */
class MniConverter extends LanguageConverterSpecific {
	private const O = 'ꯑ';
	private const OO = 'ꯑꯣ';
	private const U = 'ꯎ';
	private const EE = 'ꯑꯤ';
	private const YA = 'ꯌ';
	private const Y_ = 'য';
	private const WA = 'ꯋ';
	private const BA = 'ꯕ';
	private const NA_ = 'ꯟ';
	private const NA = 'ꯅ';
	private const DIACRITIC_AA = 'ꯥ';
	private const HALANTA = '꯭';
	private const SKIP = '';
	private const PERIOD = '꯫';
	private const PA_ = 'ꯞ';
	private const DIACRITICS_WITH_O = [
		'ꯣ' => 'ো',
		'ꯤ' => 'ী',
		'ꯥ' => 'া',
		'ꯦ' => 'ে',
		'ꯧ' => 'ৌ',
		'ꯩ' => 'ৈ',
		'ꯪ' => 'ং',
	];
	private const CONJUGATE_WITH_O = [
		'ꯑꯣ' => 'ও',
		'ꯑꯤ' => 'ঈ',
		'ꯑꯥ' => 'আ',
		'ꯑꯦ' => 'এ',
		'ꯑꯧ' => 'ঔ',
		'ꯑꯩ' => 'ঐ',
		'ꯑꯪ' => 'অং',
	];
	private const NOT_WEIRD_AFTER_NA_ = [ 'ꯇ', 'ꯊ', 'ꯗ', 'ꯙ', 'ꯟ', 'ꯕ', 'ꯌ', 'ꯁ' ];
	private const NUMERALS = [
		'꯰' => '০',
		'꯱' => '১',
		'꯲' => '২',
		'꯳' => '৩',
		'꯴' => '৪',
		'꯵' => '৫',
		'꯶' => '৬',
		'꯷' => '৭',
		'꯸' => '৮',
		'꯹' => '৯',
	];
	private const HALANTA_CONSONANTS = [
		'ꯟ' => 'ন্',
		'ꯛ' => 'ক্',
		'ꯝ' => 'ম্',
		'ꯡ' => 'ং',
		'ꯜ' => 'ল্',
		'ꯠ' => 'ৎ',
		'ꯞ' => 'প্',
	];
	private const HALANTA_CONSONANTS_TO_NORMAL = [
		'ꯟ' => 'ন',
		'ꯛ' => 'ক',
		'ꯝ' => 'ম',
		'ꯡ' => 'ং',
		'ꯜ' => 'ল',
		'ꯠ' => 'ৎ',
		'ꯞ' => 'প',
	];
	private const NON_WORD_CHARACTER_PATTERN = "/[\s꯫\p{P}<>=\-\|$+^~]+?/u";
	private const CONSONANTS = self::HALANTA_CONSONANTS + [
		'ꯀ' => 'ক',
		'ꯈ' => 'খ',
		'ꯒ' => 'গ',
		'ꯘ' => 'ঘ',
		'ꯉ' => 'ঙ',
		'ꯆ' => 'চ',
		'ꯖ' => 'জ',
		'ꯓ' => 'ঝ',
		'ꯇ' => 'ত',
		'ꯊ' => 'থ',
		'ꯗ' => 'দ',
		'ꯙ' => 'ধ',
		'ꯅ' => 'ন',
		'ꯄ' => 'প',
		'ꯐ' => 'ফ',
		'ꯕ' => 'ব',
		'ꯚ' => 'ভ',
		'ꯃ' => 'ম',
		'ꯌ' => 'য়',
		'ꯔ' => 'র',
		'ꯂ' => 'ল',
		'ꯋ' => 'ৱ',
		'ꫩ' => 'শ',
		'ꫪ' => 'ষ',
		'ꯁ' => 'স',
		'ꯍ' => 'হ',
	];
	private const VOWELS = [
		'ꯑ' => 'অ',
		'ꯏ' => 'ই',
		'ꯎ' => 'উ',
		'ꯢ' => 'ই',
		'ꯨ' => 'ু',
	];
	private const MTEI_TO_BENG_MAP_EXTRA = [
		'꯫' => '।',
		'꯭' => '্',
	];
	private const MTEI_TO_BENG_MAP =
		self::VOWELS +
		self::DIACRITICS_WITH_O +
		self::CONJUGATE_WITH_O +
		self::CONSONANTS +
		self::NUMERALS +
		self::MTEI_TO_BENG_MAP_EXTRA;

	private function isBeginning( $position, $text ) {
		$at_first = $position === 0;
		return $at_first || preg_match( self::NON_WORD_CHARACTER_PATTERN, $text[$position - 1] );
	}

	private function isEndOfWord( $char ) {
		if ( $char === self::PERIOD ) {
			return true;
		}
		$status = preg_match( self::NON_WORD_CHARACTER_PATTERN, $char, $matches );
		return count( $matches ) > 0;
	}

	private function mteiToBengali( $text ) {
		$chars = mb_str_split( $text );
		$l = count( $chars );
		$i = 0;
		while ( $i < $l ) {
			$char = $chars[$i];
			if (
				$char === self::O &&
				$i + 1 < $l &&
				array_key_exists( $chars[ $i + 1 ], self::DIACRITICS_WITH_O )
			) {
				/**
				 * We have only 3 true vowels,
				 * ꯑ(a), ꯏ(i), ꯎ (u)
				 * Others are just extension from "a" by mixing with diacritics
				 */
				yield self::CONJUGATE_WITH_O[$char . $chars[ $i + 1 ]];
				$i += 1;
			} elseif (
				$char === self::HALANTA &&
				$i > 0 &&
				array_key_exists( $chars[ $i - 1 ], self::HALANTA_CONSONANTS )
			) {
				// Remove halanta if the consonant has halanta already
				yield self::SKIP;
			} elseif (
				array_key_exists( $char, self::HALANTA_CONSONANTS ) &&
				( $i === $l - 1 || ( $i + 1 < $l &&
					$this->isEndOfWord( $chars[ $i + 1 ] )
				) )
			) {
				// Remove halanta if this is the last character of the word
				yield self::HALANTA_CONSONANTS_TO_NORMAL[$char];
			} elseif ( $char === self::YA &&
				$i > 0 && $chars[ $i - 1 ] === self::HALANTA ) {
				// য + ্ = য়
				yield self::Y_;
			} elseif (
				$char === self::WA &&
				$i - 2 >= 0 && $chars[ $i - 1 ] === self::HALANTA &&
				array_key_exists( $chars[ $i - 2 ], self::CONSONANTS )
			) {
				// ব + ্ + র = ব্র
				yield self::CONSONANTS[self::BA];
			} elseif (
				$char === self::PA_ && $i + 1 < $l && $chars[ $i + 1 ] === 'ꯀ'
			) {
				// do not conjugate with halanta if it's followed by "ক"
				yield self::HALANTA_CONSONANTS_TO_NORMAL[$char];
			} elseif (
				$char === self::NA_ &&
				$i + 1 < $l &&
				!in_array( $chars[ $i + 1 ], self::NOT_WEIRD_AFTER_NA_ ) &&
				array_key_exists( $chars[ $i + 1 ], self::CONSONANTS )
			) {
				/**
				 * ন্ / ণ্ + any consonant
				 * (except, ট, ঠ, ড, ঢ, , ত, থ, দ, ধ, ন, ব, য, য়) = weird
				 * Any consonant + ্ + ন = maybe ok
				 */
				yield self::MTEI_TO_BENG_MAP[self::NA];
				$i += 1;
				continue;
			} elseif ( $char === self::U && !$this->isBeginning( $i, $text ) ) {
				// উ/ঊ in the middle of words are often replaced by ও
				yield self::MTEI_TO_BENG_MAP[self::OO];
			} elseif ( $char === self::O &&
				$i + 2 < $l && $chars[$i + 1] === self::EE[0] && $chars[ $i + 2 ] === self::EE[1] ) {
				/**
				 * Instead of হাঈবা, people love to use হায়বা.
				 * But this is only in the case when ee or ya is
				 * in the middle of the words,
				 * never to do it if it's in the beginning.
				 */
				yield self::MTEI_TO_BENG_MAP[self::YA];
			} elseif (
				!array_key_exists( $char, self::HALANTA_CONSONANTS ) &&
				array_key_exists( $char, self::CONSONANTS ) &&
				( $i === $l - 1 || ( $i + 1 < $l &&
					$this->isEndOfWord( $chars[ $i + 1 ] )
				) )
			) {
				// Consonants without halantas should end with diacritics of aa sound everytime.
				yield self::MTEI_TO_BENG_MAP[$char] . self::MTEI_TO_BENG_MAP[self::DIACRITIC_AA];
			} else {
				yield (
					array_key_exists( $char, self::MTEI_TO_BENG_MAP ) ?
					self::MTEI_TO_BENG_MAP[$char] : $char
				);
			}
			$i += 1;
		}
	}

	public function transliterate( $text ) {
		$transliterated = '';
		foreach ( $this->mteiToBengali( $text ) as $char ) {
			$transliterated .= $char;
		}
		return $transliterated;
	}

	public function getMainCode(): string {
		return 'mni';
	}

	public function getLanguageVariants(): array {
		return [ 'mni', 'mni-beng' ];
	}

	public function getVariantsFallbacks(): array {
		return [
			'mni-beng' => 'mni'
		];
	}

	protected function loadDefaultTables(): array {
		return [
			'mni' => new ReplacementArray(),
			'mni-beng' => new ReplacementArray(),
		];
	}

	/**
	 * Transliterates text into Bangla Script. This allows developers to test the language variants
	 * functionality and user interface without having to switch wiki language away from default.
	 * This method also processes custom conversion rules to allow testing these parts of the
	 * language converter as well.
	 *
	 * @param string $text
	 * @param string $toVariant
	 * @return string
	 */
	public function translate( $text, $toVariant ) {
		if ( $toVariant === 'mni-beng' ) {
			return $this->transliterate( $text );
		}
		return $text;
	}
}
