<?php
/**
 * @file
 * @license GPL-2.0-or-later
 */

namespace MediaWiki\Extension\Translate\Utilities;

use InvalidArgumentException;
use MediaWiki\Extension\Translate\FileFormatSupport\GettextPluralException;

/** @since 2019.09 */
class GettextPlural {
	private const PRE = '{{PLURAL:GETTEXT|';
	private const POST = '}}';

	/**
	 * Returns Gettext plural rule for given language.
	 *
	 * @param string $code Language tag in MediaWiki internal format.
	 * @return string Empty string if no plural rule found
	 */
	public static function getPluralRule( $code ) {
		global $wgTranslateDocumentationLanguageCode;

		if ( $code === $wgTranslateDocumentationLanguageCode ) {
			return 'nplurals=1; plural=0;';
		}

		$rulefile = __DIR__ . '/../../data/plural-gettext.txt';
		$rules = file_get_contents( $rulefile );
		foreach ( explode( "\n", $rules ) as $line ) {
			if ( trim( $line ) === '' ) {
				continue;
			}
			[ $rulecode, $rule ] = explode( "\t", $line );
			if ( $rulecode === $code ) {
				return $rule;
			}
		}

		return '';
	}

	/**
	 * Returns how many plural forms are expected by a given plural rule.
	 *
	 * @param string $rule Gettext style plural rule.
	 * @return int
	 * @throws InvalidArgumentException
	 */
	public static function getPluralCount( $rule ) {
		$m = [];
		$ok = preg_match( '/nplurals=([0-9]+).*;/', $rule, $m );
		if ( !$ok ) {
			throw new InvalidArgumentException( "Rule $rule is malformed" );
		}
		return (int)$m[ 1 ];
	}

	/**
	 * Quick way to check if the text contains plural syntax.
	 *
	 * @param string $text
	 * @return bool
	 */
	public static function hasPlural( $text ) {
		return str_contains( $text, self::PRE );
	}

	/**
	 * Format plural forms as single string suitable for translation.
	 *
	 * @param string[] $forms
	 * @return string
	 */
	public static function flatten( array $forms ) {
		return self::PRE . implode( '|', $forms ) . self::POST;
	}

	/**
	 * Format translation with plural forms as array of forms.
	 *
	 * Reverse of flatten. Do note that A may be != flatten( unflatten( A ) ) because
	 * translators can place part of the text outside the plural markup or use multiple
	 * instances of the markup.
	 *
	 * @param string $text
	 * @param int $expectedPluralCount
	 * @return string[]
	 */
	public static function unflatten( $text, $expectedPluralCount ) {
		[ $template, $instanceMap ] = self::parsePluralForms( $text );
		return self::expandTemplate( $template, $instanceMap, $expectedPluralCount );
	}

	/**
	 * Replaces problematic markup which can confuse our plural syntax markup with placeholders
	 *
	 * @param string $text
	 * @return array [ string $text, array $map ]
	 */
	private static function armour( $text ) {
		// |/| is commonly used in KDE to support inflections. It needs to be escaped
		// to avoid it messing up the plural markup.
		$replacements = [
			'|/|' => Utilities::getPlaceholder(),
		];
		// {0} is a common variable format
		preg_match_all( '/\{\d+\}/', $text, $matches );
		foreach ( $matches[0] as $m ) {
			$replacements[$m] = Utilities::getPlaceholder();
		}

		$text = strtr( $text, $replacements );
		$map = array_flip( $replacements );

		return [ $text, $map ];
	}

	/**
	 * Reverse of armour.
	 *
	 * @param string $text
	 * @param array $map Map returned by armour.
	 * @return string
	 */
	private static function unarmour( $text, array $map ) {
		return strtr( $text, $map );
	}

	/**
	 * Parses plural markup into a structure form.
	 *
	 * @param string $text
	 * @return array [ string $template, array $instanceMap ]
	 */
	public static function parsePluralForms( $text ) {
		$m = [];
		$pre = preg_quote( self::PRE, '/' );
		$post = preg_quote( self::POST, '/' );

		[ $armouredText, $armourMap ] = self::armour( $text );

		$ok = preg_match_all( "/$pre(.*)$post/Us", $armouredText, $m );
		if ( $ok === false ) {
			throw new GettextPluralException( "Plural regular expression failed for text: $text" );
		}

		$template = $armouredText;
		$instanceMap = [];

		foreach ( $m[0] as $instanceIndex => $instanceText ) {
			$ph = Utilities::getPlaceholder();

			// Using preg_replace instead of str_replace because of the limit parameter
			$pattern = '/' . preg_quote( $instanceText, '/' ) . '/';
			$template = preg_replace( $pattern, $ph, $template, 1 );

			$instanceForms = explode( '|', $m[ 1 ][ $instanceIndex ] );
			foreach ( $instanceForms as $i => $v ) {
				$instanceForms[ $i ] = self::unarmour( $v, $armourMap );
			}

			$instanceMap[$ph] = $instanceForms;
		}

		$template = self::unarmour( $template, $armourMap );
		return [ $template, $instanceMap ];
	}

	/**
	 * Gives fully expanded forms given a template and parsed plural markup instances.
	 *
	 * @param string $template
	 * @param array $instanceMap
	 * @param int $expectedPluralCount
	 * @return string[]
	 */
	public static function expandTemplate( $template, array $instanceMap, $expectedPluralCount ) {
		$formArray = [];
		for ( $formIndex = 0; $formIndex < $expectedPluralCount; $formIndex++ ) {
			// Start with the whole string
			$form = $template;

			// Loop over each plural markup instance and replace it with the plural form belonging
			// to the current index
			foreach ( $instanceMap as $ph => $instanceForms ) {
				// For missing forms, fall back to empty text.
				// Extra forms are excluded because $formIndex < $expectedPluralCount
				$replacement = $instanceForms[ $formIndex ] ?? '';
				$form = str_replace( $ph, $replacement, $form );
			}

			$formArray[ $formIndex ] = $form;
		}

		return $formArray;
	}
}
