<?php

namespace CommonsMetadata;

use DOMDocument;
use DOMElement;
use DOMNode;
use DOMNodeList;
use DOMXPath;

/**
 * A very simple wrapper to DOMDocument to make it easy to traverse nodes which match
 * simple CSS selectors.
 */
class DomNavigator {
	/**
	 * The document to search through.
	 * @var DOMXPath
	 */
	protected $domx;

	/**
	 * @param string $html
	 */
	public function __construct( $html ) {
		$oldLoaderState = false;
		if ( LIBXML_VERSION < 20900 ) {
			$oldLoaderState = libxml_disable_entity_loader( true );
		}
		$oldHandlerState = libxml_use_internal_errors( true );
		$dom = new DOMDocument();
		$dom->loadHTML( '<!doctype html><html><head><meta charset="UTF-8"/></head><body>' . $html . '</body></html>' );
		$this->domx = new DOMXPath( $dom );
		if ( LIBXML_VERSION < 20900 ) {
			libxml_disable_entity_loader( $oldLoaderState );
		}
		libxml_use_internal_errors( $oldHandlerState );
	}

	/**
	 * Returns a list of elements of the given type which have the given class.
	 * (In other words, this is equivalent to the CSS selector 'element.class'.)
	 * @param string|array $element HTML tag name (* to accept all) or array of tag names
	 * @param string $class
	 * @param DOMNode|null $context if present, the method will only search inside this element
	 * @return DOMNodeList|DOMElement[]
	 */
	public function findElementsWithClass( $element, $class, ?DOMNode $context = null ) {
		$element = $this->handleElementOrList( $element );
		$xpath = "./descendant-or-self::{$element}" .
			"[contains(concat(' ', normalize-space(@class), ' '), ' $class ')]";
		return $this->findByXpath( $xpath, $context );
	}

	/**
	 * Returns a list of elements of the given type which have a class starting with the given
	 * string.
	 * @param string|array $element HTML tag name (* to accept all) or array of tag names
	 * @param string $classPrefix
	 * @param DOMNode|null $context if present, the method will only search inside this element
	 * @return DOMNodeList|DOMElement[]
	 */
	public function findElementsWithClassPrefix( $element, $classPrefix, ?DOMNode $context = null ) {
		$element = $this->handleElementOrList( $element );
		$xpath = "./descendant-or-self::{$element}" .
			"[contains(concat(' ', normalize-space(@class)), ' $classPrefix')]";
		return $this->findByXpath( $xpath, $context );
	}

	/**
	 * Returns a list of elements of the given type which have the given class and any lang
	 * attribute. (In other words, this is equivalent to the CSS selector 'element.class[lang]'.)
	 * @param string|array $element HTML tag name (* to accept all) or array of tag names
	 * @param string $class
	 * @param DOMNode|null $context if present, the method will only search inside this element
	 * @return DOMNodeList|DOMElement[]
	 */
	public function findElementsWithClassAndLang( $element, $class, ?DOMNode $context = null ) {
		$element = $this->handleElementOrList( $element );
		$xpath = "./descendant-or-self::{$element}" .
			"[@lang and contains(concat(' ', normalize-space(@class), ' '), ' $class ')]";
		return $this->findByXpath( $xpath, $context );
	}

	/**
	 * Returns a list of elements of the given type which have the given id.
	 * (In other words, this is equivalent to the CSS selector 'element#id'.)
	 * When there are multiple elements with this ID, all are returned.
	 * @param string|array $element HTML tag name (* to accept all) or array of tag names
	 * @param string $id
	 * @param DOMNode|null $context if present, the method will only search inside this element
	 * @return DOMNodeList|DOMElement[]
	 */
	public function findElementsWithId( $element, $id, ?DOMNode $context = null ) {
		$element = $this->handleElementOrList( $element );
		$xpath = "./descendant-or-self::{$element}[@id='$id']";
		return $this->findByXpath( $xpath, $context );
	}

	/**
	 * Returns a list of elements of the given type which have an id starting with the given prefix.
	 * (In other words, this is equivalent to the CSS selector 'element[id^=prefix]'.)
	 * @param string|array $element HTML tag name (* to accept all) or array of tag names
	 * @param string $idPrefix
	 * @param DOMNode|null $context if present, the method will only search inside this element
	 * @return DOMNodeList|DOMElement[]
	 */
	public function findElementsWithIdPrefix( $element, $idPrefix, ?DOMNode $context = null ) {
		$element = $this->handleElementOrList( $element );
		$xpath = "./descendant-or-self::{$element}[starts-with(@id, '$idPrefix')]";
		return $this->findByXpath( $xpath, $context );
	}

	/**
	 * Returns a list of elements of the given type which have the given attribute with any value.
	 * (In other words, this is equivalent to the CSS selector 'element[attribute]'.)
	 * When there are multiple elements with this attribute, all are returned.
	 * @param string|array $element HTML tag name (* to accept all) or array of tag names
	 * @param string $attribute
	 * @param DOMNode|null $context if present, the method will only search inside this element
	 * @return DOMNodeList|DOMElement[]
	 */
	public function findElementsWithAttribute( $element, $attribute, ?DOMNode $context = null ) {
		$element = $this->handleElementOrList( $element );
		$xpath = "./descendant-or-self::{$element}[@{$attribute}]";
		return $this->findByXpath( $xpath, $context );
	}

	/**
	 * Returns true if the node has all the specified classes.
	 * @param DOMNode $node
	 * @param string $classes one or more class names (separated with space)
	 * @return bool
	 */
	public function hasClass( DOMNode $node, $classes ) {
		if ( !$node instanceof \DOMElement ) {
			return false;
		}
		$nodeClasses = explode( ' ', $node->getAttribute( 'class' ) );
		$testClasses = explode( ' ', $classes );
		return !array_diff( $testClasses, $nodeClasses );
	}

	/**
	 * Returns the first class matching a prefix.
	 * @param DOMNode $node
	 * @param string $classPrefix
	 * @return string|null
	 */
	public function getFirstClassWithPrefix( DOMNode $node, $classPrefix ) {
		if ( !$node instanceof \DOMElement ) {
			return null;
		}
		$classes = explode( ' ', $node->getAttribute( 'class' ) );
		foreach ( $classes as $class ) {
			$length = strlen( $classPrefix );
			if ( substr( $class, 0, $length ) === $classPrefix ) {
				return $class;
			}
		}
		return null;
	}

	/**
	 * Returns the closest ancestor of the given node, which is of the given type
	 * (like jQuery.closest())
	 * @param DOMNode $node
	 * @param string $element HTML tag name
	 * @return DOMElement|null
	 */
	public function closest( DOMNode $node, $element ) {
		while ( !$node instanceof DOMElement || $node->nodeName !== $element ) {
			if ( $node->parentNode instanceof DOMNode ) {
				$node = $node->parentNode;
			} else {
				return null;
			}
		}
		// @phan-suppress-next-line PhanTypeMismatchReturnSuperType
		return $node;
	}

	/**
	 * Returns the nodes matching an XPath expression.
	 * @param string $xpath
	 * @param DOMNode|null $context
	 * @return DOMNodeList|DOMNode[]
	 */
	public function findByXpath( $xpath, ?DOMNode $context = null ) {
		$results = $this->domx->query( $xpath, $context );
		if ( $results === false ) {
			$error = libxml_get_last_error();
			$logMessage = sprintf( 'HTML parsing error: %s (%s) at line %s, columnt %s',
				$error->message, $error->code, $error->line, $error->column );
			wfDebugLog( 'CommonsMetadata', $logMessage );
			return new DOMNodeList();
		}
		return $results;
	}

	/**
	 * Returns the first node matching an XPath expression, or null.
	 * @param string $xpath
	 * @param DOMNode|null $context
	 * @return DOMNode|null
	 */
	public function getByXpath( $xpath, ?DOMNode $context = null ) {
		$results = $this->findByXpath( $xpath, $context );
		foreach ( $results as $result ) {
			return $result;
		}
		return null;
	}

	/**
	 * Return next sibling element (or null)
	 * @param DOMElement $node
	 * @return DOMElement|null
	 */
	public function nextElementSibling( DOMElement $node ) {
		$nextSibling = $node->nextSibling;
		while ( $nextSibling && !$nextSibling instanceof DOMElement ) {
			$nextSibling = $nextSibling->nextSibling;
		}
		return $nextSibling;
	}

	/**
	 * Takes an element name or array of element names and returns an XPath expression which can
	 * be used as an element name, but matches all of the provided elements.
	 * @param string|array $elmementOrList
	 * @return string
	 */
	protected function handleElementOrList( $elmementOrList ) {
		if ( is_array( $elmementOrList ) ) {
			return '*[' . implode( ' or ', array_map(
				static function ( $el ) {
					return 'self::' . $el;
				},
				$elmementOrList
			) ) . ']';
		} else {
			return $elmementOrList;
		}
	}

}
