Page Menu
Home
WickedGov Phorge
Search
Configure Global Search
Log In
Files
F1431862
DOMUtils.php
No One
Temporary
Actions
Download File
Edit File
Delete File
View Transforms
Subscribe
Flag For Later
Award Token
Size
26 KB
Referenced Files
None
Subscribers
None
DOMUtils.php
View Options
<?php
declare
(
strict_types
=
1
);
namespace
Wikimedia\Parsoid\Utils
;
use
Wikimedia\Assert\Assert
;
use
Wikimedia\Parsoid\Core\ClientError
;
use
Wikimedia\Parsoid\DOM\Comment
;
use
Wikimedia\Parsoid\DOM\Document
;
use
Wikimedia\Parsoid\DOM\DocumentFragment
;
use
Wikimedia\Parsoid\DOM\Element
;
use
Wikimedia\Parsoid\DOM\Node
;
use
Wikimedia\Parsoid\DOM\Text
;
use
Wikimedia\Parsoid\Wikitext\Consts
;
use
Wikimedia\Parsoid\Wt2Html\XMLSerializer
;
use
Wikimedia\RemexHtml\DOM\DOMBuilder
;
use
Wikimedia\RemexHtml\Tokenizer\Tokenizer
;
use
Wikimedia\RemexHtml\TreeBuilder\Dispatcher
;
use
Wikimedia\RemexHtml\TreeBuilder\TreeBuilder
;
/**
* DOM utilities for querying the DOM. This is largely independent of Parsoid
* although some Parsoid details (TokenUtils, inline content version)
* have snuck in.
*/
class
DOMUtils
{
/**
* Parse HTML, return the tree.
*
* @param string $html
* @param bool $validateXMLNames
* @return Document
*/
public
static
function
parseHTML
(
string
$html
,
bool
$validateXMLNames
=
false
):
Document
{
if
(
!
preg_match
(
'/^<(?:!doctype|html|body)/i'
,
$html
)
)
{
// Make sure that we parse fragments in the body. Otherwise comments,
// link and meta tags end up outside the html element or in the head
// elements.
$html
=
'<body>'
.
$html
;
}
$domBuilder
=
new
class
(
[
'suppressHtmlNamespace'
=>
true
,
]
)
extends
DOMBuilder
{
/** @inheritDoc */
protected
function
createDocument
(
?
string
$doctypeName
=
null
,
?
string
$public
=
null
,
?
string
$system
=
null
)
{
// @phan-suppress-next-line PhanTypeMismatchReturn
return
DOMCompat
::
newDocument
(
false
);
}
};
$treeBuilder
=
new
TreeBuilder
(
$domBuilder
,
[
'ignoreErrors'
=>
true
]
);
$dispatcher
=
new
Dispatcher
(
$treeBuilder
);
$tokenizer
=
new
Tokenizer
(
$dispatcher
,
$html
,
[
'ignoreErrors'
=>
true
]
);
$tokenizer
->
execute
(
[]
);
if
(
$validateXMLNames
&&
$domBuilder
->
isCoerced
()
)
{
throw
new
ClientError
(
'Encountered a name invalid in XML.'
);
}
$frag
=
$domBuilder
->
getFragment
();
'@phan-var Document $frag'
;
// @var Document $frag
return
$frag
;
}
/**
* This is a simplified version of the DOMTraverser.
* Consider using that before making this more complex.
*
* FIXME: Move to DOMTraverser OR create a new class?
* @param Node $node
* @param callable $handler
* @param mixed ...$args
*/
public
static
function
visitDOM
(
Node
$node
,
callable
$handler
,
...
$args
):
void
{
$handler
(
$node
,
...
$args
);
$node
=
$node
->
firstChild
;
while
(
$node
)
{
$next
=
$node
->
nextSibling
;
self
::
visitDOM
(
$node
,
$handler
,
...
$args
);
$node
=
$next
;
}
}
/**
* Move 'from'.childNodes to 'to' adding them before 'beforeNode'
* If 'beforeNode' is null, the nodes are appended at the end.
* @param Node $from Source node. Children will be removed.
* @param Node $to Destination node. Children of $from will be added here
* @param ?Node $beforeNode Add the children before this node.
*/
public
static
function
migrateChildren
(
Node
$from
,
Node
$to
,
?
Node
$beforeNode
=
null
):
void
{
while
(
$from
->
firstChild
)
{
$to
->
insertBefore
(
$from
->
firstChild
,
$beforeNode
);
}
}
/**
* Copy 'from'.childNodes to 'to' adding them before 'beforeNode'
* 'from' and 'to' belong to different documents.
*
* If 'beforeNode' is null, the nodes are appended at the end.
* @param Node $from
* @param Node $to
* @param ?Node $beforeNode
*/
public
static
function
migrateChildrenBetweenDocs
(
Node
$from
,
Node
$to
,
?
Node
$beforeNode
=
null
):
void
{
$n
=
$from
->
firstChild
;
$destDoc
=
$to
->
ownerDocument
;
while
(
$n
)
{
$to
->
insertBefore
(
$destDoc
->
importNode
(
$n
,
true
),
$beforeNode
);
$n
=
$n
->
nextSibling
;
}
}
// phpcs doesn't like @phan-assert...
// phpcs:disable MediaWiki.Commenting.FunctionAnnotations.UnrecognizedAnnotation
/**
* Assert that this is a DOM element node.
* This is primarily to help phan analyze variable types.
* @phan-assert Element $node
* @param ?Node $node
* @return bool Always returns true
* @phan-assert Element $node
*/
public
static
function
assertElt
(
?
Node
$node
):
bool
{
Assert
::
invariant
(
$node
instanceof
Element
,
"Expected an element"
);
return
true
;
}
/**
* @param ?Node $node
* @return bool
*/
public
static
function
isRemexBlockNode
(
?
Node
$node
):
bool
{
return
$node
instanceof
Element
&&
!
isset
(
Consts
::
$HTML
[
'OnlyInlineElements'
][
DOMCompat
::
nodeName
(
$node
)]
)
&&
// This is a superset of \\MediaWiki\Tidy\RemexCompatMunger::$metadataElements
!
self
::
isMetaDataTag
(
$node
);
}
/**
* @param ?Node $node
* @return bool
*/
public
static
function
isWikitextBlockNode
(
?
Node
$node
):
bool
{
return
$node
&&
TokenUtils
::
isWikitextBlockTag
(
DOMCompat
::
nodeName
(
$node
)
);
}
/**
* Determine whether this is a formatting DOM element.
* @param ?Node $node
* @return bool
*/
public
static
function
isFormattingElt
(
?
Node
$node
):
bool
{
return
$node
&&
isset
(
Consts
::
$HTML
[
'FormattingTags'
][
DOMCompat
::
nodeName
(
$node
)]
);
}
/**
* Determine whether this is a quote DOM element.
* @param ?Node $node
* @return bool
*/
public
static
function
isQuoteElt
(
?
Node
$node
):
bool
{
return
$node
&&
isset
(
Consts
::
$WTQuoteTags
[
DOMCompat
::
nodeName
(
$node
)]
);
}
/**
* Determine whether this is the <body> DOM element.
* @param ?Node $node
* @return bool
*/
public
static
function
isBody
(
?
Node
$node
):
bool
{
return
$node
&&
DOMCompat
::
nodeName
(
$node
)
===
'body'
;
}
/**
* Determine whether this is a removed DOM node but Node object yet
* @param ?Node $node
* @return bool
*/
public
static
function
isRemoved
(
?
Node
$node
):
bool
{
return
!
$node
||
!
isset
(
$node
->
nodeType
);
}
/**
* Build path from a node to the root of the document.
*
* @param Node $node
* @return Node[] Path including all nodes from $node to the root of the document
*/
public
static
function
pathToRoot
(
Node
$node
):
array
{
$path
=
[];
do
{
$path
[]
=
$node
;
// phpcs:ignore Generic.CodeAnalysis.AssignmentInCondition.FoundInWhileCondition
}
while
(
$node
=
$node
->
parentNode
);
return
$path
;
}
/**
* Compute the edge length of the path from $node to the root.
* Root document is at depth 0, <html> at 1, <body> at 2.
* @param Node $node
* @return int
*/
public
static
function
nodeDepth
(
Node
$node
):
int
{
$edges
=
0
;
// phpcs:ignore Generic.CodeAnalysis.AssignmentInCondition.FoundInWhileCondition
while
(
$node
=
$node
->
parentNode
)
{
$edges
++;
}
return
$edges
;
}
/**
* Build path from a node to its passed-in sibling.
* Return will not include the passed-in sibling.
*
* @param Node $node
* @param Node $sibling
* @param bool $left indicates whether to go backwards, use previousSibling instead of nextSibling.
* @return Node[]
*/
public
static
function
pathToSibling
(
Node
$node
,
Node
$sibling
,
bool
$left
):
array
{
$path
=
[];
while
(
$node
&&
$node
!==
$sibling
)
{
$path
[]
=
$node
;
$node
=
$left
?
$node
->
previousSibling
:
$node
->
nextSibling
;
}
return
$path
;
}
/**
* Check whether a node `n1` comes before another node `n2` in
* their parent's children list.
*
* @param Node $n1 The node you expect to come first.
* @param Node $n2 Expected later sibling.
* @return bool
*/
public
static
function
inSiblingOrder
(
Node
$n1
,
Node
$n2
):
bool
{
while
(
$n1
&&
$n1
!==
$n2
)
{
$n1
=
$n1
->
nextSibling
;
}
return
$n1
!==
null
;
}
/**
* Check that a node 'n1' is an ancestor of another node 'n2' in
* the DOM. Returns true if n1 === n2.
* $n1 is the suspected ancestor.
* $n2 The suspected descendant.
*
* @param Node $n1
* @param Node $n2
* @return bool
*/
public
static
function
isAncestorOf
(
Node
$n1
,
Node
$n2
):
bool
{
while
(
$n2
&&
$n2
!==
$n1
)
{
$n2
=
$n2
->
parentNode
;
}
return
$n2
!==
null
;
}
/**
* Find an ancestor of $node with nodeName $name.
*
* @param Node $node
* @param string $name
* @return ?Element
*/
public
static
function
findAncestorOfName
(
Node
$node
,
string
$name
):
?
Element
{
$node
=
$node
->
parentNode
;
while
(
$node
&&
DOMCompat
::
nodeName
(
$node
)
!==
$name
)
{
$node
=
$node
->
parentNode
;
}
'@phan-var Element $node'
;
// @var Element $node
return
$node
;
}
/**
* Check whether $node has $name or has an ancestor named $name.
*
* @param Node $node
* @param string $name
* @return bool
*/
public
static
function
hasNameOrHasAncestorOfName
(
Node
$node
,
string
$name
):
bool
{
return
DOMCompat
::
nodeName
(
$node
)
===
$name
||
self
::
findAncestorOfName
(
$node
,
$name
)
!==
null
;
}
/**
* Determine whether the node matches the given nodeName and attribute value.
* Returns true if node name matches and the attribute equals "typeof"
*
* @param Node $n The node to test
* @param string $name The expected nodeName of $n
* @param string $typeRe Regular expression matching the expected value of
* `typeof` attribute.
* @return ?string The matching `typeof` value, or `null` if there is
* no match.
*/
public
static
function
matchNameAndTypeOf
(
Node
$n
,
string
$name
,
string
$typeRe
):
?
string
{
return
DOMCompat
::
nodeName
(
$n
)
===
$name
?
self
::
matchTypeOf
(
$n
,
$typeRe
)
:
null
;
}
/**
* Determine whether the node matches the given nodeName and typeof
* attribute value; the typeof is given as string.
*
* @param Node $n
* @param string $name node name to test for
* @param string $type Expected value of "typeof" attribute (literal string)
* @return bool True if the node matches.
*/
public
static
function
hasNameAndTypeOf
(
Node
$n
,
string
$name
,
string
$type
):
bool
{
return
self
::
matchNameAndTypeOf
(
$n
,
$name
,
'/^'
.
preg_quote
(
$type
,
'/'
)
.
'$/'
)
!==
null
;
}
/**
* Determine whether the node matches the given `typeof` attribute value.
*
* @param Node $n The node to test
* @param string $typeRe Regular expression matching the expected value of
* the `typeof` attribute.
* @return ?string The matching `typeof` value, or `null` if there is
* no match.
*/
public
static
function
matchTypeOf
(
Node
$n
,
string
$typeRe
):
?
string
{
return
self
::
matchMultivalAttr
(
$n
,
'typeof'
,
$typeRe
);
}
/**
* Determine whether the node matches the given `rel` attribute value.
*
* @param Node $n The node to test
* @param string $relRe Regular expression matching the expected value of
* the `rel` attribute.
* @return ?string The matching `rel` value, or `null` if there is
* no match.
*/
public
static
function
matchRel
(
Node
$n
,
string
$relRe
):
?
string
{
return
self
::
matchMultivalAttr
(
$n
,
'rel'
,
$relRe
);
}
/**
* Determine whether the node matches the given multivalue attribute value.
*
* @param Node $n The node to test
* @param string $attrName the attribute to test (typically 'rel' or 'typeof')
* @param string $valueRe Regular expression matching the expected value of
* the attribute.
* @return ?string The matching attribute value, or `null` if there is
* no match.
*/
private
static
function
matchMultivalAttr
(
Node
$n
,
string
$attrName
,
string
$valueRe
):
?
string
{
if
(
!(
$n
instanceof
Element
)
)
{
return
null
;
}
$attrValue
=
DOMCompat
::
getAttribute
(
$n
,
$attrName
);
if
(
$attrValue
===
null
||
$attrValue
===
''
)
{
return
null
;
}
foreach
(
explode
(
' '
,
$attrValue
)
as
$ty
)
{
if
(
$ty
===
''
)
{
continue
;
}
$count
=
preg_match
(
$valueRe
,
$ty
);
Assert
::
invariant
(
$count
!==
false
,
"Bad regexp"
);
if
(
$count
)
{
return
$ty
;
}
}
return
null
;
}
/**
* Determine whether the node matches the given typeof attribute value.
*
* @param Node $n
* @param string $type Expected value of "typeof" attribute, as a literal
* string.
* @return bool True if the node matches.
*/
public
static
function
hasTypeOf
(
Node
$n
,
string
$type
):
bool
{
return
self
::
hasValueInMultivalAttr
(
$n
,
'typeof'
,
$type
);
}
/**
* Determine whether the node matches the given rel attribute value.
*
* @param Node $n
* @param string $rel Expected value of "rel" attribute, as a literal string.
* @return bool True if the node matches.
*/
public
static
function
hasRel
(
Node
$n
,
string
$rel
):
bool
{
return
self
::
hasValueInMultivalAttr
(
$n
,
'rel'
,
$rel
);
}
/**
* @param Element $element
* @param string $regex Partial regular expression, e.g. "foo|bar"
* @return bool
*/
public
static
function
hasClass
(
Element
$element
,
string
$regex
):
bool
{
$value
=
DOMCompat
::
getAttribute
(
$element
,
'class'
);
return
(
bool
)
preg_match
(
'{(?<=^|
\s
)'
.
$regex
.
'(?=
\s
|$)}'
,
$value
??
''
);
}
/**
* Determine whether the node matches the given attribute value for a multivalued attribute
* @param Node $n
* @param string $attrName name of the attribute to check (typically 'typeof', 'rel')
* @param string $value Expected value of $attrName" attribute, as a literal string.
* @return bool True if the node matches
*/
private
static
function
hasValueInMultivalAttr
(
Node
$n
,
string
$attrName
,
string
$value
):
bool
{
// fast path
if
(
!(
$n
instanceof
Element
)
)
{
return
false
;
}
$attrValue
=
DOMCompat
::
getAttribute
(
$n
,
$attrName
);
if
(
$attrValue
===
null
||
$attrValue
===
''
)
{
return
false
;
}
if
(
$attrValue
===
$value
)
{
return
true
;
}
// fallback
return
in_array
(
$value
,
explode
(
' '
,
$attrValue
),
true
);
}
/**
* Add a type to the typeof attribute. This method should almost always
* be used instead of `setAttribute`, to ensure we don't overwrite existing
* typeof information.
*
* @param Element $node node
* @param string $type type
* @param bool $prepend If true, adds value to start, rather than end.
* Use of this option in new code is discouraged.
*/
public
static
function
addTypeOf
(
Element
$node
,
string
$type
,
bool
$prepend
=
false
):
void
{
self
::
addValueToMultivalAttr
(
$node
,
'typeof'
,
$type
,
$prepend
);
}
/**
* Add a type to the rel attribute. This method should almost always
* be used instead of `setAttribute`, to ensure we don't overwrite existing
* rel information.
*
* @param Element $node node
* @param string $rel type
*/
public
static
function
addRel
(
Element
$node
,
string
$rel
):
void
{
self
::
addValueToMultivalAttr
(
$node
,
'rel'
,
$rel
);
}
/**
* Add an element to a multivalue attribute (typeof, rel). This method should almost always
* be used instead of `setAttribute`, to ensure we don't overwrite existing
* multivalue information.
*
* @param Element $node
* @param string $attr
* @param string $value
* @param bool $prepend If true, adds value to start, rather than end
*/
private
static
function
addValueToMultivalAttr
(
Element
$node
,
string
$attr
,
string
$value
,
bool
$prepend
=
false
):
void
{
$value
=
trim
(
$value
);
if
(
$value
===
''
)
{
return
;
}
$oldValue
=
DOMCompat
::
getAttribute
(
$node
,
$attr
);
if
(
$oldValue
!==
null
&&
trim
(
$oldValue
)
!==
''
)
{
$values
=
explode
(
' '
,
trim
(
$oldValue
)
);
if
(
in_array
(
$value
,
$values
,
true
)
)
{
return
;
}
$value
=
$prepend
?
"$value $oldValue"
:
"$oldValue $value"
;
}
$node
->
setAttribute
(
$attr
,
$value
);
}
/**
* Remove a value from a multiple-valued attribute.
*
* @param Element $node node
* @param string $attr The attribute name
* @param string $value The value to remove
*/
private
static
function
removeValueFromMultivalAttr
(
Element
$node
,
string
$attr
,
string
$value
):
void
{
$oldValue
=
DOMCompat
::
getAttribute
(
$node
,
$attr
);
if
(
$oldValue
!==
null
&&
$oldValue
!==
''
)
{
$value
=
trim
(
$value
);
$types
=
array_diff
(
explode
(
' '
,
$oldValue
),
[
$value
]
);
if
(
count
(
$types
)
>
0
)
{
$node
->
setAttribute
(
$attr
,
implode
(
' '
,
$types
)
);
}
else
{
$node
->
removeAttribute
(
$attr
);
}
}
}
/**
* Remove a type from the typeof attribute.
*
* @param Element $node node
* @param string $type type
*/
public
static
function
removeTypeOf
(
Element
$node
,
string
$type
):
void
{
self
::
removeValueFromMultivalAttr
(
$node
,
'typeof'
,
$type
);
}
/**
* Remove a type from the rel attribute.
*
* @param Element $node node
* @param string $rel rel
*/
public
static
function
removeRel
(
Element
$node
,
string
$rel
):
void
{
self
::
removeValueFromMultivalAttr
(
$node
,
'rel'
,
$rel
);
}
/**
* Check whether `node` is in a fosterable position.
*
* @param ?Node $n
* @return bool
*/
public
static
function
isFosterablePosition
(
?
Node
$n
):
bool
{
return
$n
&&
isset
(
Consts
::
$HTML
[
'FosterablePosition'
][
DOMCompat
::
nodeName
(
$n
->
parentNode
)]
);
}
/**
* Check whether `node` is a heading.
*
* @param ?Node $n
* @return bool
*/
public
static
function
isHeading
(
?
Node
$n
):
bool
{
return
$n
&&
preg_match
(
'/^h[1-6]$/D'
,
DOMCompat
::
nodeName
(
$n
)
);
}
/**
* Check whether `node` is a list.
*
* @param ?Node $n
* @return bool
*/
public
static
function
isList
(
?
Node
$n
):
bool
{
return
$n
&&
isset
(
Consts
::
$HTML
[
'ListTags'
][
DOMCompat
::
nodeName
(
$n
)]
);
}
/**
* Check whether `node` is a list item.
*
* @param ?Node $n
* @return bool
*/
public
static
function
isListItem
(
?
Node
$n
):
bool
{
return
$n
&&
isset
(
Consts
::
$HTML
[
'ListItemTags'
][
DOMCompat
::
nodeName
(
$n
)]
);
}
/**
* Check whether `node` is a list or list item.
*
* @param ?Node $n
* @return bool
*/
public
static
function
isListOrListItem
(
?
Node
$n
):
bool
{
return
self
::
isList
(
$n
)
||
self
::
isListItem
(
$n
);
}
/**
* Check whether `node` is nestee in a list item.
*
* @param ?Node $n
* @return bool
*/
public
static
function
isNestedInListItem
(
?
Node
$n
):
bool
{
$parentNode
=
$n
->
parentNode
;
while
(
$parentNode
)
{
if
(
self
::
isListItem
(
$parentNode
)
)
{
return
true
;
}
$parentNode
=
$parentNode
->
parentNode
;
}
return
false
;
}
/**
* Check whether `node` is a nested list or a list item.
*
* @param ?Node $n
* @return bool
*/
public
static
function
isNestedListOrListItem
(
?
Node
$n
):
bool
{
return
self
::
isListOrListItem
(
$n
)
&&
self
::
isNestedInListItem
(
$n
);
}
/**
* Check a node to see whether it's a meta with some typeof.
*
* @param Node $n
* @param string $type
* @return bool
*/
public
static
function
isMarkerMeta
(
Node
$n
,
string
$type
):
bool
{
return
self
::
hasNameAndTypeOf
(
$n
,
'meta'
,
$type
);
}
/**
* Check whether a node has any children that are elements.
*
* @param Node $node
* @return bool
*/
public
static
function
hasElementChild
(
Node
$node
):
bool
{
for
(
$child
=
$node
->
firstChild
;
$child
;
$child
=
$child
->
nextSibling
)
{
if
(
$child
instanceof
Element
)
{
return
true
;
}
}
return
false
;
}
/**
* Check if a node has a block-level element descendant.
*
* @param Node $node
* @return bool
*/
public
static
function
hasBlockElementDescendant
(
Node
$node
):
bool
{
for
(
$child
=
$node
->
firstChild
;
$child
;
$child
=
$child
->
nextSibling
)
{
if
(
$child
instanceof
Element
&&
(
self
::
isWikitextBlockNode
(
$child
)
||
// Is a block-level node
self
::
hasBlockElementDescendant
(
$child
)
)
// or has a block-level child or grandchild or..
)
{
return
true
;
}
}
return
false
;
}
/**
* Is a node representing inter-element whitespace?
*
* @param ?Node $node
* @return bool
*/
public
static
function
isIEW
(
?
Node
$node
):
bool
{
// ws-only
return
$node
instanceof
Text
&&
preg_match
(
'/^
\s
*$/D'
,
$node
->
nodeValue
);
}
/**
* Is a node a document fragment?
*
* @param ?Node $node
* @return bool
*/
public
static
function
isDocumentFragment
(
?
Node
$node
):
bool
{
return
$node
&&
$node
->
nodeType
===
XML_DOCUMENT_FRAG_NODE
;
}
/**
* Is a node at the top?
*
* @param ?Node $node
* @return bool
*/
public
static
function
atTheTop
(
?
Node
$node
):
bool
{
return
self
::
isDocumentFragment
(
$node
)
||
self
::
isBody
(
$node
);
}
/**
* Are all children of this node text or comment nodes?
*
* @param Node $node
* @return bool
*/
public
static
function
allChildrenAreTextOrComments
(
Node
$node
):
bool
{
$child
=
$node
->
firstChild
;
while
(
$child
)
{
if
(
!(
$child
instanceof
Text
||
$child
instanceof
Comment
)
)
{
return
false
;
}
$child
=
$child
->
nextSibling
;
}
return
true
;
}
/**
* Check if the dom-subtree rooted at node has an element with tag name 'tagName'
* By default, the root node is not checked.
*
* @param Node $node The DOM node whose tree should be checked
* @param string $tagName Tag name to look for
* @param bool $checkRoot Should the root be checked?
* @return bool
*/
public
static
function
treeHasElement
(
Node
$node
,
string
$tagName
,
bool
$checkRoot
=
false
):
bool
{
if
(
$checkRoot
&&
DOMCompat
::
nodeName
(
$node
)
===
$tagName
)
{
return
true
;
}
$node
=
$node
->
firstChild
;
while
(
$node
)
{
if
(
$node
instanceof
Element
)
{
if
(
self
::
treeHasElement
(
$node
,
$tagName
,
true
)
)
{
return
true
;
}
}
$node
=
$node
->
nextSibling
;
}
return
false
;
}
/**
* Is node a table tag (table, tbody, td, tr, etc.)?
*
* @param Node $node
* @return bool
*/
public
static
function
isTableTag
(
Node
$node
):
bool
{
return
isset
(
Consts
::
$HTML
[
'TableTags'
][
DOMCompat
::
nodeName
(
$node
)]
);
}
/**
* Returns a media element nested in `node`
*
* @param Element $node
* @return Element|null
*/
public
static
function
selectMediaElt
(
Element
$node
):
?
Element
{
return
DOMCompat
::
querySelector
(
$node
,
'img, video, audio'
);
}
/**
* Extract http-equiv headers from the HTML, including content-language and
* vary headers, if present
*
* @param Document $doc
* @return array<string,string>
*/
public
static
function
findHttpEquivHeaders
(
Document
$doc
):
array
{
$elts
=
DOMCompat
::
querySelectorAll
(
$doc
,
'meta[http-equiv][content]'
);
$r
=
[];
foreach
(
$elts
as
$el
)
{
$r
[
strtolower
(
DOMCompat
::
getAttribute
(
$el
,
'http-equiv'
)
)]
=
DOMCompat
::
getAttribute
(
$el
,
'content'
);
}
return
$r
;
}
/**
* Add or replace http-equiv headers in the HTML <head>.
* This is used for content-language and vary headers, among possible
* others.
* @param Document $doc The HTML document to update
* @param array<string,string|string[]> $headers An array mapping HTTP
* header names (which are case-insensitive) to new values. If an
* array of values is provided, they will be joined with commas.
*/
public
static
function
addHttpEquivHeaders
(
Document
$doc
,
array
$headers
):
void
{
foreach
(
$headers
as
$key
=>
$value
)
{
if
(
is_array
(
$value
)
)
{
$value
=
implode
(
','
,
$value
);
}
// HTTP header names are case-insensitive; hence the "i" suffix
// on this selector query.
$el
=
DOMCompat
::
querySelector
(
$doc
,
"meta[http-equiv=
\"
{$key}
\"
i]"
);
if
(
!
$el
)
{
// This also ensures there is a <head> element.
$el
=
self
::
appendToHead
(
$doc
,
'meta'
,
[
'http-equiv'
=>
$key
]
);
}
$el
->
setAttribute
(
'content'
,
$value
);
}
}
/**
* @param Document $doc
* @return string|null
*/
public
static
function
extractInlinedContentVersion
(
Document
$doc
):
?
string
{
$el
=
DOMCompat
::
querySelector
(
$doc
,
'meta[property="mw:htmlVersion"], meta[property="mw:html:version"]'
);
return
$el
?
DOMCompat
::
getAttribute
(
$el
,
'content'
)
:
null
;
}
/**
* Add attributes to a node element.
*
* @param Element $elt element
* @param array $attrs attributes
*/
public
static
function
addAttributes
(
Element
$elt
,
array
$attrs
):
void
{
foreach
(
$attrs
as
$key
=>
$value
)
{
if
(
$value
!==
null
)
{
if
(
$key
===
'id'
)
{
DOMCompat
::
setIdAttribute
(
$elt
,
$value
);
}
else
{
$elt
->
setAttribute
(
$key
,
$value
);
}
}
}
}
/**
* Create an element in the document head with the given attrs.
* Creates the head element in the document if needed.
*
* @param Document $document
* @param string $tagName
* @param array $attrs
* @return Element The newly-appended Element
*/
public
static
function
appendToHead
(
Document
$document
,
string
$tagName
,
array
$attrs
=
[]
):
Element
{
$elt
=
$document
->
createElement
(
$tagName
);
self
::
addAttributes
(
$elt
,
$attrs
);
$head
=
DOMCompat
::
getHead
(
$document
);
if
(
!
$head
)
{
$head
=
$document
->
createElement
(
'head'
);
$document
->
documentElement
->
insertBefore
(
$head
,
DOMCompat
::
getBody
(
$document
)
);
}
$head
->
appendChild
(
$elt
);
return
$elt
;
}
/**
* innerHTML and outerHTML are not defined on DocumentFragment.
*
* Defined similarly to DOMCompat::getInnerHTML()
*
* @param DocumentFragment $frag
* @return string
*/
public
static
function
getFragmentInnerHTML
(
DocumentFragment
$frag
):
string
{
return
XMLSerializer
::
serialize
(
$frag
,
[
'innerXML'
=>
true
]
)[
'html'
];
}
/**
* innerHTML and outerHTML are not defined on DocumentFragment.
* @see DOMCompat::setInnerHTML() for the Element version
*
* @param DocumentFragment $frag
* @param string $html
*/
public
static
function
setFragmentInnerHTML
(
DocumentFragment
$frag
,
string
$html
):
void
{
// FIXME: This should be an HTML5 template element
$body
=
$frag
->
ownerDocument
->
createElement
(
'body'
);
DOMCompat
::
setInnerHTML
(
$body
,
$html
);
self
::
migrateChildren
(
$body
,
$frag
);
}
/**
* @param Document $doc
* @param string $html
* @return DocumentFragment
*/
public
static
function
parseHTMLToFragment
(
Document
$doc
,
string
$html
):
DocumentFragment
{
$frag
=
$doc
->
createDocumentFragment
();
self
::
setFragmentInnerHTML
(
$frag
,
$html
);
return
$frag
;
}
/**
* @param Node $node
* @return bool
*/
public
static
function
isRawTextElement
(
Node
$node
):
bool
{
return
isset
(
Consts
::
$HTML
[
'RawTextElements'
][
DOMCompat
::
nodeName
(
$node
)]
);
}
/**
* Is 'n' a block tag, or does the subtree rooted at 'n' have a block tag
* in it?
*
* @param Node $n
* @return bool
*/
public
static
function
hasBlockTag
(
Node
$n
):
bool
{
if
(
self
::
isRemexBlockNode
(
$n
)
)
{
return
true
;
}
$c
=
$n
->
firstChild
;
while
(
$c
)
{
if
(
self
::
hasBlockTag
(
$c
)
)
{
return
true
;
}
$c
=
$c
->
nextSibling
;
}
return
false
;
}
/**
* Get an associative array of attributes, suitable for serialization.
*
* Add the xmlns attribute if available, to workaround PHP's surprising
* behavior with the xmlns attribute: HTML is *not* an XML document,
* but various parts of PHP (including our misnamed XMLSerializer) pretend
* that it is, sort of.
*
* @param Element $element
* @return array<string,string>
* @see https://phabricator.wikimedia.org/T235295
*/
public
static
function
attributes
(
Element
$element
):
array
{
$result
=
[];
// The 'xmlns' attribute is "invisible" T235295
$xmlns
=
DOMCompat
::
getAttribute
(
$element
,
'xmlns'
);
if
(
$xmlns
!==
null
)
{
$result
[
'xmlns'
]
=
$xmlns
;
}
foreach
(
$element
->
attributes
as
$attr
)
{
$result
[
$attr
->
name
]
=
$attr
->
value
;
}
return
$result
;
}
/**
* @param Element $node
* @return bool
*/
public
static
function
isMetaDataTag
(
Element
$node
):
bool
{
return
isset
(
Consts
::
$HTML
[
'MetaDataTags'
][
DOMCompat
::
nodeName
(
$node
)]
);
}
/**
* Strip a paragraph wrapper, if any, before parsing HTML to DOM
*/
public
static
function
stripPWrapper
(
string
$ret
):
string
{
return
preg_replace
(
'#(^<p>)|(
\n
</p>('
.
Utils
::
COMMENT_REGEXP_FRAGMENT
.
'|
\s
)*$)#D'
,
''
,
$ret
);
}
}
File Metadata
Details
Attached
Mime Type
text/x-php
Expires
Sat, May 16, 21:09 (1 d, 12 h)
Storage Engine
local-disk
Storage Format
Raw Data
Storage Handle
5c/f4/4a2604e0e35b7087412f1885ff4f
Default Alt Text
DOMUtils.php (26 KB)
Attached To
Mode
rMWPROD MediaWiki Production
Attached
Detach File
Event Timeline
Log In to Comment