Page Menu
Home
WickedGov Phorge
Search
Configure Global Search
Log In
Files
F1432299
WTSUtils.php
No One
Temporary
Actions
Download File
Edit File
Delete File
View Transforms
Subscribe
Flag For Later
Award Token
Size
12 KB
Referenced Files
None
Subscribers
None
WTSUtils.php
View Options
<?php
declare
(
strict_types
=
1
);
namespace
Wikimedia\Parsoid\Html2Wt
;
use
Wikimedia\Assert\UnreachableException
;
use
Wikimedia\Parsoid\Core\DomSourceRange
;
use
Wikimedia\Parsoid\DOM\Element
;
use
Wikimedia\Parsoid\DOM\Node
;
use
Wikimedia\Parsoid\DOM\Text
;
use
Wikimedia\Parsoid\NodeData\DataMw
;
use
Wikimedia\Parsoid\NodeData\DataMwAttrib
;
use
Wikimedia\Parsoid\Tokens\EndTagTk
;
use
Wikimedia\Parsoid\Tokens\KV
;
use
Wikimedia\Parsoid\Tokens\TagTk
;
use
Wikimedia\Parsoid\Utils\DiffDOMUtils
;
use
Wikimedia\Parsoid\Utils\DOMCompat
;
use
Wikimedia\Parsoid\Utils\DOMDataUtils
;
use
Wikimedia\Parsoid\Utils\DOMUtils
;
use
Wikimedia\Parsoid\Utils\PHPUtils
;
use
Wikimedia\Parsoid\Utils\WTUtils
;
class
WTSUtils
{
public
static
function
isValidSep
(
string
$sep
):
bool
{
/* TODO (Anomie)
You might be able to simplify the regex a bit using a no-backtracking group:
'/^(?>\s*<!--.*?-->)*\s*$/s'
Although I'm not sure that'll actually run faster.*/
return
(
bool
)
preg_match
(
'/^(
\s
|<!--([^
\-
]|-(?!->))*-->)*$/uD'
,
$sep
);
}
public
static
function
hasValidTagWidths
(
?
DomSourceRange
$dsr
):
bool
{
return
$dsr
!==
null
&&
$dsr
->
hasValidTagWidths
();
}
/**
* Get the attributes on a node in an array of KV objects.
*
* @param Element $node
* @return KV[]
*/
public
static
function
getAttributeKVArray
(
Element
$node
):
array
{
$kvs
=
[];
foreach
(
DOMUtils
::
attributes
(
$node
)
as
$name
=>
$value
)
{
$kvs
[]
=
new
KV
(
$name
,
$value
);
}
return
$kvs
;
}
/**
* Create a `TagTk` corresponding to a DOM node.
*
* @param Element $node
* @return TagTk
*/
public
static
function
mkTagTk
(
Element
$node
):
TagTk
{
$attribKVs
=
self
::
getAttributeKVArray
(
$node
);
return
new
TagTk
(
DOMCompat
::
nodeName
(
$node
),
$attribKVs
,
DOMDataUtils
::
getDataParsoid
(
$node
)
);
}
/**
* Create a `EndTagTk` corresponding to a DOM node.
*
* @param Element $node
* @return EndTagTk
*/
public
static
function
mkEndTagTk
(
Element
$node
):
EndTagTk
{
$attribKVs
=
self
::
getAttributeKVArray
(
$node
);
return
new
EndTagTk
(
DOMCompat
::
nodeName
(
$node
),
$attribKVs
,
DOMDataUtils
::
getDataParsoid
(
$node
)
);
}
/**
* For new elements, attrs are always considered modified. However, For
* old elements, we only consider an attribute modified if we have shadow
* info for it and it doesn't match the current value.
* Returns array with data:
* [
* value => mixed,
* modified => bool (If the value of the attribute changed since we parsed the wikitext),
* fromsrc => bool (Whether we got the value from source-based roundtripping)
* ]
*
* @param Element $node
* @param string $name
* @param ?string $curVal
* @return array
*/
public
static
function
getShadowInfo
(
Element
$node
,
string
$name
,
?
string
$curVal
):
array
{
$dp
=
DOMDataUtils
::
getDataParsoid
(
$node
);
// Not the case, continue regular round-trip information.
if
(
!
isset
(
$dp
->
a
)
||
!
array_key_exists
(
$name
,
$dp
->
a
)
)
{
return
[
'value'
=>
$curVal
,
// Mark as modified if a new element
'modified'
=>
WTUtils
::
isNewElt
(
$node
),
'fromsrc'
=>
false
];
}
elseif
(
$dp
->
a
[
$name
]
!==
$curVal
)
{
return
[
'value'
=>
$curVal
,
'modified'
=>
true
,
'fromsrc'
=>
false
];
}
elseif
(
!
isset
(
$dp
->
sa
)
||
!
array_key_exists
(
$name
,
$dp
->
sa
)
)
{
return
[
'value'
=>
$curVal
,
'modified'
=>
false
,
'fromsrc'
=>
false
];
}
else
{
return
[
'value'
=>
$dp
->
sa
[
$name
],
'modified'
=>
false
,
'fromsrc'
=>
true
];
}
}
/**
* Get shadowed information about an attribute on a node.
* Returns array with data:
* [
* value => mixed,
* modified => bool (If the value of the attribute changed since we parsed the wikitext),
* fromsrc => bool (Whether we got the value from source-based roundtripping)
* ]
*
* @param Element $node
* @param string $name
* @return array
*/
public
static
function
getAttributeShadowInfo
(
Element
$node
,
string
$name
):
array
{
return
self
::
getShadowInfo
(
$node
,
$name
,
DOMCompat
::
getAttribute
(
$node
,
$name
)
);
}
public
static
function
commentWT
(
string
$comment
):
string
{
return
'<!--'
.
WTUtils
::
decodeComment
(
$comment
)
.
'-->'
;
}
/**
* In wikitext, did origNode occur next to a block node which has been
* deleted? While looking for next, we look past DOM nodes that are
* transparent in rendering. (See emitsSolTransparentSingleLineWT for
* which nodes.)
*
* @param ?Node $origNode
* @param bool $before
* @return bool
*/
public
static
function
nextToDeletedBlockNodeInWT
(
?
Node
$origNode
,
bool
$before
):
bool
{
if
(
!
$origNode
||
DOMUtils
::
atTheTop
(
$origNode
)
)
{
return
false
;
}
while
(
true
)
{
// Find the nearest node that shows up in HTML (ignore nodes that show up
// in wikitext but don't affect sol-state or HTML rendering -- note that
// whitespace is being ignored, but that whitespace occurs between block nodes).
$node
=
$origNode
;
do
{
$node
=
$before
?
$node
->
previousSibling
:
$node
->
nextSibling
;
if
(
DiffUtils
::
maybeDeletedNode
(
$node
)
)
{
return
DiffUtils
::
isDeletedBlockNode
(
$node
);
}
}
while
(
$node
&&
WTUtils
::
emitsSolTransparentSingleLineWT
(
$node
)
);
if
(
$node
)
{
return
false
;
}
else
{
// Walk up past zero-width wikitext parents
$node
=
$origNode
->
parentNode
;
if
(
!
WTUtils
::
isZeroWidthWikitextElt
(
$node
)
)
{
// If the parent occupies space in wikitext,
// clearly, we are not next to a deleted block node!
// We'll eventually hit BODY here and return.
return
false
;
}
$origNode
=
$node
;
}
}
}
/**
* Check if whitespace preceding this node would NOT trigger an indent-pre.
*
* @param Node $node
* @param Node $sepNode
* @return bool
*/
public
static
function
precedingSpaceSuppressesIndentPre
(
Node
$node
,
Node
$sepNode
):
bool
{
if
(
$node
!==
$sepNode
&&
$node
instanceof
Text
)
{
// if node is the same as sepNode, then the separator text
// at the beginning of it has been stripped out already, and
// we cannot use it to test it for indent-pre safety
return
(
bool
)
preg_match
(
'/^[
\t
]*
\n
/'
,
$node
->
nodeValue
);
}
elseif
(
DOMCompat
::
nodeName
(
$node
)
===
'br'
)
{
return
true
;
}
elseif
(
WTUtils
::
isFirstEncapsulationWrapperNode
(
$node
)
)
{
DOMUtils
::
assertElt
(
$node
);
// Dont try any harder than this
return
!
$node
->
hasChildNodes
()
||
DOMCompat
::
getInnerHTML
(
$node
)[
0
]
===
"
\n
"
;
}
else
{
return
WTUtils
::
isBlockNodeWithVisibleWT
(
$node
);
}
}
public
static
function
traceNodeName
(
Node
$node
):
string
{
switch
(
$node
->
nodeType
)
{
case
XML_ELEMENT_NODE
:
return
(
DiffUtils
::
isDiffMarker
(
$node
)
)
?
'DIFF_MARK'
:
'NODE: '
.
DOMCompat
::
nodeName
(
$node
);
case
XML_TEXT_NODE
:
return
'TEXT: '
.
PHPUtils
::
jsonEncode
(
$node
->
nodeValue
);
case
XML_COMMENT_NODE
:
return
'CMT : '
.
PHPUtils
::
jsonEncode
(
self
::
commentWT
(
$node
->
nodeValue
)
);
default
:
return
DOMCompat
::
nodeName
(
$node
);
}
}
/**
* In selser mode, check if an unedited node's wikitext from source wikitext
* is reusable as is.
*
* @param SerializerState $state
* @param Node $node
* @return bool
*/
public
static
function
origSrcValidInEditedContext
(
SerializerState
$state
,
Node
$node
):
bool
{
$env
=
$state
->
getEnv
();
$prev
=
null
;
if
(
WTUtils
::
isRedirectLink
(
$node
)
)
{
return
DOMUtils
::
atTheTop
(
$node
->
parentNode
)
&&
!
$node
->
previousSibling
;
}
elseif
(
self
::
dsrContainsOpenExtendedRangeAnnotationTag
(
$node
,
$state
)
)
{
return
false
;
}
elseif
(
DOMCompat
::
nodeName
(
$node
)
===
'th'
||
DOMCompat
::
nodeName
(
$node
)
===
'td'
)
{
DOMUtils
::
assertElt
(
$node
);
// The wikitext representation for them is dependent
// on cell position (first cell is always single char).
// If there is no previous sibling, nothing to worry about.
$prev
=
$node
->
previousSibling
;
if
(
!
$prev
)
{
return
true
;
}
if
(
DiffUtils
::
hasInsertedDiffMark
(
$prev
)
||
DiffUtils
::
hasInsertedDiffMark
(
$node
)
)
{
return
false
;
}
// If previous sibling is unmodified, nothing to worry about.
if
(
!
DiffUtils
::
isDiffMarker
(
$prev
)
&&
!
DiffUtils
::
directChildrenChanged
(
$prev
)
)
{
return
true
;
}
// If it didn't have a stx marker that indicated that the cell
// showed up on the same line via the "||" or "!!" syntax, nothing
// to worry about.
return
(
DOMDataUtils
::
getDataParsoid
(
$node
)->
stx
??
''
)
!==
'row'
;
}
elseif
(
$node
instanceof
Element
&&
DOMCompat
::
nodeName
(
$node
)
===
'tr'
&&
empty
(
DOMDataUtils
::
getDataParsoid
(
$node
)->
startTagSrc
)
)
{
// If this <tr> didn't have a startTagSrc, it would have been
// the first row of a table in original wikitext. So, it is safe
// to reuse the original source for the row (without a "|-") as long as
// it continues to be the first row of the table. If not, since we need to
// insert a "|-" to separate it from the newly added row (in an edit),
// we cannot simply reuse orig. wikitext for this <tr>.
return
!
DiffDOMUtils
::
previousNonSepSibling
(
$node
);
}
elseif
(
DOMUtils
::
isNestedListOrListItem
(
$node
)
)
{
if
(
DOMUtils
::
isList
(
$node
)
)
{
// Lists never get bullets assigned to them. So, unless they
// start a fresh list ( => they have a previous sibling ),
// we cannot reuse source for nested lists.
if
(
!
$node
->
previousSibling
)
{
return
false
;
}
}
else
{
// Consider this wikitext snippet and its output below:
//
// ** a
// *** b
//
// <ul><li-*>
// <ul><li-*> a <-- cannot reuse source of this <li>
// <ul><li-***> b</li></ul> <-- can reuse source of this <li>
// </li></ul>
// </li></ul>
//
// If we reuse the src for the inner li with the a, we'd be missing
// one bullet because the tag handler for lists in the serializer only
// emits start tag src when it hits a first child that isn't a list
// element. We need to walk up and get the other bullet(s).
//
// The above logic can be condensed into this observation.
// Reusable nested <li> nodes will always have multiple bullets.
// Don't reuse source from any nested list
$dp
=
DOMDataUtils
::
getDataParsoid
(
$node
);
if
(
!
isset
(
$dp
->
dsr
)
||
$dp
->
dsr
->
openWidth
<
2
)
{
return
false
;
}
}
// If a previous sibling was modified, we can't reuse the start dsr.
$prev
=
$node
->
previousSibling
;
while
(
$prev
)
{
if
(
DiffUtils
::
isDiffMarker
(
$prev
)
||
DiffUtils
::
hasInsertedDiffMark
(
$prev
)
)
{
return
false
;
}
$prev
=
$prev
->
previousSibling
;
}
return
true
;
}
elseif
(
WTUtils
::
isMovedMetaTag
(
$node
)
)
{
return
false
;
}
else
{
return
true
;
}
}
/**
* We keep track in $state of all extended ranges that are currently open by a <meta> tag.
* This method checks whether the wikitext source pointed by the dsr of the node contains either
* an opening or closing tag matching that annotation (<translate> or </translate> for example.)
* @param Node $node
* @param SerializerState $state
* @return bool
*/
private
static
function
dsrContainsOpenExtendedRangeAnnotationTag
(
Node
$node
,
SerializerState
$state
):
bool
{
if
(
empty
(
$state
->
openAnnotations
)
||
!
$node
instanceof
Element
)
{
return
false
;
}
$dsr
=
DOMDataUtils
::
getDataParsoid
(
$node
)->
dsr
??
null
;
if
(
!
$dsr
)
{
return
false
;
}
$src
=
$state
->
getOrigSrc
(
$dsr
->
innerRange
()
);
foreach
(
$state
->
openAnnotations
as
$ann
=>
$extended
)
{
if
(
$extended
)
{
if
(
preg_match
(
'</?'
.
$ann
.
'.*>'
,
$src
)
)
{
return
true
;
}
}
}
return
false
;
}
/**
* FIXME: This method should probably be moved to DOMDataUtils class since
* it is used by both html2wt and wt2html code
*
* @param DataMw $dataMw
* @param string $key
* @param bool $keep
* @return ?DataMwAttrib
*/
public
static
function
getAttrFromDataMw
(
DataMw
$dataMw
,
string
$key
,
bool
$keep
):
?
DataMwAttrib
{
$arr
=
$dataMw
->
attribs
??
[];
$i
=
false
;
foreach
(
$arr
as
$k
=>
$a
)
{
if
(
is_string
(
$a
->
key
)
)
{
$txt
=
$a
->
key
;
}
elseif
(
is_array
(
$a
->
key
)
)
{
$txt
=
$a
->
key
[
'txt'
]
??
null
;
}
else
{
throw
new
UnreachableException
(
'Control should never get here!'
);
}
if
(
$txt
===
$key
)
{
$i
=
$k
;
break
;
}
}
if
(
$i
===
false
)
{
return
null
;
}
$ret
=
$arr
[
$i
];
if
(
!
$keep
&&
!
isset
(
$ret
->
value
[
'html'
]
)
)
{
array_splice
(
$arr
,
$i
,
1
);
$dataMw
->
attribs
=
$arr
;
}
return
$ret
;
}
/**
* Escape `<nowiki>` tags.
*
* @param string $text
* @return string
*/
public
static
function
escapeNowikiTags
(
string
$text
):
string
{
return
preg_replace
(
'#<(/?nowiki
\s
*/?
\s
*)>#i'
,
'<$1>'
,
$text
);
}
}
File Metadata
Details
Attached
Mime Type
text/x-php
Expires
Sat, May 16, 21:37 (1 d, 10 h)
Storage Engine
local-disk
Storage Format
Raw Data
Storage Handle
82/b3/b060c2362baea338533463cb7555
Default Alt Text
WTSUtils.php (12 KB)
Attached To
Mode
rMWPROD MediaWiki Production
Attached
Detach File
Event Timeline
Log In to Comment