Page Menu
Home
WickedGov Phorge
Search
Configure Global Search
Log In
Files
F1426884
VueComponentParser.php
No One
Temporary
Actions
Download File
Edit File
Delete File
View Transforms
Subscribe
Flag For Later
Award Token
Size
11 KB
Referenced Files
None
Subscribers
None
VueComponentParser.php
View Options
<?php
/**
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along
* with this program; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
* http://www.gnu.org/copyleft/gpl.html
*
* @file
* @author Roan Kattouw
*/
namespace
MediaWiki\ResourceLoader
;
use
DOMDocument
;
use
DOMElement
;
use
DOMNode
;
use
InvalidArgumentException
;
use
Wikimedia\RemexHtml\DOM\DOMBuilder
;
use
Wikimedia\RemexHtml\HTMLData
;
use
Wikimedia\RemexHtml\Serializer\HtmlFormatter
;
use
Wikimedia\RemexHtml\Serializer\Serializer
;
use
Wikimedia\RemexHtml\Serializer\SerializerNode
;
use
Wikimedia\RemexHtml\Tokenizer\Attributes
;
use
Wikimedia\RemexHtml\Tokenizer\Tokenizer
;
use
Wikimedia\RemexHtml\TreeBuilder\Dispatcher
;
use
Wikimedia\RemexHtml\TreeBuilder\TreeBuilder
;
use
Wikimedia\Zest\Zest
;
/**
* Parser for Vue single file components (.vue files). See parse() for usage.
*
* @ingroup ResourceLoader
* @internal For use within FileModule.
*/
class
VueComponentParser
{
/**
* Parse a Vue single file component, and extract the script, template and style parts.
*
* Returns an associative array with the following keys:
* - 'script': The JS code in the <script> tag
* - 'template': The HTML in the <template> tag
* - 'style': The CSS/LESS styles in the <style> tag, or null if the <style> tag was missing
* - 'styleLang': The language used for 'style'; either 'css' or 'less', or null if no <style> tag
*
* The following options can be passed in the $options parameter:
* - 'minifyTemplate': Whether to minify the HTML in the template tag. This removes
* HTML comments and strips whitespace. Default: false
*
* @param string $html HTML with <script>, <template> and <style> tags at the top level
* @param array $options Associative array of options
* @return array
* @throws InvalidArgumentException If the input is invalid
*/
public
function
parse
(
string
$html
,
array
$options
=
[]
):
array
{
$dom
=
$this
->
parseHTML
(
$html
);
// Remex wraps everything in <html><head>, unwrap that
$head
=
Zest
::
getElementsByTagName
(
$dom
,
'head'
)[
0
];
// Find the <script>, <template> and <style> tags. They can appear in any order, but they
// must be at the top level, and there can only be one of each.
if
(
!
$head
)
{
throw
new
InvalidArgumentException
(
'Parsed DOM did not contain a <head> tag'
);
}
$nodes
=
$this
->
findUniqueTags
(
$head
,
[
'script'
,
'template'
,
'style'
]
);
// Throw an error if we didn't find a <script> or <template> tag. <style> is optional.
foreach
(
[
'script'
,
'template'
]
as
$requiredTag
)
{
if
(
!
isset
(
$nodes
[
$requiredTag
]
)
)
{
throw
new
InvalidArgumentException
(
"No <$requiredTag> tag found"
);
}
}
$this
->
validateAttributes
(
$nodes
[
'script'
],
[]
);
$this
->
validateAttributes
(
$nodes
[
'template'
],
[]
);
if
(
isset
(
$nodes
[
'style'
]
)
)
{
$this
->
validateAttributes
(
$nodes
[
'style'
],
[
'lang'
]
);
}
$styleData
=
isset
(
$nodes
[
'style'
]
)
?
$this
->
getStyleAndLang
(
$nodes
[
'style'
]
)
:
null
;
$template
=
$this
->
getTemplateHtml
(
$html
,
$options
[
'minifyTemplate'
]
??
false
);
return
[
'script'
=>
trim
(
$nodes
[
'script'
]->
nodeValue
??
''
),
'template'
=>
$template
,
'style'
=>
$styleData
?
$styleData
[
'style'
]
:
null
,
'styleLang'
=>
$styleData
?
$styleData
[
'lang'
]
:
null
];
}
/**
* Parse HTML to DOM using RemexHtml
* @param string $html
* @return DOMDocument
*/
private
function
parseHTML
(
$html
):
DOMDocument
{
$domBuilder
=
new
DOMBuilder
(
[
'suppressHtmlNamespace'
=>
true
]
);
$treeBuilder
=
new
TreeBuilder
(
$domBuilder
,
[
'ignoreErrors'
=>
true
]
);
$tokenizer
=
new
Tokenizer
(
new
Dispatcher
(
$treeBuilder
),
$html
,
[
'ignoreErrors'
=>
true
]
);
$tokenizer
->
execute
();
// @phan-suppress-next-line PhanTypeMismatchReturnSuperType
return
$domBuilder
->
getFragment
();
}
/**
* Find occurrences of specified tags in a DOM node, expecting at most one occurrence of each.
* This method only looks at the top-level children of $rootNode, it doesn't descend into them.
*
* @param DOMNode $rootNode Node whose children to look at
* @param string[] $tagNames Tag names to look for (must be all lowercase)
* @return DOMElement[] Associative arrays whose keys are tag names and values are DOM nodes
*/
private
function
findUniqueTags
(
DOMNode
$rootNode
,
array
$tagNames
):
array
{
$nodes
=
[];
foreach
(
$rootNode
->
childNodes
as
$node
)
{
$tagName
=
strtolower
(
$node
->
nodeName
);
if
(
in_array
(
$tagName
,
$tagNames
)
)
{
if
(
isset
(
$nodes
[
$tagName
]
)
)
{
throw
new
InvalidArgumentException
(
"More than one <$tagName> tag found"
);
}
$nodes
[
$tagName
]
=
$node
;
}
}
return
$nodes
;
}
/**
* Verify that a given node only has a given set of attributes, and no others.
* @param DOMNode $node Node to check
* @param array $allowedAttributes Attributes the node is allowed to have
* @throws InvalidArgumentException If the node has an attribute it's not allowed to have
*/
private
function
validateAttributes
(
DOMNode
$node
,
array
$allowedAttributes
):
void
{
if
(
$allowedAttributes
)
{
foreach
(
$node
->
attributes
as
$attr
)
{
if
(
!
in_array
(
$attr
->
name
,
$allowedAttributes
)
)
{
throw
new
InvalidArgumentException
(
"<{$node->nodeName}> may not have the "
.
"{$attr->name} attribute"
);
}
}
}
elseif
(
$node
->
attributes
->
length
>
0
)
{
throw
new
InvalidArgumentException
(
"<{$node->nodeName}> may not have any attributes"
);
}
}
/**
* Get the contents and language of the <style> tag. The language can be 'css' or 'less'.
* @param DOMElement $styleNode The <style> tag.
* @return array [ 'style' => string, 'lang' => string ]
* @throws InvalidArgumentException If an invalid language is used, or if the 'scoped' attribute is set.
*/
private
function
getStyleAndLang
(
DOMElement
$styleNode
):
array
{
$style
=
trim
(
$styleNode
->
nodeValue
??
''
);
$styleLang
=
$styleNode
->
hasAttribute
(
'lang'
)
?
$styleNode
->
getAttribute
(
'lang'
)
:
'css'
;
if
(
$styleLang
!==
'css'
&&
$styleLang
!==
'less'
)
{
throw
new
InvalidArgumentException
(
"<style lang=
\"
$styleLang
\"
> is invalid,"
.
" lang must be
\"
css
\"
or
\"
less
\"
"
);
}
return
[
'style'
=>
$style
,
'lang'
=>
$styleLang
,
];
}
/**
* Get the HTML contents of the <template> tag, optionally minifed.
*
* To work around a bug in PHP's DOMDocument where attributes like @click get mangled,
* we re-parse the entire file using a Remex parse+serialize pipeline, with a custom dispatcher
* to zoom in on just the contents of the <template> tag, and a custom formatter for minification.
* Keeping everything in Remex and never converting it to DOM avoids the attribute mangling issue.
*
* @param string $html HTML that contains a <template> tag somewhere
* @param bool $minify Whether to minify the output (remove comments, strip whitespace)
* @return string HTML contents of the template tag
*/
private
function
getTemplateHtml
(
$html
,
$minify
)
{
$serializer
=
new
Serializer
(
$this
->
newTemplateFormatter
(
$minify
)
);
$tokenizer
=
new
Tokenizer
(
$this
->
newFilteringDispatcher
(
new
TreeBuilder
(
$serializer
,
[
'ignoreErrors'
=>
true
]
),
'template'
),
$html
,
[
'ignoreErrors'
=>
true
]
);
$tokenizer
->
execute
(
[
'fragmentNamespace'
=>
HTMLData
::
NS_HTML
,
'fragmentName'
=>
'template'
]
);
return
trim
(
$serializer
->
getResult
()
);
}
/**
* Custom HtmlFormatter subclass that optionally removes comments and strips whitespace.
* If $minify=false, this formatter falls through to HtmlFormatter for everything (except that
* it strips the <!doctype html> tag).
*
* @param bool $minify If true, remove comments and strip whitespace
* @return HtmlFormatter
*/
private
function
newTemplateFormatter
(
$minify
)
{
return
new
class
(
$minify
)
extends
HtmlFormatter
{
private
$minify
;
public
function
__construct
(
$minify
)
{
$this
->
minify
=
$minify
;
}
public
function
startDocument
(
$fragmentNamespace
,
$fragmentName
)
{
// Remove <!doctype html>
return
''
;
}
public
function
comment
(
SerializerNode
$parent
,
$text
)
{
if
(
$this
->
minify
)
{
// Remove all comments
return
''
;
}
return
parent
::
comment
(
$parent
,
$text
);
}
public
function
characters
(
SerializerNode
$parent
,
$text
,
$start
,
$length
)
{
if
(
$this
->
minify
&&
(
// Don't touch <pre>/<listing>/<textarea> nodes
$parent
->
namespace
!==
HTMLData
::
NS_HTML
||
!
isset
(
$this
->
prefixLfElements
[
$parent
->
name
]
)
)
)
{
$text
=
substr
(
$text
,
$start
,
$length
);
// Collapse runs of adjacent whitespace, and convert all whitespace to spaces
$text
=
preg_replace
(
'/[
\r\n\t
]+/'
,
' '
,
$text
);
$start
=
0
;
$length
=
strlen
(
$text
);
}
return
parent
::
characters
(
$parent
,
$text
,
$start
,
$length
);
}
public
function
element
(
SerializerNode
$parent
,
SerializerNode
$node
,
$contents
)
{
if
(
$this
->
minify
&&
(
// Don't touch <pre>/<listing>/<textarea> nodes
$node
->
namespace
!==
HTMLData
::
NS_HTML
||
!
isset
(
$this
->
prefixLfElements
[
$node
->
name
]
)
)
&&
$contents
!==
null
)
{
// Remove leading and trailing whitespace
$contents
=
preg_replace
(
'/(^[
\r\n\t
]+)|([
\r\n\t
]+$)/'
,
''
,
$contents
);
}
return
parent
::
element
(
$parent
,
$node
,
$contents
);
}
};
}
/**
* Custom Dispatcher subclass that only dispatches tree events inside a tag with a certain name.
* This effectively filters the tree to only the contents of that tag.
*
* @param TreeBuilder $treeBuilder
* @param string $nodeName Tag name to filter for
* @return Dispatcher
*/
private
function
newFilteringDispatcher
(
TreeBuilder
$treeBuilder
,
$nodeName
)
{
return
new
class
(
$treeBuilder
,
$nodeName
)
extends
Dispatcher
{
private
$nodeName
;
private
$nodeDepth
=
0
;
private
$seenTag
=
false
;
public
function
__construct
(
TreeBuilder
$treeBuilder
,
$nodeName
)
{
$this
->
nodeName
=
$nodeName
;
parent
::
__construct
(
$treeBuilder
);
}
public
function
startTag
(
$name
,
Attributes
$attrs
,
$selfClose
,
$sourceStart
,
$sourceLength
)
{
if
(
$this
->
nodeDepth
)
{
parent
::
startTag
(
$name
,
$attrs
,
$selfClose
,
$sourceStart
,
$sourceLength
);
}
if
(
$name
===
$this
->
nodeName
)
{
if
(
$this
->
nodeDepth
===
0
&&
$this
->
seenTag
)
{
// This is the second opening tag, not nested in the first one
throw
new
InvalidArgumentException
(
"More than one <{$this->nodeName}> tag found"
);
}
$this
->
nodeDepth
++;
$this
->
seenTag
=
true
;
}
}
public
function
endTag
(
$name
,
$sourceStart
,
$sourceLength
)
{
if
(
$name
===
$this
->
nodeName
)
{
$this
->
nodeDepth
--;
}
if
(
$this
->
nodeDepth
)
{
parent
::
endTag
(
$name
,
$sourceStart
,
$sourceLength
);
}
}
public
function
characters
(
$text
,
$start
,
$length
,
$sourceStart
,
$sourceLength
)
{
if
(
$this
->
nodeDepth
)
{
parent
::
characters
(
$text
,
$start
,
$length
,
$sourceStart
,
$sourceLength
);
}
}
public
function
comment
(
$text
,
$sourceStart
,
$sourceLength
)
{
if
(
$this
->
nodeDepth
)
{
parent
::
comment
(
$text
,
$sourceStart
,
$sourceLength
);
}
}
};
}
}
File Metadata
Details
Attached
Mime Type
text/x-php
Expires
Sat, May 16, 13:50 (1 d, 20 h)
Storage Engine
local-disk
Storage Format
Raw Data
Storage Handle
dc/8d/e34fb26e0b0f4a551736b72a6cad
Default Alt Text
VueComponentParser.php (11 KB)
Attached To
Mode
rMWPROD MediaWiki Production
Attached
Detach File
Event Timeline
Log In to Comment