Page Menu
Home
WickedGov Phorge
Search
Configure Global Search
Log In
Files
F1431868
Parser.php
No One
Temporary
Actions
Download File
Edit File
Delete File
View Transforms
Subscribe
Flag For Later
Award Token
Size
19 KB
Referenced Files
None
Subscribers
None
Parser.php
View Options
<?php
/**
* @file
* @license https://opensource.org/licenses/Apache-2.0 Apache-2.0
*/
namespace
Wikimedia\CSS\Parser
;
use
Wikimedia\CSS\Objects\AtRule
;
use
Wikimedia\CSS\Objects\ComponentValue
;
use
Wikimedia\CSS\Objects\ComponentValueList
;
use
Wikimedia\CSS\Objects\CSSFunction
;
use
Wikimedia\CSS\Objects\Declaration
;
use
Wikimedia\CSS\Objects\DeclarationList
;
use
Wikimedia\CSS\Objects\DeclarationOrAtRuleList
;
use
Wikimedia\CSS\Objects\QualifiedRule
;
use
Wikimedia\CSS\Objects\Rule
;
use
Wikimedia\CSS\Objects\RuleList
;
use
Wikimedia\CSS\Objects\SimpleBlock
;
use
Wikimedia\CSS\Objects\Stylesheet
;
use
Wikimedia\CSS\Objects\Token
;
// Note: While reading the code below, you might find that my calls to
// consumeToken() don't match what the spec says, and I don't ever "reconsume" a
// token. It turns out that the spec is overcomplicated and confused with
// respect to the "current input token" and the "next input token". It turns
// out things are pretty simple: every "consume an X" is called with the
// current input token being the first token of X, and returns with the current
// input token being the last token of X (or EOF if X ends at EOF).
// Also, of note is that, since our Tokenizer can only return a stream of tokens
// rather than a stream of component values, the consume functions here only
// consider tokens. ComponentValueList::toTokenArray() may be used to convert a
// list of component values to a list of tokens if necessary.
/**
* Parse CSS into a structure for further processing.
*
* This implements the CSS Syntax Module Level 3 candidate recommendation.
* @see https://www.w3.org/TR/2019/CR-css-syntax-3-20190716/
*
* The usual entry points are:
* - Parser::parseStylesheet() to parse a stylesheet or the contents of a <style> tag.
* - Parser::parseDeclarationList() to parse an inline style attribute
*/
class
Parser
{
/**
* Maximum depth of nested ComponentValues
*
* Arbitrary number that seems like it should be enough
*/
private
const
CV_DEPTH_LIMIT
=
100
;
/** @var Tokenizer */
protected
$tokenizer
;
/** @var Token|null The most recently consumed token */
protected
$currentToken
=
null
;
/** @var array Parse errors. Each error is [ string $tag, int $line, int $pos ] */
protected
$parseErrors
=
[];
/** @var int Recursion depth, incremented in self::consumeComponentValue() */
protected
$cvDepth
=
0
;
/**
* @param Tokenizer $tokenizer CSS Tokenizer
*/
public
function
__construct
(
Tokenizer
$tokenizer
)
{
$this
->
tokenizer
=
$tokenizer
;
}
/**
* Create a Parser for a CSS string
* @param string $source CSS to parse.
* @param array $options Configuration options, see DataSourceTokenizer::__construct(). Also,
* - convert: (array) If specified, detect the encoding as defined in the
* CSS spec. The value is passed as the $encodings argument to
* Encoder::convert().
* @return static
*/
public
static
function
newFromString
(
$source
,
array
$options
=
[]
)
{
if
(
isset
(
$options
[
'convert'
]
)
)
{
$source
=
Encoder
::
convert
(
$source
,
$options
[
'convert'
]
);
}
return
static
::
newFromDataSource
(
new
StringDataSource
(
$source
),
$options
);
}
/**
* Create a Parser for a CSS DataSource
* @param DataSource $source CSS to parse.
* @param array $options Configuration options, see DataSourceTokenizer::__construct().
* @return static
*/
public
static
function
newFromDataSource
(
DataSource
$source
,
array
$options
=
[]
)
{
$tokenizer
=
new
DataSourceTokenizer
(
$source
,
$options
);
return
new
static
(
$tokenizer
);
}
/**
* Create a Parser for a list of Tokens
* @param Token[] $tokens Token-stream to parse
* @param Token|null $eof EOF-token
* @return static
*/
public
static
function
newFromTokens
(
array
$tokens
,
?
Token
$eof
=
null
)
{
$tokenizer
=
new
TokenListTokenizer
(
$tokens
,
$eof
);
return
new
static
(
$tokenizer
);
}
/**
* Consume a token
*/
protected
function
consumeToken
()
{
if
(
!
$this
->
currentToken
||
$this
->
currentToken
->
type
()
!==
Token
::
T_EOF
)
{
$this
->
currentToken
=
$this
->
tokenizer
->
consumeToken
();
// Copy any parse errors encountered
foreach
(
$this
->
tokenizer
->
getParseErrors
()
as
$error
)
{
$this
->
parseErrors
[]
=
$error
;
}
$this
->
tokenizer
->
clearParseErrors
();
}
}
/**
* Consume a token, also consuming any following whitespace (and comments)
*/
protected
function
consumeTokenAndWhitespace
()
{
do
{
$this
->
consumeToken
();
}
while
(
$this
->
currentToken
->
type
()
===
Token
::
T_WHITESPACE
);
}
/**
* Return all parse errors seen so far
* @return array Array of [ string $tag, int $line, int $pos, ... ]
*/
public
function
getParseErrors
()
{
return
$this
->
parseErrors
;
}
/**
* Clear parse errors
*/
public
function
clearParseErrors
()
{
$this
->
parseErrors
=
[];
}
/**
* Record a parse error
* @param string $tag Error tag
* @param Token $token Report the error as starting at this token.
* @param array $data Extra data about the error.
*/
protected
function
parseError
(
$tag
,
Token
$token
,
array
$data
=
[]
)
{
[
$line
,
$pos
]
=
$token
->
getPosition
();
$this
->
parseErrors
[]
=
array_merge
(
[
$tag
,
$line
,
$pos
],
$data
);
}
/**
* Parse a stylesheet
* @see https://www.w3.org/TR/2019/CR-css-syntax-3-20190716/#parse-stylesheet
* @return Stylesheet
*/
public
function
parseStylesheet
()
{
// Move to the first token
$this
->
consumeToken
();
$list
=
$this
->
consumeRuleList
(
true
);
return
new
Stylesheet
(
$list
);
}
/**
* Parse a list of rules
* @see https://www.w3.org/TR/2019/CR-css-syntax-3-20190716/#parse-list-of-rules
* @return RuleList
*/
public
function
parseRuleList
()
{
// Move to the first token
$this
->
consumeToken
();
return
$this
->
consumeRuleList
(
false
);
}
/**
* Parse a rule
* @see https://www.w3.org/TR/2019/CR-css-syntax-3-20190716/#parse-rule
* @return Rule|null
*/
public
function
parseRule
()
{
// 1.
$this
->
consumeTokenAndWhitespace
();
// 2.
if
(
$this
->
currentToken
->
type
()
===
Token
::
T_EOF
)
{
// "return a syntax error"?
$this
->
parseError
(
'unexpected-eof'
,
$this
->
currentToken
);
return
null
;
}
if
(
$this
->
currentToken
->
type
()
===
Token
::
T_AT_KEYWORD
)
{
$rule
=
$this
->
consumeAtRule
();
}
else
{
$rule
=
$this
->
consumeQualifiedRule
();
if
(
!
$rule
)
{
return
null
;
}
}
// 3.
$this
->
consumeTokenAndWhitespace
();
// 4.
if
(
$this
->
currentToken
->
type
()
===
Token
::
T_EOF
)
{
return
$rule
;
}
// "return a syntax error"?
$this
->
parseError
(
'expected-eof'
,
$this
->
currentToken
);
return
null
;
}
/**
* Parse a declaration
* @see https://www.w3.org/TR/2019/CR-css-syntax-3-20190716/#parse-declaration
* @return Declaration|null
*/
public
function
parseDeclaration
()
{
// 1.
$this
->
consumeTokenAndWhitespace
();
// 2.
if
(
$this
->
currentToken
->
type
()
!==
Token
::
T_IDENT
)
{
// "return a syntax error"?
$this
->
parseError
(
'expected-ident'
,
$this
->
currentToken
);
return
null
;
}
// 3.
// Declarations always run to EOF, no need to check.
return
$this
->
consumeDeclaration
();
}
/**
* Parse a list of declarations
* @note This is not the entry point the standard calls "parse a list of declarations",
* see self::parseDeclarationOrAtRuleList()
* @return DeclarationList
*/
public
function
parseDeclarationList
()
{
// Move to the first token
$this
->
consumeToken
();
return
$this
->
consumeDeclarationOrAtRuleList
(
false
);
}
/**
* Parse a list of declarations and at-rules
* @note This is the entry point the standard calls "parse a list of declarations"
* @see https://www.w3.org/TR/2019/CR-css-syntax-3-20190716/#parse-list-of-declarations
* @return DeclarationOrAtRuleList
*/
public
function
parseDeclarationOrAtRuleList
()
{
// Move to the first token
$this
->
consumeToken
();
return
$this
->
consumeDeclarationOrAtRuleList
();
}
/**
* Parse a (non-whitespace) component value
* @see https://www.w3.org/TR/2019/CR-css-syntax-3-20190716/#parse-component-value
* @return ComponentValue|null
*/
public
function
parseComponentValue
()
{
// 1.
$this
->
consumeTokenAndWhitespace
();
// 2.
if
(
$this
->
currentToken
->
type
()
===
Token
::
T_EOF
)
{
// "return a syntax error"?
$this
->
parseError
(
'unexpected-eof'
,
$this
->
currentToken
);
return
null
;
}
// 3.
$value
=
$this
->
consumeComponentValue
();
// 4.
$this
->
consumeTokenAndWhitespace
();
// 5.
if
(
$this
->
currentToken
->
type
()
===
Token
::
T_EOF
)
{
return
$value
;
}
// "return a syntax error"?
$this
->
parseError
(
'expected-eof'
,
$this
->
currentToken
);
return
null
;
}
/**
* Parse a list of component values
* @see https://www.w3.org/TR/2019/CR-css-syntax-3-20190716/#parse-list-of-component-values
* @return ComponentValueList
*/
public
function
parseComponentValueList
()
{
$list
=
new
ComponentValueList
();
while
(
true
)
{
// Move to the first/next token
$this
->
consumeToken
();
$value
=
$this
->
consumeComponentValue
();
if
(
$value
instanceof
Token
&&
$value
->
type
()
===
Token
::
T_EOF
)
{
break
;
}
$list
->
add
(
$value
);
}
return
$list
;
}
/**
* Parse a comma-separated list of component values
* @see https://www.w3.org/TR/2019/CR-css-syntax-3-20190716/#parse-comma-separated-list-of-component-values
* @return ComponentValueList[]
*/
public
function
parseCommaSeparatedComponentValueList
()
{
$lists
=
[];
do
{
$list
=
new
ComponentValueList
();
while
(
true
)
{
// Move to the first/next token
$this
->
consumeToken
();
$value
=
$this
->
consumeComponentValue
();
if
(
$value
instanceof
Token
&&
(
$value
->
type
()
===
Token
::
T_EOF
||
$value
->
type
()
===
Token
::
T_COMMA
)
)
{
break
;
}
$list
->
add
(
$value
);
}
$lists
[]
=
$list
;
}
while
(
$value
->
type
()
===
Token
::
T_COMMA
);
return
$lists
;
}
/**
* Consume a list of rules
* @see https://www.w3.org/TR/2019/CR-css-syntax-3-20190716/#consume-list-of-rules
* @param bool $topLevel Determines the behavior when CDO and CDC tokens are encountered
* @return RuleList
*/
protected
function
consumeRuleList
(
$topLevel
)
{
// @phan-suppress-previous-line PhanPluginNeverReturnMethod
$list
=
new
RuleList
();
// @phan-suppress-next-line PhanInfiniteLoop
while
(
true
)
{
$rule
=
false
;
switch
(
$this
->
currentToken
->
type
()
)
{
case
Token
::
T_WHITESPACE
:
break
;
case
Token
::
T_EOF
:
break
2
;
case
Token
::
T_CDO
:
case
Token
::
T_CDC
:
if
(
!
$topLevel
)
{
$rule
=
$this
->
consumeQualifiedRule
();
}
// Else, do nothing
break
;
case
Token
::
T_AT_KEYWORD
:
$rule
=
$this
->
consumeAtRule
();
break
;
default
:
$rule
=
$this
->
consumeQualifiedRule
();
break
;
}
if
(
$rule
)
{
$list
->
add
(
$rule
);
}
$this
->
consumeToken
();
}
// @phan-suppress-next-line PhanPluginUnreachableCode Reached by break 2
return
$list
;
}
/**
* Consume a list of declarations and at-rules
* @see https://www.w3.org/TR/2019/CR-css-syntax-3-20190716/#consume-list-of-declarations
* @param bool $allowAtRules Whether to allow at-rules. This flag is not in
* the spec and is used to implement the non-spec self::parseDeclarationList().
* @return DeclarationOrAtRuleList|DeclarationList
*/
protected
function
consumeDeclarationOrAtRuleList
(
$allowAtRules
=
true
)
{
// @phan-suppress-previous-line PhanPluginNeverReturnMethod
$list
=
$allowAtRules
?
new
DeclarationOrAtRuleList
()
:
new
DeclarationList
();
// @phan-suppress-next-line PhanInfiniteLoop
while
(
true
)
{
$declaration
=
false
;
switch
(
$this
->
currentToken
->
type
()
)
{
case
Token
::
T_WHITESPACE
:
break
;
case
Token
::
T_SEMICOLON
:
$declaration
=
null
;
break
;
case
Token
::
T_EOF
:
break
2
;
case
Token
::
T_AT_KEYWORD
:
if
(
$allowAtRules
)
{
$declaration
=
$this
->
consumeAtRule
();
}
else
{
$this
->
parseError
(
'unexpected-token-in-declaration-list'
,
$this
->
currentToken
);
$this
->
consumeAtRule
();
$declaration
=
null
;
}
break
;
case
Token
::
T_IDENT
:
$cvs
=
[];
do
{
$cvs
[]
=
$this
->
consumeComponentValue
();
$this
->
consumeToken
();
}
while
(
$this
->
currentToken
->
type
()
!==
Token
::
T_SEMICOLON
&&
$this
->
currentToken
->
type
()
!==
Token
::
T_EOF
);
$tokens
=
(
new
ComponentValueList
(
$cvs
)
)->
toTokenArray
();
$parser
=
static
::
newFromTokens
(
$tokens
,
$this
->
currentToken
);
// Load that first token
$parser
->
consumeToken
();
$declaration
=
$parser
->
consumeDeclaration
();
// Propagate any errors
$this
->
parseErrors
=
array_merge
(
$this
->
parseErrors
,
$parser
->
parseErrors
);
break
;
default
:
$this
->
parseError
(
'unexpected-token-in-declaration-list'
,
$this
->
currentToken
);
do
{
$this
->
consumeComponentValue
();
$this
->
consumeToken
();
}
while
(
$this
->
currentToken
->
type
()
!==
Token
::
T_SEMICOLON
&&
$this
->
currentToken
->
type
()
!==
Token
::
T_EOF
);
$declaration
=
null
;
break
;
}
if
(
$declaration
)
{
$list
->
add
(
$declaration
);
}
$this
->
consumeToken
();
}
// @phan-suppress-next-line PhanPluginUnreachableCode Reached by break 2
return
$list
;
}
/**
* Consume a declaration
* @see https://www.w3.org/TR/2019/CR-css-syntax-3-20190716/#consume-declaration
* @return Declaration|null
*/
protected
function
consumeDeclaration
()
{
$declaration
=
new
Declaration
(
$this
->
currentToken
);
// 1.
$this
->
consumeTokenAndWhitespace
();
// 2. and 3.
if
(
$this
->
currentToken
->
type
()
!==
Token
::
T_COLON
)
{
$this
->
parseError
(
'expected-colon'
,
$this
->
currentToken
);
return
null
;
}
$this
->
consumeTokenAndWhitespace
();
// 4.
$value
=
$declaration
->
getValue
();
$l1
=
$l2
=
-
1
;
while
(
$this
->
currentToken
->
type
()
!==
Token
::
T_EOF
)
{
$value
->
add
(
$this
->
consumeComponentValue
()
);
if
(
$this
->
currentToken
->
type
()
!==
Token
::
T_WHITESPACE
)
{
$l1
=
$l2
;
$l2
=
$value
->
count
()
-
1
;
}
$this
->
consumeToken
();
}
// 5. and part of 6.
// @phan-suppress-next-line PhanSuspiciousValueComparison False positive about $l1 is -1
$v1
=
$l1
>=
0
?
$value
[
$l1
]
:
null
;
$v2
=
$l2
>=
0
?
$value
[
$l2
]
:
null
;
if
(
$v1
instanceof
Token
&&
$v1
->
type
()
===
Token
::
T_DELIM
&&
$v1
->
value
()
===
'!'
&&
$v2
instanceof
Token
&&
$v2
->
type
()
===
Token
::
T_IDENT
&&
!
strcasecmp
(
$v2
->
value
(),
'important'
)
)
{
// This removes the "!" and "important" (5), and also any whitespace between/after (6)
while
(
isset
(
$value
[
$l1
]
)
)
{
$value
->
remove
(
$l1
);
}
$declaration
->
setImportant
(
true
);
}
// Rest of 6.
$i
=
$value
->
count
();
// @phan-suppress-next-line PhanNonClassMethodCall False positive
while
(
--
$i
>=
0
&&
$value
[
$i
]
instanceof
Token
&&
$value
[
$i
]->
type
()
===
Token
::
T_WHITESPACE
)
{
$value
->
remove
(
$i
);
}
// 7.
return
$declaration
;
}
/**
* Consume an at-rule
* @see https://www.w3.org/TR/2019/CR-css-syntax-3-20190716/#consume-at-rule
* @return AtRule
* @suppress PhanPluginNeverReturnMethod due to break 2;
*/
protected
function
consumeAtRule
()
{
$rule
=
new
AtRule
(
$this
->
currentToken
);
$this
->
consumeToken
();
// @phan-suppress-next-line PhanInfiniteLoop
while
(
true
)
{
switch
(
$this
->
currentToken
->
type
()
)
{
case
Token
::
T_SEMICOLON
:
break
2
;
case
Token
::
T_EOF
:
if
(
$this
->
currentToken
->
typeFlag
()
!==
'recursion-depth-exceeded'
)
{
$this
->
parseError
(
'unexpected-eof-in-rule'
,
$this
->
currentToken
);
}
break
2
;
case
Token
::
T_LEFT_BRACE
:
$rule
->
setBlock
(
$this
->
consumeSimpleBlock
()
);
break
2
;
// Spec has "simple block with an associated token of <{-token>" here, but that isn't possible
// because it's not a Token.
default
:
$rule
->
getPrelude
()->
add
(
$this
->
consumeComponentValue
()
);
break
;
}
$this
->
consumeToken
();
}
// @phan-suppress-next-line PhanPluginUnreachableCode False positive due to break 2;
return
$rule
;
}
/**
* Consume a qualified rule
* @see https://www.w3.org/TR/2019/CR-css-syntax-3-20190716/#consume-qualified-rule
* @return QualifiedRule|null
*/
protected
function
consumeQualifiedRule
()
{
$rule
=
new
QualifiedRule
(
$this
->
currentToken
);
while
(
true
)
{
switch
(
$this
->
currentToken
->
type
()
)
{
case
Token
::
T_EOF
:
if
(
$this
->
currentToken
->
typeFlag
()
!==
'recursion-depth-exceeded'
)
{
$this
->
parseError
(
'unexpected-eof-in-rule'
,
$this
->
currentToken
);
}
return
null
;
case
Token
::
T_LEFT_BRACE
:
$rule
->
setBlock
(
$this
->
consumeSimpleBlock
()
);
break
2
;
// Spec has "simple block with an associated token of <{-token>" here, but that isn't possible
// because it's not a Token.
default
:
$rule
->
getPrelude
()->
add
(
$this
->
consumeComponentValue
()
);
break
;
}
$this
->
consumeToken
();
}
// @phan-suppress-next-line PhanPluginUnreachableCode False positive due to break 2;
return
$rule
;
}
/**
* Consume a component value
* @see https://www.w3.org/TR/2019/CR-css-syntax-3-20190716/#consume-component-value
* @return ComponentValue
*/
protected
function
consumeComponentValue
()
{
if
(
++
$this
->
cvDepth
>
static
::
CV_DEPTH_LIMIT
)
{
$this
->
parseError
(
'recursion-depth-exceeded'
,
$this
->
currentToken
);
// There's no way to safely recover from this without more recursion.
// So just eat the rest of the input, then return a
// specially-flagged EOF, so we can avoid 100 "unexpected EOF"
// errors.
$position
=
$this
->
currentToken
->
getPosition
();
while
(
$this
->
currentToken
->
type
()
!==
Token
::
T_EOF
)
{
$this
->
consumeToken
();
}
$this
->
currentToken
=
new
Token
(
Token
::
T_EOF
,
[
'position'
=>
$position
,
'typeFlag'
=>
'recursion-depth-exceeded'
]
);
}
switch
(
$this
->
currentToken
->
type
()
)
{
case
Token
::
T_LEFT_BRACE
:
case
Token
::
T_LEFT_BRACKET
:
case
Token
::
T_LEFT_PAREN
:
$ret
=
$this
->
consumeSimpleBlock
();
break
;
case
Token
::
T_FUNCTION
:
$ret
=
$this
->
consumeFunction
();
break
;
default
:
$ret
=
$this
->
currentToken
;
break
;
}
$this
->
cvDepth
--;
// @phan-suppress-next-line PhanTypeMismatchReturnNullable $ret always set
return
$ret
;
}
/**
* Consume a simple block
* @see https://www.w3.org/TR/2019/CR-css-syntax-3-20190716/#consume-simple-block
* @return SimpleBlock
* @suppress PhanPluginNeverReturnMethod due to break 2;
*/
protected
function
consumeSimpleBlock
()
{
$block
=
new
SimpleBlock
(
$this
->
currentToken
);
$endTokenType
=
$block
->
getEndTokenType
();
$this
->
consumeToken
();
// @phan-suppress-next-line PhanInfiniteLoop
while
(
true
)
{
switch
(
$this
->
currentToken
->
type
()
)
{
case
Token
::
T_EOF
:
if
(
$this
->
currentToken
->
typeFlag
()
!==
'recursion-depth-exceeded'
)
{
$this
->
parseError
(
'unexpected-eof-in-block'
,
$this
->
currentToken
);
}
break
2
;
case
$endTokenType
:
break
2
;
default
:
$block
->
getValue
()->
add
(
$this
->
consumeComponentValue
()
);
break
;
}
$this
->
consumeToken
();
}
// @phan-suppress-next-line PhanPluginUnreachableCode False positive due to break 2;
return
$block
;
}
/**
* Consume a function
* @see https://www.w3.org/TR/2019/CR-css-syntax-3-20190716/#consume-function
* @return CSSFunction
* @suppress PhanPluginNeverReturnMethod due to break 2;
*/
protected
function
consumeFunction
()
{
$function
=
new
CSSFunction
(
$this
->
currentToken
);
$this
->
consumeToken
();
// @phan-suppress-next-line PhanInfiniteLoop
while
(
true
)
{
switch
(
$this
->
currentToken
->
type
()
)
{
case
Token
::
T_EOF
:
if
(
$this
->
currentToken
->
typeFlag
()
!==
'recursion-depth-exceeded'
)
{
$this
->
parseError
(
'unexpected-eof-in-function'
,
$this
->
currentToken
);
}
break
2
;
case
Token
::
T_RIGHT_PAREN
:
break
2
;
default
:
$function
->
getValue
()->
add
(
$this
->
consumeComponentValue
()
);
break
;
}
$this
->
consumeToken
();
}
// @phan-suppress-next-line PhanPluginUnreachableCode False positive due to break 2;
return
$function
;
}
// @codeCoverageIgnoreEnd
}
File Metadata
Details
Attached
Mime Type
text/x-php
Expires
Sat, May 16, 21:10 (1 d, 21 h)
Storage Engine
local-disk
Storage Format
Raw Data
Storage Handle
8d/3b/ba81fe505d6cde4193e131a4830c
Default Alt Text
Parser.php (19 KB)
Attached To
Mode
rMWPROD MediaWiki Production
Attached
Detach File
Event Timeline
Log In to Comment