Page Menu
Home
WickedGov Phorge
Search
Configure Global Search
Log In
Files
F1432014
UnicodeEscapeSniff.php
No One
Temporary
Actions
Download File
Edit File
Delete File
View Transforms
Subscribe
Flag For Later
Award Token
Size
4 KB
Referenced Files
None
Subscribers
None
UnicodeEscapeSniff.php
View Options
<?php
/**
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along
* with this program; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
* http://www.gnu.org/copyleft/gpl.html
*
* @file
*/
namespace
MediaWiki\Sniffs\AlternativeSyntax
;
use
PHP_CodeSniffer\Files\File
;
use
PHP_CodeSniffer\Sniffs\Sniff
;
class
UnicodeEscapeSniff
implements
Sniff
{
/**
* @inheritDoc
*/
public
function
register
():
array
{
return
[
T_CONSTANT_ENCAPSED_STRING
,
T_DOUBLE_QUOTED_STRING
,
T_START_HEREDOC
,
];
}
/**
* @param File $phpcsFile File
* @param int $stackPtr Location
* @return int
*/
public
function
process
(
File
$phpcsFile
,
$stackPtr
)
{
$tokens
=
$phpcsFile
->
getTokens
();
// Find the end of the string.
$endPtr
=
$phpcsFile
->
findNext
(
/* types */
[
$tokens
[
$stackPtr
][
'code'
],
T_HEREDOC
,
T_END_HEREDOC
],
/* start */
$stackPtr
+
1
,
/* end */
null
,
/* exclude */
true
)
?:
$phpcsFile
->
numTokens
;
if
(
$tokens
[
$endPtr
-
1
][
'code'
]
===
T_END_HEREDOC
)
{
if
(
isset
(
$tokens
[
$endPtr
]
)
&&
$tokens
[
$endPtr
][
'code'
]
===
T_SEMICOLON
)
{
++
$endPtr
;
}
if
(
isset
(
$tokens
[
$endPtr
]
)
&&
$tokens
[
$endPtr
][
'code'
]
===
T_WHITESPACE
)
{
++
$endPtr
;
}
}
// If this is a single-quoted string, skip it.
if
(
$tokens
[
$stackPtr
][
'code'
]
===
T_CONSTANT_ENCAPSED_STRING
&&
$tokens
[
$stackPtr
][
'content'
][
0
]
===
"'"
)
{
return
$endPtr
;
}
// If the string takes up multiple lines, PHP_CodeSniffer would
// have split some of its tokens. Recombine the string's tokens
// so the next step will work.
$content
=
$phpcsFile
->
getTokensAsString
(
$stackPtr
,
$endPtr
-
$stackPtr
);
// If the string contains braced expressions, PHP_CodeSniffer
// would have combined these and surrounding tokens, which could
// lead to false matches. Avoid this by retokenizing the string.
$origTokens
=
token_get_all
(
'<?php '
.
$content
);
$warn
=
false
;
$content
=
''
;
foreach
(
$origTokens
as
$i
=>
$origToken
)
{
// Skip the PHP opening tag we added.
if
(
$i
===
0
)
{
continue
;
}
// Don't check tokens that cannot contain escape sequences.
$origToken
=
(
array
)
$origToken
;
if
(
!(
$origToken
[
0
]
===
T_ENCAPSED_AND_WHITESPACE
||
(
$origToken
[
0
]
===
T_CONSTANT_ENCAPSED_STRING
&&
$origToken
[
1
][
0
]
!==
"'"
)
)
)
{
$content
.=
$origToken
[
1
]
??
$origToken
[
0
];
continue
;
}
// Check for Unicode escape sequences in the token, explicitly
// skipping escaped backslashes to prevent false matches.
$content
.=
preg_replace_callback
(
'/
\\\\
(?:u
\{
([0-9A-Fa-f]+)
\}
|
\\\\
(*SKIP)(*FAIL))/'
,
static
function
(
array
$m
)
use
(
&
$warn
)
{
// Decode the codepoint-digits.
$cp
=
hexdec
(
$m
[
1
]
);
if
(
$cp
>
0x10FFFF
)
{
// This is a parse error. Don't offer to fix it.
return
$m
[
0
];
}
// Check the codepoint-digits against the expected format.
$hex
=
sprintf
(
'%04X'
,
$cp
);
if
(
$m
[
1
]
===
$hex
)
{
// Keep the conforming escape sequence as-is.
return
$m
[
0
];
}
// Print a warning for the token containing the nonconforming
// escape sequence and replace it with a conforming one.
$warn
=
true
;
return
'
\u
{'
.
$hex
.
'}'
;
},
$origToken
[
1
]
);
}
if
(
$warn
)
{
$fix
=
$phpcsFile
->
addFixableWarning
(
'Unicode code points should be expressed using four to six uppercase hex '
.
'digits, with leading zeros used only as necessary for
\u
{0FFF} and below'
,
$stackPtr
,
'DigitsNotNormalized'
);
if
(
$fix
)
{
$phpcsFile
->
fixer
->
beginChangeset
();
$phpcsFile
->
fixer
->
replaceToken
(
$stackPtr
,
$content
);
for
(
$i
=
$stackPtr
+
1
;
$i
<
$endPtr
;
++
$i
)
{
$phpcsFile
->
fixer
->
replaceToken
(
$i
,
''
);
}
$phpcsFile
->
fixer
->
endChangeset
();
}
}
return
$endPtr
;
}
}
File Metadata
Details
Attached
Mime Type
text/x-php
Expires
Sat, May 16, 21:20 (1 d, 12 h)
Storage Engine
local-disk
Storage Format
Raw Data
Storage Handle
c8/1d/8c6adaac4aa7b30748b83e1bad3b
Default Alt Text
UnicodeEscapeSniff.php (4 KB)
Attached To
Mode
rMWPROD MediaWiki Production
Attached
Detach File
Event Timeline
Log In to Comment