Page Menu
Home
WickedGov Phorge
Search
Configure Global Search
Log In
Files
F584724
NumericUppercaseCollation.php
No One
Temporary
Actions
Download File
Edit File
Delete File
View Transforms
Subscribe
Flag For Later
Award Token
Size
3 KB
Referenced Files
None
Subscribers
None
NumericUppercaseCollation.php
View Options
<?php
/**
* @license GPL-2.0-or-later
* @file
*/
use
MediaWiki\Language\Language
;
use
MediaWiki\Languages\LanguageFactory
;
/**
* Collation that orders text with numbers "naturally", so that 'Foo 1' < 'Foo 2' < 'Foo 12'.
*
* Note that this only works in terms of sequences of digits, and the behavior for decimal fractions
* or pretty-formatted numbers may be unexpected.
*
* Digits will be based on the wiki's content language settings. If
* you change the content language of a wiki you will need to run
* updateCollation.php --force. Only English (ASCII 0-9) and the
* localized version will be counted. Localized digits from other languages
* or weird unicode digit equivalents (e.g. 4, 𝟜, ⓸ , ⁴, etc) will not count.
*
* @since 1.28
*/
class
NumericUppercaseCollation
extends
UppercaseCollation
{
/**
* @var Language How to convert digits (usually the content language)
*/
private
$digitTransformLang
;
/**
* @param LanguageFactory $languageFactory
* @param string|Language $digitTransformLang How to convert digits.
* For example, if given language "my" than ၇ is treated like 7.
* It is expected that usually this is given the content language.
*/
public
function
__construct
(
LanguageFactory
$languageFactory
,
$digitTransformLang
)
{
$this
->
digitTransformLang
=
$digitTransformLang
instanceof
Language
?
$digitTransformLang
:
$languageFactory
->
getLanguage
(
$digitTransformLang
);
parent
::
__construct
(
$languageFactory
);
}
/** @inheritDoc */
public
function
getSortKey
(
$string
)
{
$sortkey
=
parent
::
getSortKey
(
$string
);
$sortkey
=
$this
->
convertDigits
(
$sortkey
);
// For each sequence of digits, insert the digit '0' and then the length of the sequence
// (encoded in two bytes) before it. That's all folks, it sorts correctly now! The '0' ensures
// correct position (where digits would normally sort), then the length will be compared putting
// shorter numbers before longer ones; if identical, then the characters will be compared, which
// generates the correct results for numbers of equal length.
$sortkey
=
preg_replace_callback
(
'/
\d
+/'
,
static
function
(
$matches
)
{
// Strip any leading zeros
$number
=
ltrim
(
$matches
[
0
],
'0'
);
$len
=
strlen
(
$number
);
// This allows sequences of up to 65536 numeric characters to be handled correctly. One byte
// would allow only for 256, which doesn't feel future-proof.
$prefix
=
chr
(
(
int
)
floor
(
$len
/
256
)
)
.
chr
(
$len
%
256
);
return
'0'
.
$prefix
.
$number
;
},
$sortkey
);
return
$sortkey
;
}
/**
* Convert localized digits to english digits.
*
* based on Language::parseFormattedNumber but without commas.
*
* @param string $string sortkey to unlocalize digits of
* @return string Sortkey with all localized digits replaced with ASCII digits.
*/
private
function
convertDigits
(
$string
)
{
$table
=
$this
->
digitTransformLang
->
digitTransformTable
();
if
(
$table
)
{
$table
=
array_filter
(
$table
);
$flipped
=
array_flip
(
$table
);
// Some languages seem to also have commas in this table.
$flipped
=
array_filter
(
$flipped
,
'is_numeric'
);
$string
=
strtr
(
$string
,
$flipped
);
}
return
$string
;
}
/** @inheritDoc */
public
function
getFirstLetter
(
$string
)
{
$convertedString
=
$this
->
convertDigits
(
$string
);
if
(
preg_match
(
'/^
\d
/'
,
$convertedString
)
)
{
return
wfMessage
(
'category-header-numerals'
)
->
numParams
(
0
,
9
)
->
text
();
}
else
{
return
parent
::
getFirstLetter
(
$string
);
}
}
}
File Metadata
Details
Attached
Mime Type
text/x-php
Expires
Thu, Apr 2, 12:58 (16 h, 39 m)
Storage Engine
local-disk
Storage Format
Raw Data
Storage Handle
dc/74/391bd8839fab9c499379df620747
Default Alt Text
NumericUppercaseCollation.php (3 KB)
Attached To
Mode
rMWPROD MediaWiki Production
Attached
Detach File
Event Timeline
Log In to Comment