Page Menu
Home
WickedGov Phorge
Search
Configure Global Search
Log In
Files
F1430739
DataAccess.php
No One
Temporary
Actions
Download File
Edit File
Delete File
View Transforms
Subscribe
Flag For Later
Award Token
Size
14 KB
Referenced Files
None
Subscribers
None
DataAccess.php
View Options
<?php
declare
(
strict_types
=
1
);
namespace
Wikimedia\Parsoid\Config\Api
;
use
Wikimedia\Parsoid\Config\Api\SiteConfig
as
ApiSiteConfig
;
use
Wikimedia\Parsoid\Config\DataAccess
as
IDataAccess
;
use
Wikimedia\Parsoid\Config\PageConfig
;
use
Wikimedia\Parsoid\Config\PageContent
;
use
Wikimedia\Parsoid\Config\SiteConfig
as
ISiteConfig
;
use
Wikimedia\Parsoid\Core\ContentMetadataCollector
;
use
Wikimedia\Parsoid\Core\ContentMetadataCollectorStringSets
as
CMCSS
;
use
Wikimedia\Parsoid\Core\LinkTarget
;
use
Wikimedia\Parsoid\Mocks\MockPageContent
;
use
Wikimedia\Parsoid\Utils\PHPUtils
;
use
Wikimedia\Parsoid\Utils\Title
;
use
Wikimedia\Parsoid\Utils\TitleValue
;
/**
* DataAccess via MediaWiki's Action API
*
* Note this is intended for testing, not performance.
*/
class
DataAccess
extends
IDataAccess
{
/** @var ApiHelper */
private
$api
;
/**
* @var bool Should we strip the protocol from returned URLs?
* Generally this should be true, since the protocol of the API
* request doesn't necessarily match the protocol of article
* access; ie, we could be using https to access the API but emit
* article content which can be read with http. But for running
* parserTests, we need to include the protocol in order to match
* the parserTest configuration in core.
*/
private
$stripProto
;
/**
* @name Caching
* @todo Someone should librarize MediaWiki core's MapCacheLRU so we can
* pull it in via composer and use it here.
* @{
*/
private
const
MAX_CACHE_LEN
=
100
;
/**
* @var array
*/
private
$cache
=
[];
private
ISiteConfig
$siteConfig
;
/**
* Get from cache
* @param string $key
* @return mixed
*/
private
function
getCache
(
string
$key
)
{
if
(
isset
(
$this
->
cache
[
$key
]
)
)
{
$ret
=
$this
->
cache
[
$key
];
// The LRU cache uses position in the array to indicate recency, so
// move the accessed key to the end.
unset
(
$this
->
cache
[
$key
]
);
$this
->
cache
[
$key
]
=
$ret
;
return
$ret
;
}
return
null
;
}
/**
* Set a value into cache
* @param string $key
* @param mixed $value Not null.
*/
private
function
setCache
(
string
$key
,
$value
):
void
{
if
(
isset
(
$this
->
cache
[
$key
]
)
)
{
// The LRU cache uses position in the array to indicate recency, so
// remove the old entry so the new version goes at the end.
unset
(
$this
->
cache
[
$key
]
);
}
elseif
(
count
(
$this
->
cache
)
>=
self
::
MAX_CACHE_LEN
)
{
reset
(
$this
->
cache
);
$evictKey
=
key
(
$this
->
cache
);
unset
(
$this
->
cache
[
$evictKey
]
);
}
$this
->
cache
[
$key
]
=
$value
;
}
/** @} */
/**
* @param ApiHelper $api
* @param ISiteConfig $siteConfig
* @param array $opts
*/
public
function
__construct
(
ApiHelper
$api
,
ISiteConfig
$siteConfig
,
array
$opts
)
{
$this
->
api
=
$api
;
$this
->
siteConfig
=
$siteConfig
;
$this
->
stripProto
=
$opts
[
'stripProto'
]
??
true
;
}
/** @inheritDoc */
public
function
getPageInfo
(
$pageConfigOrTitle
,
array
$titles
):
array
{
$contextTitle
=
$pageConfigOrTitle
instanceof
PageConfig
?
$pageConfigOrTitle
->
getLinkTarget
()
:
$pageConfigOrTitle
;
if
(
!
$titles
)
{
return
[];
}
$ret
=
[];
$pageConfigTitle
=
$this
->
toPrefixedText
(
$contextTitle
);
foreach
(
array_chunk
(
$titles
,
50
)
as
$batch
)
{
$data
=
$this
->
api
->
makeRequest
(
[
'action'
=>
'query'
,
'prop'
=>
'info'
,
'inprop'
=>
'linkclasses'
,
'inlinkcontext'
=>
$pageConfigTitle
,
'titles'
=>
implode
(
'|'
,
$batch
),
]
)[
'query'
];
$norm
=
[];
if
(
isset
(
$data
[
'normalized'
]
)
)
{
foreach
(
$data
[
'normalized'
]
as
$n
)
{
$from
=
$n
[
'from'
];
if
(
$n
[
'fromencoded'
]
)
{
$from
=
rawurldecode
(
$from
);
}
$norm
[
$from
]
=
$n
[
'to'
];
}
}
$pages
=
[];
foreach
(
$data
[
'pages'
]
as
$p
)
{
$pages
[
$p
[
'title'
]]
=
$p
;
}
foreach
(
$batch
as
$title
)
{
$ttitle
=
$title
;
while
(
isset
(
$norm
[
$ttitle
]
)
)
{
$ttitle
=
$norm
[
$ttitle
];
}
$page
=
$pages
[
$ttitle
]
??
[];
$ret
[
$title
]
=
[
'pageId'
=>
$page
[
'pageid'
]
??
null
,
'revId'
=>
$page
[
'lastrevid'
]
??
null
,
'missing'
=>
$page
[
'missing'
]
??
false
,
'known'
=>
(
$page
[
'known'
]
??
false
),
'redirect'
=>
$page
[
'redirect'
]
??
false
,
'linkclasses'
=>
$page
[
'linkclasses'
]
??
[],
'invalid'
=>
$page
[
'invalid'
]
??
false
,
];
if
(
!(
$ret
[
$title
][
'missing'
]
||
$ret
[
$title
][
'invalid'
]
)
)
{
$ret
[
$title
][
'known'
]
=
true
;
}
}
}
return
$ret
;
}
/** @inheritDoc */
public
function
getFileInfo
(
PageConfig
$pageConfig
,
array
$files
):
array
{
$pageConfigTitle
=
$this
->
toPrefixedText
(
$pageConfig
->
getLinkTarget
()
);
$sc
=
$this
->
siteConfig
;
if
(
$sc
instanceof
ApiSiteConfig
&&
$sc
->
hasVideoInfo
()
)
{
$prefix
=
"vi"
;
$propName
=
"videoinfo"
;
}
else
{
$prefix
=
"ii"
;
$propName
=
"imageinfo"
;
}
$apiArgs2
=
[
'action'
=>
'query'
,
'format'
=>
'json'
,
'formatversion'
=>
2
,
'rawcontinue'
=>
1
,
'prop'
=>
$propName
,
"{$prefix}badfilecontexttitle"
=>
$pageConfigTitle
,
"{$prefix}prop"
=>
implode
(
'|'
,
[
'mediatype'
,
'mime'
,
'size'
,
'url'
,
'badfile'
,
'sha1'
,
'timestamp'
]
)
];
if
(
$prefix
===
'vi'
)
{
$apiArgs2
[
"viprop"
]
.=
'|derivatives|timedtext'
;
}
$ret
=
[];
foreach
(
$files
as
$file
)
{
$apiArgs
=
$apiArgs2
;
// Copy since we modify it
$name
=
$file
[
0
];
$dims
=
$file
[
1
];
$imgNS
=
$sc
->
namespaceName
(
$sc
->
canonicalNamespaceId
(
'file'
)
);
$apiArgs
[
'titles'
]
=
"$imgNS:$name"
;
$needsWidth
=
isset
(
$dims
[
'page'
]
)
||
isset
(
$dims
[
'lang'
]
);
if
(
isset
(
$dims
[
'width'
]
)
)
{
$apiArgs
[
"{$prefix}urlwidth"
]
=
$dims
[
'width'
];
if
(
$needsWidth
)
{
if
(
isset
(
$dims
[
'page'
]
)
)
{
// PDF
$apiArgs
[
"{$prefix}urlparam"
]
=
"page{$dims['page']}-{$dims['width']}px"
;
}
elseif
(
isset
(
$dims
[
'lang'
]
)
)
{
// SVG
$apiArgs
[
"{$prefix}urlparam"
]
=
"lang{$dims['lang']}-{$dims['width']}px"
;
}
$needsWidth
=
false
;
}
}
if
(
isset
(
$dims
[
'height'
]
)
)
{
$apiArgs
[
"{$prefix}urlheight"
]
=
$dims
[
'height'
];
}
if
(
isset
(
$dims
[
'seek'
]
)
)
{
$apiArgs
[
"{$prefix}urlparam"
]
=
"seek={$dims['seek']}"
;
}
do
{
$data
=
$this
->
api
->
makeRequest
(
$apiArgs
);
// Expect exactly 1 row
$fileinfo
=
$data
[
'query'
][
'pages'
][
0
][
$propName
][
0
];
// Corner case: if page is set, the core ImageInfo API doesn't
// respect it *unless* width is set as well. So repeat the
// request if necessary.
if
(
isset
(
$fileinfo
[
'pagecount'
]
)
&&
!
isset
(
$dims
[
'page'
]
)
)
{
$dims
[
'page'
]
=
1
;
# also ensures we won't get here again
$needsWidth
=
true
;
}
if
(
$needsWidth
&&
!
isset
(
$fileinfo
[
'filemissing'
]
)
)
{
$needsWidth
=
false
;
# ensure we won't get here again
$width
=
$fileinfo
[
'width'
];
$apiArgs
[
"{$prefix}urlwidth"
]
=
$width
;
if
(
isset
(
$dims
[
'page'
]
)
)
{
// PDF
$apiArgs
[
"{$prefix}urlparam"
]
=
"page{$dims['page']}-{$width}px"
;
}
elseif
(
isset
(
$dims
[
'lang'
]
)
)
{
// SVG
$apiArgs
[
"{$prefix}urlparam"
]
=
"lang{$dims['lang']}-{$width}px"
;
}
continue
;
}
break
;
}
while
(
true
);
if
(
isset
(
$fileinfo
[
'filemissing'
]
)
)
{
$fileinfo
=
null
;
}
else
{
$fileinfo
[
'badFile'
]
=
$data
[
'query'
][
'pages'
][
0
][
'badfile'
];
$this
->
stripProto
(
$fileinfo
,
'url'
);
$this
->
stripProto
(
$fileinfo
,
'thumburl'
);
$this
->
stripProto
(
$fileinfo
,
'descriptionurl'
);
$this
->
stripProto
(
$fileinfo
,
'descriptionshorturl'
);
foreach
(
$fileinfo
[
'responsiveUrls'
]
??
[]
as
$density
=>
$url
)
{
$this
->
stripProto
(
$fileinfo
[
'responsiveUrls'
],
(
string
)
$density
);
}
if
(
$prefix
===
'vi'
)
{
foreach
(
$fileinfo
[
'thumbdata'
][
'derivatives'
]
??
[]
as
$j
=>
$d
)
{
$this
->
stripProto
(
$fileinfo
[
'thumbdata'
][
'derivatives'
][
$j
],
'src'
);
}
foreach
(
$fileinfo
[
'thumbdata'
][
'timedtext'
]
??
[]
as
$j
=>
$d
)
{
$this
->
stripProto
(
$fileinfo
[
'thumbdata'
][
'timedtext'
][
$j
],
'src'
);
}
}
}
$ret
[]
=
$fileinfo
;
}
return
$ret
;
}
/**
* Convert the given URL into protocol-relative form.
*
* @param ?array &$obj
* @param string $key
*/
private
function
stripProto
(
?
array
&
$obj
,
string
$key
):
void
{
if
(
$obj
!==
null
&&
!
empty
(
$obj
[
$key
]
)
&&
$this
->
stripProto
)
{
$obj
[
$key
]
=
preg_replace
(
'#^https?://#'
,
'//'
,
$obj
[
$key
]
);
}
}
/** @inheritDoc */
public
function
doPst
(
PageConfig
$pageConfig
,
string
$wikitext
):
string
{
$pageConfigTitle
=
$this
->
toPrefixedText
(
$pageConfig
->
getLinkTarget
()
);
$key
=
implode
(
':'
,
[
'pst'
,
md5
(
$pageConfigTitle
),
md5
(
$wikitext
)
]
);
$ret
=
$this
->
getCache
(
$key
);
if
(
$ret
===
null
)
{
$data
=
$this
->
api
->
makeRequest
(
[
'action'
=>
'parse'
,
'title'
=>
$pageConfigTitle
,
'text'
=>
$wikitext
,
'contentmodel'
=>
'wikitext'
,
'onlypst'
=>
1
,
]
);
$ret
=
$data
[
'parse'
][
'text'
];
$this
->
setCache
(
$key
,
$ret
);
}
return
$ret
;
}
/**
* Transfer the metadata returned in an API result into our
* ContentMetadataCollector.
* @param array $data
* @param ContentMetadataCollector $metadata
*/
private
function
mergeMetadata
(
array
$data
,
ContentMetadataCollector
$metadata
):
void
{
foreach
(
(
$data
[
'categories'
]
??
[]
)
as
$c
)
{
$tv
=
TitleValue
::
tryNew
(
14
,
// NS_CATEGORY,
$c
[
'category'
]
);
$metadata
->
addCategory
(
$tv
,
$c
[
'sortkey'
]
);
}
$metadata
->
appendOutputStrings
(
CMCSS
::
MODULE
,
$data
[
'modules'
]
??
[]
);
$metadata
->
appendOutputStrings
(
CMCSS
::
MODULE_STYLE
,
$data
[
'modulestyles'
]
??
[]
);
foreach
(
(
$data
[
'jsconfigvars'
]
??
[]
)
as
$key
=>
$value
)
{
$strategy
=
'write-once'
;
if
(
is_array
(
$value
)
)
{
// Strategy value will be exposed by change
// I974d9ecfb4ca8b22361d25c4c70fc5e55c39d5ed in core.
$strategy
=
$value
[
'_mw-strategy'
]
??
'write-once'
;
unset
(
$value
[
'_mw-strategy'
]
);
}
if
(
$strategy
===
'union'
)
{
foreach
(
$value
as
$item
=>
$ignore
)
{
$metadata
->
appendJsConfigVar
(
$key
,
$item
);
}
}
else
{
$metadata
->
setJsConfigVar
(
$key
,
$value
);
}
}
foreach
(
(
$data
[
'externallinks'
]
??
[]
)
as
$url
)
{
$metadata
->
addExternalLink
(
$url
);
}
foreach
(
(
$data
[
'properties'
]
??
[]
)
as
$name
=>
$value
)
{
if
(
is_string
(
$value
)
)
{
$metadata
->
setUnsortedPageProperty
(
$name
,
$value
);
}
elseif
(
is_numeric
(
$value
)
)
{
$metadata
->
setNumericPageProperty
(
$name
,
$value
);
}
elseif
(
is_bool
(
$value
)
)
{
// Deprecated back-compat
$metadata
->
setNumericPageProperty
(
$name
,
(
int
)
$value
);
}
else
{
// Non-scalar values deprecatedin 1.42; drop them.
}
}
}
/** @inheritDoc */
public
function
parseWikitext
(
PageConfig
$pageConfig
,
ContentMetadataCollector
$metadata
,
string
$wikitext
):
string
{
$revid
=
$pageConfig
->
getRevisionId
();
$pageConfigTitle
=
$this
->
toPrefixedText
(
$pageConfig
->
getLinkTarget
()
);
$key
=
implode
(
':'
,
[
'parse'
,
md5
(
$pageConfigTitle
),
md5
(
$wikitext
),
$revid
]
);
$data
=
$this
->
getCache
(
$key
);
if
(
$data
===
null
)
{
$params
=
[
'action'
=>
'parse'
,
'title'
=>
$pageConfigTitle
,
'text'
=>
$wikitext
,
'contentmodel'
=>
'wikitext'
,
'prop'
=>
'text|modules|jsconfigvars|categories|properties|externallinks'
,
'disablelimitreport'
=>
1
,
'wrapoutputclass'
=>
''
,
'showstrategykeys'
=>
1
,
];
if
(
$revid
!==
null
)
{
$params
[
'revid'
]
=
$revid
;
}
$data
=
$this
->
api
->
makeRequest
(
$params
)[
'parse'
];
$this
->
setCache
(
$key
,
$data
);
}
$this
->
mergeMetadata
(
$data
,
$metadata
);
return
$data
[
'text'
];
# HTML
}
/** @inheritDoc */
public
function
preprocessWikitext
(
PageConfig
$pageConfig
,
ContentMetadataCollector
$metadata
,
string
$wikitext
):
string
{
$revid
=
$pageConfig
->
getRevisionId
();
$pageConfigTitle
=
$this
->
toPrefixedText
(
$pageConfig
->
getLinkTarget
()
);
$key
=
implode
(
':'
,
[
'preprocess'
,
md5
(
$pageConfigTitle
),
md5
(
$wikitext
),
$revid
]
);
$data
=
$this
->
getCache
(
$key
);
if
(
$data
===
null
)
{
$params
=
[
'action'
=>
'expandtemplates'
,
'title'
=>
$pageConfigTitle
,
'text'
=>
$wikitext
,
'prop'
=>
'wikitext|modules|jsconfigvars|categories|properties'
,
'showstrategykeys'
=>
1
,
];
if
(
$revid
!==
null
)
{
$params
[
'revid'
]
=
$revid
;
}
$data
=
$this
->
api
->
makeRequest
(
$params
)[
'expandtemplates'
];
$this
->
setCache
(
$key
,
$data
);
}
$this
->
mergeMetadata
(
$data
,
$metadata
);
return
$data
[
'wikitext'
];
}
/** @inheritDoc */
public
function
fetchTemplateSource
(
PageConfig
$pageConfig
,
LinkTarget
$title
):
?
PageContent
{
$title
=
$this
->
toPrefixedText
(
$title
);
$key
=
implode
(
':'
,
[
'content'
,
md5
(
$title
)
]
);
$ret
=
$this
->
getCache
(
$key
);
if
(
$ret
===
null
)
{
$params
=
[
'action'
=>
'query'
,
'prop'
=>
'revisions'
,
'rvprop'
=>
'content'
,
'rvslots'
=>
'*'
,
'titles'
=>
$title
,
'rvlimit'
=>
1
,
];
$data
=
$this
->
api
->
makeRequest
(
$params
);
$pageData
=
$data
[
'query'
][
'pages'
][
0
];
if
(
isset
(
$pageData
[
'missing'
]
)
)
{
return
null
;
}
else
{
$ret
=
$pageData
[
'revisions'
][
0
][
'slots'
];
// PORT-FIXME set the redirect field if needed
$this
->
setCache
(
$key
,
$ret
);
}
}
return
new
MockPageContent
(
$ret
);
}
/** @inheritDoc */
public
function
fetchTemplateData
(
PageConfig
$pageConfig
,
LinkTarget
$title
):
?
array
{
$title
=
$this
->
toPrefixedText
(
$title
);
$key
=
implode
(
':'
,
[
'templatedata'
,
md5
(
$title
)
]
);
$ret
=
$this
->
getCache
(
$key
);
if
(
$ret
===
null
)
{
$data
=
$this
->
api
->
makeRequest
(
[
'action'
=>
'templatedata'
,
'includeMissingTitles'
=>
1
,
'titles'
=>
$title
,
'redirects'
=>
1
,
]
)[
'pages'
];
$ret
=
reset
(
$data
);
$this
->
setCache
(
$key
,
$ret
);
}
return
$ret
;
}
/** @inheritDoc */
public
function
logLinterData
(
PageConfig
$pageConfig
,
array
$lints
):
void
{
foreach
(
$lints
as
$l
)
{
error_log
(
PHPUtils
::
jsonEncode
(
$l
)
);
}
}
/**
* Helper to turn a LinkTarget object into the "prefixed text" title form
* expected by the MediaWiki action API.
* @param LinkTarget $linkTarget
* @return string The title, as prefixed text
*/
private
function
toPrefixedText
(
LinkTarget
$linkTarget
):
string
{
return
Title
::
newFromLinkTarget
(
$linkTarget
,
$this
->
siteConfig
)->
getPrefixedText
();
}
/** @inheritDoc */
public
function
addTrackingCategory
(
PageConfig
$pageConfig
,
ContentMetadataCollector
$metadata
,
string
$key
):
void
{
$pageConfigTitle
=
$this
->
toPrefixedText
(
$pageConfig
->
getLinkTarget
()
);
$cacheKey
=
implode
(
':'
,
[
'allmessages'
,
md5
(
$pageConfigTitle
),
md5
(
$key
)
]
);
$data
=
$this
->
getCache
(
$cacheKey
);
if
(
$data
===
null
)
{
$params
=
[
'action'
=>
'query'
,
'meta'
=>
'allmessages'
,
'amtitle'
=>
$pageConfigTitle
,
'ammessages'
=>
$key
,
'amenableparser'
=>
1
,
];
$data
=
$this
->
api
->
makeRequest
(
$params
)[
'query'
][
'allmessages'
][
0
];
$this
->
setCache
(
$cacheKey
,
$data
);
}
if
(
isset
(
$data
[
'missing'
]
)
)
{
return
;
}
$tv
=
TitleValue
::
tryNew
(
14
,
// NS_CATEGORY,
$data
[
'content'
]
);
$metadata
->
addCategory
(
$tv
);
}
}
File Metadata
Details
Attached
Mime Type
text/x-php
Expires
Sat, May 16, 19:29 (4 h, 10 m)
Storage Engine
local-disk
Storage Format
Raw Data
Storage Handle
f3/f7/96f74acfa711ecd236f470546ef4
Default Alt Text
DataAccess.php (14 KB)
Attached To
Mode
rMWPROD MediaWiki Production
Attached
Detach File
Event Timeline
Log In to Comment