Page Menu
Home
WickedGov Phorge
Search
Configure Global Search
Log In
Files
F2752481
prewarmParsoidParserCache.php
No One
Temporary
Actions
Download File
Edit File
Delete File
View Transforms
Subscribe
Flag For Later
Award Token
Size
6 KB
Referenced Files
None
Subscribers
None
prewarmParsoidParserCache.php
View Options
<?php
use
MediaWiki\Page\PageLookup
;
use
MediaWiki\Page\PageRecord
;
use
MediaWiki\Page\ParserOutputAccess
;
use
MediaWiki\Parser\ParserOptions
;
use
MediaWiki\Parser\Parsoid\Config\SiteConfig
as
ParsoidSiteConfig
;
use
MediaWiki\Revision\RevisionLookup
;
use
MediaWiki\Revision\RevisionRecord
;
use
MediaWiki\Revision\SlotRecord
;
use
MediaWiki\Status\Status
;
use
Wikimedia\Parsoid\Core\ClientError
;
use
Wikimedia\Parsoid\Core\ResourceLimitExceededException
;
use
Wikimedia\Rdbms\SelectQueryBuilder
;
// @codeCoverageIgnoreStart
require_once
__DIR__
.
'/Maintenance.php'
;
// @codeCoverageIgnoreEnd
/**
* Maintenance script for populating parser cache with parsoid output.
*
* @since 1.41
*
* @license GPL-2.0-or-later
* @author Richika Rana
*/
class
PrewarmParsoidParserCache
extends
Maintenance
{
private
int
$forceParse
=
0
;
private
ParserOutputAccess
$parserOutputAccess
;
private
PageLookup
$pageLookup
;
private
RevisionLookup
$revisionLookup
;
private
ParsoidSiteConfig
$parsoidSiteConfig
;
public
function
__construct
()
{
parent
::
__construct
();
$this
->
addDescription
(
'Populate parser cache with parsoid output. By default, script attempt to run'
.
'for supported content model pages (in a specified batch if provided)'
);
$this
->
addOption
(
'force'
,
'Re-parse pages even if the cached entry seems up to date'
,
false
,
false
);
$this
->
addOption
(
'start-from'
,
'Start from this page ID'
,
false
,
true
);
$this
->
addOption
(
'namespace'
,
'Filter pages in this namespace'
,
false
,
true
);
$this
->
setBatchSize
(
100
);
}
private
function
getPageLookup
():
PageLookup
{
$this
->
pageLookup
=
$this
->
getServiceContainer
()->
getPageStore
();
return
$this
->
pageLookup
;
}
private
function
getRevisionLookup
():
RevisionLookup
{
$this
->
revisionLookup
=
$this
->
getServiceContainer
()->
getRevisionLookup
();
return
$this
->
revisionLookup
;
}
private
function
getParserOutputAccess
():
ParserOutputAccess
{
$this
->
parserOutputAccess
=
$this
->
getServiceContainer
()->
getParserOutputAccess
();
return
$this
->
parserOutputAccess
;
}
private
function
getParsoidSiteConfig
():
ParsoidSiteConfig
{
$this
->
parsoidSiteConfig
=
$this
->
getServiceContainer
()->
getParsoidSiteConfig
();
return
$this
->
parsoidSiteConfig
;
}
private
function
getQueryBuilder
():
SelectQueryBuilder
{
$dbr
=
$this
->
getReplicaDB
();
return
$dbr
->
newSelectQueryBuilder
()
->
select
(
[
'page_id'
]
)
->
from
(
'page'
)
->
caller
(
__METHOD__
)
->
orderBy
(
'page_id'
,
SelectQueryBuilder
::
SORT_ASC
);
}
private
function
parse
(
PageRecord
$page
,
RevisionRecord
$revision
):
Status
{
$popts
=
ParserOptions
::
newFromAnon
();
$popts
->
setUseParsoid
();
try
{
return
$this
->
getParserOutputAccess
()->
getParserOutput
(
$page
,
$popts
,
$revision
,
$this
->
forceParse
);
}
catch
(
ClientError
$e
)
{
return
Status
::
newFatal
(
'parsoid-client-error'
,
$e
->
getMessage
()
);
}
catch
(
ResourceLimitExceededException
$e
)
{
return
Status
::
newFatal
(
'parsoid-resource-limit-exceeded'
,
$e
->
getMessage
()
);
}
}
/*
* NamespaceInfo::getCanonicalIndex() requires the namespace to be in lowercase,
* so let's do some normalization and return its canonical index.
*
* @param string $namespace The namespace string from the command line
* @return int The canonical index of the namespace
*/
private
function
normalizeNamespace
(
string
$namespace
):
int
{
return
$this
->
getServiceContainer
()->
getNamespaceInfo
()
->
getCanonicalIndex
(
strtolower
(
$namespace
)
);
}
/**
* Populate parser cache with parsoid output.
*
* @return bool
*/
public
function
execute
()
{
$force
=
$this
->
getOption
(
'force'
);
$startFrom
=
$this
->
getOption
(
'start-from'
);
// We need the namespace index instead of the name to perform the query
// on, because that's what the page table stores (in the page_namespace field).
$namespaceIndex
=
null
;
$namespace
=
$this
->
getOption
(
'namespace'
);
if
(
$namespace
!==
null
)
{
$namespaceIndex
=
$this
->
normalizeNamespace
(
$namespace
);
}
if
(
$force
!==
null
)
{
// If --force is supplied, for a parse for supported pages or supported
// pages in the specified batch.
$this
->
forceParse
=
ParserOutputAccess
::
OPT_FORCE_PARSE
;
}
$startFrom
=
(
int
)
$startFrom
;
$this
->
output
(
"
\n
Warming parsoid parser cache with Parsoid output...
\n\n
"
);
while
(
true
)
{
$query
=
$this
->
getQueryBuilder
();
if
(
$namespaceIndex
!==
null
)
{
$query
=
$query
->
where
(
[
'page_namespace'
=>
$namespaceIndex
]
);
}
$query
=
$query
->
where
(
$this
->
getReplicaDB
()->
expr
(
'page_id'
,
'>='
,
$startFrom
)
)
->
limit
(
$this
->
getBatchSize
()
);
$result
=
$query
->
fetchResultSet
();
if
(
!
$result
->
numRows
()
)
{
break
;
}
$currentBatch
=
$startFrom
+
(
$this
->
getBatchSize
()
-
1
);
$this
->
output
(
"
\n\n
Batch: $startFrom - $currentBatch
\n
----
\n
"
);
// Look through pages by pageId and populate the parserCache
foreach
(
$result
as
$row
)
{
$page
=
$this
->
getPageLookup
()->
getPageById
(
$row
->
page_id
);
$startFrom
=
(
(
int
)
$row
->
page_id
+
1
);
if
(
$page
===
null
)
{
$this
->
output
(
"
\n
[Skipped] Page ID: $row->page_id not found.
\n
"
);
continue
;
}
$latestRevision
=
$page
->
getLatest
();
$revision
=
$this
->
getRevisionLookup
()->
getRevisionById
(
$latestRevision
);
$mainSlot
=
$revision
->
getSlot
(
SlotRecord
::
MAIN
);
// POA will write a dummy output to PC, but we don't want that here. Just skip!
if
(
!
$this
->
getParsoidSiteConfig
()->
supportsContentModel
(
$mainSlot
->
getModel
()
)
)
{
$this
->
output
(
'[Skipped] Content model "'
.
$mainSlot
->
getModel
()
.
"
\"
not supported for page ID: $row->page_id.
\n
"
);
continue
;
}
$status
=
$this
->
parse
(
$page
,
$revision
);
if
(
!
$status
->
isOK
()
)
{
$this
->
output
(
__METHOD__
.
": Error parsing page ID: $row->page_id or writing to parser cache
\n
"
);
continue
;
}
$this
->
output
(
"[Done] Page ID: $row->page_id ✔️
\n
"
);
}
$this
->
waitForReplication
();
}
$this
->
output
(
"
\n
Done pre-warming parsoid parser cache...
\n
"
);
return
true
;
}
}
// @codeCoverageIgnoreStart
$maintClass
=
PrewarmParsoidParserCache
::
class
;
require_once
RUN_MAINTENANCE_IF_MAIN
;
// @codeCoverageIgnoreEnd
File Metadata
Details
Attached
Mime Type
text/x-php
Expires
Fri, Jul 3, 20:02 (1 d, 6 h)
Storage Engine
local-disk
Storage Format
Raw Data
Storage Handle
42/5c/736d4a5d45746968b45549693a76
Default Alt Text
prewarmParsoidParserCache.php (6 KB)
Attached To
Mode
rMWPROD MediaWiki Production
Attached
Detach File
Event Timeline
Log In to Comment