Page Menu
Home
WickedGov Phorge
Search
Configure Global Search
Log In
Files
F1429289
CheckUserCentralIndexManager.php
No One
Temporary
Actions
Download File
Edit File
Delete File
View Transforms
Subscribe
Flag For Later
Award Token
Size
15 KB
Referenced Files
None
Subscribers
None
CheckUserCentralIndexManager.php
View Options
<?php
namespace
MediaWiki\CheckUser\Services
;
use
Job
;
use
JobQueueGroup
;
use
JobSpecification
;
use
MediaWiki\CheckUser\CheckUserQueryInterface
;
use
MediaWiki\CheckUser\Jobs\UpdateUserCentralIndexJob
;
use
MediaWiki\Config\ServiceOptions
;
use
MediaWiki\User\CentralId\CentralIdLookup
;
use
MediaWiki\User\TempUser\TempUserConfig
;
use
MediaWiki\User\UserFactory
;
use
MediaWiki\User\UserGroupManager
;
use
MediaWiki\User\UserIdentity
;
use
Psr\Log\LoggerInterface
;
use
RuntimeException
;
use
Wikimedia\IPUtils
;
use
Wikimedia\Rdbms\IDBAccessObject
;
use
Wikimedia\Rdbms\ILoadBalancer
;
use
Wikimedia\Rdbms\LBFactory
;
use
Wikimedia\Timestamp\ConvertibleTimestamp
;
/**
* Service to insert and delete rows in the CheckUser central index tables
*/
class
CheckUserCentralIndexManager
implements
CheckUserQueryInterface
{
public
const
CONSTRUCTOR_OPTIONS
=
[
'CheckUserCentralIndexGroupsToExclude'
,
'CheckUserCentralIndexRangesToExclude'
,
'CheckUserWriteToCentralIndex'
,
'CheckUserCuciUserRandomChanceDebounceCutoff'
,
];
private
ServiceOptions
$options
;
private
LBFactory
$lbFactory
;
private
CentralIdLookup
$centralIdLookup
;
private
UserGroupManager
$userGroupManager
;
private
JobQueueGroup
$jobQueueGroup
;
private
TempUserConfig
$tempUserConfig
;
private
UserFactory
$userFactory
;
private
LoggerInterface
$logger
;
public
function
__construct
(
ServiceOptions
$options
,
LBFactory
$lbFactory
,
CentralIdLookup
$centralIdLookup
,
UserGroupManager
$userGroupManager
,
JobQueueGroup
$jobQueueGroup
,
TempUserConfig
$tempUserConfig
,
UserFactory
$userFactory
,
LoggerInterface
$logger
)
{
$this
->
options
=
$options
;
$this
->
lbFactory
=
$lbFactory
;
$this
->
centralIdLookup
=
$centralIdLookup
;
$this
->
userGroupManager
=
$userGroupManager
;
$this
->
jobQueueGroup
=
$jobQueueGroup
;
$this
->
tempUserConfig
=
$tempUserConfig
;
$this
->
userFactory
=
$userFactory
;
$this
->
logger
=
$logger
;
}
/**
* Records that an CheckUser logged action (defined as an action that caused an insert to a local CheckUser
* result table) has occurred for a given wiki.
*
* If this is called in code that is producing a web response, then this should be queued for execution
* via a DeferredUpdate to be run on POST_SEND so to not block the HTTP response.
*
* @param UserIdentity $performer The performer of the action that was logged to a local CheckUser result table
* @param string|null $ip The IP address used to perform the action
* @param string $domainID The domain ID for the wiki where the action was performed
* @param string $timestamp When the action was performed, as a TS_MW timestamp
* @param bool $hasRevisionId Whether the given action has a revision ID (i.e. is an edit)
* @return void
*/
public
function
recordActionInCentralIndexes
(
UserIdentity
$performer
,
?
string
$ip
,
string
$domainID
,
string
$timestamp
,
bool
$hasRevisionId
)
{
// Don't record data when the user does not exist locally or is an IP address, as for the user central index
// we need a central ID and for the temp edit index the performer has to be a temporary account.
if
(
!
$performer
->
isRegistered
()
)
{
return
;
}
// Skip recording in central index if this wiki's actions are not included in the central index. This
// can occur for wikis that are not part of the wiki farms SUL system.
if
(
!
$this
->
options
->
get
(
'CheckUserWriteToCentralIndex'
)
)
{
return
;
}
// Get the cuci_wiki_map ID for this domain, to be used in inserting data to the central indexes.
$wikiMapId
=
$this
->
getWikiMapIdForDomainId
(
$domainID
);
// Update the cuci_temp_edit central index for this cuci_wiki_map ID and IP combination.
$this
->
recordActionInTempEditCentralIndex
(
$performer
,
$ip
,
$wikiMapId
,
$timestamp
,
$hasRevisionId
);
// Update the cuci_user central index for this cuci_wiki_map ID and central ID combination.
$this
->
recordActionInUserCentralIndex
(
$performer
,
$ip
,
$wikiMapId
,
$timestamp
);
}
private
function
recordActionInTempEditCentralIndex
(
UserIdentity
$performer
,
?
string
$ip
,
int
$wikiMapId
,
string
$timestamp
,
bool
$hasRevisionId
)
{
// We only record edits performed by temporary accounts in this index, so return early if the performer
// is not a temporary account or if the action does not have a revision ID (i.e. not an edit). We also
// cannot store a row if we have no IP address.
if
(
!
$hasRevisionId
||
!
$this
->
tempUserConfig
->
isTempName
(
$performer
->
getName
()
)
||
$ip
===
null
)
{
return
;
}
$ipAsHex
=
IPUtils
::
toHex
(
$ip
);
// Get the last cite_timestamp for this $performer, if any exists.
$dbr
=
$this
->
lbFactory
->
getReplicaDatabase
(
self
::
VIRTUAL_GLOBAL_DB_DOMAIN
);
$lastTimestamp
=
$dbr
->
newSelectQueryBuilder
()
->
select
(
'cite_timestamp'
)
->
from
(
'cuci_temp_edit'
)
->
where
(
[
'cite_ciwm_id'
=>
$wikiMapId
,
'cite_ip_hex'
=>
$ipAsHex
]
)
->
caller
(
__METHOD__
)
->
fetchField
();
$lastTimestamp
=
$lastTimestamp
?
ConvertibleTimestamp
::
convert
(
TS_UNIX
,
$lastTimestamp
)
:
0
;
// No need to update the index if the last timestamp is after our $timestamp or within a minute of our
// $timestamp.
$oneMinuteAgo
=
(
int
)
ConvertibleTimestamp
::
convert
(
TS_UNIX
,
$timestamp
)
-
60
;
if
(
$oneMinuteAgo
<
$lastTimestamp
)
{
return
;
}
// Either insert a cuci_temp_edit row or update it if one already exists, setting the timestamp provided.
$dbw
=
$this
->
lbFactory
->
getPrimaryDatabase
(
self
::
VIRTUAL_GLOBAL_DB_DOMAIN
);
$dbw
->
newInsertQueryBuilder
()
->
insertInto
(
'cuci_temp_edit'
)
->
row
(
[
'cite_timestamp'
=>
$dbw
->
timestamp
(
$timestamp
),
'cite_ciwm_id'
=>
$wikiMapId
,
'cite_ip_hex'
=>
$ipAsHex
,
]
)
->
onDuplicateKeyUpdate
()
->
uniqueIndexFields
(
[
'cite_ciwm_id'
,
'cite_ip_hex'
]
)
->
set
(
[
'cite_timestamp'
=>
$dbw
->
timestamp
(
$timestamp
)
]
)
->
caller
(
__METHOD__
)
->
execute
();
}
/**
* Records a CheckUser logged action into the cuci_user table for a given wiki and central ID.
*
* @param UserIdentity $performer The performer of the action that was logged to a local CheckUser result table
* @param string|null $ip The IP address used to perform the action
* @param int $wikiMapId The ciwm_id for the wiki where the action was performed
* @param string $timestamp When the action was performed, as a TS_MW timestamp
* @return void
*/
private
function
recordActionInUserCentralIndex
(
UserIdentity
$performer
,
?
string
$ip
,
int
$wikiMapId
,
string
$timestamp
)
{
// Don't record actions by users in any of the configured groups that are marked as excluded.
if
(
count
(
array_intersect
(
$this
->
userGroupManager
->
getUserGroups
(
$performer
),
$this
->
options
->
get
(
'CheckUserCentralIndexGroupsToExclude'
)
)
)
)
{
return
;
}
// Don't record the action if the IP address used to make it is in the excluded ranges list.
if
(
$ip
!==
null
)
{
foreach
(
$this
->
options
->
get
(
'CheckUserCentralIndexRangesToExclude'
)
as
$rangeOrIP
)
{
// Skip the $rangeOrIP if it is not recognised as valid.
if
(
!
IPUtils
::
isIPAddress
(
$rangeOrIP
)
)
{
continue
;
}
if
(
IPUtils
::
isInRange
(
$ip
,
$rangeOrIP
)
)
{
return
;
}
}
}
// Get the central ID associated with the $performer, trying primary if we cannot find the ID on a replica DB.
// We may need to try the primary DB when we are recording an account creation action in the index.
$centralId
=
$this
->
centralIdLookup
->
centralIdFromLocalUser
(
$performer
,
CentralIdLookup
::
AUDIENCE_RAW
);
if
(
!
$centralId
)
{
$centralId
=
$this
->
centralIdLookup
->
centralIdFromLocalUser
(
$performer
,
CentralIdLookup
::
AUDIENCE_RAW
,
IDBAccessObject
::
READ_LATEST
);
}
if
(
!
$centralId
)
{
// If we have been unable to find a central ID for the user, we should usually log an error for this.
// The exception is when the user is a system user, as these are frequently not attached to a global
// account.
$shouldLogError
=
!
$this
->
userFactory
->
newFromUserIdentity
(
$performer
)->
isSystemUser
();
if
(
$shouldLogError
)
{
$this
->
logger
->
error
(
"Unable to find central ID for local user {username} when recording action in cuci_user table."
,
[
'username'
=>
$performer
->
getName
(),
'exception'
=>
new
RuntimeException
]
);
}
// We cannot record the action in the cuci_user table if we do not have a central ID for the performer,
// so return early.
return
;
}
// Get the last ciu_timestamp for this $performer, if any exists.
$dbr
=
$this
->
lbFactory
->
getReplicaDatabase
(
self
::
VIRTUAL_GLOBAL_DB_DOMAIN
);
$lastTimestamp
=
$dbr
->
newSelectQueryBuilder
()
->
select
(
'ciu_timestamp'
)
->
from
(
'cuci_user'
)
->
where
(
[
'ciu_ciwm_id'
=>
$wikiMapId
,
'ciu_central_id'
=>
$centralId
]
)
->
caller
(
__METHOD__
)
->
fetchField
();
$lastTimestamp
=
$lastTimestamp
?
ConvertibleTimestamp
::
convert
(
TS_UNIX
,
$lastTimestamp
)
:
0
;
// No need to update the index if the last timestamp is after our $timestamp or within a minute of our
// $timestamp.
$oneMinuteAgo
=
(
int
)
ConvertibleTimestamp
::
convert
(
TS_UNIX
,
$timestamp
)
-
60
;
if
(
$oneMinuteAgo
<
$lastTimestamp
)
{
return
;
}
// If the last timestamp was less wgCheckUserCentralIndexCuciUserRandomChanceDebounceCutoff seconds ago,
// only update the timestamp 1 out of 10 times.
$randomChanceCutoff
=
$this
->
options
->
get
(
'CheckUserCuciUserRandomChanceDebounceCutoff'
);
if
(
$randomChanceCutoff
)
{
$cutoff
=
(
int
)
ConvertibleTimestamp
::
convert
(
TS_UNIX
,
$timestamp
)
-
(
int
)
$randomChanceCutoff
;
if
(
$cutoff
<
$lastTimestamp
&&
mt_rand
(
0
,
9
)
!==
0
)
{
return
;
}
}
// Queue a job to update the cuci_user table. Using a newRootJobParams call ensures that if multiple jobs
// are submitted at once, we only end up running the newest job.
$jobParams
=
[
'centralID'
=>
$centralId
,
'wikiMapID'
=>
$wikiMapId
,
'timestamp'
=>
$timestamp
];
$jobParams
+=
Job
::
newRootJobParams
(
"updateUserCentralIndex:$wikiMapId:$centralId"
);
// Modify the 'rootJobTimestamp' to be the timestamp we are submitting, as this will ensure that the
// newest timestamp will be processed out of a bunch of duplicate jobs.
$jobParams
[
'rootJobTimestamp'
]
=
$timestamp
;
$this
->
jobQueueGroup
->
push
(
new
JobSpecification
(
UpdateUserCentralIndexJob
::
TYPE
,
$jobParams
)
);
}
/**
* Gets the integer ID for the given string $domainID from the cuci_wiki_map table. Creates an ID if no ID
* already exists.
*
* @param string $domainID The domain ID for the local wiki
* @return int
*/
public
function
getWikiMapIdForDomainId
(
string
$domainID
):
int
{
// First try to get the wiki ID from the replica DB
$dbr
=
$this
->
lbFactory
->
getReplicaDatabase
(
self
::
VIRTUAL_GLOBAL_DB_DOMAIN
);
$wikiMapId
=
$dbr
->
newSelectQueryBuilder
()
->
select
(
'ciwm_id'
)
->
from
(
'cuci_wiki_map'
)
->
where
(
[
'ciwm_wiki'
=>
$domainID
]
)
->
caller
(
__METHOD__
)
->
fetchField
();
if
(
$wikiMapId
!==
false
)
{
return
$wikiMapId
;
}
// We could not find the wiki ID on the replica, so now try to insert the domain ID on the primary to get
// an ID for this wiki. We need to do this using auto-commit mode, so that we can read the value from
// the primary DB if the insert fails.
$dbDomain
=
$this
->
lbFactory
->
getPrimaryDatabase
(
self
::
VIRTUAL_GLOBAL_DB_DOMAIN
)->
getDomainID
();
$dbw
=
$this
->
lbFactory
->
getLoadBalancer
(
self
::
VIRTUAL_GLOBAL_DB_DOMAIN
)->
getConnection
(
DB_PRIMARY
,
[],
$dbDomain
,
ILoadBalancer
::
CONN_TRX_AUTOCOMMIT
);
// We could not find the wiki ID on the replica, so now try to insert the domain ID on the primary
// to get an ID for this wiki.
$dbw
->
newInsertQueryBuilder
()
->
ignore
()
->
insertInto
(
'cuci_wiki_map'
)
->
row
(
[
'ciwm_wiki'
=>
$domainID
]
)
->
caller
(
__METHOD__
)
->
execute
();
// Now that we definitely have an ID for this domain ID, grab it from the primary DB.
return
$dbw
->
newSelectQueryBuilder
()
->
select
(
'ciwm_id'
)
->
from
(
'cuci_wiki_map'
)
->
where
(
[
'ciwm_wiki'
=>
$domainID
]
)
->
caller
(
__METHOD__
)
->
fetchField
();
}
/**
* Purge a given number of expired rows from the central index tables where the wiki is the local wiki.
*
* We need to purge rows per-wiki, as each wiki can have it's own value for the expiry of CU data.
*
* @param string $cutoff The timestamp used as a "cutoff", where rows which have a timestamp before the given
* cutoff are eligible to be purged from the database. Should be in a form which the DB can recognise.
* @param string $domain The DB name of the wiki that we are purging rows from
* @param int $maximumRowsToPurge The maximum number of rows to purge from cuci_temp_edit and cuci_user
* @return int The number of rows that were purged
*/
public
function
purgeExpiredRows
(
string
$cutoff
,
string
$domain
,
int
$maximumRowsToPurge
=
100
):
int
{
// Find the ID associated with this DB domain, or if it is not present in the cuci_wiki_map table then
// return early as there will be no matching rows to purge.
$dbr
=
$this
->
lbFactory
->
getReplicaDatabase
(
self
::
VIRTUAL_GLOBAL_DB_DOMAIN
);
$wikiId
=
$dbr
->
newSelectQueryBuilder
()
->
select
(
'ciwm_id'
)
->
from
(
'cuci_wiki_map'
)
->
where
(
[
'ciwm_wiki'
=>
$domain
]
)
->
caller
(
__METHOD__
)
->
fetchField
();
if
(
$wikiId
===
false
)
{
return
0
;
}
// First purge rows from cuci_temp_edit
$dbw
=
$this
->
lbFactory
->
getPrimaryDatabase
(
self
::
VIRTUAL_GLOBAL_DB_DOMAIN
);
$ipRowsToPurge
=
$dbw
->
newSelectQueryBuilder
()
->
select
(
[
'cite_ip_hex'
,
'cite_timestamp'
]
)
->
from
(
'cuci_temp_edit'
)
->
where
(
[
'cite_ciwm_id'
=>
$wikiId
,
$dbw
->
expr
(
'cite_timestamp'
,
'<'
,
$cutoff
)
]
)
->
limit
(
$maximumRowsToPurge
)
->
caller
(
__METHOD__
)
->
fetchResultSet
();
$ipsToPurgeConds
=
[];
foreach
(
$ipRowsToPurge
as
$row
)
{
$ipsToPurgeConds
[]
=
$dbw
->
andExpr
(
[
'cite_ciwm_id'
=>
$wikiId
,
'cite_ip_hex'
=>
$row
->
cite_ip_hex
,
// Use the timestamp as a CAS check to prevent races with concurrent updates to these rows
'cite_timestamp'
=>
$row
->
cite_timestamp
]
);
}
if
(
count
(
$ipsToPurgeConds
)
)
{
$dbw
->
newDeleteQueryBuilder
()
->
deleteFrom
(
'cuci_temp_edit'
)
->
where
(
$dbw
->
orExpr
(
$ipsToPurgeConds
)
)
->
caller
(
__METHOD__
)
->
execute
();
}
// Then purge rows from cuci_user
$centralIdsToPurgeRows
=
$dbw
->
newSelectQueryBuilder
()
->
select
(
[
'ciu_central_id'
,
'ciu_timestamp'
]
)
->
from
(
'cuci_user'
)
->
where
(
[
'ciu_ciwm_id'
=>
$wikiId
,
$dbw
->
expr
(
'ciu_timestamp'
,
'<'
,
$cutoff
)
]
)
->
limit
(
$maximumRowsToPurge
)
->
caller
(
__METHOD__
)
->
fetchResultSet
();
$centralIdsToPurgeConds
=
[];
foreach
(
$centralIdsToPurgeRows
as
$row
)
{
$centralIdsToPurgeConds
[]
=
$dbw
->
andExpr
(
[
'ciu_ciwm_id'
=>
$wikiId
,
'ciu_central_id'
=>
$row
->
ciu_central_id
,
// Use the timestamp as a CAS check to prevent races with concurrent updates to these rows
'ciu_timestamp'
=>
$row
->
ciu_timestamp
]
);
}
if
(
count
(
$centralIdsToPurgeConds
)
)
{
$dbw
->
newDeleteQueryBuilder
()
->
deleteFrom
(
'cuci_user'
)
->
where
(
$dbw
->
orExpr
(
$centralIdsToPurgeConds
)
)
->
caller
(
__METHOD__
)
->
execute
();
}
// Return the sum of the rows found for purging. We do this, instead of ::affectedRows, because the
// aforementioned method does not work if a DELETE statement was not run (like in the case of
// 0 rows found for purging).
return
count
(
$ipsToPurgeConds
)
+
count
(
$centralIdsToPurgeConds
);
}
}
File Metadata
Details
Attached
Mime Type
text/x-php
Expires
Sat, May 16, 17:23 (10 h, 20 m)
Storage Engine
local-disk
Storage Format
Raw Data
Storage Handle
f7/6d/3ed3b5681b7dbe636e952a11b56c
Default Alt Text
CheckUserCentralIndexManager.php (15 KB)
Attached To
Mode
rMWPROD MediaWiki Production
Attached
Detach File
Event Timeline
Log In to Comment