Page MenuHomeWickedGov Phorge

CheckUserCentralIndexManager.php
No OneTemporary

Size
15 KB
Referenced Files
None
Subscribers
None

CheckUserCentralIndexManager.php

<?php
namespace MediaWiki\CheckUser\Services;
use Job;
use JobQueueGroup;
use JobSpecification;
use MediaWiki\CheckUser\CheckUserQueryInterface;
use MediaWiki\CheckUser\Jobs\UpdateUserCentralIndexJob;
use MediaWiki\Config\ServiceOptions;
use MediaWiki\User\CentralId\CentralIdLookup;
use MediaWiki\User\TempUser\TempUserConfig;
use MediaWiki\User\UserFactory;
use MediaWiki\User\UserGroupManager;
use MediaWiki\User\UserIdentity;
use Psr\Log\LoggerInterface;
use RuntimeException;
use Wikimedia\IPUtils;
use Wikimedia\Rdbms\IDBAccessObject;
use Wikimedia\Rdbms\ILoadBalancer;
use Wikimedia\Rdbms\LBFactory;
use Wikimedia\Timestamp\ConvertibleTimestamp;
/**
* Service to insert and delete rows in the CheckUser central index tables
*/
class CheckUserCentralIndexManager implements CheckUserQueryInterface {
public const CONSTRUCTOR_OPTIONS = [
'CheckUserCentralIndexGroupsToExclude',
'CheckUserCentralIndexRangesToExclude',
'CheckUserWriteToCentralIndex',
'CheckUserCuciUserRandomChanceDebounceCutoff',
];
private ServiceOptions $options;
private LBFactory $lbFactory;
private CentralIdLookup $centralIdLookup;
private UserGroupManager $userGroupManager;
private JobQueueGroup $jobQueueGroup;
private TempUserConfig $tempUserConfig;
private UserFactory $userFactory;
private LoggerInterface $logger;
public function __construct(
ServiceOptions $options,
LBFactory $lbFactory,
CentralIdLookup $centralIdLookup,
UserGroupManager $userGroupManager,
JobQueueGroup $jobQueueGroup,
TempUserConfig $tempUserConfig,
UserFactory $userFactory,
LoggerInterface $logger
) {
$this->options = $options;
$this->lbFactory = $lbFactory;
$this->centralIdLookup = $centralIdLookup;
$this->userGroupManager = $userGroupManager;
$this->jobQueueGroup = $jobQueueGroup;
$this->tempUserConfig = $tempUserConfig;
$this->userFactory = $userFactory;
$this->logger = $logger;
}
/**
* Records that an CheckUser logged action (defined as an action that caused an insert to a local CheckUser
* result table) has occurred for a given wiki.
*
* If this is called in code that is producing a web response, then this should be queued for execution
* via a DeferredUpdate to be run on POST_SEND so to not block the HTTP response.
*
* @param UserIdentity $performer The performer of the action that was logged to a local CheckUser result table
* @param string|null $ip The IP address used to perform the action
* @param string $domainID The domain ID for the wiki where the action was performed
* @param string $timestamp When the action was performed, as a TS_MW timestamp
* @param bool $hasRevisionId Whether the given action has a revision ID (i.e. is an edit)
* @return void
*/
public function recordActionInCentralIndexes(
UserIdentity $performer, ?string $ip, string $domainID, string $timestamp, bool $hasRevisionId
) {
// Don't record data when the user does not exist locally or is an IP address, as for the user central index
// we need a central ID and for the temp edit index the performer has to be a temporary account.
if ( !$performer->isRegistered() ) {
return;
}
// Skip recording in central index if this wiki's actions are not included in the central index. This
// can occur for wikis that are not part of the wiki farms SUL system.
if ( !$this->options->get( 'CheckUserWriteToCentralIndex' ) ) {
return;
}
// Get the cuci_wiki_map ID for this domain, to be used in inserting data to the central indexes.
$wikiMapId = $this->getWikiMapIdForDomainId( $domainID );
// Update the cuci_temp_edit central index for this cuci_wiki_map ID and IP combination.
$this->recordActionInTempEditCentralIndex( $performer, $ip, $wikiMapId, $timestamp, $hasRevisionId );
// Update the cuci_user central index for this cuci_wiki_map ID and central ID combination.
$this->recordActionInUserCentralIndex( $performer, $ip, $wikiMapId, $timestamp );
}
private function recordActionInTempEditCentralIndex(
UserIdentity $performer, ?string $ip, int $wikiMapId, string $timestamp, bool $hasRevisionId
) {
// We only record edits performed by temporary accounts in this index, so return early if the performer
// is not a temporary account or if the action does not have a revision ID (i.e. not an edit). We also
// cannot store a row if we have no IP address.
if ( !$hasRevisionId || !$this->tempUserConfig->isTempName( $performer->getName() ) || $ip === null ) {
return;
}
$ipAsHex = IPUtils::toHex( $ip );
// Get the last cite_timestamp for this $performer, if any exists.
$dbr = $this->lbFactory->getReplicaDatabase( self::VIRTUAL_GLOBAL_DB_DOMAIN );
$lastTimestamp = $dbr->newSelectQueryBuilder()
->select( 'cite_timestamp' )
->from( 'cuci_temp_edit' )
->where( [ 'cite_ciwm_id' => $wikiMapId, 'cite_ip_hex' => $ipAsHex ] )
->caller( __METHOD__ )
->fetchField();
$lastTimestamp = $lastTimestamp ? ConvertibleTimestamp::convert( TS_UNIX, $lastTimestamp ) : 0;
// No need to update the index if the last timestamp is after our $timestamp or within a minute of our
// $timestamp.
$oneMinuteAgo = (int)ConvertibleTimestamp::convert( TS_UNIX, $timestamp ) - 60;
if ( $oneMinuteAgo < $lastTimestamp ) {
return;
}
// Either insert a cuci_temp_edit row or update it if one already exists, setting the timestamp provided.
$dbw = $this->lbFactory->getPrimaryDatabase( self::VIRTUAL_GLOBAL_DB_DOMAIN );
$dbw->newInsertQueryBuilder()
->insertInto( 'cuci_temp_edit' )
->row( [
'cite_timestamp' => $dbw->timestamp( $timestamp ),
'cite_ciwm_id' => $wikiMapId,
'cite_ip_hex' => $ipAsHex,
] )
->onDuplicateKeyUpdate()
->uniqueIndexFields( [ 'cite_ciwm_id', 'cite_ip_hex' ] )
->set( [ 'cite_timestamp' => $dbw->timestamp( $timestamp ) ] )
->caller( __METHOD__ )
->execute();
}
/**
* Records a CheckUser logged action into the cuci_user table for a given wiki and central ID.
*
* @param UserIdentity $performer The performer of the action that was logged to a local CheckUser result table
* @param string|null $ip The IP address used to perform the action
* @param int $wikiMapId The ciwm_id for the wiki where the action was performed
* @param string $timestamp When the action was performed, as a TS_MW timestamp
* @return void
*/
private function recordActionInUserCentralIndex(
UserIdentity $performer, ?string $ip, int $wikiMapId, string $timestamp
) {
// Don't record actions by users in any of the configured groups that are marked as excluded.
if ( count( array_intersect(
$this->userGroupManager->getUserGroups( $performer ),
$this->options->get( 'CheckUserCentralIndexGroupsToExclude' )
) ) ) {
return;
}
// Don't record the action if the IP address used to make it is in the excluded ranges list.
if ( $ip !== null ) {
foreach ( $this->options->get( 'CheckUserCentralIndexRangesToExclude' ) as $rangeOrIP ) {
// Skip the $rangeOrIP if it is not recognised as valid.
if ( !IPUtils::isIPAddress( $rangeOrIP ) ) {
continue;
}
if ( IPUtils::isInRange( $ip, $rangeOrIP ) ) {
return;
}
}
}
// Get the central ID associated with the $performer, trying primary if we cannot find the ID on a replica DB.
// We may need to try the primary DB when we are recording an account creation action in the index.
$centralId = $this->centralIdLookup->centralIdFromLocalUser( $performer, CentralIdLookup::AUDIENCE_RAW );
if ( !$centralId ) {
$centralId = $this->centralIdLookup->centralIdFromLocalUser(
$performer, CentralIdLookup::AUDIENCE_RAW, IDBAccessObject::READ_LATEST
);
}
if ( !$centralId ) {
// If we have been unable to find a central ID for the user, we should usually log an error for this.
// The exception is when the user is a system user, as these are frequently not attached to a global
// account.
$shouldLogError = !$this->userFactory->newFromUserIdentity( $performer )->isSystemUser();
if ( $shouldLogError ) {
$this->logger->error(
"Unable to find central ID for local user {username} when recording action in cuci_user table.",
[ 'username' => $performer->getName(), 'exception' => new RuntimeException ]
);
}
// We cannot record the action in the cuci_user table if we do not have a central ID for the performer,
// so return early.
return;
}
// Get the last ciu_timestamp for this $performer, if any exists.
$dbr = $this->lbFactory->getReplicaDatabase( self::VIRTUAL_GLOBAL_DB_DOMAIN );
$lastTimestamp = $dbr->newSelectQueryBuilder()
->select( 'ciu_timestamp' )
->from( 'cuci_user' )
->where( [ 'ciu_ciwm_id' => $wikiMapId, 'ciu_central_id' => $centralId ] )
->caller( __METHOD__ )
->fetchField();
$lastTimestamp = $lastTimestamp ? ConvertibleTimestamp::convert( TS_UNIX, $lastTimestamp ) : 0;
// No need to update the index if the last timestamp is after our $timestamp or within a minute of our
// $timestamp.
$oneMinuteAgo = (int)ConvertibleTimestamp::convert( TS_UNIX, $timestamp ) - 60;
if ( $oneMinuteAgo < $lastTimestamp ) {
return;
}
// If the last timestamp was less wgCheckUserCentralIndexCuciUserRandomChanceDebounceCutoff seconds ago,
// only update the timestamp 1 out of 10 times.
$randomChanceCutoff = $this->options->get( 'CheckUserCuciUserRandomChanceDebounceCutoff' );
if ( $randomChanceCutoff ) {
$cutoff = (int)ConvertibleTimestamp::convert( TS_UNIX, $timestamp ) - (int)$randomChanceCutoff;
if ( $cutoff < $lastTimestamp && mt_rand( 0, 9 ) !== 0 ) {
return;
}
}
// Queue a job to update the cuci_user table. Using a newRootJobParams call ensures that if multiple jobs
// are submitted at once, we only end up running the newest job.
$jobParams = [ 'centralID' => $centralId, 'wikiMapID' => $wikiMapId, 'timestamp' => $timestamp ];
$jobParams += Job::newRootJobParams( "updateUserCentralIndex:$wikiMapId:$centralId" );
// Modify the 'rootJobTimestamp' to be the timestamp we are submitting, as this will ensure that the
// newest timestamp will be processed out of a bunch of duplicate jobs.
$jobParams['rootJobTimestamp'] = $timestamp;
$this->jobQueueGroup->push( new JobSpecification( UpdateUserCentralIndexJob::TYPE, $jobParams ) );
}
/**
* Gets the integer ID for the given string $domainID from the cuci_wiki_map table. Creates an ID if no ID
* already exists.
*
* @param string $domainID The domain ID for the local wiki
* @return int
*/
public function getWikiMapIdForDomainId( string $domainID ): int {
// First try to get the wiki ID from the replica DB
$dbr = $this->lbFactory->getReplicaDatabase( self::VIRTUAL_GLOBAL_DB_DOMAIN );
$wikiMapId = $dbr->newSelectQueryBuilder()
->select( 'ciwm_id' )
->from( 'cuci_wiki_map' )
->where( [ 'ciwm_wiki' => $domainID ] )
->caller( __METHOD__ )
->fetchField();
if ( $wikiMapId !== false ) {
return $wikiMapId;
}
// We could not find the wiki ID on the replica, so now try to insert the domain ID on the primary to get
// an ID for this wiki. We need to do this using auto-commit mode, so that we can read the value from
// the primary DB if the insert fails.
$dbDomain = $this->lbFactory->getPrimaryDatabase( self::VIRTUAL_GLOBAL_DB_DOMAIN )->getDomainID();
$dbw = $this->lbFactory->getLoadBalancer( self::VIRTUAL_GLOBAL_DB_DOMAIN )->getConnection(
DB_PRIMARY, [], $dbDomain, ILoadBalancer::CONN_TRX_AUTOCOMMIT
);
// We could not find the wiki ID on the replica, so now try to insert the domain ID on the primary
// to get an ID for this wiki.
$dbw->newInsertQueryBuilder()
->ignore()
->insertInto( 'cuci_wiki_map' )
->row( [ 'ciwm_wiki' => $domainID ] )
->caller( __METHOD__ )
->execute();
// Now that we definitely have an ID for this domain ID, grab it from the primary DB.
return $dbw->newSelectQueryBuilder()
->select( 'ciwm_id' )
->from( 'cuci_wiki_map' )
->where( [ 'ciwm_wiki' => $domainID ] )
->caller( __METHOD__ )
->fetchField();
}
/**
* Purge a given number of expired rows from the central index tables where the wiki is the local wiki.
*
* We need to purge rows per-wiki, as each wiki can have it's own value for the expiry of CU data.
*
* @param string $cutoff The timestamp used as a "cutoff", where rows which have a timestamp before the given
* cutoff are eligible to be purged from the database. Should be in a form which the DB can recognise.
* @param string $domain The DB name of the wiki that we are purging rows from
* @param int $maximumRowsToPurge The maximum number of rows to purge from cuci_temp_edit and cuci_user
* @return int The number of rows that were purged
*/
public function purgeExpiredRows( string $cutoff, string $domain, int $maximumRowsToPurge = 100 ): int {
// Find the ID associated with this DB domain, or if it is not present in the cuci_wiki_map table then
// return early as there will be no matching rows to purge.
$dbr = $this->lbFactory->getReplicaDatabase( self::VIRTUAL_GLOBAL_DB_DOMAIN );
$wikiId = $dbr->newSelectQueryBuilder()
->select( 'ciwm_id' )
->from( 'cuci_wiki_map' )
->where( [ 'ciwm_wiki' => $domain ] )
->caller( __METHOD__ )
->fetchField();
if ( $wikiId === false ) {
return 0;
}
// First purge rows from cuci_temp_edit
$dbw = $this->lbFactory->getPrimaryDatabase( self::VIRTUAL_GLOBAL_DB_DOMAIN );
$ipRowsToPurge = $dbw->newSelectQueryBuilder()
->select( [ 'cite_ip_hex', 'cite_timestamp' ] )
->from( 'cuci_temp_edit' )
->where( [ 'cite_ciwm_id' => $wikiId, $dbw->expr( 'cite_timestamp', '<', $cutoff ) ] )
->limit( $maximumRowsToPurge )
->caller( __METHOD__ )
->fetchResultSet();
$ipsToPurgeConds = [];
foreach ( $ipRowsToPurge as $row ) {
$ipsToPurgeConds[] = $dbw->andExpr( [
'cite_ciwm_id' => $wikiId,
'cite_ip_hex' => $row->cite_ip_hex,
// Use the timestamp as a CAS check to prevent races with concurrent updates to these rows
'cite_timestamp' => $row->cite_timestamp
] );
}
if ( count( $ipsToPurgeConds ) ) {
$dbw->newDeleteQueryBuilder()
->deleteFrom( 'cuci_temp_edit' )
->where( $dbw->orExpr( $ipsToPurgeConds ) )
->caller( __METHOD__ )
->execute();
}
// Then purge rows from cuci_user
$centralIdsToPurgeRows = $dbw->newSelectQueryBuilder()
->select( [ 'ciu_central_id', 'ciu_timestamp' ] )
->from( 'cuci_user' )
->where( [ 'ciu_ciwm_id' => $wikiId, $dbw->expr( 'ciu_timestamp', '<', $cutoff ) ] )
->limit( $maximumRowsToPurge )
->caller( __METHOD__ )
->fetchResultSet();
$centralIdsToPurgeConds = [];
foreach ( $centralIdsToPurgeRows as $row ) {
$centralIdsToPurgeConds[] = $dbw->andExpr( [
'ciu_ciwm_id' => $wikiId,
'ciu_central_id' => $row->ciu_central_id,
// Use the timestamp as a CAS check to prevent races with concurrent updates to these rows
'ciu_timestamp' => $row->ciu_timestamp
] );
}
if ( count( $centralIdsToPurgeConds ) ) {
$dbw->newDeleteQueryBuilder()
->deleteFrom( 'cuci_user' )
->where( $dbw->orExpr( $centralIdsToPurgeConds ) )
->caller( __METHOD__ )
->execute();
}
// Return the sum of the rows found for purging. We do this, instead of ::affectedRows, because the
// aforementioned method does not work if a DELETE statement was not run (like in the case of
// 0 rows found for purging).
return count( $ipsToPurgeConds ) + count( $centralIdsToPurgeConds );
}
}

File Metadata

Mime Type
text/x-php
Expires
Sat, May 16, 17:23 (10 h, 27 m)
Storage Engine
local-disk
Storage Format
Raw Data
Storage Handle
f7/6d/3ed3b5681b7dbe636e952a11b56c
Default Alt Text
CheckUserCentralIndexManager.php (15 KB)

Event Timeline