Page MenuHomeWickedGov Phorge

migrateFileTables.php
No OneTemporary

Size
8 KB
Referenced Files
None
Subscribers
None

migrateFileTables.php

<?php
/**
* Maintenance script to refresh image metadata fields.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along
* with this program; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
* http://www.gnu.org/copyleft/gpl.html
*
* @file
* @ingroup Maintenance
*/
// @codeCoverageIgnoreStart
require_once __DIR__ . '/Maintenance.php';
// @codeCoverageIgnoreEnd
use MediaWiki\Maintenance\Maintenance;
use Wikimedia\Rdbms\IMaintainableDatabase;
use Wikimedia\Rdbms\SelectQueryBuilder;
/**
* Maintenance script to refresh image metadata fields.
*
* @ingroup Maintenance
*/
class MigrateFileTables extends Maintenance {
/**
* @var IMaintainableDatabase
*/
protected $dbw;
public function __construct() {
parent::__construct();
$this->addDescription( 'Script to migrate from image/oldimage tables to file/filerevision' );
$this->setBatchSize( 200 );
$this->addOption( 'start', 'Name of file to start with', false, true );
$this->addOption( 'end', 'Name of file to end with', false, true );
$this->addOption(
'sleep',
'Time to sleep between each batch (in seconds). Default: 0',
false,
true
);
}
public function execute() {
$verbose = $this->hasOption( 'verbose' );
$start = $this->getOption( 'start', false );
$sleep = (int)$this->getOption( 'sleep', 0 );
$dbw = $this->getPrimaryDB();
$queryBuilderTemplate = $dbw->newSelectQueryBuilder()
->select(
[
'img_name',
'img_size',
'img_width',
'img_height',
'img_metadata',
'img_bits',
'img_media_type',
'img_major_mime',
'img_minor_mime',
'img_timestamp',
'img_sha1',
'img_actor',
'img_metadata',
'img_description_id',
'img_description_text' => 'comment_img_description.comment_text',
'img_description_data' => 'comment_img_description.comment_data',
'img_description_cid' => 'comment_img_description.comment_id'
]
)
->from( 'image' )
->join(
'comment',
'comment_img_description',
'comment_img_description.comment_id = img_description_id'
);
$totalRowsInserted = 0;
$filesHandled = 0;
$batchSize = intval( $this->getBatchSize() );
if ( $batchSize <= 0 ) {
$this->fatalError( "Batch size is too low...", 12 );
}
$end = $this->getOption( 'end', false );
if ( $end !== false ) {
$queryBuilderTemplate->andWhere( $dbw->expr( 'img_name', '<=', $end ) );
}
$queryBuilderTemplate
->orderBy( 'img_name', SelectQueryBuilder::SORT_ASC )
->limit( $batchSize );
$batchCondition = [];
// For the WHERE img_name > 'foo' condition that comes after doing a batch
if ( $start !== false ) {
$batchCondition[] = $dbw->expr( 'img_name', '>=', $start );
}
do {
$queryBuilder = clone $queryBuilderTemplate;
$res = $queryBuilder->andWhere( $batchCondition )
->caller( __METHOD__ )->fetchResultSet();
if ( $res->numRows() > 0 ) {
$row1 = $res->current();
$this->output( "Processing next {$res->numRows()} row(s) starting with {$row1->img_name}.\n" );
$res->rewind();
}
foreach ( $res as $row ) {
$rowsInserted = $this->handleFile( $row );
$filesHandled += 1;
$totalRowsInserted += $rowsInserted;
$this->output( "Migrated File:{$row->img_name}. Inserted $rowsInserted rows.\n" );
}
if ( $res->numRows() > 0 ) {
// @phan-suppress-next-line PhanPossiblyUndeclaredVariable rows contains at least one item
$batchCondition = [ $dbw->expr( 'img_name', '>', $row->img_name ) ];
}
$this->waitForReplication();
if ( $sleep ) {
sleep( $sleep );
}
} while ( $res->numRows() === $batchSize );
$this->output( "\nFinished migration for $filesHandled files. "
. "$totalRowsInserted rows have been inserted into filerevision table.\n" );
}
private function handleFile( stdClass $row ): int {
$repo = $this->getServiceContainer()->getRepoGroup()
->newCustomLocalRepo();
$dbw = $this->getPrimaryDB();
$rowsInserted = 0;
// LocalFile doesn't like it when the row holds img_description_id
$imgDescriptionId = $row->img_description_id;
unset( $row->img_description_id );
$file = $repo->newFileFromRow( $row );
// Lock everything we can
$file->acquireFileLock();
$dbw->startAtomic( __METHOD__ );
$dbw->newSelectQueryBuilder()
->select( '*' )
->forUpdate()
->from( 'image' )
->where( [ 'img_name' => $row->img_name ] )
->caller( __METHOD__ )->fetchRow();
$oldimageRows = $dbw->newSelectQueryBuilder()
->select( '*' )
->forUpdate()
->from( 'oldimage' )
->where( [ 'oi_name' => $row->img_name ] )
->orderBy( 'oi_timestamp', 'ASC' )
->caller( __METHOD__ )->fetchResultSet();
$dbw->newSelectQueryBuilder()
->select( '*' )
->forUpdate()
->from( 'file' )
->where( [ 'file_name' => $row->img_name ] )
->caller( __METHOD__ )->fetchRow();
// Make sure the row exists in file table
$fileId = $file->acquireFileIdFromName();
$fileRevisionRows = $dbw->newSelectQueryBuilder()
->select( '*' )
->forUpdate()
->from( 'filerevision' )
->where( [ 'fr_file' => $fileId ] )
->caller( __METHOD__ )->fetchResultSet();
// Make sure the filerevision rows exist
foreach ( $oldimageRows as $oldimageRow ) {
$timestamp = $oldimageRow->oi_timestamp;
$sha1 = $oldimageRow->oi_sha1;
$alreadyDone = false;
foreach ( $fileRevisionRows as $fileRevisionRow ) {
if (
$timestamp === $fileRevisionRow->fr_timestamp &&
$sha1 === $fileRevisionRow->fr_sha1
) {
// This assume the combination of oi_timestamp and oi_sha1
// will be always unique which is not the case in production
// but also all of them were duplicate old uploads and we are
// willing to simply insert one row only. See T67264
$alreadyDone = true;
break;
}
}
if ( $alreadyDone ) {
continue;
}
$dbw->newInsertQueryBuilder()
->insertInto( 'filerevision' )
->row(
[
'fr_file' => $fileId,
'fr_size' => $oldimageRow->oi_size,
'fr_width' => $oldimageRow->oi_width,
'fr_height' => $oldimageRow->oi_height,
'fr_metadata' => $oldimageRow->oi_metadata,
'fr_bits' => $oldimageRow->oi_bits,
'fr_description_id' => $oldimageRow->oi_description_id,
'fr_actor' => $oldimageRow->oi_actor,
'fr_timestamp' => $oldimageRow->oi_timestamp,
'fr_sha1' => $oldimageRow->oi_sha1,
'fr_archive_name' => $oldimageRow->oi_archive_name,
'fr_deleted' => $oldimageRow->oi_deleted,
]
)
->caller( __METHOD__ )->execute();
$rowsInserted += 1;
}
// Make sure the image row (most current version) is there
$timestamp = $row->img_timestamp;
$sha1 = $row->img_sha1;
$alreadyDone = false;
foreach ( $fileRevisionRows as $fileRevisionRow ) {
if (
$timestamp === $fileRevisionRow->fr_timestamp &&
$sha1 === $fileRevisionRow->fr_sha1
) {
$alreadyDone = true;
break;
}
}
if ( !$alreadyDone ) {
$dbw->newInsertQueryBuilder()
->insertInto( 'filerevision' )
->row(
[
'fr_file' => $fileId,
'fr_size' => $row->img_size,
'fr_width' => $row->img_width,
'fr_height' => $row->img_height,
'fr_metadata' => $row->img_metadata,
'fr_bits' => $row->img_bits,
'fr_description_id' => $imgDescriptionId,
'fr_actor' => $row->img_actor,
'fr_timestamp' => $row->img_timestamp,
'fr_sha1' => $row->img_sha1,
'fr_archive_name' => '',
'fr_deleted' => 0,
]
)
->caller( __METHOD__ )->execute();
$rowsInserted += 1;
}
// Make sure file has the latest filerevision
$latestFrId = $dbw->newSelectQueryBuilder()
->select( 'fr_id' )
->from( 'filerevision' )
->where( [ 'fr_file' => $fileId ] )
->orderBy( 'fr_timestamp', 'DESC' )
->fetchField();
$dbw->newUpdateQueryBuilder()
->update( 'file' )
->set( [ 'file_latest' => $latestFrId ] )
->where( [ 'file_id' => $fileId ] )
->caller( __METHOD__ )->execute();
$dbw->endAtomic( __METHOD__ );
$file->releaseFileLock();
return $rowsInserted;
}
}
// @codeCoverageIgnoreStart
$maintClass = MigrateFileTables::class;
require_once RUN_MAINTENANCE_IF_MAIN;
// @codeCoverageIgnoreEnd

File Metadata

Mime Type
text/x-php
Expires
Sat, May 16, 17:37 (10 h, 11 m)
Storage Engine
local-disk
Storage Format
Raw Data
Storage Handle
6a/be/686a1b1ac9b91f80d2d3005addcb
Default Alt Text
migrateFileTables.php (8 KB)

Event Timeline