<?php
/**
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License along
 * with this program; if not, write to the Free Software Foundation, Inc.,
 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
 * http://www.gnu.org/copyleft/gpl.html
 *
 * @file
 */
namespace Wikimedia\Rdbms;

use Psr\Log\LoggerAwareInterface;
use Psr\Log\LoggerInterface;
use Psr\Log\NullLogger;
use RuntimeException;
use Wikimedia\ScopedCallback;
use Wikimedia\Stats\StatsFactory;

/**
 * Detect high-contention DB queries via profiling calls.
 *
 * This class is meant to work with an IDatabase object, which manages queries.
 *
 * @internal For use by Database only
 * @since 1.24
 * @ingroup Profiler
 * @ingroup Database
 */
class TransactionProfiler implements LoggerAwareInterface {
	/** @var LoggerInterface */
	private $logger;
	/** @var StatsFactory */
	private $statsFactory;
	/** @var array<string,array> Map of (event name => map of FLD_* class constants) */
	private $expect;
	/** @var array<string,int> Map of (event name => current hits) */
	private $hits;
	/** @var array<string,int> Map of (event name => violation counter) */
	private $violations;
	/** @var array<string,int> Map of (event name => silence counter) */
	private $silenced;

	/**
	 * @var array<string,array> Map of (trx ID => (write start time, list of DBs involved))
	 * @phan-var array<string,array{start:float,conns:array<string,int>}>
	 */
	private $dbTrxHoldingLocks;

	/**
	 * @var array[][] Map of (trx ID => list of (query name, start time, end time))
	 * @phan-var array<string,array<int,array{0:string,1:float,2:float}>>
	 */
	private $dbTrxMethodTimes;

	/** @var string|null HTTP request method; null for CLI mode */
	private $method;

	/** @var float|null */
	private $wallClockOverride;

	/** Treat locks as long-running if they last longer than this many seconds */
	private const DB_LOCK_THRESHOLD_SEC = 3.0;
	/** Include events in any violation logs if they last longer than this many seconds */
	private const EVENT_THRESHOLD_SEC = 0.25;

	/** List of event names */
	private const EVENT_NAMES = [
		'writes',
		'queries',
		'conns',
		'masterConns',
		'maxAffected',
		'readQueryRows',
		'readQueryTime',
		'writeQueryTime'
	];

	/** List of event names with hit counters */
	private const COUNTER_EVENT_NAMES = [
		'writes',
		'queries',
		'conns',
		'masterConns'
	];

	/** Key to max expected value */
	private const FLD_LIMIT = 0;
	/** Key to the function that set the max expected value */
	private const FLD_FNAME = 1;

	/** Any type of expectation */
	public const EXPECTATION_ANY = 'any';
	/** Any expectations about replica usage never occurring */
	public const EXPECTATION_REPLICAS_ONLY = 'replicas-only';

	public function __construct() {
		$this->initPlaceholderExpectations();

		$this->dbTrxHoldingLocks = [];
		$this->dbTrxMethodTimes = [];

		$this->silenced = array_fill_keys( self::EVENT_NAMES, 0 );

		$this->setLogger( new NullLogger() );
		$this->statsFactory = StatsFactory::newNull();
	}

	public function setLogger( LoggerInterface $logger ) {
		$this->logger = $logger;
	}

	/**
	 * Set statsFactory
	 *
	 * @param StatsFactory $statsFactory
	 * @return void
	 */
	public function setStatsFactory( StatsFactory $statsFactory ) {
		$this->statsFactory = $statsFactory;
	}

	/**
	 * @param ?string $method HTTP method; null for CLI mode
	 * @return void
	 */
	public function setRequestMethod( ?string $method ) {
		$this->method = $method;
	}

	/**
	 * Temporarily ignore expectations until the returned object goes out of scope
	 *
	 * During this time, violation of expectations will not be logged and counters
	 * for expectations (e.g. "conns") will not be incremented.
	 *
	 * This will suppress warnings about event counters which have a limit of zero.
	 * The main use case is too avoid warnings about primary connections/writes and
	 * warnings about getting any primary/replica connections at all.
	 *
	 * @param string $type Class EXPECTATION_* constant [default: TransactionProfiler::EXPECTATION_ANY]
	 * @return ScopedCallback
	 */
	public function silenceForScope( string $type = self::EXPECTATION_ANY ) {
		if ( $type === self::EXPECTATION_REPLICAS_ONLY ) {
			$events = [];
			foreach ( [ 'writes', 'masterConns' ] as $event ) {
				if ( $this->expect[$event][self::FLD_LIMIT] === 0 ) {
					$events[] = $event;
				}
			}
		} else {
			$events = self::EVENT_NAMES;
		}

		foreach ( $events as $event ) {
			++$this->silenced[$event];
		}

		return new ScopedCallback( function () use ( $events ) {
			foreach ( $events as $event ) {
				--$this->silenced[$event];
			}
		} );
	}

	/**
	 * Set performance expectations
	 *
	 * With conflicting expectations, the most narrow ones will be used
	 *
	 * @param string $event Event name, {@see self::EVENT_NAMES}
	 * @param float|int $limit Maximum event count, event value, or total event value
	 * @param string $fname Caller
	 * @since 1.25
	 */
	public function setExpectation( string $event, $limit, string $fname ) {
		if ( !isset( $this->expect[$event] ) ) {
			return; // obsolete/bogus expectation
		}

		if ( $limit <= $this->expect[$event][self::FLD_LIMIT] ) {
			// New limit is more restrictive
			$this->expect[$event] = [
				self::FLD_LIMIT => $limit,
				self::FLD_FNAME => $fname
			];
		}
	}

	/**
	 * Set one or multiple performance expectations
	 *
	 * With conflicting expectations, the most narrow ones will be used
	 *
	 * Use this to initialize expectations or make them stricter mid-request
	 *
	 * @param array $expects Map of (event name => limit), {@see self::EVENT_NAMES}
	 * @param string $fname
	 * @since 1.26
	 */
	public function setExpectations( array $expects, string $fname ) {
		foreach ( $expects as $event => $value ) {
			$this->setExpectation( $event, $value, $fname );
		}
	}

	/**
	 * Reset all performance expectations and hit counters
	 *
	 * Use this for unit testing or before applying a totally different set of expectations
	 * for a different part of the request, such as during "post-send" (execution after HTTP
	 * response completion)
	 *
	 * @since 1.25
	 */
	public function resetExpectations() {
		$this->initPlaceholderExpectations();
	}

	/**
	 * Clear all expectations and hit counters and set new performance expectations
	 *
	 * Use this to apply a totally different set of expectations for a different part
	 * of the request, such as during "post-send" (execution after HTTP response completion)
	 *
	 * @param array $expects Map of (event name => limit), {@see self::EVENT_NAMES}
	 * @param string $fname
	 * @since 1.33
	 */
	public function redefineExpectations( array $expects, string $fname ) {
		$this->initPlaceholderExpectations();
		$this->setExpectations( $expects, $fname );
	}

	/**
	 * Mark a DB as having been connected to with a new handle
	 *
	 * Note that there can be multiple connections to a single DB.
	 *
	 * @param string $server DB server
	 * @param string|null $db DB name
	 * @param bool $isPrimaryWithReplicas If the server is the primary and there are replicas
	 */
	public function recordConnection( $server, $db, bool $isPrimaryWithReplicas ) {
		// Report when too many connections happen...
		if ( $this->pingAndCheckThreshold( 'conns' ) ) {
			$this->reportExpectationViolated(
				'conns',
				"[connect to $server ($db)]",
				$this->hits['conns']
			);
		}

		// Report when too many primary connections happen...
		if ( $isPrimaryWithReplicas && $this->pingAndCheckThreshold( 'masterConns' ) ) {
			$this->reportExpectationViolated(
				'masterConns',
				"[connect to $server ($db)]",
				$this->hits['masterConns']
			);
		}
	}

	/**
	 * Mark a DB as in a transaction with one or more writes pending
	 *
	 * Note that there can be multiple connections to a single DB.
	 *
	 * @param string $server DB server
	 * @param string|null $db DB name
	 * @param string $id ID string of transaction
	 * @param float $startTime UNIX timestamp
	 */
	public function transactionWritingIn( $server, $db, string $id, float $startTime ) {
		$name = "{$db} {$server} TRX#$id";
		if ( isset( $this->dbTrxHoldingLocks[$name] ) ) {
			$this->logger->warning( "Nested transaction for '$name' - out of sync." );
		}
		$this->dbTrxHoldingLocks[$name] = [
			'start' => $startTime,
			'conns' => [], // all connections involved
		];
		$this->dbTrxMethodTimes[$name] = [];

		foreach ( $this->dbTrxHoldingLocks as $name => &$info ) {
			// Track all DBs in transactions for this transaction
			$info['conns'][$name] = 1;
		}
	}

	/**
	 * Register the name and time of a method for slow DB trx detection
	 *
	 * This assumes that all queries are synchronous (non-overlapping)
	 *
	 * @param string|GeneralizedSql|Query $query Function name or generalized SQL
	 * @param float $sTime Starting UNIX wall time
	 * @param bool $isWrite Whether this is a write query
	 * @param int|null $rowCount Number of affected/read rows
	 * @param string $trxId Transaction id
	 * @param string|null $serverName db host name like db1234
	 */
	public function recordQueryCompletion(
		$query,
		float $sTime,
		bool $isWrite,
		?int $rowCount,
		string $trxId,
		?string $serverName = null
	) {
		$eTime = $this->getCurrentTime();
		$elapsed = ( $eTime - $sTime );

		if ( $isWrite && $this->isAboveThreshold( $rowCount, 'maxAffected' ) ) {
			$this->reportExpectationViolated( 'maxAffected', $query, $rowCount, $trxId, $serverName );
		} elseif ( !$isWrite && $this->isAboveThreshold( $rowCount, 'readQueryRows' ) ) {
			$this->reportExpectationViolated( 'readQueryRows', $query, $rowCount, $trxId, $serverName );
		}

		// Report when too many writes/queries happen...
		if ( $this->pingAndCheckThreshold( 'queries' ) ) {
			$this->reportExpectationViolated( 'queries', $query, $this->hits['queries'], $trxId, $serverName );
		}
		if ( $isWrite && $this->pingAndCheckThreshold( 'writes' ) ) {
			$this->reportExpectationViolated( 'writes', $query, $this->hits['writes'], $trxId, $serverName );
		}
		// Report slow queries...
		if ( !$isWrite && $this->isAboveThreshold( $elapsed, 'readQueryTime' ) ) {
			$this->reportExpectationViolated( 'readQueryTime', $query, $elapsed, $trxId, $serverName );
		}
		if ( $isWrite && $this->isAboveThreshold( $elapsed, 'writeQueryTime' ) ) {
			$this->reportExpectationViolated( 'writeQueryTime', $query, $elapsed, $trxId, $serverName );
		}

		if ( !$this->dbTrxHoldingLocks ) {
			// Short-circuit
			return;
		} elseif ( !$isWrite && $elapsed < self::EVENT_THRESHOLD_SEC ) {
			// Not an important query nor slow enough
			return;
		}

		foreach ( $this->dbTrxHoldingLocks as $name => $info ) {
			$lastQuery = end( $this->dbTrxMethodTimes[$name] );
			if ( $lastQuery ) {
				// Additional query in the trx...
				$lastEnd = $lastQuery[2];
				if ( $sTime >= $lastEnd ) {
					if ( ( $sTime - $lastEnd ) > self::EVENT_THRESHOLD_SEC ) {
						// Add an entry representing the time spent doing non-queries
						$this->dbTrxMethodTimes[$name][] = [ '...delay...', $lastEnd, $sTime ];
					}
					$this->dbTrxMethodTimes[$name][] = [ $query, $sTime, $eTime ];
				}
			} else {
				// First query in the trx...
				if ( $sTime >= $info['start'] ) {
					$this->dbTrxMethodTimes[$name][] = [ $query, $sTime, $eTime ];
				}
			}
		}
	}

	/**
	 * Mark a DB as no longer in a transaction
	 *
	 * This will check if locks are possibly held for longer than
	 * needed and log any affected transactions to a special DB log.
	 * Note that there can be multiple connections to a single DB.
	 *
	 * @param string $server DB server
	 * @param string|null $db DB name
	 * @param string $id ID string of transaction
	 * @param float $writeTime Time spent in write queries
	 * @param int $affected Number of rows affected by writes
	 */
	public function transactionWritingOut(
		$server,
		$db,
		string $id,
		float $writeTime,
		int $affected
	) {
		// Must match $name in transactionWritingIn()
		$name = "{$db} {$server} TRX#$id";
		if ( !isset( $this->dbTrxMethodTimes[$name] ) ) {
			$this->logger->warning( "Detected no transaction for '$name' - out of sync." );
			return;
		}

		$slow = false;

		// Warn if too much time was spend writing...
		if ( $this->isAboveThreshold( $writeTime, 'writeQueryTime' ) ) {
			$this->reportExpectationViolated(
				'writeQueryTime',
				"[transaction writes to {$db} at {$server}]",
				$writeTime,
				$id
			);
			$slow = true;
		}
		// Warn if too many rows were changed...
		if ( $this->isAboveThreshold( $affected, 'maxAffected' ) ) {
			$this->reportExpectationViolated(
				'maxAffected',
				"[transaction writes to {$db} at {$server}]",
				$affected,
				$id
			);
		}
		// Fill in the last non-query period...
		$lastQuery = end( $this->dbTrxMethodTimes[$name] );
		if ( $lastQuery ) {
			$now = $this->getCurrentTime();
			$lastEnd = $lastQuery[2];
			if ( ( $now - $lastEnd ) > self::EVENT_THRESHOLD_SEC ) {
				$this->dbTrxMethodTimes[$name][] = [ '...delay...', $lastEnd, $now ];
			}
		}
		// Check for any slow queries or non-query periods...
		foreach ( $this->dbTrxMethodTimes[$name] as $info ) {
			$elapsed = ( $info[2] - $info[1] );
			if ( $elapsed >= self::DB_LOCK_THRESHOLD_SEC ) {
				$slow = true;
				break;
			}
		}
		if ( $slow ) {
			$trace = '';
			foreach ( $this->dbTrxMethodTimes[$name] as $i => [ $query, $sTime, $end ] ) {
				$trace .= sprintf(
					"%-2d %.3fs %s\n", $i, ( $end - $sTime ), $this->getGeneralizedSql( $query ) );
			}
			$this->logger->warning( "Suboptimal transaction [{dbs}]:\n{trace}", [
				'dbs' => implode( ', ', array_keys( $this->dbTrxHoldingLocks[$name]['conns'] ) ),
				'trace' => mb_substr( $trace, 0, 2000 )
			] );
		}
		unset( $this->dbTrxHoldingLocks[$name] );
		unset( $this->dbTrxMethodTimes[$name] );
	}

	private function initPlaceholderExpectations() {
		$this->expect = array_fill_keys(
			self::EVENT_NAMES,
			[ self::FLD_LIMIT => INF, self::FLD_FNAME => null ]
		);

		$this->hits = array_fill_keys( self::COUNTER_EVENT_NAMES, 0 );
		$this->violations = array_fill_keys( self::EVENT_NAMES, 0 );
	}

	/**
	 * @param float|int $value
	 * @param string $event
	 * @return bool
	 */
	private function isAboveThreshold( $value, string $event ) {
		if ( $this->silenced[$event] > 0 ) {
			return false;
		}

		return ( $value > $this->expect[$event][self::FLD_LIMIT] );
	}

	/**
	 * @param string $event
	 * @return bool
	 */
	private function pingAndCheckThreshold( string $event ) {
		if ( $this->silenced[$event] > 0 ) {
			return false;
		}

		$newValue = ++$this->hits[$event];
		$limit = $this->expect[$event][self::FLD_LIMIT];

		return ( $newValue > $limit );
	}

	/**
	 * @param string $event
	 * @param string|GeneralizedSql|Query $query
	 * @param float|int $actual
	 * @param string|null $trxId Transaction id
	 * @param string|null $serverName db host name like db1234
	 */
	private function reportExpectationViolated(
		$event,
		$query,
		$actual,
		?string $trxId = null,
		?string $serverName = null
	) {
		$violations = ++$this->violations[$event];
		// First violation; check if this is a web request
		if ( $violations === 1 && $this->method !== null ) {
			$this->statsFactory->getCounter( 'rdbms_trxprofiler_warnings_total' )
				->setLabel( 'event', $event )
				->setLabel( 'method', $this->method )
				->copyToStatsdAt( "rdbms_trxprofiler_warnings.$event.{$this->method}" )
				->increment();
		}

		$max = $this->expect[$event][self::FLD_LIMIT];
		$by = $this->expect[$event][self::FLD_FNAME];

		$message = "Expectation ($event <= $max) by $by not met (actual: {actualSeconds})";
		if ( $trxId ) {
			$message .= ' in trx #{trxId}';
		}
		$message .= ":\n{query}\n";

		$this->logger->warning(
			$message,
			[
				'db_log_category' => 'performance',
				'measure' => $event,
				'maxSeconds' => $max,
				'by' => $by,
				'actualSeconds' => $actual,
				'query' => $this->getGeneralizedSql( $query ),
				'exception' => new RuntimeException(),
				'trxId' => $trxId,
				// Avoid truncated JSON in Logstash (T349140)
				'fullQuery' => mb_substr( $this->getRawSql( $query ), 0, 2000 ),
				'dbHost' => $serverName
			]
		);
	}

	/**
	 * @param GeneralizedSql|string|Query $query
	 * @return string
	 */
	private function getGeneralizedSql( $query ) {
		if ( $query instanceof Query ) {
			return $query->getCleanedSql();
		}
		return $query instanceof GeneralizedSql ? $query->stringify() : $query;
	}

	/**
	 * @param GeneralizedSql|string|Query $query
	 * @return string
	 */
	private function getRawSql( $query ) {
		if ( $query instanceof Query ) {
			return $query->getSQL();
		}
		return $query instanceof GeneralizedSql ? $query->getRawSql() : $query;
	}

	/**
	 * @return float UNIX timestamp
	 * @codeCoverageIgnore
	 */
	private function getCurrentTime() {
		return $this->wallClockOverride ?: microtime( true );
	}

	/**
	 * @param float|null &$time Mock UNIX timestamp for testing
	 * @codeCoverageIgnore
	 */
	public function setMockTime( &$time ) {
		$this->wallClockOverride =& $time;
	}
}
