%PDF- %PDF-
Direktori : /www/varak.net/wiki.varak.net/extensions/CirrusSearch/includes/ |
Current File : /www/varak.net/wiki.varak.net/extensions/CirrusSearch/includes/Util.php |
<?php namespace CirrusSearch; use GeoData\Coord; use GeoData\GeoData; use GeoData\Globe; use IP; use MediaWiki\Logger\LoggerFactory; use MediaWiki\MediaWikiServices; use PoolCounterWorkViaCallback; use RequestContext; use Status; use Title; use WebRequest; /** * Random utility functions that don't have a better home * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License along * with this program; if not, write to the Free Software Foundation, Inc., * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. * http://www.gnu.org/copyleft/gpl.html */ class Util { /** * Cache getDefaultBoostTemplates() * * @var array|null boost templates */ private static $defaultBoostTemplates = null; /** * Get the textual representation of a namespace with underscores stripped, varying * by gender if need be (using Title::getNsText()). * * @param Title $title The page title to use * @return string */ public static function getNamespaceText( Title $title ) { return strtr( $title->getNsText(), '_', ' ' ); } /** * Check if too arrays are recursively the same. Values are compared with != and arrays * are descended into. * * @param array $lhs one array * @param array $rhs the other array * @return bool are they equal */ public static function recursiveSame( $lhs, $rhs ) { if ( array_keys( $lhs ) != array_keys( $rhs ) ) { return false; } foreach ( $lhs as $key => $value ) { if ( !isset( $rhs[ $key ] ) ) { return false; } if ( is_array( $value ) ) { if ( !is_array( $rhs[ $key ] ) ) { return false; } if ( !self::recursiveSame( $value, $rhs[ $key ] ) ) { return false; } } else { if ( $value != $rhs[ $key ] ) { return false; } } } return true; } /** * @param string $type The pool counter type, such as CirrusSearch-Search * @param bool $isSuccess If the pool counter gave a success, or failed the request * @return string The key used for collecting timing stats about this pool counter request */ private static function getPoolStatsKey( $type, $isSuccess ) { $pos = strpos( $type, '-' ); if ( $pos !== false ) { $type = substr( $type, $pos + 1 ); } $postfix = $isSuccess ? 'successMs' : 'failureMs'; return "CirrusSearch.poolCounter.$type.$postfix"; } /** * @param float $startPoolWork The time this pool request started, from microtime( true ) * @param string $type The pool counter type, such as CirrusSearch-Search * @param bool $isSuccess If the pool counter gave a success, or failed the request * @param callable $callback The function to wrap * @return callable The original callback wrapped to collect pool counter stats */ private static function wrapWithPoolStats( $startPoolWork, $type, $isSuccess, $callback ) { return function () use ( $type, $isSuccess, $callback, $startPoolWork ) { MediaWikiServices::getInstance()->getStatsdDataFactory()->timing( self::getPoolStatsKey( $type, $isSuccess ), intval( 1000 * (microtime( true ) - $startPoolWork) ) ); return call_user_func_array( $callback, func_get_args() ); }; } /** * Wraps the complex pool counter interface to force the single call pattern * that Cirrus always uses. * * @param string $type same as type parameter on PoolCounter::factory * @param \User $user the user * @param callable $workCallback callback when pool counter is acquired. Called with * no parameters. * @param callable $errorCallback optional callback called on errors. Called with * the error string and the key as parameters. If left undefined defaults * to a function that returns a fatal status and logs an warning. * @return mixed */ public static function doPoolCounterWork( $type, $user, $workCallback, $errorCallback = null ) { global $wgCirrusSearchPoolCounterKey; // By default the pool counter allows you to lock the same key with // multiple types. That might be useful but it isn't how Cirrus thinks. // Instead, all keys are scoped to their type. if ( !$user ) { // We don't want to even use the pool counter if there isn't a user. return $workCallback(); } $perUserKey = md5( $user->getName() ); $perUserKey = "nowait:CirrusSearch:_per_user:$perUserKey"; $globalKey = "$type:$wgCirrusSearchPoolCounterKey"; if ( $errorCallback === null ) { $errorCallback = function( $error, $key, $userName ) { $forUserName = $userName ? "for {userName} " : ''; LoggerFactory::getInstance( 'CirrusSearch' )->warning( "Pool error {$forUserName}on {key}: {error}", [ 'userName' => $userName, 'key' => $key, 'error' => $error ] ); return Status::newFatal( 'cirrussearch-backend-error' ); }; } // wrap some stats collection on the success/failure handlers $startPoolWork = microtime( true ); $workCallback = self::wrapWithPoolStats( $startPoolWork, $type, true, $workCallback ); $errorCallback = self::wrapWithPoolStats( $startPoolWork, $type, false, $errorCallback ); $errorHandler = function( $key ) use ( $errorCallback, $user ) { return function( Status $status ) use ( $errorCallback, $key, $user ) { /** @suppress PhanDeprecatedFunction No good replacements for getErrorsArray */ $status = $status->getErrorsArray(); // anon usernames are needed within the logs to determine if // specific ips (such as large #'s of users behind a proxy) // need to be whitelisted. We do not need this information // for logged in users and do not store it. $userName = $user->isAnon() ? $user->getName() : ''; return $errorCallback( $status[ 0 ][ 0 ], $key, $userName ); }; }; $doPerUserWork = function() use ( $type, $globalKey, $workCallback, $errorHandler ) { // Now that we have the per user lock lets get the operation lock. // Note that this could block, causing the user to wait in line with their lock held. $work = new PoolCounterWorkViaCallback( $type, $globalKey, [ 'doWork' => $workCallback, 'error' => $errorHandler( $globalKey ), ] ); return $work->execute(); }; $work = new PoolCounterWorkViaCallback( 'CirrusSearch-PerUser', $perUserKey, [ 'doWork' => $doPerUserWork, 'error' => function( $status ) use( $errorHandler, $perUserKey, $doPerUserWork ) { $errorCallback = $errorHandler( $perUserKey ); $errorResult = $errorCallback( $status ); if ( Util::isUserPoolCounterActive() ) { return $errorResult; } else { return $doPerUserWork(); } }, ] ); return $work->execute(); } /** * @return bool */ public static function isUserPoolCounterActive() { global $wgCirrusSearchBypassPerUserFailure, $wgCirrusSearchForcePerUserPoolCounter; $ip = RequestContext::getMain()->getRequest()->getIP(); if ( IP::isInRanges( $ip, $wgCirrusSearchForcePerUserPoolCounter ) ) { return true; } elseif ( $wgCirrusSearchBypassPerUserFailure ) { return false; } else { return true; } } /** * @param string $str * @return float */ public static function parsePotentialPercent( $str ) { $result = floatval( $str ); if ( strpos( $str, '%' ) === false ) { return (float) $result; } return $result / 100; } /** * Matches $data against $properties to clear keys that no longer exist. * E.g.: * $data = array( * 'title' => "I'm a title", * 'useless' => "I'm useless", * ); * $properties = array( * 'title' => 'params-for-title' * ); * * Will return: * array( * 'title' => "I'm a title", * ) * With the no longer existing 'useless' field stripped. * * We could just use array_intersect_key for this simple example, but it * gets more complex with nested data. * * @param array $data * @param array $properties * @return array */ public static function cleanUnusedFields( array $data, array $properties ) { $data = array_intersect_key( $data, $properties ); foreach ( $data as $key => $value ) { if ( is_array( $value ) ) { foreach ( $value as $i => $innerValue ) { if ( is_array( $innerValue ) && isset( $properties[$key]['properties'] ) ) { // go recursive to intersect multidimensional values $data[$key][$i] = static::cleanUnusedFields( $innerValue, $properties[$key]['properties'] ); } } } } return $data; } /** * Parse a message content into an array. This function is generally used to * parse settings stored as i18n messages (see cirrussearch-boost-templates). * * @param string $message * @return string[] */ public static function parseSettingsInMessage( $message ) { $lines = explode( "\n", $message ); $lines = preg_replace( '/#.*$/', '', $lines ); // Remove comments $lines = array_map( 'trim', $lines ); // Remove extra spaces $lines = array_filter( $lines ); // Remove empty lines return $lines; } /** * Tries to identify the best redirect by finding the link with the * smallest edit distance between the title and the user query. * * @param string $userQuery the user query * @param array $redirects the list of redirects * @return string the best redirect text */ public static function chooseBestRedirect( $userQuery, $redirects ) { $userQuery = mb_strtolower( $userQuery ); $len = mb_strlen( $userQuery ); $bestDistance = INF; $best = null; foreach( $redirects as $redir ) { $text = $redir['title']; if ( mb_strlen( $text ) > $len ) { $text = mb_substr( $text, 0, $len ); } $text = mb_strtolower( $text ); $distance = levenshtein( $text, $userQuery ); if ( $distance == 0 ) { return $redir['title']; } if ( $distance < $bestDistance ) { $bestDistance = $distance; $best = $redir['title']; } } return $best; } /** * Test if $string ends with $suffix * * @param string $string string to test * @param string $suffix the suffix * @return boolean true if $string ends with $suffix */ public static function endsWith( $string, $suffix ) { $strlen = strlen( $string ); $suffixlen = strlen( $suffix ); if ( $suffixlen > $strlen ) { return false; } return substr_compare( $string, $suffix, $strlen - $suffixlen, $suffixlen ) === 0; } /** * Set $dest to the true/false from $request->getVal( $name ) if yes/no. * * @param mixed &$dest * @param WebRequest $request * @param string $name */ public static function overrideYesNo( &$dest, $request, $name ) { $val = $request->getVal( $name ); if ( $val !== null ) { if ( $val === 'yes' ) { $dest = true; } elseif( $val = 'no' ) { $dest = false; } } } /** * Set $dest to the numeric value from $request->getVal( $name ) if it is <= $limit * or => $limit if upperLimit is false. * * @param mixed &$dest * @param WebRequest $request * @param string $name * @param int|null $limit * @param bool $upperLimit */ public static function overrideNumeric( &$dest, $request, $name, $limit = null, $upperLimit = true ) { $val = $request->getVal( $name ); if ( $val !== null && is_numeric( $val ) ) { if ( !isset( $limit ) ) { $dest = $val; } else if ( $upperLimit && $val <= $limit ) { $dest = $val; } else if ( !$upperLimit && $val >= $limit ) { $dest = $val; } } } /** * Get boost templates configured in messages. * @param SearchConfig $config Search config requesting the templates * @return \float[] */ public static function getDefaultBoostTemplates( SearchConfig $config = null ) { if ( is_null( $config ) ) { $config = MediaWikiServices::getInstance()->getConfigFactory()->makeConfig( 'CirrusSearch' ); } $fromConfig = $config->get( 'CirrusSearchBoostTemplates' ); if ( $config->get( 'CirrusSearchIgnoreOnWikiBoostTemplates' ) ) { // on wiki messages disabled, we can return this config // directly return $fromConfig; } $fromMessage = self::getOnWikiBoostTemplates( $config ); if ( empty( $fromMessage ) ) { // the onwiki config is empty (or unknown for non-local // config), we can fallback to templates from config return $fromConfig; } return $fromMessage; } /** * Load and cache boost templates configured on wiki via the system * message 'cirrussearch-boost-templates'. * If called from the local wiki the message will be cached. * If called from a non local wiki an attempt to fetch this data from the cache is made. * If an empty array is returned it means that no config is available on wiki * or the value possibly unknown if run from a non local wiki. * * @param SearchConfig $config * @return \float[] indexed by template name */ private static function getOnWikiBoostTemplates( SearchConfig $config ) { $cache = \ObjectCache::getLocalClusterInstance(); $cacheKey = $cache->makeGlobalKey( 'cirrussearch-boost-templates', $config->getWikiId() ); if ( $config->getWikiId() == wfWikiID() ) { // Local wiki we can fetch boost templates from system // message if ( self::$defaultBoostTemplates !== null ) { // This static cache is never set with non-local // wiki data. return self::$defaultBoostTemplates; } $templates = $cache->getWithSetCallback( $cacheKey, 600, function () { $source = wfMessage( 'cirrussearch-boost-templates' )->inContentLanguage(); if( !$source->isDisabled() ) { $lines = Util::parseSettingsInMessage( $source->plain() ); // Now parse the templates return Query\BoostTemplatesFeature::parseBoostTemplates( implode( ' ', $lines ) ); } return []; } ); self::$defaultBoostTemplates = $templates; return $templates; } // Here we're dealing with boost template from other wiki, try to fetch it if it exists // otherwise, don't bother. $nonLocalCache = $cache->get( $cacheKey ); if ( !is_array( $nonLocalCache ) ) { // not yet in cache, value is unknown // return empty array return []; } return $nonLocalCache; } /** * Strip question marks from queries, according to the defined stripping * level, defined by $wgCirrusSearchStripQuestionMarks. Strip all ?s, those * at word breaks, or only string-final. Ignore queries that are all * punctuation or use insource. Don't remove escaped \?s, but unescape them. * ¿ is not :punct:, hence $more_punct. * * @param string $term * @param string $strippingLevel * @return string modified term, based on strippingLevel */ public static function stripQuestionMarks( $term, $strippingLevel ) { // strip question marks $more_punct = "[¿]"; if ( strpos( $term, 'insource:' ) === false && preg_match( "/^([[:punct:]]|\s|$more_punct)+$/", $term ) === 0 ) { if ( $strippingLevel === 'final' ) { // strip only query-final question marks that are not escaped $term = preg_replace( "/((?<!\\\\)\?|\s)+$/", '', $term ); $term = preg_replace( '/\\\\\?/', '?', $term ); } elseif ( $strippingLevel === 'break' ) { //strip question marks at word boundaries $term = preg_replace( '/(?<!\\\\)(\?)+(\PL|$)/', '$2', $term ); $term = preg_replace( '/\\\\\?/', '?', $term ); } elseif ( $strippingLevel === 'all' ) { //strip all unescapred question marks $term = preg_replace( '/(?<!\\\\)(\?)+/', ' ', $term ); $term = preg_replace( '/\\\\\?/', '?', $term ); } } return $term; } }