%PDF- %PDF-
Direktori : /www/varak.net/wiki.varak.net/extensions/CirrusSearch/tests/unit/ |
Current File : /www/varak.net/wiki.varak.net/extensions/CirrusSearch/tests/unit/SuggestScoringTest.php |
<?php namespace CirrusSearch; use CirrusSearch\BuildDocument\IncomingLinksScoringMethod; use CirrusSearch\BuildDocument\QualityScore; use CirrusSearch\BuildDocument\PQScore; /** * test suggest scoring functions. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License along * with this program; if not, write to the Free Software Foundation, Inc., * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. * http://www.gnu.org/copyleft/gpl.html */ class SuggestScoringTest extends \MediaWikiTestCase { public function testQualityScoreNormFunctions() { $qs = new QualityScore(); $qs->setMaxDocs( 10000 ); for( $i = 0; $i < 1000; $i++ ) { $value = mt_rand( 0, 1000000 ); $norm = mt_rand( 1, 1000000 ); $score = $qs->scoreNorm( $value, $norm ); $this->assertLessThanOrEqual( 1, $score, "scoreNorm cannot produce a score greater than 1" ); $this->assertGreaterThanOrEqual( 0, $score, "scoreNorm cannot produce a score lower than 0" ); $score = $qs->scoreNormL2( $value, $norm ); $this->assertLessThanOrEqual( 1, $score, "scoreNormL2 cannot produce a score greater than 1" ); $this->assertGreaterThanOrEqual( 0, $score, "scoreNormL2 cannot produce a score lower than 0" ); } // Edges $score = $qs->scoreNorm( 1, 1 ); $this->assertLessThanOrEqual( 1, $score, "scoreNorm cannot produce a score greater than 1" ); $this->assertGreaterThanOrEqual( 0, $score, "scoreNorm cannot produce a score lower than 0" ); $score = $qs->scoreNorm( 0, 1 ); $this->assertLessThanOrEqual( 1, $score, "scoreNorm cannot produce a score greater than 1" ); $this->assertGreaterThanOrEqual( 0, $score, "scoreNorm cannot produce a score lower than 0" ); $score = $qs->scoreNormL2( 1, 1 ); $this->assertLessThanOrEqual( 1, $score, "scoreNormL2 cannot produce a score greater than 1" ); $this->assertGreaterThanOrEqual( 0, $score, "scoreNormL2 cannot produce a score lower than 0" ); $score = $qs->scoreNormL2( 0, 1 ); $this->assertLessThanOrEqual( 1, $score, "scoreNormL2 cannot produce a score greater than 1" ); $this->assertGreaterThanOrEqual( 0, $score, "scoreNormL2 cannot produce a score lower than 0" ); } public function testQualityScoreBoostFunction() { $qs = new QualityScore(); for( $i = 0; $i < 1000; $i++ ) { $score = (float) mt_rand() / (float) mt_getrandmax(); $boost = (float) mt_rand( 0, 10000 ) / mt_rand( 1, 10000 ); $res = $qs->boost( $score, $boost ); $this->assertLessThanOrEqual( 1, $score, "boost cannot produce a score greater than 1" ); $this->assertGreaterThanOrEqual( 0, $score, "boost cannot produce a score lower than 0" ); if ( $boost > 1 ) { $this->assertGreaterThan( $score, $res, "With a boost ($boost) greater than 1 the boosted score must be greater than the original." ); } else if ( $boost < 1 ) { $this->assertLessThan( $score, $res, "With a boost ($boost) less than 1 the boosted score must be less than the original." ); } else { $this->assertEquals( $score, $res, "When boost is 1 the score remains unchanged." ); } } for( $i = 1; $i < 1000; $i++ ) { // The same boost value must keep original score ordering $score1 = 0.1; $score2 = 0.5; $boost = $i; $res1 = $qs->boost( $score1, $boost ); $res2 = $qs->boost( $score2, $boost ); $this->assertGreaterThan( $res1, $res2, "A boost cannot 'overboost' a score" ); $res1 = $qs->boost( $score1, (float) 1/(float) $boost ); $res2 = $qs->boost( $score2, (float) 1/(float) $boost ); $this->assertGreaterThan( $res1, $res2, "A boost cannot 'overboost' a score" ); } // Edges $res = $qs->boost( 1, 1 ); $this->assertEquals( $res, 1, "When boost is 1 the score remains unchanged." ); $res = $qs->boost( 1, 0 ); $this->assertEquals( $res, 0.5, "When boost is 0 the score is divided by 2." ); $res = $qs->boost( 1, 2^31-1); $this->assertEquals( $res, 1, "When score is 1 and boost is very high the score is still 1." ); $res = $qs->boost( 0, 0 ); $this->assertEquals( $res, 0, "When score is 0 and boost is 0 the score is still 0." ); } public function testQualityScoreBoostTemplates() { $goodDoc = [ 'template' => [ 'Good' ] ]; $badDoc = [ 'template' => [ 'Bad' ] ]; $mixedDoc = [ 'template' => [ 'Good', 'Bad' ] ]; $neutralDoc = [ 'template' => [ 'Neutral' ] ]; $qs = new QualityScore( [ 'Good' => 2, 'Bad' => 0.5 ] ); $score = 0.5; $res = $qs->boostTemplates( $goodDoc, $score ); $this->assertGreaterThan( $score, $res, "A good doc gets a better score" ); $res = $qs->boostTemplates( $badDoc, $score ); $this->assertLessThan( $score, $res, "A bad doc gets a lower score" ); $res = $qs->boostTemplates( $mixedDoc, $score ); $this->assertEquals( $score, $res, "A mixed doc gets the same score"); $res = $qs->boostTemplates( $neutralDoc, $score ); $this->assertEquals( $res, $score, "A neutral doc gets the same score" ); } public function testQualityScoreRanking() { $maxDocs = 10000000; $qs = new QualityScore( [ 'Good' => 2, 'Bad' => 0.5 ] ); $qs->setMaxDocs( $maxDocs ); $veryGoodArticle = [ 'incoming_links' => 120340, 'external_link' => array_fill( 0, 200, null ), 'text_bytes' => '230000', 'heading' => array_fill( 0, 30, null ), 'redirect' => array_fill( 0, 100, null ), 'template' => [ 'Good' ] ]; $goodArticle = [ 'incoming_links' => 120340, 'external_link' => array_fill( 0, 200, null ), 'text_bytes' => '230000', 'heading' => array_fill( 0, 30, null ), 'redirect' => array_fill( 0, 100, null ), 'template' => [] ]; $goodButBadArticle = [ 'incoming_links' => 120340, 'external_link' => array_fill( 0, 200, null ), 'text_bytes' => '230000', 'heading' => array_fill( 0, 30, null ), 'redirect' => array_fill( 0, 100, null ), 'template' => [ 'Bad' ] ]; $this->assertLessThan( $qs->score( $veryGoodArticle ), $qs->score( $goodArticle ), "Same values but a boosted template give a better score" ); $this->assertLessThan( $qs->score( $goodArticle ), $qs->score( $goodButBadArticle ), "Same values but without a negative boosted template give a better score" ); $page1 = [ 'incoming_links' => $maxDocs * QualityScore::INCOMING_LINKS_MAX_DOCS_FACTOR, 'external_link' => array_fill( 0, 200, null ), 'text_bytes' => '230000', 'heading' => array_fill( 0, 30, null ), 'redirect' => array_fill( 0, 100, null ), 'template' => [ 'Good' ] ]; $page2 = [ 'incoming_links' => $maxDocs * QualityScore::INCOMING_LINKS_MAX_DOCS_FACTOR + 1, 'external_link' => array_fill( 0, 200, null ), 'text_bytes' => '230000', 'heading' => array_fill( 0, 30, null ), 'redirect' => array_fill( 0, 100, null ), 'template' => [ 'Good' ] ]; $this->assertEquals( $qs->score( $page1 ), $qs->score( $page2 ), "Having more incoming links than the norm give the same score" ); $page1 = [ 'incoming_links' => $maxDocs * QualityScore::INCOMING_LINKS_MAX_DOCS_FACTOR, 'external_link' => array_fill( 0, 200, null ), 'text_bytes' => QualityScore::PAGE_SIZE_NORM, 'heading' => array_fill( 0, 30, null ), 'redirect' => array_fill( 0, 100, null ), 'template' => [ 'Good' ] ]; $page2 = [ 'incoming_links' => $maxDocs * QualityScore::INCOMING_LINKS_MAX_DOCS_FACTOR, 'external_link' => array_fill( 0, 200, null ), 'text_bytes' => QualityScore::PAGE_SIZE_NORM + 1, 'heading' => array_fill( 0, 30, null ), 'redirect' => array_fill( 0, 100, null ), 'template' => [ 'Good' ] ]; $this->assertEquals( $qs->score( $page1 ), $qs->score( $page2 ), "Having more text_bytes than the norm give the same score" ); } public function testQualityScoreWithRandomValues() { $maxDocs = 10000000; $qs = new QualityScore( [ 'Good' => 2, 'Bad' => 0.5 ] ); $qs->setMaxDocs( $maxDocs ); for( $i = 0; $i < 1000; $i++ ) { $page = [ 'incoming_links' => mt_rand( 0, 2^31-1 ), 'external_link' => array_fill( 0, mt_rand( 1, 2000 ), null ), 'text_bytes' => mt_rand( 1, 400000 ), 'heading' => array_fill( 0, mt_rand( 1, 1000 ), null ), 'redirect' => array_fill( 0, mt_rand( 1, 1000 ), null ), 'template' => mt_rand( 0, 1 ) == 1 ? [ 'Good' ] : ['Bad'] ]; $this->assertGreaterThan( 0, $qs->score( $page ), "Score is always greater than 0" ); $this->assertLessThan( QualityScore::SCORE_RANGE, $qs->score( $page ), "Score is always lower than " . QualityScore::SCORE_RANGE ); } // Edges $page = [ 'incoming_links' => $maxDocs * QualityScore::INCOMING_LINKS_MAX_DOCS_FACTOR, 'external_link' => array_fill( 0, QualityScore::EXTERNAL_LINKS_NORM, null ), 'text_bytes' => QualityScore::PAGE_SIZE_NORM, 'heading' => array_fill( 0, QualityScore::HEADING_NORM, null ), 'redirect' => array_fill( 0, QualityScore::REDIRECT_NORM, null ), 'template' => [] ]; $this->assertEquals( QualityScore::SCORE_RANGE, $qs->score( $page ), "Highest score is " . QualityScore::SCORE_RANGE ); $page = [ 'incoming_links' => 0, 'external_link' => [], 'text_bytes' => 0, 'heading' => [], 'redirect' => [], 'template' => [] ]; $this->assertEquals( 0, $qs->score( $page ), "Lowest score is 0" ); $page = []; $this->assertEquals( 0, $qs->score( $page ), "Score of a broken article is 0" ); // A very small wiki $qs = new QualityScore( ); $qs->setMaxDocs( 1 ); $page = [ 'incoming_links' => 1, 'external_link' => array_fill( 0, QualityScore::EXTERNAL_LINKS_NORM, null ), 'text_bytes' => QualityScore::PAGE_SIZE_NORM, 'heading' => array_fill( 0, QualityScore::HEADING_NORM, null ), 'redirect' => array_fill( 0, QualityScore::REDIRECT_NORM, null ), 'template' => [] ]; $this->assertEquals( QualityScore::SCORE_RANGE, $qs->score( $page ), "With very small wiki the highest score is also " . QualityScore::SCORE_RANGE ); // The scoring function should not fail with 0 page $qs = new QualityScore(); $page = [ 'incoming_links' => 1, 'external_link' => array_fill( 0, QualityScore::EXTERNAL_LINKS_NORM, null ), 'text_bytes' => QualityScore::PAGE_SIZE_NORM, 'heading' => array_fill( 0, QualityScore::HEADING_NORM, null ), 'redirect' => array_fill( 0, QualityScore::REDIRECT_NORM, null ), 'template' => [] ]; $this->assertEquals( QualityScore::SCORE_RANGE, $qs->score( $page ), "With a zero page wiki the highest score is also " . QualityScore::SCORE_RANGE ); } public function testRobustness() { $templates = [ 'Good' => 2, 'Bad' => 0.5 ]; $all_templates = array_keys( $templates ); $all_templates += [ 'Foo', 'Bar' ]; for ( $i = 0; $i < 5000; $i++ ) { $scorers = []; $scorers[] = new PQScore( [ 'Good' => 2, 'Bad' => 0.5 ] ); $scorers[] = new QualityScore( [ 'Good' => 2, 'Bad' => 0.5 ] ); $scorers[] = new IncomingLinksScoringMethod(); $tmpl = []; for ( $j = mt_rand( 0, count( $all_templates ) - 1 ); $j >= 0; $j-- ) { $tmpl[] = $all_templates[$j]; } $page = []; $page['incoming_links'] = mt_rand( 0, 1 ) ? mt_rand( 0, 200 ) : null; $page['external_link'] = $this->randomArray( 200 ); $page['text_bytes'] = mt_rand( 0, 1 ) ? (string) mt_rand( 0, 230000 ) : null; $page['heading'] = $this->randomArray( 30 ); $page['redirect'] = $this->randomArray( 100 ); $page['popularity_score'] = mt_rand( 0, 1 ) ? 1 / mt_rand( 1, 1800000 ) : null; $page['templates'] = mt_rand( 0, 1 ) ? $tmpl : null; $maxDocs = mt_rand( 0, 100 ); foreach( $scorers as $scorer ) { $scorer->setMaxDocs( $maxDocs ); $score = $scorer->score( $page ); $pagedebug = print_r( $page, true ); $this->assertTrue( is_int( $score ), "Score is always an integer for " . get_class( $scorer ) . " with these values $pagedebug" ); $this->assertTrue( $score >= 0, "Score is always positive " . get_class( $scorer ) . " with these values $pagedebug" ); $this->assertTrue( $score <= QualityScore::SCORE_RANGE, "Score is always lower than QualityScore::SCORE_RANGE " . get_class( $scorer ) . " with these values $pagedebug" ); } } } /** * @param $max integer max element in the array * @return array|null randomly null or an array of size [0, $max] */ private function randomArray( $max ) { if ( mt_rand( 0, 1 ) ) { $size = mt_rand( 0, $max ); if ( $size === 0 ) { return []; } return array_fill( 0, $size, null ); } return null; } }