%PDF- %PDF-
| Direktori : /www/varak.net/wiki.varak.net/extensions/CirrusSearch/includes/LanguageDetector/ |
| Current File : //www/varak.net/wiki.varak.net/extensions/CirrusSearch/includes/LanguageDetector/TextCat.php |
<?php
namespace CirrusSearch\LanguageDetector;
use CirrusSearch;
use MediaWiki\Logger\LoggerFactory;
/**
* Try to detect language with TextCat text categorizer
*/
class TextCat implements Detector {
/**
* Detect language
*
* @param CirrusSearch $cirrus Searching class
* @param string $text Text to detect language
* @return string|null Preferred language, or null if none found
*/
public function detect( CirrusSearch $cirrus, $text ) {
$config = $cirrus->getConfig();
if( empty( $config ) ) {
// Should not happen
return null;
}
$dir = $config->getElement('CirrusSearchTextcatModel');
if( !$dir ) {
return null;
}
if( !is_dir( $dir ) ) {
LoggerFactory::getInstance( 'CirrusSearch' )->warning(
"Bad directory for TextCat model: {dir}",
[ "dir" => $dir ]
);
}
$textcat = new \TextCat( $dir );
$languages = $textcat->classify( $text, $config->getElement( 'CirrusSearchTextcatLanguages' ) );
if( !empty( $languages ) ) {
// For now, just return the best option
// TODO: thing what else we could do
reset( $languages );
return key( $languages );
}
}
}