%PDF- %PDF-
Mini Shell

Mini Shell

Direktori : /www/varak.net/wiki.varak.net/extensions/CirrusSearch/vendor/wikimedia/textcat/
Upload File :
Create Path :
Current File : /www/varak.net/wiki.varak.net/extensions/CirrusSearch/vendor/wikimedia/textcat/felis.php

<?php
/**
 * Generate ngrams data from text files.
 * Run: php felis.php INPUTDIR OUTPUTDIR
 * INPUTDIR should contain text files e.g. english.txt
 * OUTPUTDIR would contain ngrams files e.g. english.lm
 */

// Language model generation failing?
// up your memory limit or set $minFreq >0 in TextCat.php
// ini_set('memory_limit', '2000000000');

require_once __DIR__.'/TextCat.php';
// TODO: add option to control model ngram count
$maxNgrams = 4000;

if ( $argc != 3 ) {
	die( "Use $argv[0] INPUTDIR OUTPUTDIR\n" );
}
if ( !file_exists( $argv[2] ) ) {
	mkdir( $argv[2], 0755, true );
}
$cat = new TextCat( $argv[2] );

foreach ( new DirectoryIterator( $argv[1] ) as $file ) {
	if ( !$file->isFile() ) {
		continue;
	}
	$ngrams = $cat->createLM( file_get_contents( $file->getPathname() ), $maxNgrams );
	$cat->writeLanguageFile( $ngrams, $argv[2] . "/" . $file->getBasename( ".txt" ) . ".lm" );
}
exit( 0 );

Zerion Mini Shell 1.0