%PDF- %PDF-
Direktori : /www/varak.net/wiki.varak.net/extensions/CirrusSearch/profiles/ |
Current File : /www/varak.net/wiki.varak.net/extensions/CirrusSearch/profiles/SaneitizeProfiles.php |
<?php /** * CirrusSearch - List of sanitization profiles. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License along * with this program; if not, write to the Free Software Foundation, Inc., * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. * http://www.gnu.org/copyleft/gpl.html */ /** * List of sanitization profiles suited for a 2 hours refresh freq. * Allows saneitizeJobs to automatically select its settings according to wiki * size. * The first profile that verifies max(page_id)-min(page_id) < max_wiki_size * will be chosen (the array is sorted before applying profile selection) */ $wgCirrusSearchSanitizationProfiles = [ // Loop in 9 days for 11k ids, 0.00014 jobs/sec, with 18% ids wrong // it's 0.0025 updates/sec per cluster 'XS' => [ 'max_wiki_size' => 12000, // Size of the chunk sent per CherckerJob 'jobs_chunk_size' => 10, // number of articles processed in batch by a checker job // number of batches is jobs_chunk_size/checker_batch_size // A higher value will increase throughput but will also // consume more memory on the jobrunners. 'checker_batch_size' => 10, // Max number of update jobs, the checker jobs will hold until the // number of pending update jobs decrease below this limit. // This value depends on the number of jobrunner availables // and the max write throughput you want to put on elastic. 'max_checker_jobs' => 10, // Max number of update jobs, the checker jobs will hold until the // number of pending update jobs decrease below this limit. // This value depends on the number of jobrunner availables // and the max write throughput you want to put on elastic. 'update_jobs_max_pressure' => 50, // Max time in seconds a checker job is allowed to run, // the job will reschedule itself at a later time with // a new offset it this timeout is reached. 'checker_job_max_time' => 60, // Minimum time to wait between loops in seconds // Default: 2 weeks // Usefull to not restart a loop too frequently on small wikis 'min_loop_duration' => 2*7*24*3600, ], // Loop in 16 days for 99k ids, 0.006 jobs/sec, with 18% ids wrong // it's 0.0125 updates/sec per cluster 'S' => [ 'max_wiki_size' => 100000, 'jobs_chunk_size' => 10, 'checker_batch_size' => 10, 'max_checker_jobs' => 50, 'update_jobs_max_pressure' => 100, 'checker_job_max_time' => 60, 'min_loop_duration' => 2*7*24*3600, ], // Loop in 15 days for 920k ids, 0.06 jobs/sec, with 18% ids wrong // it's 0.125 updates/sec per cluster 'M' => [ 'max_wiki_size' => 1000000, 'jobs_chunk_size' => 10, 'checker_batch_size' => 10, 'max_checker_jobs' => 500, 'update_jobs_max_pressure' => 250, 'checker_job_max_time' => 60, 'min_loop_duration' => 2*7*24*3600, ], // Loop in 17 days for 10m ids, 0.13 jobs/sec, with 18% ids wrong // it's 1.25 updates/sec per cluster 'L' => [ 'max_wiki_size' => 12000000, 'jobs_chunk_size' => 50, 'checker_batch_size' => 10, 'max_checker_jobs' => 1000, 'update_jobs_max_pressure' => 500, 'checker_job_max_time' => 60, 'min_loop_duration' => 2*7*24*3600, ], // Loop in 15 days for 27m ids, 0.20 jobs/sec, with 18% ids wrong // it's 3.75 updates/sec per cluster 'XL' => [ 'max_wiki_size' => 30000000, 'jobs_chunk_size' => 100, 'checker_batch_size' => 10, 'max_checker_jobs' => 1500, 'update_jobs_max_pressure' => 750, 'checker_job_max_time' => 60, 'min_loop_duration' => 2*7*24*3600, ], // Loop in 16 days for 50m ids, 0.34 jobs/sec, with 18% ids wrong // it's 6 updates/sec per cluster 'XXL' => [ 'max_wiki_size' => PHP_INT_MAX, 'jobs_chunk_size' => 100, 'checker_batch_size' => 10, // 10*10 =~ 5sec on terbium 'max_checker_jobs' => 2500, 'update_jobs_max_pressure' => 1000, 'checker_job_max_time' => 60, 'min_loop_duration' => 2*7*24*3600, ], ];