%PDF- %PDF-
Direktori : /www/varak.net/nextcloud.varak.net/apps_old/apps/bookmarks/vendor/rowbot/idna/bin/ |
Current File : //www/varak.net/nextcloud.varak.net/apps_old/apps/bookmarks/vendor/rowbot/idna/bin/RegexBuilder.php |
<?php declare(strict_types=1); namespace Rowbot\Idna\Bin; use function array_filter; use function assert; use function file_put_contents; use function in_array; use function is_array; use function sprintf; use const DIRECTORY_SEPARATOR as DS; class RegexBuilder extends Builder { public static function buildRegexClass(string $output): void { $bidiData = self::parseProperties('extracted/DerivedBidiClass.txt'); $rtlLabel = sprintf( '/[%s]/u', self::buildCharacterClass(array_filter($bidiData, static function (array $data): bool { return in_array($data[1], ['R', 'AL', 'AN'], true); })) ); // Step 1. The first character must be a character with Bidi property L, R, or AL. If it has the R // or AL property, it is an RTL label; if it has the L property, it is an LTR label. // // Because any code point not explicitly listed in DerivedBidiClass.txt is considered to have the // 'L' property, we negate a character class matching all code points explicitly listed in // DerivedBidiClass.txt minus the ones explicitly marked as 'L'. $bidiStep1Ltr = sprintf( '/^[^%s]/u', self::buildCharacterClass(array_filter($bidiData, static function (array $data): bool { return $data[1] !== 'L'; })) ); $bidiStep1Rtl = sprintf( '/^[%s]/u', self::buildCharacterClass(array_filter($bidiData, static function (array $data): bool { return in_array($data[1], ['R', 'AL'], true); })) ); // Step 2. In an RTL label, only characters with the Bidi properties R, AL, AN, EN, ES, CS, ET, ON, // BN, or NSM are allowed. $bidiStep2 = sprintf( '/[^%s]/u', self::buildCharacterClass(array_filter($bidiData, static function (array $data): bool { return in_array($data[1], ['R', 'AL', 'AN', 'EN', 'ES', 'CS', 'ET', 'ON', 'BN', 'NSM'], true); })) ); // Step 3. In an RTL label, the end of the label must be a character with Bidi property R, AL, EN, // or AN, followed by zero or more characters with Bidi property NSM. $bidiStep3 = sprintf( '/[%s][%s]*$/u', self::buildCharacterClass(array_filter($bidiData, static function (array $data): bool { return in_array($data[1], ['R', 'AL', 'EN', 'AN'], true); })), self::buildCharacterClass(array_filter($bidiData, static function (array $data): bool { return $data[1] === 'NSM'; })) ); // Step 4. In an RTL label, if an EN is present, no AN may be present, and vice versa. $bidiStep4EN = sprintf( '/[%s]/u', self::buildCharacterClass(array_filter($bidiData, static function (array $data): bool { return $data[1] === 'EN'; })) ); $bidiStep4AN = sprintf( '/[%s]/u', self::buildCharacterClass(array_filter($bidiData, static function (array $data): bool { return $data[1] === 'AN'; })) ); // Step 5. In an LTR label, only characters with the Bidi properties L, EN, ES, CS, ET, ON, BN, or // NSM are allowed. // // Because any code point not explicitly listed in DerivedBidiClass.txt is considered to have the // 'L' property, we create a character class matching all code points explicitly listed in // DerivedBidiClass.txt minus the ones explicitly marked as 'L', 'EN', 'ES', 'CS', 'ET', 'ON', // 'BN', or 'NSM'. $bidiStep5 = sprintf( '/[%s]/u', self::buildCharacterClass(array_filter($bidiData, static function (array $data): bool { return !in_array($data[1], ['L', 'EN', 'ES', 'CS', 'ET', 'ON', 'BN', 'NSM'], true); })) ); // Step 6. In an LTR label, the end of the label must be a character with Bidi property L or EN, // followed by zero or more characters with Bidi property NSM. // // Again, because any code point not explicitly listed in DerivedBidiClass.txt is considered to // have the 'L' property, we negate a character class matching all code points explicitly listed in // DerivedBidiClass.txt to match characters with the 'L' and 'EN' property. $bidiStep6 = sprintf( '/[^%s][%s]*$/u', self::buildCharacterClass(array_filter($bidiData, static function (array $data): bool { return !in_array($data[1], ['L', 'EN'], true); })), self::buildCharacterClass(array_filter($bidiData, static function (array $data): bool { return $data[1] === 'NSM'; })) ); $combiningMarks = self::buildCombiningMarksRegex(); $zwnj = self::buildJoiningTypesRegex(); $s = <<<RegexClass <?php // This file was auto generated by running 'php bin/generateDataFiles.php' declare(strict_types=1); namespace Rowbot\Idna\Resource; final class Regex { public const COMBINING_MARK = '{$combiningMarks}'; public const RTL_LABEL = '{$rtlLabel}'; public const BIDI_STEP_1_LTR = '{$bidiStep1Ltr}'; public const BIDI_STEP_1_RTL = '{$bidiStep1Rtl}'; public const BIDI_STEP_2 = '{$bidiStep2}'; public const BIDI_STEP_3 = '{$bidiStep3}'; public const BIDI_STEP_4_AN = '{$bidiStep4AN}'; public const BIDI_STEP_4_EN = '{$bidiStep4EN}'; public const BIDI_STEP_5 = '{$bidiStep5}'; public const BIDI_STEP_6 = '{$bidiStep6}'; public const ZWNJ = '{$zwnj}'; /** * @codeCoverageIgnore */ private function __construct() { } } RegexClass; file_put_contents($output . DS . 'Regex.php', $s); } private static function buildJoiningTypesRegex(): string { $joiningTypes = self::parseProperties('extracted/DerivedJoiningType.txt'); // ((Joining_Type:{L,D})(Joining_Type:T)*\u200C(Joining_Type:T)*(Joining_Type:{R,D})) // We use a capturing group around the first portion of the regex so we can count the byte length // of the match and increment preg_match's offset accordingly. return sprintf( '/([%1$s%2$s][%3$s]*\x{200C}[%3$s]*)[%4$s%2$s]/u', self::buildCharacterClass(array_filter($joiningTypes, static function (array $data): bool { return $data[1] === 'L'; })), self::buildCharacterClass(array_filter($joiningTypes, static function (array $data): bool { return $data[1] === 'D'; })), self::buildCharacterClass(array_filter($joiningTypes, static function (array $data): bool { return $data[1] === 'T'; })), self::buildCharacterClass(array_filter($joiningTypes, static function (array $data): bool { return $data[1] === 'R'; })) ); } private static function buildCombiningMarksRegex(): string { $generalCategories = self::parseProperties('extracted/DerivedGeneralCategory.txt'); return sprintf( '/^[%s]/u', self::buildCharacterClass(array_filter($generalCategories, static function (array $data): bool { return in_array($data[1], ['Mc', 'Me', 'Mn'], true); })) ); } /** * @param array<int, array<int, array<int, int>|string>> $data */ private static function buildCharacterClass(array $data): string { $out = ''; foreach ($data as $codePoints) { assert(is_array($codePoints[0])); if ($codePoints[0][0] !== $codePoints[0][1]) { $out .= sprintf('\x{%04X}-\x{%04X}', ...$codePoints[0]); continue; } $out .= sprintf('\x{%04X}', $codePoints[0][0]); } return $out; } }