%PDF- %PDF-
| Direktori : /www/varak.net/nextcloud.varak.net/apps_old/apps/bookmarks/vendor/rowbot/idna/bin/ |
| Current File : /www/varak.net/nextcloud.varak.net/apps_old/apps/bookmarks/vendor/rowbot/idna/bin/RegexBuilder.php |
<?php
declare(strict_types=1);
namespace Rowbot\Idna\Bin;
use function array_filter;
use function assert;
use function file_put_contents;
use function in_array;
use function is_array;
use function sprintf;
use const DIRECTORY_SEPARATOR as DS;
class RegexBuilder extends Builder
{
public static function buildRegexClass(string $output): void
{
$bidiData = self::parseProperties('extracted/DerivedBidiClass.txt');
$rtlLabel = sprintf(
'/[%s]/u',
self::buildCharacterClass(array_filter($bidiData, static function (array $data): bool {
return in_array($data[1], ['R', 'AL', 'AN'], true);
}))
);
// Step 1. The first character must be a character with Bidi property L, R, or AL. If it has the R
// or AL property, it is an RTL label; if it has the L property, it is an LTR label.
//
// Because any code point not explicitly listed in DerivedBidiClass.txt is considered to have the
// 'L' property, we negate a character class matching all code points explicitly listed in
// DerivedBidiClass.txt minus the ones explicitly marked as 'L'.
$bidiStep1Ltr = sprintf(
'/^[^%s]/u',
self::buildCharacterClass(array_filter($bidiData, static function (array $data): bool {
return $data[1] !== 'L';
}))
);
$bidiStep1Rtl = sprintf(
'/^[%s]/u',
self::buildCharacterClass(array_filter($bidiData, static function (array $data): bool {
return in_array($data[1], ['R', 'AL'], true);
}))
);
// Step 2. In an RTL label, only characters with the Bidi properties R, AL, AN, EN, ES, CS, ET, ON,
// BN, or NSM are allowed.
$bidiStep2 = sprintf(
'/[^%s]/u',
self::buildCharacterClass(array_filter($bidiData, static function (array $data): bool {
return in_array($data[1], ['R', 'AL', 'AN', 'EN', 'ES', 'CS', 'ET', 'ON', 'BN', 'NSM'], true);
}))
);
// Step 3. In an RTL label, the end of the label must be a character with Bidi property R, AL, EN,
// or AN, followed by zero or more characters with Bidi property NSM.
$bidiStep3 = sprintf(
'/[%s][%s]*$/u',
self::buildCharacterClass(array_filter($bidiData, static function (array $data): bool {
return in_array($data[1], ['R', 'AL', 'EN', 'AN'], true);
})),
self::buildCharacterClass(array_filter($bidiData, static function (array $data): bool {
return $data[1] === 'NSM';
}))
);
// Step 4. In an RTL label, if an EN is present, no AN may be present, and vice versa.
$bidiStep4EN = sprintf(
'/[%s]/u',
self::buildCharacterClass(array_filter($bidiData, static function (array $data): bool {
return $data[1] === 'EN';
}))
);
$bidiStep4AN = sprintf(
'/[%s]/u',
self::buildCharacterClass(array_filter($bidiData, static function (array $data): bool {
return $data[1] === 'AN';
}))
);
// Step 5. In an LTR label, only characters with the Bidi properties L, EN, ES, CS, ET, ON, BN, or
// NSM are allowed.
//
// Because any code point not explicitly listed in DerivedBidiClass.txt is considered to have the
// 'L' property, we create a character class matching all code points explicitly listed in
// DerivedBidiClass.txt minus the ones explicitly marked as 'L', 'EN', 'ES', 'CS', 'ET', 'ON',
// 'BN', or 'NSM'.
$bidiStep5 = sprintf(
'/[%s]/u',
self::buildCharacterClass(array_filter($bidiData, static function (array $data): bool {
return !in_array($data[1], ['L', 'EN', 'ES', 'CS', 'ET', 'ON', 'BN', 'NSM'], true);
}))
);
// Step 6. In an LTR label, the end of the label must be a character with Bidi property L or EN,
// followed by zero or more characters with Bidi property NSM.
//
// Again, because any code point not explicitly listed in DerivedBidiClass.txt is considered to
// have the 'L' property, we negate a character class matching all code points explicitly listed in
// DerivedBidiClass.txt to match characters with the 'L' and 'EN' property.
$bidiStep6 = sprintf(
'/[^%s][%s]*$/u',
self::buildCharacterClass(array_filter($bidiData, static function (array $data): bool {
return !in_array($data[1], ['L', 'EN'], true);
})),
self::buildCharacterClass(array_filter($bidiData, static function (array $data): bool {
return $data[1] === 'NSM';
}))
);
$combiningMarks = self::buildCombiningMarksRegex();
$zwnj = self::buildJoiningTypesRegex();
$s = <<<RegexClass
<?php
// This file was auto generated by running 'php bin/generateDataFiles.php'
declare(strict_types=1);
namespace Rowbot\Idna\Resource;
final class Regex
{
public const COMBINING_MARK = '{$combiningMarks}';
public const RTL_LABEL = '{$rtlLabel}';
public const BIDI_STEP_1_LTR = '{$bidiStep1Ltr}';
public const BIDI_STEP_1_RTL = '{$bidiStep1Rtl}';
public const BIDI_STEP_2 = '{$bidiStep2}';
public const BIDI_STEP_3 = '{$bidiStep3}';
public const BIDI_STEP_4_AN = '{$bidiStep4AN}';
public const BIDI_STEP_4_EN = '{$bidiStep4EN}';
public const BIDI_STEP_5 = '{$bidiStep5}';
public const BIDI_STEP_6 = '{$bidiStep6}';
public const ZWNJ = '{$zwnj}';
/**
* @codeCoverageIgnore
*/
private function __construct()
{
}
}
RegexClass;
file_put_contents($output . DS . 'Regex.php', $s);
}
private static function buildJoiningTypesRegex(): string
{
$joiningTypes = self::parseProperties('extracted/DerivedJoiningType.txt');
// ((Joining_Type:{L,D})(Joining_Type:T)*\u200C(Joining_Type:T)*(Joining_Type:{R,D}))
// We use a capturing group around the first portion of the regex so we can count the byte length
// of the match and increment preg_match's offset accordingly.
return sprintf(
'/([%1$s%2$s][%3$s]*\x{200C}[%3$s]*)[%4$s%2$s]/u',
self::buildCharacterClass(array_filter($joiningTypes, static function (array $data): bool {
return $data[1] === 'L';
})),
self::buildCharacterClass(array_filter($joiningTypes, static function (array $data): bool {
return $data[1] === 'D';
})),
self::buildCharacterClass(array_filter($joiningTypes, static function (array $data): bool {
return $data[1] === 'T';
})),
self::buildCharacterClass(array_filter($joiningTypes, static function (array $data): bool {
return $data[1] === 'R';
}))
);
}
private static function buildCombiningMarksRegex(): string
{
$generalCategories = self::parseProperties('extracted/DerivedGeneralCategory.txt');
return sprintf(
'/^[%s]/u',
self::buildCharacterClass(array_filter($generalCategories, static function (array $data): bool {
return in_array($data[1], ['Mc', 'Me', 'Mn'], true);
}))
);
}
/**
* @param array<int, array<int, array<int, int>|string>> $data
*/
private static function buildCharacterClass(array $data): string
{
$out = '';
foreach ($data as $codePoints) {
assert(is_array($codePoints[0]));
if ($codePoints[0][0] !== $codePoints[0][1]) {
$out .= sprintf('\x{%04X}-\x{%04X}', ...$codePoints[0]);
continue;
}
$out .= sprintf('\x{%04X}', $codePoints[0][0]);
}
return $out;
}
}