[wp-trac] [WordPress Trac] #63913: WordPress assumes that the UTF-8 PCRE flag is available.
WordPress Trac
noreply at wordpress.org
Wed Sep 3 15:05:22 UTC 2025
#63913: WordPress assumes that the UTF-8 PCRE flag is available.
-------------------------+-----------------------------
Reporter: dmsnell | Owner: (none)
Type: enhancement | Status: new
Priority: low | Milestone: Future Release
Component: Charset | Version: trunk
Severity: normal | Resolution:
Keywords: has-patch | Focuses:
-------------------------+-----------------------------
Comment (by tusharbharti):
If anyone is interested with the scanner, this uses
https://github.com/nikic/PHP-Parser
{{{#!php
<?php
require 'vendor/autoload.php';
use PhpParser\Error;
use PhpParser\Node;
use PhpParser\NodeTraverser;
use PhpParser\NodeVisitorAbstract;
use PhpParser\ParserFactory;
class RegexVisitor extends NodeVisitorAbstract {
private string $file;
public function __construct(string $file) {
$this->file = $file;
}
public function enterNode(Node $node): void {
// Look for *any* string literal (single, double, HEREDOC, NOWDOC,
encapsed)
if ($node instanceof Node\Scalar\String_ || $node instanceof
Node\Scalar\Encapsed) {
$value = $node instanceof Node\Scalar\String_
? $node->value
: implode('', array_map(function ($p) {
return $p instanceof Node\Scalar\EncapsedStringPart ?
$p->value : '';
}, $node->parts));
// Regex: delimiter, body, delimiter, modifiers
if (preg_match(
'/^(?P<delim>[^a-zA-Z0-9\s\\\\])
(?P<body>(?:\\\\.|(?!\1).)*)
\1(?P<modifiers>[imsxADSUXJu]+)$/x',
$value,
$m
)) {
if (strpos($m['modifiers'], 'u') !== false) {
printf(
"%s:%d: %s\n",
$this->file,
$node->getStartLine(),
$value
);
}
}
}
}
}
$root = __DIR__ . '/src'; // adjust path to WP root
$parser = (new ParserFactory)->createForNewestSupportedVersion();
$rii = new RecursiveIteratorIterator(new
RecursiveDirectoryIterator($root));
foreach ($rii as $file) {
if ($file->isDir() || pathinfo($file->getPathname(),
PATHINFO_EXTENSION) !== 'php') {
continue;
}
$code = file_get_contents($file->getPathname());
try {
$ast = $parser->parse($code);
} catch (Error $e) {
fwrite(STDERR, "Parse error in {$file}: {$e->getMessage()}\n");
continue;
}
$traverser = new NodeTraverser();
$traverser->addVisitor(new RegexVisitor($file->getPathname()));
$traverser->traverse($ast);
}
}}}
Regex version
{{{#!php
<?php
$root = __DIR__ . '/src';
$iterator = new RecursiveIteratorIterator(
new RecursiveDirectoryIterator($root)
);
$pattern = '/(?P<quote>[\'"])(?P<delim>[^a-zA-Z0-9\s\\\\])
(?P<regex>(?:\\\\.|(?!\2|\1).)*)
(?P=delim)(?P<modifiers>[imsxADSUXJu]+)(?P=quote)/x';
foreach ($iterator as $file) {
if ($file->isDir()) {
continue;
}
if (pathinfo($file, PATHINFO_EXTENSION) !== 'php') {
continue;
}
$lines = file($file->getPathname());
foreach ($lines as $lineNumber => $line) {
if (preg_match_all($pattern, $line, $matches, PREG_SET_ORDER)) {
foreach ($matches as $match) {
if (strpos($match['modifiers'], 'u') !== false) {
printf(
"%s:%d: %s\n",
$file->getPathname(),
$lineNumber + 1,
trim($line)
);
}
}
}
}
}
}}}
--
Ticket URL: <https://core.trac.wordpress.org/ticket/63913#comment:5>
WordPress Trac <https://core.trac.wordpress.org/>
WordPress publishing platform
More information about the wp-trac
mailing list