[wp-trac] [WordPress Trac] #63913: WordPress assumes that the UTF-8 PCRE flag is available.

WordPress Trac noreply at wordpress.org
Wed Sep 3 15:05:22 UTC 2025


#63913: WordPress assumes that the UTF-8 PCRE flag is available.
-------------------------+-----------------------------
 Reporter:  dmsnell      |       Owner:  (none)
     Type:  enhancement  |      Status:  new
 Priority:  low          |   Milestone:  Future Release
Component:  Charset      |     Version:  trunk
 Severity:  normal       |  Resolution:
 Keywords:  has-patch    |     Focuses:
-------------------------+-----------------------------

Comment (by tusharbharti):

 If anyone is interested with the scanner, this uses
 https://github.com/nikic/PHP-Parser
 {{{#!php
 <?php
 require 'vendor/autoload.php';

 use PhpParser\Error;
 use PhpParser\Node;
 use PhpParser\NodeTraverser;
 use PhpParser\NodeVisitorAbstract;
 use PhpParser\ParserFactory;

 class RegexVisitor extends NodeVisitorAbstract {
     private string $file;

     public function __construct(string $file) {
         $this->file = $file;
     }

     public function enterNode(Node $node): void {
         // Look for *any* string literal (single, double, HEREDOC, NOWDOC,
 encapsed)
         if ($node instanceof Node\Scalar\String_ || $node instanceof
 Node\Scalar\Encapsed) {
             $value = $node instanceof Node\Scalar\String_
                 ? $node->value
                 : implode('', array_map(function ($p) {
                     return $p instanceof Node\Scalar\EncapsedStringPart ?
 $p->value : '';
                 }, $node->parts));

             // Regex: delimiter, body, delimiter, modifiers
             if (preg_match(
                 '/^(?P<delim>[^a-zA-Z0-9\s\\\\])
                   (?P<body>(?:\\\\.|(?!\1).)*)
                   \1(?P<modifiers>[imsxADSUXJu]+)$/x',
                 $value,
                 $m
             )) {
                 if (strpos($m['modifiers'], 'u') !== false) {
                     printf(
                         "%s:%d: %s\n",
                         $this->file,
                         $node->getStartLine(),
                         $value
                     );
                 }
             }
         }
     }
 }

 $root = __DIR__ . '/src'; // adjust path to WP root
 $parser = (new ParserFactory)->createForNewestSupportedVersion();

 $rii = new RecursiveIteratorIterator(new
 RecursiveDirectoryIterator($root));

 foreach ($rii as $file) {
     if ($file->isDir() || pathinfo($file->getPathname(),
 PATHINFO_EXTENSION) !== 'php') {
         continue;
     }

     $code = file_get_contents($file->getPathname());
     try {
         $ast = $parser->parse($code);
     } catch (Error $e) {
         fwrite(STDERR, "Parse error in {$file}: {$e->getMessage()}\n");
         continue;
     }

     $traverser = new NodeTraverser();
     $traverser->addVisitor(new RegexVisitor($file->getPathname()));
     $traverser->traverse($ast);
 }


 }}}

 Regex version
 {{{#!php
 <?php

 $root = __DIR__ . '/src';
 $iterator = new RecursiveIteratorIterator(
     new RecursiveDirectoryIterator($root)
 );

 $pattern = '/(?P<quote>[\'"])(?P<delim>[^a-zA-Z0-9\s\\\\])
             (?P<regex>(?:\\\\.|(?!\2|\1).)*)
             (?P=delim)(?P<modifiers>[imsxADSUXJu]+)(?P=quote)/x';

 foreach ($iterator as $file) {
     if ($file->isDir()) {
         continue;
     }
     if (pathinfo($file, PATHINFO_EXTENSION) !== 'php') {
         continue;
     }

     $lines = file($file->getPathname());
     foreach ($lines as $lineNumber => $line) {
         if (preg_match_all($pattern, $line, $matches, PREG_SET_ORDER)) {
             foreach ($matches as $match) {
                 if (strpos($match['modifiers'], 'u') !== false) {
                     printf(
                         "%s:%d: %s\n",
                         $file->getPathname(),
                         $lineNumber + 1,
                         trim($line)
                     );
                 }
             }
         }
     }
 }


 }}}

-- 
Ticket URL: <https://core.trac.wordpress.org/ticket/63913#comment:5>
WordPress Trac <https://core.trac.wordpress.org/>
WordPress publishing platform


More information about the wp-trac mailing list