<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN"
"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
<html xmlns="http://www.w3.org/1999/xhtml">
<head><meta http-equiv="content-type" content="text/html; charset=utf-8" /><style type="text/css"><!--
#msg dl { border: 1px #006 solid; background: #369; padding: 6px; color: #fff; }
#msg dt { float: left; width: 6em; font-weight: bold; }
#msg dt:after { content:':';}
#msg dl, #msg dt, #msg ul, #msg li, #header, #footer { font-family: verdana,arial,helvetica,sans-serif; font-size: 10pt;  }
#msg dl a { font-weight: bold}
#msg dl a:link    { color:#fc3; }
#msg dl a:active  { color:#ff0; }
#msg dl a:visited { color:#cc6; }
h3 { font-family: verdana,arial,helvetica,sans-serif; font-size: 10pt; font-weight: bold; }
#msg pre { overflow: auto; background: #ffc; border: 1px #fc0 solid; padding: 6px; }
#msg ul, pre { overflow: auto; }
#header, #footer { color: #fff; background: #636; border: 1px #300 solid; padding: 6px; }
#patch { width: 100%; }
#patch h4 {font-family: verdana,arial,helvetica,sans-serif;font-size:10pt;padding:8px;background:#369;color:#fff;margin:0;}
#patch .propset h4, #patch .binary h4 {margin:0;}
#patch pre {padding:0;line-height:1.2em;margin:0;}
#patch .diff {width:100%;background:#eee;padding: 0 0 10px 0;overflow:auto;}
#patch .propset .diff, #patch .binary .diff  {padding:10px 0;}
#patch span {display:block;padding:0 10px;}
#patch .modfile, #patch .addfile, #patch .delfile, #patch .propset, #patch .binary, #patch .copfile {border:1px solid #ccc;margin:10px 0;}
#patch ins {background:#dfd;text-decoration:none;display:block;padding:0 10px;}
#patch del {background:#fdd;text-decoration:none;display:block;padding:0 10px;}
#patch .lines, .info {color:#888;background:#fff;}
--></style>
<title>[13358] trunk/wp-includes: Whitelist entities.</title>
</head>
<body>

<div id="msg">
<dl>
<dt>Revision</dt> <dd><a href="http://trac.wordpress.org/changeset/13358">13358</a></dd>
<dt>Author</dt> <dd>ryan</dd>
<dt>Date</dt> <dd>2010-02-24 05:37:20 +0000 (Wed, 24 Feb 2010)</dd>
</dl>

<h3>Log Message</h3>
<pre>Whitelist entities. Props miqrogroove. see <a href="http://trac.wordpress.org/ticket/12284">#12284</a></pre>

<h3>Modified Paths</h3>
<ul>
<li><a href="#trunkwpincludesformattingphp">trunk/wp-includes/formatting.php</a></li>
<li><a href="#trunkwpincludesksesphp">trunk/wp-includes/kses.php</a></li>
</ul>

</div>
<div id="patch">
<h3>Diff</h3>
<a id="trunkwpincludesformattingphp"></a>
<div class="modfile"><h4>Modified: trunk/wp-includes/formatting.php (13357 => 13358)</h4>
<pre class="diff"><span>
<span class="info">--- trunk/wp-includes/formatting.php        2010-02-24 05:30:34 UTC (rev 13357)
+++ trunk/wp-includes/formatting.php        2010-02-24 05:37:20 UTC (rev 13358)
</span><span class="lines">@@ -334,6 +334,12 @@
</span><span class="cx">         // Handle double encoding ourselves
</span><span class="cx">         if ( !$double_encode ) {
</span><span class="cx">                 $string = wp_specialchars_decode( $string, $_quote_style );
</span><ins>+
+                /* Critical */
+                // The previous line decodes &amp;amp;phrase; into &amp;phrase;  We must guarantee that &amp;phrase; is valid before proceeding.
+                $string = wp_kses_normalize_entities($string);
+
+                // Now proceed with custom double-encoding silliness
</ins><span class="cx">                 $string = preg_replace( '/&amp;(#?x?[0-9a-z]+);/i', '|wp_entity|$1|/wp_entity|', $string );
</span><span class="cx">         }
</span><span class="cx"> 
</span></span></pre></div>
<a id="trunkwpincludesksesphp"></a>
<div class="modfile"><h4>Modified: trunk/wp-includes/kses.php (13357 => 13358)</h4>
<pre class="diff"><span>
<span class="info">--- trunk/wp-includes/kses.php        2010-02-24 05:30:34 UTC (rev 13357)
+++ trunk/wp-includes/kses.php        2010-02-24 05:37:20 UTC (rev 13358)
</span><span class="lines">@@ -333,6 +333,255 @@
</span><span class="cx">                 //        'u' =&gt; array(),
</span><span class="cx">                 //        'ul' =&gt; array(),
</span><span class="cx">         );
</span><ins>+
+        $allowedentitynames = array(
+                'nbsp',
+                'iexcl',
+                'cent',
+                'pound',
+                'curren',
+                'yen',
+                'brvbar',
+                'sect',
+                'uml',
+                'copy',
+                'ordf',
+                'laquo',
+                'not',
+                'shy',
+                'reg',
+                'macr',
+                'deg',
+                'plusmn',
+                'acute',
+                'micro',
+                'para',
+                'middot',
+                'cedil',
+                'ordm',
+                'raquo',
+                'iquest',
+                'Agrave',
+                'Aacute',
+                'Acirc',
+                'Atilde',
+                'Auml',
+                'Aring',
+                'AElig',
+                'Ccedil',
+                'Egrave',
+                'Eacute',
+                'Ecirc',
+                'Euml',
+                'Igrave',
+                'Iacute',
+                'Icirc',
+                'Iuml',
+                'ETH',
+                'Ntilde',
+                'Ograve',
+                'Oacute',
+                'Ocirc',
+                'Otilde',
+                'Ouml',
+                'times',
+                'Oslash',
+                'Ugrave',
+                'Uacute',
+                'Ucirc',
+                'Uuml',
+                'Yacute',
+                'THORN',
+                'szlig',
+                'agrave',
+                'aacute',
+                'acirc',
+                'atilde',
+                'auml',
+                'aring',
+                'aelig',
+                'ccedil',
+                'egrave',
+                'eacute',
+                'ecirc',
+                'euml',
+                'igrave',
+                'iacute',
+                'icirc',
+                'iuml',
+                'eth',
+                'ntilde',
+                'ograve',
+                'oacute',
+                'ocirc',
+                'otilde',
+                'ouml',
+                'divide',
+                'oslash',
+                'ugrave',
+                'uacute',
+                'ucirc',
+                'uuml',
+                'yacute',
+                'thorn',
+                'yuml',
+                'quot',
+                'amp',
+                'lt',
+                'gt',
+                'apos',
+                'OElig',
+                'oelig',
+                'Scaron',
+                'scaron',
+                'Yuml',
+                'circ',
+                'tilde',
+                'ensp',
+                'emsp',
+                'thinsp',
+                'zwnj',
+                'zwj',
+                'lrm',
+                'rlm',
+                'ndash',
+                'mdash',
+                'lsquo',
+                'rsquo',
+                'sbquo',
+                'ldquo',
+                'rdquo',
+                'bdquo',
+                'dagger',
+                'Dagger',
+                'permil',
+                'lsaquo',
+                'rsaquo',
+                'euro',
+                'fnof',
+                'Alpha',
+                'Beta',
+                'Gamma',
+                'Delta',
+                'Epsilon',
+                'Zeta',
+                'Eta',
+                'Theta',
+                'Iota',
+                'Kappa',
+                'Lambda',
+                'Mu',
+                'Nu',
+                'Xi',
+                'Omicron',
+                'Pi',
+                'Rho',
+                'Sigma',
+                'Tau',
+                'Upsilon',
+                'Phi',
+                'Chi',
+                'Psi',
+                'Omega',
+                'alpha',
+                'beta',
+                'gamma',
+                'delta',
+                'epsilon',
+                'zeta',
+                'eta',
+                'theta',
+                'iota',
+                'kappa',
+                'lambda',
+                'mu',
+                'nu',
+                'xi',
+                'omicron',
+                'pi',
+                'rho',
+                'sigmaf',
+                'sigma',
+                'tau',
+                'upsilon',
+                'phi',
+                'chi',
+                'psi',
+                'omega',
+                'thetasym',
+                'upsih',
+                'piv',
+                'bull',
+                'hellip',
+                'prime',
+                'Prime',
+                'oline',
+                'frasl',
+                'weierp',
+                'image',
+                'real',
+                'trade',
+                'alefsym',
+                'larr',
+                'uarr',
+                'rarr',
+                'darr',
+                'harr',
+                'crarr',
+                'lArr',
+                'uArr',
+                'rArr',
+                'dArr',
+                'hArr',
+                'forall',
+                'part',
+                'exist',
+                'empty',
+                'nabla',
+                'isin',
+                'notin',
+                'ni',
+                'prod',
+                'sum',
+                'minus',
+                'lowast',
+                'radic',
+                'prop',
+                'infin',
+                'ang',
+                'and',
+                'or',
+                'cap',
+                'cup',
+                'int',
+                'sim',
+                'cong',
+                'asymp',
+                'ne',
+                'equiv',
+                'le',
+                'ge',
+                'sub',
+                'sup',
+                'nsub',
+                'sube',
+                'supe',
+                'oplus',
+                'otimes',
+                'perp',
+                'sdot',
+                'lceil',
+                'rceil',
+                'lfloor',
+                'rfloor',
+                'lang',
+                'rang',
+                'loz',
+                'spades',
+                'clubs',
+                'hearts',
+                'diams'
+        );
</ins><span class="cx"> }
</span><span class="cx"> 
</span><span class="cx"> /**
</span><span class="lines">@@ -952,7 +1201,7 @@
</span><span class="cx"> 
</span><span class="cx">         # Change back the allowed entities in our entity whitelist
</span><span class="cx"> 
</span><del>-        $string = preg_replace('/&amp;amp;([A-Za-z][A-Za-z0-9]{0,19});/', '&amp;\\1;', $string);
</del><ins>+        $string = preg_replace_callback('/&amp;amp;([A-Za-z]{2,8});/', 'wp_kses_named_entities', $string);
</ins><span class="cx">         $string = preg_replace_callback('/&amp;amp;#0*([0-9]{1,5});/', 'wp_kses_normalize_entities2', $string);
</span><span class="cx">         $string = preg_replace_callback('/&amp;amp;#([Xx])0*(([0-9A-Fa-f]{2}){1,2});/', 'wp_kses_normalize_entities3', $string);
</span><span class="cx"> 
</span><span class="lines">@@ -962,6 +1211,27 @@
</span><span class="cx"> /**
</span><span class="cx">  * Callback for wp_kses_normalize_entities() regular expression.
</span><span class="cx">  *
</span><ins>+ * This function only accepts valid named entity references, which are finite,
+ * case-sensitive, and highly scrutinized by HTML and XML validators.
+ *
+ * @since 3.0.0
+ *
+ * @param array $matches preg_replace_callback() matches array
+ * @return string Correctly encoded entity
+ */
+function wp_kses_named_entities($matches) {
+        global $allowedentitynames;
+
+        if ( empty($matches[1]) )
+                return '';
+
+        $i = $matches[1];
+        return ( ( ! in_array($i, $allowedentitynames) ) ? &quot;&amp;amp;$i;&quot; : &quot;&amp;$i;&quot; );
+}
+
+/**
+ * Callback for wp_kses_normalize_entities() regular expression.
+ *
</ins><span class="cx">  * This function helps wp_kses_normalize_entities() to only accept 16 bit values
</span><span class="cx">  * and nothing more for &amp;#number; entities.
</span><span class="cx">  *
</span><span class="lines">@@ -972,7 +1242,7 @@
</span><span class="cx">  * @return string Correctly encoded entity
</span><span class="cx">  */
</span><span class="cx"> function wp_kses_normalize_entities2($matches) {
</span><del>-        if ( ! isset($matches[1]) || empty($matches[1]) )
</del><ins>+        if ( empty($matches[1]) )
</ins><span class="cx">                 return '';
</span><span class="cx"> 
</span><span class="cx">         $i = $matches[1];
</span><span class="lines">@@ -991,7 +1261,7 @@
</span><span class="cx">  * @return string Correctly encoded entity
</span><span class="cx">  */
</span><span class="cx"> function wp_kses_normalize_entities3($matches) {
</span><del>-        if ( ! isset($matches[2]) || empty($matches[2]) )
</del><ins>+        if ( empty($matches[2]) )
</ins><span class="cx">                 return '';
</span><span class="cx"> 
</span><span class="cx">         $hexchars = $matches[2];
</span></span></pre>
</div>
</div>

</body>
</html>