<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN"
"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
<html xmlns="http://www.w3.org/1999/xhtml">
<head><meta http-equiv="content-type" content="text/html; charset=utf-8" />
<title>[20493] branches/3.3: Don't attempt to make links inside attributes clickable.</title>
</head>
<body>
<style type="text/css"><!--
#msg dl.meta { border: 1px #006 solid; background: #369; padding: 6px; color: #fff; }
#msg dl.meta dt { float: left; width: 6em; font-weight: bold; }
#msg dt:after { content:':';}
#msg dl, #msg dt, #msg ul, #msg li, #header, #footer, #logmsg { font-family: verdana,arial,helvetica,sans-serif; font-size: 10pt; }
#msg dl a { font-weight: bold}
#msg dl a:link { color:#fc3; }
#msg dl a:active { color:#ff0; }
#msg dl a:visited { color:#cc6; }
h3 { font-family: verdana,arial,helvetica,sans-serif; font-size: 10pt; font-weight: bold; }
#msg pre { overflow: auto; background: #ffc; border: 1px #fa0 solid; padding: 6px; }
#logmsg { background: #ffc; border: 1px #fa0 solid; padding: 1em 1em 0 1em; }
#logmsg p, #logmsg pre, #logmsg blockquote { margin: 0 0 1em 0; }
#logmsg p, #logmsg li, #logmsg dt, #logmsg dd { line-height: 14pt; }
#logmsg h1, #logmsg h2, #logmsg h3, #logmsg h4, #logmsg h5, #logmsg h6 { margin: .5em 0; }
#logmsg h1:first-child, #logmsg h2:first-child, #logmsg h3:first-child, #logmsg h4:first-child, #logmsg h5:first-child, #logmsg h6:first-child { margin-top: 0; }
#logmsg ul, #logmsg ol { padding: 0; list-style-position: inside; margin: 0 0 0 1em; }
#logmsg > ul, #logmsg > ol { margin-left: 0; margin: 0 0 1em 0; }
#logmsg pre { background: #eee; padding: 1em; }
#logmsg blockquote { border: 1px solid #fa0; border-left-width: 10px; padding: 1em 1em 0 1em; background: white;}
#logmsg dl { margin: 0; }
#logmsg dt { font-weight: bold; }
#logmsg dd { margin: 0; padding: 0 0 0.5em 0; }
#logmsg dd:before { content:'\00bb';}
#logmsg table { border-spacing: 0px; border-collapse: collapse; border-top: 4px solid #fa0; border-bottom: 1px solid #fa0; background: #fff; }
#logmsg table th { text-align: left; font-weight: normal; padding: 0.2em 0.5em; border-top: 1px dotted #fa0; }
#logmsg table td { text-align: right; border-top: 1px dotted #fa0; padding: 0.2em 0.5em; }
#logmsg table thead th { text-align: center; border-bottom: 1px solid #fa0; }
#logmsg table th.Corner { text-align: left; }
#logmsg hr { border: none 0; border-top: 2px dashed #fa0; height: 1px; }
#header, #footer { color: #fff; background: #636; border: 1px #300 solid; padding: 6px; }
#patch { width: 100%; }
#patch h4 {font-family: verdana,arial,helvetica,sans-serif;font-size:10pt;padding:8px;background:#369;color:#fff;margin:0;}
#patch .propset h4, #patch .binary h4 {margin:0;}
#patch pre {padding:0;line-height:1.2em;margin:0;}
#patch .diff {width:100%;background:#eee;padding: 0 0 10px 0;overflow:auto;}
#patch .propset .diff, #patch .binary .diff {padding:10px 0;}
#patch span {display:block;padding:0 10px;}
#patch .modfile, #patch .addfile, #patch .delfile, #patch .propset, #patch .binary, #patch .copfile {border:1px solid #ccc;margin:10px 0;}
#patch ins {background:#dfd;text-decoration:none;display:block;padding:0 10px;}
#patch del {background:#fdd;text-decoration:none;display:block;padding:0 10px;}
#patch .lines, .info {color:#888;background:#fff;}
--></style>
<div id="msg">
<dl class="meta">
<dt>Revision</dt> <dd><a href="http://core.trac.wordpress.org/changeset/20493">20493</a></dd>
<dt>Author</dt> <dd>ryan</dd>
<dt>Date</dt> <dd>2012-04-17 20:02:49 +0000 (Tue, 17 Apr 2012)</dd>
</dl>
<h3>Log Message</h3>
<pre>Don't attempt to make links inside attributes clickable.</pre>
<h3>Modified Paths</h3>
<ul>
<li><a href="#branches33wpincludesformattingphp">branches/3.3/wp-includes/formatting.php</a></li>
</ul>
<h3>Property Changed</h3>
<ul>
<li><a href="#branches33">branches/3.3/</a></li>
<li><a href="#branches33wpincludescapabilitiesphp">branches/3.3/wp-includes/capabilities.php</a></li>
</ul>
</div>
<div id="patch">
<h3>Diff</h3>
<a id="branches33"></a>
<div class="propset"><h4>Property changes: branches/3.3</h4>
<pre class="diff"><span>
</span></pre></div>
<a id="svnmergeinfo"></a>
<div class="modfile"><h4>Modified: svn:mergeinfo</h4></div>
<span class="cx">/trunk:18512,19638-19641,19647,19649,19653,19655,19657,19662,19665,20425,20463,20467
</span><span class="cx"> + /branches/3.1:18031
</span><span class="cx">/trunk:18512,19638-19641,19647,19649,19653,19655,19657,19662,19665,20425,20443,20463,20467
</span><a id="branches33wpincludescapabilitiesphp"></a>
<div class="propset"><h4>Property changes: branches/3.3/wp-includes/capabilities.php</h4>
<pre class="diff"><span>
</span></pre></div>
<a id="svnmergeinfo"></a>
<div class="modfile"><h4>Modified: svn:mergeinfo</h4></div>
<span class="cx">/trunk/wp-includes/capabilities.php:18512,19596,19638-19641,19647,19649,19653,19655,19657,19662,19665,20425,20463,20467
</span><span class="cx"> + /branches/3.1/wp-includes/capabilities.php:18031
</span><span class="cx">/trunk/wp-includes/capabilities.php:18512,19596,19638-19641,19647,19649,19653,19655,19657,19662,19665,20425,20443,20463,20467
</span><a id="branches33wpincludesformattingphp"></a>
<div class="modfile"><h4>Modified: branches/3.3/wp-includes/formatting.php (20492 => 20493)</h4>
<pre class="diff"><span>
<span class="info">--- branches/3.3/wp-includes/formatting.php        2012-04-17 16:49:17 UTC (rev 20492)
+++ branches/3.3/wp-includes/formatting.php        2012-04-17 20:02:49 UTC (rev 20493)
</span><span class="lines">@@ -1350,9 +1350,17 @@
</span><span class="cx"> */
</span><span class="cx"> function _make_url_clickable_cb($matches) {
</span><span class="cx">         $url = $matches[2];
</span><del>-        $suffix = '';
</del><span class="cx">
</span><del>-        /** Include parentheses in the URL only if paired **/
</del><ins>+        if ( ')' == $matches[3] && strpos( $url, '(' ) ) {
+                // If the trailing character is a closing parethesis, and the URL has an opening parenthesis in it, add the closing parenthesis to the URL.
+                // Then we can let the parenthesis balancer do its thing below.
+                $url .= $matches[3];
+                $suffix = '';
+        } else {
+                $suffix = $matches[3];
+        }
+
+        // Include parentheses in the URL only if paired
</ins><span class="cx">         while ( substr_count( $url, '(' ) < substr_count( $url, ')' ) ) {
</span><span class="cx">                 $suffix = strrchr( $url, ')' ) . $suffix;
</span><span class="cx">                 $url = substr( $url, 0, strrpos( $url, ')' ) );
</span><span class="lines">@@ -1418,26 +1426,120 @@
</span><span class="cx"> *
</span><span class="cx"> * @since 0.71
</span><span class="cx"> *
</span><del>- * @param string $ret Content to convert URIs.
</del><ins>+ * @param string $text Content to convert URIs.
</ins><span class="cx"> * @return string Content with converted URIs.
</span><span class="cx"> */
</span><del>-function make_clickable($ret) {
-        $ret = ' ' . $ret;
-        // in testing, using arrays here was found to be faster
-        $save = @ini_set('pcre.recursion_limit', 10000);
-        $retval = preg_replace_callback('#(?<!=[\'"])(?<=[*\')+.,;:!&$\s>])(\()?([\w]+?://(?:[\w\\x80-\\xff\#%~/?@\[\]-]{1,2000}|[\'*(+.,;:!=&$](?![\b\)]|(\))?([\s]|$))|(?(1)\)(?![\s<.,;:]|$)|\)))+)#is', '_make_url_clickable_cb', $ret);
-        if (null !== $retval )
-                $ret = $retval;
-        @ini_set('pcre.recursion_limit', $save);
-        $ret = preg_replace_callback('#([\s>])((www|ftp)\.[\w\\x80-\\xff\#$%&~/.\-;:=,?@\[\]+]+)#is', '_make_web_ftp_clickable_cb', $ret);
-        $ret = preg_replace_callback('#([\s>])([.0-9a-z_+-]+)@(([0-9a-z-]+\.)+[0-9a-z]{2,})#i', '_make_email_clickable_cb', $ret);
-        // this one is not in an array because we need it to run last, for cleanup of accidental links within links
-        $ret = preg_replace("#(<a( [^>]+?>|>))<a [^>]+?>([^>]+?)</a></a>#i", "$1$3</a>", $ret);
-        $ret = trim($ret);
-        return $ret;
</del><ins>+function make_clickable( $text ) {
+        $r = '';
+        $textarr = preg_split( '/(<[^<>]+>)/', $text, -1, PREG_SPLIT_DELIM_CAPTURE ); // split out HTML tags
+        foreach ( $textarr as $piece ) {
+                if ( empty( $piece ) || ( $piece[0] == '<' && ! preg_match('|^<\s*[\w]{1,20}+://|', $piece) ) ) {
+                        $r .= $piece;
+                        continue;
+                }
+
+                // Long strings might contain expensive edge cases ...
+                if ( 10000 < strlen( $piece ) ) {
+                        // ... break it up
+                        foreach ( _split_str_by_whitespace( $piece, 2100 ) as $chunk ) { // 2100: Extra room for scheme and leading and trailing paretheses
+                                if ( 2101 < strlen( $chunk ) ) {
+                                        $r .= $chunk; // Too big, no whitespace: bail.
+                                } else {
+                                        $r .= make_clickable( $chunk );
+                                }
+                        }
+                } else {
+                        $ret = " $piece "; // Pad with whitespace to simplify the regexes
+
+                        $url_clickable = '~
+                                ([\\s(<.,;:!?]) # 1: Leading whitespace, or punctuation
+                                ( # 2: URL
+                                        [\\w]{1,20}+:// # Scheme and hier-part prefix
+                                        (?=\S{1,2000}\s) # Limit to URLs less than about 2000 characters long
+                                        [\\w\\x80-\\xff#%\\~/@\\[\\]*(+=&$-]*+ # Non-punctuation URL character
+                                        (?: # Unroll the Loop: Only allow puctuation URL character if followed by a non-punctuation URL character
+                                                [\'.,;:!?)] # Punctuation URL character
+                                                [\\w\\x80-\\xff#%\\~/@\\[\\]*(+=&$-]++ # Non-punctuation URL character
+                                        )*
+                                )
+                                (\)?) # 3: Trailing closing parenthesis (for parethesis balancing post processing)
+                        ~xS'; // The regex is a non-anchored pattern and does not have a single fixed starting character.
+                         // Tell PCRE to spend more time optimizing since, when used on a page load, it will probably be used several times.
+
+                        $ret = preg_replace_callback( $url_clickable, '_make_url_clickable_cb', $ret );
+
+                        $ret = preg_replace_callback( '#([\s>])((www|ftp)\.[\w\\x80-\\xff\#$%&~/.\-;:=,?@\[\]+]+)#is', '_make_web_ftp_clickable_cb', $ret );
+                        $ret = preg_replace_callback( '#([\s>])([.0-9a-z_+-]+)@(([0-9a-z-]+\.)+[0-9a-z]{2,})#i', '_make_email_clickable_cb', $ret );
+
+                        $ret = substr( $ret, 1, -1 ); // Remove our whitespace padding.
+                        $r .= $ret;
+                }
+        }
+
+        // Cleanup of accidental links within links
+        $r = preg_replace( '#(<a( [^>]+?>|>))<a [^>]+?>([^>]+?)</a></a>#i', "$1$3</a>", $r );
+        return $r;
</ins><span class="cx"> }
</span><span class="cx">
</span><span class="cx"> /**
</span><ins>+ * Breaks a string into chunks by splitting at whitespace characters.
+ * The length of each returned chunk is as close to the specified length goal as possible,
+ * with the caveat that each chunk includes its trailing delimiter.
+ * Chunks longer than the goal are guaranteed to not have any inner whitespace.
+ *
+ * Joining the returned chunks with empty delimiters reconstructs the input string losslessly.
+ *
+ * Input string must have no null characters (or eventual transformations on output chunks must not care about null characters)
+ *
+ * <code>
+ * _split_str_by_whitespace( "1234 67890 1234 67890a cd 1234 890 123456789 1234567890a 45678 1 3 5 7 90 ", 10 ) ==
+ * array (
+ * 0 => '1234 67890 ', // 11 characters: Perfect split
+ * 1 => '1234 ', // 5 characters: '1234 67890a' was too long
+ * 2 => '67890a cd ', // 10 characters: '67890a cd 1234' was too long
+ * 3 => '1234 890 ', // 11 characters: Perfect split
+ * 4 => '123456789 ', // 10 characters: '123456789 1234567890a' was too long
+ * 5 => '1234567890a ', // 12 characters: Too long, but no inner whitespace on which to split
+ * 6 => ' 45678 ', // 11 characters: Perfect split
+ * 7 => '1 3 5 7 9', // 9 characters: End of $string
+ * );
+ * </code>
+ *
+ * @since 3.4.0
+ * @access private
+ *
+ * @param string $string The string to split
+ * @param int $goal The desired chunk length.
+ * @return array Numeric array of chunks.
+ */
+function _split_str_by_whitespace( $string, $goal ) {
+        $chunks = array();
+
+        $string_nullspace = strtr( $string, "\r\n\t\v\f ", "\000\000\000\000\000\000" );
+
+        while ( $goal < strlen( $string_nullspace ) ) {
+                $pos = strrpos( substr( $string_nullspace, 0, $goal + 1 ), "\000" );
+
+                if ( false === $pos ) {
+                        $pos = strpos( $string_nullspace, "\000", $goal + 1 );
+                        if ( false === $pos ) {
+                                break;
+                        }
+                }
+
+                $chunks[] = substr( $string, 0, $pos + 1 );
+                $string = substr( $string, $pos + 1 );
+                $string_nullspace = substr( $string_nullspace, $pos + 1 );
+        }
+
+        if ( $string ) {
+                $chunks[] = $string;
+        }
+
+        return $chunks;
+}
+
+/**
</ins><span class="cx"> * Adds rel nofollow string to all HTML A elements in content.
</span><span class="cx"> *
</span><span class="cx"> * @since 1.5.0
</span></span></pre>
</div>
</div>
</body>
</html>