<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN"

"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">

<html xmlns="http://www.w3.org/1999/xhtml">

<head><meta http-equiv="content-type" content="text/html; charset=utf-8" />

<title>[23804] trunk/wp-includes: Extract chats as structured data.</title>

</head>

<body>

<style type="text/css"><!--

#msg dl.meta { border: 1px #006 solid; background: #369; padding: 6px; color: #fff; }

#msg dl.meta dt { float: left; width: 6em; font-weight: bold; }

#msg dt:after { content:':';}

#msg dl, #msg dt, #msg ul, #msg li, #header, #footer, #logmsg { font-family: verdana,arial,helvetica,sans-serif; font-size: 10pt;  }

#msg dl a { font-weight: bold}

#msg dl a:link    { color:#fc3; }

#msg dl a:active  { color:#ff0; }

#msg dl a:visited { color:#cc6; }

h3 { font-family: verdana,arial,helvetica,sans-serif; font-size: 10pt; font-weight: bold; }

#msg pre { overflow: auto; background: #ffc; border: 1px #fa0 solid; padding: 6px; }

#logmsg { background: #ffc; border: 1px #fa0 solid; padding: 1em 1em 0 1em; }

#logmsg p, #logmsg pre, #logmsg blockquote { margin: 0 0 1em 0; }

#logmsg p, #logmsg li, #logmsg dt, #logmsg dd { line-height: 14pt; }

#logmsg h1, #logmsg h2, #logmsg h3, #logmsg h4, #logmsg h5, #logmsg h6 { margin: .5em 0; }

#logmsg h1:first-child, #logmsg h2:first-child, #logmsg h3:first-child, #logmsg h4:first-child, #logmsg h5:first-child, #logmsg h6:first-child { margin-top: 0; }

#logmsg ul, #logmsg ol { padding: 0; list-style-position: inside; margin: 0 0 0 1em; }

#logmsg > ul, #logmsg > ol { margin-left: 0; margin: 0 0 1em 0; }

#logmsg pre { background: #eee; padding: 1em; }

#logmsg blockquote { border: 1px solid #fa0; border-left-width: 10px; padding: 1em 1em 0 1em; background: white;}

#logmsg dl { margin: 0; }

#logmsg dt { font-weight: bold; }

#logmsg dd { margin: 0; padding: 0 0 0.5em 0; }

#logmsg dd:before { content:'\00bb';}

#logmsg table { border-spacing: 0px; border-collapse: collapse; border-top: 4px solid #fa0; border-bottom: 1px solid #fa0; background: #fff; }

#logmsg table th { text-align: left; font-weight: normal; padding: 0.2em 0.5em; border-top: 1px dotted #fa0; }

#logmsg table td { text-align: right; border-top: 1px dotted #fa0; padding: 0.2em 0.5em; }

#logmsg table thead th { text-align: center; border-bottom: 1px solid #fa0; }

#logmsg table th.Corner { text-align: left; }

#logmsg hr { border: none 0; border-top: 2px dashed #fa0; height: 1px; }

#header, #footer { color: #fff; background: #636; border: 1px #300 solid; padding: 6px; }

#patch { width: 100%; }

#patch h4 {font-family: verdana,arial,helvetica,sans-serif;font-size:10pt;padding:8px;background:#369;color:#fff;margin:0;}

#patch .propset h4, #patch .binary h4 {margin:0;}

#patch pre {padding:0;line-height:1.2em;margin:0;}

#patch .diff {width:100%;background:#eee;padding: 0 0 10px 0;overflow:auto;}

#patch .propset .diff, #patch .binary .diff  {padding:10px 0;}

#patch span {display:block;padding:0 10px;}

#patch .modfile, #patch .addfile, #patch .delfile, #patch .propset, #patch .binary, #patch .copfile {border:1px solid #ccc;margin:10px 0;}

#patch ins {background:#dfd;text-decoration:none;display:block;padding:0 10px;}

#patch del {background:#fdd;text-decoration:none;display:block;padding:0 10px;}

#patch .lines, .info {color:#888;background:#fff;}

--></style>

<div id="msg">

<dl class="meta">

<dt>Revision</dt> <dd><a href="http://core.trac.wordpress.org/changeset/23804">23804</a></dd>

<dt>Author</dt> <dd>markjaquith</dd>

<dt>Date</dt> <dd>2013-03-27 08:31:12 +0000 (Wed, 27 Mar 2013)</dd>

</dl>

<h3>Log Message</h3>

<pre>Extract chats as structured data.

* add_chat_detection_format() ?\226?\128?\148 to add a chat regex pattern

* get_content_chat() ?\226?\128?\148 to grab a chat from content

* get_the_chat() ?\226?\128?\148 grab the chat from the current (or passed) post

* the_chat() ?\226?\128?\148 output the chat in formatted HTML

* paginate_content() ?\226?\128?\148 puts the &lt;!--nextpage--&gt; splitting stuff into a function

* get_paged_content() ?\226?\128?\148 grabs a page of raw content, needed to paginate chats properly

see <a href="http://core.trac.wordpress.org/ticket/23625">#23625</a>. props wonderboymusic, lancewillett.</pre>

<h3>Modified Paths</h3>

<ul>

<li><a href="#trunkwpincludespostformatsphp">trunk/wp-includes/post-formats.php</a></li>

<li><a href="#trunkwpincludesqueryphp">trunk/wp-includes/query.php</a></li>

</ul>

</div>

<div id="patch">

<h3>Diff</h3>

<a id="trunkwpincludespostformatsphp"></a>

<div class="modfile"><h4>Modified: trunk/wp-includes/post-formats.php (23803 => 23804)</h4>

<pre class="diff"><span>

<span class="info">--- trunk/wp-includes/post-formats.php        2013-03-27 05:11:41 UTC (rev 23803)

+++ trunk/wp-includes/post-formats.php        2013-03-27 08:31:12 UTC (rev 23804)

</span><span class="lines">@@ -392,6 +392,219 @@

</span><span class="cx"> }

</span><span class="cx"> 

</span><span class="cx"> /**

</span><ins>+ * Add chat detection support to the `get_content_chat()` chat parser

+ *

+ * @since 3.6.0

+ *

+ * @global array $_wp_chat_parsers

+ * @param string $name Unique identifier for chat format. Example: IRC

+ * @param string $newline_regex RegEx to match the start of a new line, typically when a new &quot;username:&quot; appears

+ *        The parser will handle up to 3 matched expressions

+ *        $matches[0] = the string before the user's message starts

+ *        $matches[1] = the time of the message, if present

+ *        $matches[2] = the author/username

+ *        OR

+ *        $matches[0] = the string before the user's message starts

+ *        $matches[1] = the author/username

+ * @param string $delimiter_regex RegEx to determine where to split the username syntax from the chat message

+ */

+function add_chat_detection_format( $name, $newline_regex, $delimiter_regex ) {

+        global $_wp_chat_parsers;

+

+        if ( empty( $_wp_chat_parsers ) )

+                $_wp_chat_parsers = array();

+

+        $_wp_chat_parsers = array( $name =&gt; array( $newline_regex, $delimiter_regex ) ) + $_wp_chat_parsers;

+}

+add_chat_detection_format( 'IM', '#^([^:]+):#', '#[:]#' );

+add_chat_detection_format( 'Skype', '#^(\[.+?\])\s([^:]+):#', '#[:]#' );

+

+/**

+ * Deliberately interpret passed content as a chat transcript that is optionally

+ * followed by commentary

+ *

+ * If the content does not contain username syntax, assume that it does not contain

+ * chat logs and return

+ *

+ * @since 3.6.0

+ *

+ * Example:

+ *

+ * One stanza of chat:

+ * Scott: Hey, let's chat!

+ * Helen: No.

+ *

+ * $stanzas = array(

+ *     array(

+ *         array(

+ *             'time' =&gt; '',

+ *             'author' =&gt; 'Scott',

+ *             'messsage' =&gt; &quot;Hey, let's chat!&quot;

+ *         ),

+ *         array(

+ *             'time' =&gt; '',

+ *             'author' =&gt; 'Helen',

+ *             'message' =&gt; 'No.'

+ *         )

+ *     )

+ * )

+ * @param string $content A string which might contain chat data.

+ * @param boolean $remove Whether to remove the found data from the passed content.

+ * @return array A chat log as structured data

+ */

+function get_content_chat( &amp;$content, $remove = false ) {

+        global $_wp_chat_parsers;

+

+        $trimmed = trim( $content );

+        if ( empty( $trimmed ) )

+                return array();

+

+        $has_match = false;

+        $matched_parser = false;

+        foreach ( $_wp_chat_parsers as $parser ) {

+                @list( $newline_regex ) = $parser;

+                if ( preg_match( $newline_regex, $trimmed ) ) {

+                        $has_match = true;

+                        $matched_parser = $parser;

+                        break;

+                }

+        }

+

+        if ( false === $matched_parser )

+                return array();

+

+        @list( $newline_regex, $delimiter_regex ) = $parser;

+

+        $last_index = 0;

+        $stanzas = array();

+        $lines = explode( &quot;\n&quot;, make_clickable( $trimmed ) );

+

+        $author = $time = '';

+        $data = array();

+        $stanza = array();

+

+        foreach ( $lines as $index =&gt; $line ) {

+                $line = trim( $line );

+

+                if ( empty( $line ) ) {

+                        if ( ! empty( $author ) ) {

+                                $stanza[] = array(

+                                        'time' =&gt; $time,

+                                        'author' =&gt; $author,

+                                        'message' =&gt; join( ' ', $data )

+                                );

+                        }

+

+                        $stanzas[] = $stanza;

+                        $last_index = $index;

+                        $stanza = array();

+                        $author = $time = '';

+                        $data = array();

+                        if ( ! empty( $lines[$index + 1] ) &amp;&amp; ! preg_match( $delimiter_regex, $lines[$index + 1] ) )

+                                break;

+                }

+

+                $matches = array();

+                $matched = preg_match( $newline_regex, $line, $matches );

+                $author_match = empty( $matches[2] ) ? $matches[1] : $matches[2];

+                // assume username syntax if no whitespace is present

+                $no_ws = $matched &amp;&amp; ! preg_match( '#\s#', $author_match );

+                // allow script-like stanzas

+                $has_ws = $matched &amp;&amp; preg_match( '#\s#', $author_match ) &amp;&amp; empty( $lines[$index + 1] ) &amp;&amp; empty( $lines[$index - 1] );

+                if ( $matched &amp;&amp; ( ! empty( $matches[2] ) || ( $no_ws || $has_ws ) ) ) {

+                        if ( ! empty( $author ) ) {

+                                $stanza[] = array(

+                                        'time' =&gt; $time,

+                                        'author' =&gt; $author,

+                                        'message' =&gt; join( ' ', $data )

+                                );

+                                $data = array();

+                        }

+

+                        $time = empty( $matches[2] ) ? '' : $matches[1];

+                        $author = $author_match;

+                        $data[] = trim( str_replace( $matches[0], '', $line ) );

+                } elseif ( preg_match( '#\S#', $line ) ) {

+                        $data[] = $line;

+                }

+        }

+

+        if ( ! empty( $author ) ) {

+                $stanza[] = array(

+                        'time' =&gt; $time,

+                        'author' =&gt; $author,

+                        'message' =&gt; trim( join( ' ', $data ) )

+                );

+        }

+

+        if ( ! empty( $stanza ) )

+                $stanzas[] = $stanza;

+

+        if ( $remove )

+                $content = trim( join( &quot;\n&quot;, array_slice( $lines, $last_index ) ) );

+

+        return $stanzas;

+}

+

+/**

+ * Retrieve structured chat data from the current or passed post

+ *

+ * @since 3.6.0

+ *

+ * @param int $id Optional. Post ID

+ * @return array

+ */

+function get_the_chat( $id = 0 ) {

+        $post = empty( $id ) ? clone get_post() : get_post( $id );

+        if ( empty( $post ) )

+                return array();

+

+        $data = get_content_chat( get_paged_content( $post-&gt;post_content ) );

+        if ( empty( $data ) )

+                return array();

+

+        return $data;

+}

+

+/**

+ * Output HTML for a given chat's structured data. Themes can use this as a

+ * template tag in place of the_content() for Chat post format templates.

+ *

+ * @since 3.6.0

+ *

+ * @uses get_the_chat()

+ *

+ * @print HTML

+ */

+function the_chat() {

+        $output = '&lt;dl class=&quot;chat-transcript&quot;&gt;';

+

+        $stanzas = get_the_chat();

+

+        foreach ( $stanzas as $stanza ) {

+                foreach ( $stanza as $row ) {

+                        $time = '';

+                        if ( ! empty( $row['time'] ) )

+                                $time = sprintf( '&lt;time&gt;%s&lt;/time&gt;', esc_html( $row['time'] ) );

+

+                        $output .= sprintf(

+                                '&lt;dt class=&quot;chat-author chat-author-%1$s vcard&quot;&gt;%2$s &lt;cite class=&quot;fn&quot;&gt;%3$s&lt;/cite&gt;: &lt;/dt&gt;

+                                        &lt;dd class=&quot;chat-text&quot;&gt;%4$s&lt;/dd&gt;

+                                ',

+                                esc_attr( strtolower( $row['author'] ) ), // Slug.

+                                $time,

+                                esc_html( $row['author'] ),

+                                esc_html( $row['message'] )

+                        );

+                }

+        }

+

+        $output .= '&lt;/dl&gt;&lt;!-- .chat-transcript --&gt;';

+

+        echo $output;

+}

+

+/**

</ins><span class="cx">  * Extract a URL from passed content, if possible

</span><span class="cx">  * Checks for a URL on the first line of the content or the first encountered href attribute.

</span><span class="cx">  *

</span></span></pre></div>

<a id="trunkwpincludesqueryphp"></a>

<div class="modfile"><h4>Modified: trunk/wp-includes/query.php (23803 => 23804)</h4>

<pre class="diff"><span>

<span class="info">--- trunk/wp-includes/query.php        2013-03-27 05:11:41 UTC (rev 23803)

+++ trunk/wp-includes/query.php        2013-03-27 08:31:12 UTC (rev 23804)

</span><span class="lines">@@ -3621,8 +3621,53 @@

</span><span class="cx">                 exit;

</span><span class="cx">         endif;

</span><span class="cx"> }

</span><ins>+/**

+ * Split the passed content by &lt;!--nextpage--&gt;

+ *

+ * @since 3.6.0

+ *

+ * @param string $content Content to split

+ * @return array Paged content

+ */

+function paginate_content( $content ) {

+        $content = str_replace( &quot;\n&lt;!--nextpage--&gt;\n&quot;, '&lt;!--nextpage--&gt;', $content );

+        $content = str_replace( &quot;\n&lt;!--nextpage--&gt;&quot;, '&lt;!--nextpage--&gt;', $content );

+        $content = str_replace( &quot;&lt;!--nextpage--&gt;\n&quot;, '&lt;!--nextpage--&gt;', $content );

+        return explode( '&lt;!--nextpage--&gt;', $content);

+}

</ins><span class="cx"> 

</span><span class="cx"> /**

</span><ins>+ * Return content offset by $page

+ *

+ * @since 3.6.0

+ *

+ * @param string $content

+ * @return string

+ */

+function get_paged_content( $content = null, $paged = null ) {

+        global $page;

+        if ( empty( $page ) )

+                $page = 1;

+

+        if ( empty( $paged ) )

+                $paged = $page;

+

+        if ( empty( $content ) ) {

+                $post = get_post();

+                if ( empty( $post ) )

+                        return;

+

+                $content = $post-&gt;post_content;

+        }

+

+        $pages = paginate_content( $content );

+        if ( isset( $pages[$paged - 1] ) )

+                return $pages[$paged - 1];

+

+        return reset( $pages );

+}

+

+/**

</ins><span class="cx">  * Set up global post data.

</span><span class="cx">  *

</span><span class="cx">  * @since 1.5.0

</span></span></pre>

</div>

</div>

</body>

</html>