Moodle  2.2.1
http://www.collinsharper.com
C:/xampp/htdocs/moodle/lib/markdown.php
Go to the documentation of this file.
00001 <?php
00002 #
00003 # Markdown Extra  -  A text-to-HTML conversion tool for web writers
00004 #
00005 # PHP Markdown & Extra
00006 # Copyright (c) 2004-2009 Michel Fortin  
00007 # <http://michelf.com/projects/php-markdown/>
00008 #
00009 # Original Markdown
00010 # Copyright (c) 2004-2006 John Gruber  
00011 # <http://daringfireball.net/projects/markdown/>
00012 #
00013 
00014 
00015 define( 'MARKDOWN_VERSION',  "1.0.1n" ); # Sat 10 Oct 2009
00016 define( 'MARKDOWNEXTRA_VERSION',  "1.2.4" ); # Sat 10 Oct 2009
00017 
00018 
00019 #
00020 # Global default settings:
00021 #
00022 
00023 # Change to ">" for HTML output
00024 @define( 'MARKDOWN_EMPTY_ELEMENT_SUFFIX',  " />");
00025 
00026 # Define the width of a tab for code blocks.
00027 @define( 'MARKDOWN_TAB_WIDTH',     4 );
00028 
00029 # Optional title attribute for footnote links and backlinks.
00030 @define( 'MARKDOWN_FN_LINK_TITLE',         "" );
00031 @define( 'MARKDOWN_FN_BACKLINK_TITLE',     "" );
00032 
00033 # Optional class attribute for footnote links and backlinks.
00034 @define( 'MARKDOWN_FN_LINK_CLASS',         "" );
00035 @define( 'MARKDOWN_FN_BACKLINK_CLASS',     "" );
00036 
00037 
00038 #
00039 # WordPress settings:
00040 #
00041 
00042 # Change to false to remove Markdown from posts and/or comments.
00043 @define( 'MARKDOWN_WP_POSTS',      true );
00044 @define( 'MARKDOWN_WP_COMMENTS',   true );
00045 
00046 
00047 
00048 ### Standard Function Interface ###
00049 
00050 @define( 'MARKDOWN_PARSER_CLASS',  'MarkdownExtra_Parser' );
00051 
00052 function Markdown($text) {
00053 #
00054 # Initialize the parser and return the result of its transform method.
00055 #
00056         # Setup static parser variable.
00057         static $parser;
00058         if (!isset($parser)) {
00059                 $parser_class = MARKDOWN_PARSER_CLASS;
00060                 $parser = new $parser_class;
00061         }
00062 
00063         # Transform text using parser.
00064         return $parser->transform($text);
00065 }
00066 
00067 
00068 ### WordPress Plugin Interface ###
00069 
00070 /*
00071 Plugin Name: Markdown Extra
00072 Plugin URI: http://michelf.com/projects/php-markdown/
00073 Description: <a href="http://daringfireball.net/projects/markdown/syntax">Markdown syntax</a> allows you to write using an easy-to-read, easy-to-write plain text format. Based on the original Perl version by <a href="http://daringfireball.net/">John Gruber</a>. <a href="http://michelf.com/projects/php-markdown/">More...</a>
00074 Version: 1.2.4
00075 Author: Michel Fortin
00076 Author URI: http://michelf.com/
00077 */
00078 
00079 if (isset($wp_version)) {
00080         # More details about how it works here:
00081         # <http://michelf.com/weblog/2005/wordpress-text-flow-vs-markdown/>
00082         
00083         # Post content and excerpts
00084         # - Remove WordPress paragraph generator.
00085         # - Run Markdown on excerpt, then remove all tags.
00086         # - Add paragraph tag around the excerpt, but remove it for the excerpt rss.
00087         if (MARKDOWN_WP_POSTS) {
00088                 remove_filter('the_content',     'wpautop');
00089         remove_filter('the_content_rss', 'wpautop');
00090                 remove_filter('the_excerpt',     'wpautop');
00091                 add_filter('the_content',     'mdwp_MarkdownPost', 6);
00092         add_filter('the_content_rss', 'mdwp_MarkdownPost', 6);
00093                 add_filter('get_the_excerpt', 'mdwp_MarkdownPost', 6);
00094                 add_filter('get_the_excerpt', 'trim', 7);
00095                 add_filter('the_excerpt',     'mdwp_add_p');
00096                 add_filter('the_excerpt_rss', 'mdwp_strip_p');
00097                 
00098                 remove_filter('content_save_pre',  'balanceTags', 50);
00099                 remove_filter('excerpt_save_pre',  'balanceTags', 50);
00100                 add_filter('the_content',         'balanceTags', 50);
00101                 add_filter('get_the_excerpt', 'balanceTags', 9);
00102         }
00103         
00104         # Add a footnote id prefix to posts when inside a loop.
00105         function mdwp_MarkdownPost($text) {
00106                 static $parser;
00107                 if (!$parser) {
00108                         $parser_class = MARKDOWN_PARSER_CLASS;
00109                         $parser = new $parser_class;
00110                 }
00111                 if (is_single() || is_page() || is_feed()) {
00112                         $parser->fn_id_prefix = "";
00113                 } else {
00114                         $parser->fn_id_prefix = get_the_ID() . ".";
00115                 }
00116                 return $parser->transform($text);
00117         }
00118         
00119         # Comments
00120         # - Remove WordPress paragraph generator.
00121         # - Remove WordPress auto-link generator.
00122         # - Scramble important tags before passing them to the kses filter.
00123         # - Run Markdown on excerpt then remove paragraph tags.
00124         if (MARKDOWN_WP_COMMENTS) {
00125                 remove_filter('comment_text', 'wpautop', 30);
00126                 remove_filter('comment_text', 'make_clickable');
00127                 add_filter('pre_comment_content', 'Markdown', 6);
00128                 add_filter('pre_comment_content', 'mdwp_hide_tags', 8);
00129                 add_filter('pre_comment_content', 'mdwp_show_tags', 12);
00130                 add_filter('get_comment_text',    'Markdown', 6);
00131                 add_filter('get_comment_excerpt', 'Markdown', 6);
00132                 add_filter('get_comment_excerpt', 'mdwp_strip_p', 7);
00133         
00134                 global $mdwp_hidden_tags, $mdwp_placeholders;
00135                 $mdwp_hidden_tags = explode(' ',
00136                         '<p> </p> <pre> </pre> <ol> </ol> <ul> </ul> <li> </li>');
00137                 $mdwp_placeholders = explode(' ', str_rot13(
00138                         'pEj07ZbbBZ U1kqgh4w4p pre2zmeN6K QTi31t9pre ol0MP1jzJR '.
00139                         'ML5IjmbRol ulANi1NsGY J7zRLJqPul liA8ctl16T K9nhooUHli'));
00140         }
00141         
00142         function mdwp_add_p($text) {
00143                 if (!preg_match('{^$|^<(p|ul|ol|dl|pre|blockquote)>}i', $text)) {
00144                         $text = '<p>'.$text.'</p>';
00145                         $text = preg_replace('{\n{2,}}', "</p>\n\n<p>", $text);
00146                 }
00147                 return $text;
00148         }
00149         
00150         function mdwp_strip_p($t) { return preg_replace('{</?p>}i', '', $t); }
00151 
00152         function mdwp_hide_tags($text) {
00153                 global $mdwp_hidden_tags, $mdwp_placeholders;
00154                 return str_replace($mdwp_hidden_tags, $mdwp_placeholders, $text);
00155         }
00156         function mdwp_show_tags($text) {
00157                 global $mdwp_hidden_tags, $mdwp_placeholders;
00158                 return str_replace($mdwp_placeholders, $mdwp_hidden_tags, $text);
00159         }
00160 }
00161 
00162 
00163 ### bBlog Plugin Info ###
00164 
00165 function identify_modifier_markdown() {
00166         return array(
00167                 'name' => 'markdown',
00168                 'type' => 'modifier',
00169                 'nicename' => 'PHP Markdown Extra',
00170                 'description' => 'A text-to-HTML conversion tool for web writers',
00171                 'authors' => 'Michel Fortin and John Gruber',
00172                 'licence' => 'GPL',
00173                 'version' => MARKDOWNEXTRA_VERSION,
00174                 'help' => '<a href="http://daringfireball.net/projects/markdown/syntax">Markdown syntax</a> allows you to write using an easy-to-read, easy-to-write plain text format. Based on the original Perl version by <a href="http://daringfireball.net/">John Gruber</a>. <a href="http://michelf.com/projects/php-markdown/">More...</a>',
00175                 );
00176 }
00177 
00178 
00179 ### Smarty Modifier Interface ###
00180 
00181 function smarty_modifier_markdown($text) {
00182         return Markdown($text);
00183 }
00184 
00185 
00186 ### Textile Compatibility Mode ###
00187 
00188 # Rename this file to "classTextile.php" and it can replace Textile everywhere.
00189 
00190 if (strcasecmp(substr(__FILE__, -16), "classTextile.php") == 0) {
00191         # Try to include PHP SmartyPants. Should be in the same directory.
00192         @include_once 'smartypants.php';
00193         # Fake Textile class. It calls Markdown instead.
00194         class Textile {
00195                 function TextileThis($text, $lite='', $encode='') {
00196                         if ($lite == '' && $encode == '')    $text = Markdown($text);
00197                         if (function_exists('SmartyPants'))  $text = SmartyPants($text);
00198                         return $text;
00199                 }
00200                 # Fake restricted version: restrictions are not supported for now.
00201                 function TextileRestricted($text, $lite='', $noimage='') {
00202                         return $this->TextileThis($text, $lite);
00203                 }
00204                 # Workaround to ensure compatibility with TextPattern 4.0.3.
00205                 function blockLite($text) { return $text; }
00206         }
00207 }
00208 
00209 
00210 
00211 #
00212 # Markdown Parser Class
00213 #
00214 
00215 class Markdown_Parser {
00216 
00217         # Regex to match balanced [brackets].
00218         # Needed to insert a maximum bracked depth while converting to PHP.
00219         var $nested_brackets_depth = 6;
00220         var $nested_brackets_re;
00221         
00222         var $nested_url_parenthesis_depth = 4;
00223         var $nested_url_parenthesis_re;
00224 
00225         # Table of hash values for escaped characters:
00226         var $escape_chars = '\`*_{}[]()>#+-.!';
00227         var $escape_chars_re;
00228 
00229         # Change to ">" for HTML output.
00230         var $empty_element_suffix = MARKDOWN_EMPTY_ELEMENT_SUFFIX;
00231         var $tab_width = MARKDOWN_TAB_WIDTH;
00232         
00233         # Change to `true` to disallow markup or entities.
00234         var $no_markup = false;
00235         var $no_entities = false;
00236         
00237         # Predefined urls and titles for reference links and images.
00238         var $predef_urls = array();
00239         var $predef_titles = array();
00240 
00241 
00242         function Markdown_Parser() {
00243         #
00244         # Constructor function. Initialize appropriate member variables.
00245         #
00246                 $this->_initDetab();
00247                 $this->prepareItalicsAndBold();
00248         
00249                 $this->nested_brackets_re = 
00250                         str_repeat('(?>[^\[\]]+|\[', $this->nested_brackets_depth).
00251                         str_repeat('\])*', $this->nested_brackets_depth);
00252         
00253                 $this->nested_url_parenthesis_re = 
00254                         str_repeat('(?>[^()\s]+|\(', $this->nested_url_parenthesis_depth).
00255                         str_repeat('(?>\)))*', $this->nested_url_parenthesis_depth);
00256                 
00257                 $this->escape_chars_re = '['.preg_quote($this->escape_chars).']';
00258                 
00259                 # Sort document, block, and span gamut in ascendent priority order.
00260                 asort($this->document_gamut);
00261                 asort($this->block_gamut);
00262                 asort($this->span_gamut);
00263         }
00264 
00265 
00266         # Internal hashes used during transformation.
00267         var $urls = array();
00268         var $titles = array();
00269         var $html_hashes = array();
00270         
00271         # Status flag to avoid invalid nesting.
00272         var $in_anchor = false;
00273         
00274         
00275         function setup() {
00276         #
00277         # Called before the transformation process starts to setup parser 
00278         # states.
00279         #
00280                 # Clear global hashes.
00281                 $this->urls = $this->predef_urls;
00282                 $this->titles = $this->predef_titles;
00283                 $this->html_hashes = array();
00284                 
00285                 $in_anchor = false;
00286         }
00287         
00288         function teardown() {
00289         #
00290         # Called after the transformation process to clear any variable 
00291         # which may be taking up memory unnecessarly.
00292         #
00293                 $this->urls = array();
00294                 $this->titles = array();
00295                 $this->html_hashes = array();
00296         }
00297 
00298 
00299         function transform($text) {
00300         #
00301         # Main function. Performs some preprocessing on the input text
00302         # and pass it through the document gamut.
00303         #
00304                 $this->setup();
00305         
00306                 # Remove UTF-8 BOM and marker character in input, if present.
00307                 $text = preg_replace('{^\xEF\xBB\xBF|\x1A}', '', $text);
00308 
00309                 # Standardize line endings:
00310                 #   DOS to Unix and Mac to Unix
00311                 $text = preg_replace('{\r\n?}', "\n", $text);
00312 
00313                 # Make sure $text ends with a couple of newlines:
00314                 $text .= "\n\n";
00315 
00316                 # Convert all tabs to spaces.
00317                 $text = $this->detab($text);
00318 
00319                 # Turn block-level HTML blocks into hash entries
00320                 $text = $this->hashHTMLBlocks($text);
00321 
00322                 # Strip any lines consisting only of spaces and tabs.
00323                 # This makes subsequent regexen easier to write, because we can
00324                 # match consecutive blank lines with /\n+/ instead of something
00325                 # contorted like /[ ]*\n+/ .
00326                 $text = preg_replace('/^[ ]+$/m', '', $text);
00327 
00328                 # Run document gamut methods.
00329                 foreach ($this->document_gamut as $method => $priority) {
00330                         $text = $this->$method($text);
00331                 }
00332                 
00333                 $this->teardown();
00334 
00335                 return $text . "\n";
00336         }
00337         
00338         var $document_gamut = array(
00339                 # Strip link definitions, store in hashes.
00340                 "stripLinkDefinitions" => 20,
00341                 
00342                 "runBasicBlockGamut"   => 30,
00343                 );
00344 
00345 
00346         function stripLinkDefinitions($text) {
00347         #
00348         # Strips link definitions from text, stores the URLs and titles in
00349         # hash references.
00350         #
00351                 $less_than_tab = $this->tab_width - 1;
00352 
00353                 # Link defs are in the form: ^[id]: url "optional title"
00354                 $text = preg_replace_callback('{
00355                                                         ^[ ]{0,'.$less_than_tab.'}\[(.+)\][ ]?: # id = $1
00356                                                           [ ]*
00357                                                           \n?                           # maybe *one* newline
00358                                                           [ ]*
00359                                                         (?:
00360                                                           <(.+?)>                       # url = $2
00361                                                         |
00362                                                           (\S+?)                        # url = $3
00363                                                         )
00364                                                           [ ]*
00365                                                           \n?                           # maybe one newline
00366                                                           [ ]*
00367                                                         (?:
00368                                                                 (?<=\s)                 # lookbehind for whitespace
00369                                                                 ["(]
00370                                                                 (.*?)                   # title = $4
00371                                                                 [")]
00372                                                                 [ ]*
00373                                                         )?      # title is optional
00374                                                         (?:\n+|\Z)
00375                         }xm',
00376                         array(&$this, '_stripLinkDefinitions_callback'),
00377                         $text);
00378                 return $text;
00379         }
00380         function _stripLinkDefinitions_callback($matches) {
00381                 $link_id = strtolower($matches[1]);
00382                 $url = $matches[2] == '' ? $matches[3] : $matches[2];
00383                 $this->urls[$link_id] = $url;
00384                 $this->titles[$link_id] =& $matches[4];
00385                 return ''; # String that will replace the block
00386         }
00387 
00388 
00389         function hashHTMLBlocks($text) {
00390                 if ($this->no_markup)  return $text;
00391 
00392                 $less_than_tab = $this->tab_width - 1;
00393 
00394                 # Hashify HTML blocks:
00395                 # We only want to do this for block-level HTML tags, such as headers,
00396                 # lists, and tables. That's because we still want to wrap <p>s around
00397                 # "paragraphs" that are wrapped in non-block-level tags, such as anchors,
00398                 # phrase emphasis, and spans. The list of tags we're looking for is
00399                 # hard-coded:
00400                 #
00401                 # *  List "a" is made of tags which can be both inline or block-level.
00402                 #    These will be treated block-level when the start tag is alone on 
00403                 #    its line, otherwise they're not matched here and will be taken as 
00404                 #    inline later.
00405                 # *  List "b" is made of tags which are always block-level;
00406                 #
00407                 $block_tags_a_re = 'ins|del';
00408                 $block_tags_b_re = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|address|'.
00409                                                    'script|noscript|form|fieldset|iframe|math';
00410 
00411                 # Regular expression for the content of a block tag.
00412                 $nested_tags_level = 4;
00413                 $attr = '
00414                         (?>                             # optional tag attributes
00415                           \s                    # starts with whitespace
00416                           (?>
00417                                 [^>"/]+         # text outside quotes
00418                           |
00419                                 /+(?!>)         # slash not followed by ">"
00420                           |
00421                                 "[^"]*"         # text inside double quotes (tolerate ">")
00422                           |
00423                                 \'[^\']*\'      # text inside single quotes (tolerate ">")
00424                           )*
00425                         )?      
00426                         ';
00427                 $content =
00428                         str_repeat('
00429                                 (?>
00430                                   [^<]+                 # content without tag
00431                                 |
00432                                   <\2                   # nested opening tag
00433                                         '.$attr.'       # attributes
00434                                         (?>
00435                                           />
00436                                         |
00437                                           >', $nested_tags_level).      # end of opening tag
00438                                           '.*?'.                                        # last level nested tag content
00439                         str_repeat('
00440                                           </\2\s*>      # closing nested tag
00441                                         )
00442                                   |                             
00443                                         <(?!/\2\s*>     # other tags with a different name
00444                                   )
00445                                 )*',
00446                                 $nested_tags_level);
00447                 $content2 = str_replace('\2', '\3', $content);
00448 
00449                 # First, look for nested blocks, e.g.:
00450                 #       <div>
00451                 #               <div>
00452                 #               tags for inner block must be indented.
00453                 #               </div>
00454                 #       </div>
00455                 #
00456                 # The outermost tags must start at the left margin for this to match, and
00457                 # the inner nested divs must be indented.
00458                 # We need to do this before the next, more liberal match, because the next
00459                 # match will start at the first `<div>` and stop at the first `</div>`.
00460                 $text = preg_replace_callback('{(?>
00461                         (?>
00462                                 (?<=\n\n)               # Starting after a blank line
00463                                 |                               # or
00464                                 \A\n?                   # the beginning of the doc
00465                         )
00466                         (                                               # save in $1
00467 
00468                           # Match from `\n<tag>` to `</tag>\n`, handling nested tags 
00469                           # in between.
00470                                         
00471                                                 [ ]{0,'.$less_than_tab.'}
00472                                                 <('.$block_tags_b_re.')# start tag = $2
00473                                                 '.$attr.'>                      # attributes followed by > and \n
00474                                                 '.$content.'            # content, support nesting
00475                                                 </\2>                           # the matching end tag
00476                                                 [ ]*                            # trailing spaces/tabs
00477                                                 (?=\n+|\Z)      # followed by a newline or end of document
00478 
00479                         | # Special version for tags of group a.
00480 
00481                                                 [ ]{0,'.$less_than_tab.'}
00482                                                 <('.$block_tags_a_re.')# start tag = $3
00483                                                 '.$attr.'>[ ]*\n        # attributes followed by >
00484                                                 '.$content2.'           # content, support nesting
00485                                                 </\3>                           # the matching end tag
00486                                                 [ ]*                            # trailing spaces/tabs
00487                                                 (?=\n+|\Z)      # followed by a newline or end of document
00488                                         
00489                         | # Special case just for <hr />. It was easier to make a special 
00490                           # case than to make the other regex more complicated.
00491                         
00492                                                 [ ]{0,'.$less_than_tab.'}
00493                                                 <(hr)                           # start tag = $2
00494                                                 '.$attr.'                       # attributes
00495                                                 /?>                                     # the matching end tag
00496                                                 [ ]*
00497                                                 (?=\n{2,}|\Z)           # followed by a blank line or end of document
00498                         
00499                         | # Special case for standalone HTML comments:
00500                         
00501                                         [ ]{0,'.$less_than_tab.'}
00502                                         (?s:
00503                                                 <!-- .*? -->
00504                                         )
00505                                         [ ]*
00506                                         (?=\n{2,}|\Z)           # followed by a blank line or end of document
00507                         
00508                         | # PHP and ASP-style processor instructions (<? and <%)
00509                         
00510                                         [ ]{0,'.$less_than_tab.'}
00511                                         (?s:
00512                                                 <([?%])                 # $2
00513                                                 .*?
00514                                                 \2>
00515                                         )
00516                                         [ ]*
00517                                         (?=\n{2,}|\Z)           # followed by a blank line or end of document
00518                                         
00519                         )
00520                         )}Sxmi',
00521                         array(&$this, '_hashHTMLBlocks_callback'),
00522                         $text);
00523 
00524                 return $text;
00525         }
00526         function _hashHTMLBlocks_callback($matches) {
00527                 $text = $matches[1];
00528                 $key  = $this->hashBlock($text);
00529                 return "\n\n$key\n\n";
00530         }
00531         
00532         
00533         function hashPart($text, $boundary = 'X') {
00534         #
00535         # Called whenever a tag must be hashed when a function insert an atomic 
00536         # element in the text stream. Passing $text to through this function gives
00537         # a unique text-token which will be reverted back when calling unhash.
00538         #
00539         # The $boundary argument specify what character should be used to surround
00540         # the token. By convension, "B" is used for block elements that needs not
00541         # to be wrapped into paragraph tags at the end, ":" is used for elements
00542         # that are word separators and "X" is used in the general case.
00543         #
00544                 # Swap back any tag hash found in $text so we do not have to `unhash`
00545                 # multiple times at the end.
00546                 $text = $this->unhash($text);
00547                 
00548                 # Then hash the block.
00549                 static $i = 0;
00550                 $key = "$boundary\x1A" . ++$i . $boundary;
00551                 $this->html_hashes[$key] = $text;
00552                 return $key; # String that will replace the tag.
00553         }
00554 
00555 
00556         function hashBlock($text) {
00557         #
00558         # Shortcut function for hashPart with block-level boundaries.
00559         #
00560                 return $this->hashPart($text, 'B');
00561         }
00562 
00563 
00564         var $block_gamut = array(
00565         #
00566         # These are all the transformations that form block-level
00567         # tags like paragraphs, headers, and list items.
00568         #
00569                 "doHeaders"         => 10,
00570                 "doHorizontalRules" => 20,
00571                 
00572                 "doLists"           => 40,
00573                 "doCodeBlocks"      => 50,
00574                 "doBlockQuotes"     => 60,
00575                 );
00576 
00577         function runBlockGamut($text) {
00578         #
00579         # Run block gamut tranformations.
00580         #
00581                 # We need to escape raw HTML in Markdown source before doing anything 
00582                 # else. This need to be done for each block, and not only at the 
00583                 # begining in the Markdown function since hashed blocks can be part of
00584                 # list items and could have been indented. Indented blocks would have 
00585                 # been seen as a code block in a previous pass of hashHTMLBlocks.
00586                 $text = $this->hashHTMLBlocks($text);
00587                 
00588                 return $this->runBasicBlockGamut($text);
00589         }
00590         
00591         function runBasicBlockGamut($text) {
00592         #
00593         # Run block gamut tranformations, without hashing HTML blocks. This is 
00594         # useful when HTML blocks are known to be already hashed, like in the first
00595         # whole-document pass.
00596         #
00597                 foreach ($this->block_gamut as $method => $priority) {
00598                         $text = $this->$method($text);
00599                 }
00600                 
00601                 # Finally form paragraph and restore hashed blocks.
00602                 $text = $this->formParagraphs($text);
00603 
00604                 return $text;
00605         }
00606         
00607         
00608         function doHorizontalRules($text) {
00609                 # Do Horizontal Rules:
00610                 return preg_replace(
00611                         '{
00612                                 ^[ ]{0,3}       # Leading space
00613                                 ([-*_])         # $1: First marker
00614                                 (?>                     # Repeated marker group
00615                                         [ ]{0,2}        # Zero, one, or two spaces.
00616                                         \1                      # Marker character
00617                                 ){2,}           # Group repeated at least twice
00618                                 [ ]*            # Tailing spaces
00619                                 $                       # End of line.
00620                         }mx',
00621                         "\n".$this->hashBlock("<hr$this->empty_element_suffix")."\n", 
00622                         $text);
00623         }
00624 
00625 
00626         var $span_gamut = array(
00627         #
00628         # These are all the transformations that occur *within* block-level
00629         # tags like paragraphs, headers, and list items.
00630         #
00631                 # Process character escapes, code spans, and inline HTML
00632                 # in one shot.
00633                 "parseSpan"           => -30,
00634 
00635                 # Process anchor and image tags. Images must come first,
00636                 # because ![foo][f] looks like an anchor.
00637                 "doImages"            =>  10,
00638                 "doAnchors"           =>  20,
00639                 
00640                 # Make links out of things like `<http://example.com/>`
00641                 # Must come after doAnchors, because you can use < and >
00642                 # delimiters in inline links like [this](<url>).
00643                 "doAutoLinks"         =>  30,
00644                 "encodeAmpsAndAngles" =>  40,
00645 
00646                 "doItalicsAndBold"    =>  50,
00647                 "doHardBreaks"        =>  60,
00648                 );
00649 
00650         function runSpanGamut($text) {
00651         #
00652         # Run span gamut tranformations.
00653         #
00654                 foreach ($this->span_gamut as $method => $priority) {
00655                         $text = $this->$method($text);
00656                 }
00657 
00658                 return $text;
00659         }
00660         
00661         
00662         function doHardBreaks($text) {
00663                 # Do hard breaks:
00664                 return preg_replace_callback('/ {2,}\n/', 
00665                         array(&$this, '_doHardBreaks_callback'), $text);
00666         }
00667         function _doHardBreaks_callback($matches) {
00668                 return $this->hashPart("<br$this->empty_element_suffix\n");
00669         }
00670 
00671 
00672         function doAnchors($text) {
00673         #
00674         # Turn Markdown link shortcuts into XHTML <a> tags.
00675         #
00676                 if ($this->in_anchor) return $text;
00677                 $this->in_anchor = true;
00678                 
00679                 #
00680                 # First, handle reference-style links: [link text] [id]
00681                 #
00682                 $text = preg_replace_callback('{
00683                         (                                       # wrap whole match in $1
00684                           \[
00685                                 ('.$this->nested_brackets_re.') # link text = $2
00686                           \]
00687 
00688                           [ ]?                          # one optional space
00689                           (?:\n[ ]*)?           # one optional newline followed by spaces
00690 
00691                           \[
00692                                 (.*?)           # id = $3
00693                           \]
00694                         )
00695                         }xs',
00696                         array(&$this, '_doAnchors_reference_callback'), $text);
00697 
00698                 #
00699                 # Next, inline-style links: [link text](url "optional title")
00700                 #
00701                 $text = preg_replace_callback('{
00702                         (                               # wrap whole match in $1
00703                           \[
00704                                 ('.$this->nested_brackets_re.') # link text = $2
00705                           \]
00706                           \(                    # literal paren
00707                                 [ \n]*
00708                                 (?:
00709                                         <(.+?)> # href = $3
00710                                 |
00711                                         ('.$this->nested_url_parenthesis_re.')  # href = $4
00712                                 )
00713                                 [ \n]*
00714                                 (                       # $5
00715                                   ([\'"])       # quote char = $6
00716                                   (.*?)         # Title = $7
00717                                   \6            # matching quote
00718                                   [ \n]*        # ignore any spaces/tabs between closing quote and )
00719                                 )?                      # title is optional
00720                           \)
00721                         )
00722                         }xs',
00723                         array(&$this, '_doAnchors_inline_callback'), $text);
00724 
00725                 #
00726                 # Last, handle reference-style shortcuts: [link text]
00727                 # These must come last in case you've also got [link text][1]
00728                 # or [link text](/foo)
00729                 #
00730                 $text = preg_replace_callback('{
00731                         (                                       # wrap whole match in $1
00732                           \[
00733                                 ([^\[\]]+)              # link text = $2; can\'t contain [ or ]
00734                           \]
00735                         )
00736                         }xs',
00737                         array(&$this, '_doAnchors_reference_callback'), $text);
00738 
00739                 $this->in_anchor = false;
00740                 return $text;
00741         }
00742         function _doAnchors_reference_callback($matches) {
00743                 $whole_match =  $matches[1];
00744                 $link_text   =  $matches[2];
00745                 $link_id     =& $matches[3];
00746 
00747                 if ($link_id == "") {
00748                         # for shortcut links like [this][] or [this].
00749                         $link_id = $link_text;
00750                 }
00751                 
00752                 # lower-case and turn embedded newlines into spaces
00753                 $link_id = strtolower($link_id);
00754                 $link_id = preg_replace('{[ ]?\n}', ' ', $link_id);
00755 
00756                 if (isset($this->urls[$link_id])) {
00757                         $url = $this->urls[$link_id];
00758                         $url = $this->encodeAttribute($url);
00759                         
00760                         $result = "<a href=\"$url\"";
00761                         if ( isset( $this->titles[$link_id] ) ) {
00762                                 $title = $this->titles[$link_id];
00763                                 $title = $this->encodeAttribute($title);
00764                                 $result .=  " title=\"$title\"";
00765                         }
00766                 
00767                         $link_text = $this->runSpanGamut($link_text);
00768                         $result .= ">$link_text</a>";
00769                         $result = $this->hashPart($result);
00770                 }
00771                 else {
00772                         $result = $whole_match;
00773                 }
00774                 return $result;
00775         }
00776         function _doAnchors_inline_callback($matches) {
00777                 $whole_match    =  $matches[1];
00778                 $link_text              =  $this->runSpanGamut($matches[2]);
00779                 $url                    =  $matches[3] == '' ? $matches[4] : $matches[3];
00780                 $title                  =& $matches[7];
00781 
00782                 $url = $this->encodeAttribute($url);
00783 
00784                 $result = "<a href=\"$url\"";
00785                 if (isset($title)) {
00786                         $title = $this->encodeAttribute($title);
00787                         $result .=  " title=\"$title\"";
00788                 }
00789                 
00790                 $link_text = $this->runSpanGamut($link_text);
00791                 $result .= ">$link_text</a>";
00792 
00793                 return $this->hashPart($result);
00794         }
00795 
00796 
00797         function doImages($text) {
00798         #
00799         # Turn Markdown image shortcuts into <img> tags.
00800         #
00801                 #
00802                 # First, handle reference-style labeled images: ![alt text][id]
00803                 #
00804                 $text = preg_replace_callback('{
00805                         (                               # wrap whole match in $1
00806                           !\[
00807                                 ('.$this->nested_brackets_re.')         # alt text = $2
00808                           \]
00809 
00810                           [ ]?                          # one optional space
00811                           (?:\n[ ]*)?           # one optional newline followed by spaces
00812 
00813                           \[
00814                                 (.*?)           # id = $3
00815                           \]
00816 
00817                         )
00818                         }xs', 
00819                         array(&$this, '_doImages_reference_callback'), $text);
00820 
00821                 #
00822                 # Next, handle inline images:  ![alt text](url "optional title")
00823                 # Don't forget: encode * and _
00824                 #
00825                 $text = preg_replace_callback('{
00826                         (                               # wrap whole match in $1
00827                           !\[
00828                                 ('.$this->nested_brackets_re.')         # alt text = $2
00829                           \]
00830                           \s?                   # One optional whitespace character
00831                           \(                    # literal paren
00832                                 [ \n]*
00833                                 (?:
00834                                         <(\S*)> # src url = $3
00835                                 |
00836                                         ('.$this->nested_url_parenthesis_re.')  # src url = $4
00837                                 )
00838                                 [ \n]*
00839                                 (                       # $5
00840                                   ([\'"])       # quote char = $6
00841                                   (.*?)         # title = $7
00842                                   \6            # matching quote
00843                                   [ \n]*
00844                                 )?                      # title is optional
00845                           \)
00846                         )
00847                         }xs',
00848                         array(&$this, '_doImages_inline_callback'), $text);
00849 
00850                 return $text;
00851         }
00852         function _doImages_reference_callback($matches) {
00853                 $whole_match = $matches[1];
00854                 $alt_text    = $matches[2];
00855                 $link_id     = strtolower($matches[3]);
00856 
00857                 if ($link_id == "") {
00858                         $link_id = strtolower($alt_text); # for shortcut links like ![this][].
00859                 }
00860 
00861                 $alt_text = $this->encodeAttribute($alt_text);
00862                 if (isset($this->urls[$link_id])) {
00863                         $url = $this->encodeAttribute($this->urls[$link_id]);
00864                         $result = "<img src=\"$url\" alt=\"$alt_text\"";
00865                         if (isset($this->titles[$link_id])) {
00866                                 $title = $this->titles[$link_id];
00867                                 $title = $this->encodeAttribute($title);
00868                                 $result .=  " title=\"$title\"";
00869                         }
00870                         $result .= $this->empty_element_suffix;
00871                         $result = $this->hashPart($result);
00872                 }
00873                 else {
00874                         # If there's no such link ID, leave intact:
00875                         $result = $whole_match;
00876                 }
00877 
00878                 return $result;
00879         }
00880         function _doImages_inline_callback($matches) {
00881                 $whole_match    = $matches[1];
00882                 $alt_text               = $matches[2];
00883                 $url                    = $matches[3] == '' ? $matches[4] : $matches[3];
00884                 $title                  =& $matches[7];
00885 
00886                 $alt_text = $this->encodeAttribute($alt_text);
00887                 $url = $this->encodeAttribute($url);
00888                 $result = "<img src=\"$url\" alt=\"$alt_text\"";
00889                 if (isset($title)) {
00890                         $title = $this->encodeAttribute($title);
00891                         $result .=  " title=\"$title\""; # $title already quoted
00892                 }
00893                 $result .= $this->empty_element_suffix;
00894 
00895                 return $this->hashPart($result);
00896         }
00897 
00898 
00899         function doHeaders($text) {
00900                 # Setext-style headers:
00901                 #         Header 1
00902                 #         ========
00903                 #  
00904                 #         Header 2
00905                 #         --------
00906                 #
00907                 $text = preg_replace_callback('{ ^(.+?)[ ]*\n(=+|-+)[ ]*\n+ }mx',
00908                         array(&$this, '_doHeaders_callback_setext'), $text);
00909 
00910                 # atx-style headers:
00911                 #       # Header 1
00912                 #       ## Header 2
00913                 #       ## Header 2 with closing hashes ##
00914                 #       ...
00915                 #       ###### Header 6
00916                 #
00917                 $text = preg_replace_callback('{
00918                                 ^(\#{1,6})      # $1 = string of #\'s
00919                                 [ ]*
00920                                 (.+?)           # $2 = Header text
00921                                 [ ]*
00922                                 \#*                     # optional closing #\'s (not counted)
00923                                 \n+
00924                         }xm',
00925                         array(&$this, '_doHeaders_callback_atx'), $text);
00926 
00927                 return $text;
00928         }
00929         function _doHeaders_callback_setext($matches) {
00930                 # Terrible hack to check we haven't found an empty list item.
00931                 if ($matches[2] == '-' && preg_match('{^-(?: |$)}', $matches[1]))
00932                         return $matches[0];
00933                 
00934                 $level = $matches[2]{0} == '=' ? 1 : 2;
00935                 $block = "<h$level>".$this->runSpanGamut($matches[1])."</h$level>";
00936                 return "\n" . $this->hashBlock($block) . "\n\n";
00937         }
00938         function _doHeaders_callback_atx($matches) {
00939                 $level = strlen($matches[1]);
00940                 $block = "<h$level>".$this->runSpanGamut($matches[2])."</h$level>";
00941                 return "\n" . $this->hashBlock($block) . "\n\n";
00942         }
00943 
00944 
00945         function doLists($text) {
00946         #
00947         # Form HTML ordered (numbered) and unordered (bulleted) lists.
00948         #
00949                 $less_than_tab = $this->tab_width - 1;
00950 
00951                 # Re-usable patterns to match list item bullets and number markers:
00952                 $marker_ul_re  = '[*+-]';
00953                 $marker_ol_re  = '\d+[.]';
00954                 $marker_any_re = "(?:$marker_ul_re|$marker_ol_re)";
00955 
00956                 $markers_relist = array(
00957                         $marker_ul_re => $marker_ol_re,
00958                         $marker_ol_re => $marker_ul_re,
00959                         );
00960 
00961                 foreach ($markers_relist as $marker_re => $other_marker_re) {
00962                         # Re-usable pattern to match any entirel ul or ol list:
00963                         $whole_list_re = '
00964                                 (                                                               # $1 = whole list
00965                                   (                                                             # $2
00966                                         ([ ]{0,'.$less_than_tab.'})     # $3 = number of spaces
00967                                         ('.$marker_re.')                        # $4 = first list item marker
00968                                         [ ]+
00969                                   )
00970                                   (?s:.+?)
00971                                   (                                                             # $5
00972                                           \z
00973                                         |
00974                                           \n{2,}
00975                                           (?=\S)
00976                                           (?!                                           # Negative lookahead for another list item marker
00977                                                 [ ]*
00978                                                 '.$marker_re.'[ ]+
00979                                           )
00980                                         |
00981                                           (?=                                           # Lookahead for another kind of list
00982                                             \n
00983                                                 \3                                              # Must have the same indentation
00984                                                 '.$other_marker_re.'[ ]+
00985                                           )
00986                                   )
00987                                 )
00988                         '; // mx
00989                         
00990                         # We use a different prefix before nested lists than top-level lists.
00991                         # See extended comment in _ProcessListItems().
00992                 
00993                         if ($this->list_level) {
00994                                 $text = preg_replace_callback('{
00995                                                 ^
00996                                                 '.$whole_list_re.'
00997                                         }mx',
00998                                         array(&$this, '_doLists_callback'), $text);
00999                         }
01000                         else {
01001                                 $text = preg_replace_callback('{
01002                                                 (?:(?<=\n)\n|\A\n?) # Must eat the newline
01003                                                 '.$whole_list_re.'
01004                                         }mx',
01005                                         array(&$this, '_doLists_callback'), $text);
01006                         }
01007                 }
01008 
01009                 return $text;
01010         }
01011         function _doLists_callback($matches) {
01012                 # Re-usable patterns to match list item bullets and number markers:
01013                 $marker_ul_re  = '[*+-]';
01014                 $marker_ol_re  = '\d+[.]';
01015                 $marker_any_re = "(?:$marker_ul_re|$marker_ol_re)";
01016                 
01017                 $list = $matches[1];
01018                 $list_type = preg_match("/$marker_ul_re/", $matches[4]) ? "ul" : "ol";
01019                 
01020                 $marker_any_re = ( $list_type == "ul" ? $marker_ul_re : $marker_ol_re );
01021                 
01022                 $list .= "\n";
01023                 $result = $this->processListItems($list, $marker_any_re);
01024                 
01025                 $result = $this->hashBlock("<$list_type>\n" . $result . "</$list_type>");
01026                 return "\n". $result ."\n\n";
01027         }
01028 
01029         var $list_level = 0;
01030 
01031         function processListItems($list_str, $marker_any_re) {
01032         #
01033         #       Process the contents of a single ordered or unordered list, splitting it
01034         #       into individual list items.
01035         #
01036                 # The $this->list_level global keeps track of when we're inside a list.
01037                 # Each time we enter a list, we increment it; when we leave a list,
01038                 # we decrement. If it's zero, we're not in a list anymore.
01039                 #
01040                 # We do this because when we're not inside a list, we want to treat
01041                 # something like this:
01042                 #
01043                 #               I recommend upgrading to version
01044                 #               8. Oops, now this line is treated
01045                 #               as a sub-list.
01046                 #
01047                 # As a single paragraph, despite the fact that the second line starts
01048                 # with a digit-period-space sequence.
01049                 #
01050                 # Whereas when we're inside a list (or sub-list), that line will be
01051                 # treated as the start of a sub-list. What a kludge, huh? This is
01052                 # an aspect of Markdown's syntax that's hard to parse perfectly
01053                 # without resorting to mind-reading. Perhaps the solution is to
01054                 # change the syntax rules such that sub-lists must start with a
01055                 # starting cardinal number; e.g. "1." or "a.".
01056                 
01057                 $this->list_level++;
01058 
01059                 # trim trailing blank lines:
01060                 $list_str = preg_replace("/\n{2,}\\z/", "\n", $list_str);
01061 
01062                 $list_str = preg_replace_callback('{
01063                         (\n)?                                                   # leading line = $1
01064                         (^[ ]*)                                                 # leading whitespace = $2
01065                         ('.$marker_any_re.'                             # list marker and space = $3
01066                                 (?:[ ]+|(?=\n)) # space only required if item is not empty
01067                         )
01068                         ((?s:.*?))                                              # list item text   = $4
01069                         (?:(\n+(?=\n))|\n)                              # tailing blank line = $5
01070                         (?= \n* (\z | \2 ('.$marker_any_re.') (?:[ ]+|(?=\n))))
01071                         }xm',
01072                         array(&$this, '_processListItems_callback'), $list_str);
01073 
01074                 $this->list_level--;
01075                 return $list_str;
01076         }
01077         function _processListItems_callback($matches) {
01078                 $item = $matches[4];
01079                 $leading_line =& $matches[1];
01080                 $leading_space =& $matches[2];
01081                 $marker_space = $matches[3];
01082                 $tailing_blank_line =& $matches[5];
01083 
01084                 if ($leading_line || $tailing_blank_line || 
01085                         preg_match('/\n{2,}/', $item))
01086                 {
01087                         # Replace marker with the appropriate whitespace indentation
01088                         $item = $leading_space . str_repeat(' ', strlen($marker_space)) . $item;
01089                         $item = $this->runBlockGamut($this->outdent($item)."\n");
01090                 }
01091                 else {
01092                         # Recursion for sub-lists:
01093                         $item = $this->doLists($this->outdent($item));
01094                         $item = preg_replace('/\n+$/', '', $item);
01095                         $item = $this->runSpanGamut($item);
01096                 }
01097 
01098                 return "<li>" . $item . "</li>\n";
01099         }
01100 
01101 
01102         function doCodeBlocks($text) {
01103         #
01104         #       Process Markdown `<pre><code>` blocks.
01105         #
01106                 $text = preg_replace_callback('{
01107                                 (?:\n\n|\A\n?)
01108                                 (                   # $1 = the code block -- one or more lines, starting with a space/tab
01109                                   (?>
01110                                         [ ]{'.$this->tab_width.'}  # Lines must start with a tab or a tab-width of spaces
01111                                         .*\n+
01112                                   )+
01113                                 )
01114                                 ((?=^[ ]{0,'.$this->tab_width.'}\S)|\Z) # Lookahead for non-space at line-start, or end of doc
01115                         }xm',
01116                         array(&$this, '_doCodeBlocks_callback'), $text);
01117 
01118                 return $text;
01119         }
01120         function _doCodeBlocks_callback($matches) {
01121                 $codeblock = $matches[1];
01122 
01123                 $codeblock = $this->outdent($codeblock);
01124                 $codeblock = htmlspecialchars($codeblock, ENT_NOQUOTES);
01125 
01126                 # trim leading newlines and trailing newlines
01127                 $codeblock = preg_replace('/\A\n+|\n+\z/', '', $codeblock);
01128 
01129                 $codeblock = "<pre><code>$codeblock\n</code></pre>";
01130                 return "\n\n".$this->hashBlock($codeblock)."\n\n";
01131         }
01132 
01133 
01134         function makeCodeSpan($code) {
01135         #
01136         # Create a code span markup for $code. Called from handleSpanToken.
01137         #
01138                 $code = htmlspecialchars(trim($code), ENT_NOQUOTES);
01139                 return $this->hashPart("<code>$code</code>");
01140         }
01141 
01142 
01143         var $em_relist = array(
01144                 ''  => '(?:(?<!\*)\*(?!\*)|(?<!_)_(?!_))(?=\S|$)(?![.,:;]\s)',
01145                 '*' => '(?<=\S|^)(?<!\*)\*(?!\*)',
01146                 '_' => '(?<=\S|^)(?<!_)_(?!_)',
01147                 );
01148         var $strong_relist = array(
01149                 ''   => '(?:(?<!\*)\*\*(?!\*)|(?<!_)__(?!_))(?=\S|$)(?![.,:;]\s)',
01150                 '**' => '(?<=\S|^)(?<!\*)\*\*(?!\*)',
01151                 '__' => '(?<=\S|^)(?<!_)__(?!_)',
01152                 );
01153         var $em_strong_relist = array(
01154                 ''    => '(?:(?<!\*)\*\*\*(?!\*)|(?<!_)___(?!_))(?=\S|$)(?![.,:;]\s)',
01155                 '***' => '(?<=\S|^)(?<!\*)\*\*\*(?!\*)',
01156                 '___' => '(?<=\S|^)(?<!_)___(?!_)',
01157                 );
01158         var $em_strong_prepared_relist;
01159         
01160         function prepareItalicsAndBold() {
01161         #
01162         # Prepare regular expressions for searching emphasis tokens in any
01163         # context.
01164         #
01165                 foreach ($this->em_relist as $em => $em_re) {
01166                         foreach ($this->strong_relist as $strong => $strong_re) {
01167                                 # Construct list of allowed token expressions.
01168                                 $token_relist = array();
01169                                 if (isset($this->em_strong_relist["$em$strong"])) {
01170                                         $token_relist[] = $this->em_strong_relist["$em$strong"];
01171                                 }
01172                                 $token_relist[] = $em_re;
01173                                 $token_relist[] = $strong_re;
01174                                 
01175                                 # Construct master expression from list.
01176                                 $token_re = '{('. implode('|', $token_relist) .')}';
01177                                 $this->em_strong_prepared_relist["$em$strong"] = $token_re;
01178                         }
01179                 }
01180         }
01181         
01182         function doItalicsAndBold($text) {
01183                 $token_stack = array('');
01184                 $text_stack = array('');
01185                 $em = '';
01186                 $strong = '';
01187                 $tree_char_em = false;
01188                 
01189                 while (1) {
01190                         #
01191                         # Get prepared regular expression for seraching emphasis tokens
01192                         # in current context.
01193                         #
01194                         $token_re = $this->em_strong_prepared_relist["$em$strong"];
01195                         
01196                         #
01197                         # Each loop iteration search for the next emphasis token. 
01198                         # Each token is then passed to handleSpanToken.
01199                         #
01200                         $parts = preg_split($token_re, $text, 2, PREG_SPLIT_DELIM_CAPTURE);
01201                         $text_stack[0] .= $parts[0];
01202                         $token =& $parts[1];
01203                         $text =& $parts[2];
01204                         
01205                         if (empty($token)) {
01206                                 # Reached end of text span: empty stack without emitting.
01207                                 # any more emphasis.
01208                                 while ($token_stack[0]) {
01209                                         $text_stack[1] .= array_shift($token_stack);
01210                                         $text_stack[0] .= array_shift($text_stack);
01211                                 }
01212                                 break;
01213                         }
01214                         
01215                         $token_len = strlen($token);
01216                         if ($tree_char_em) {
01217                                 # Reached closing marker while inside a three-char emphasis.
01218                                 if ($token_len == 3) {
01219                                         # Three-char closing marker, close em and strong.
01220                                         array_shift($token_stack);
01221                                         $span = array_shift($text_stack);
01222                                         $span = $this->runSpanGamut($span);
01223                                         $span = "<strong><em>$span</em></strong>";
01224                                         $text_stack[0] .= $this->hashPart($span);
01225                                         $em = '';
01226                                         $strong = '';
01227                                 } else {
01228                                         # Other closing marker: close one em or strong and
01229                                         # change current token state to match the other
01230                                         $token_stack[0] = str_repeat($token{0}, 3-$token_len);
01231                                         $tag = $token_len == 2 ? "strong" : "em";
01232                                         $span = $text_stack[0];
01233                                         $span = $this->runSpanGamut($span);
01234                                         $span = "<$tag>$span</$tag>";
01235                                         $text_stack[0] = $this->hashPart($span);
01236                                         $$tag = ''; # $$tag stands for $em or $strong
01237                                 }
01238                                 $tree_char_em = false;
01239                         } else if ($token_len == 3) {
01240                                 if ($em) {
01241                                         # Reached closing marker for both em and strong.
01242                                         # Closing strong marker:
01243                                         for ($i = 0; $i < 2; ++$i) {
01244                                                 $shifted_token = array_shift($token_stack);
01245                                                 $tag = strlen($shifted_token) == 2 ? "strong" : "em";
01246                                                 $span = array_shift($text_stack);
01247                                                 $span = $this->runSpanGamut($span);
01248                                                 $span = "<$tag>$span</$tag>";
01249                                                 $text_stack[0] .= $this->hashPart($span);
01250                                                 $$tag = ''; # $$tag stands for $em or $strong
01251                                         }
01252                                 } else {
01253                                         # Reached opening three-char emphasis marker. Push on token 
01254                                         # stack; will be handled by the special condition above.
01255                                         $em = $token{0};
01256                                         $strong = "$em$em";
01257                                         array_unshift($token_stack, $token);
01258                                         array_unshift($text_stack, '');
01259                                         $tree_char_em = true;
01260                                 }
01261                         } else if ($token_len == 2) {
01262                                 if ($strong) {
01263                                         # Unwind any dangling emphasis marker:
01264                                         if (strlen($token_stack[0]) == 1) {
01265                                                 $text_stack[1] .= array_shift($token_stack);
01266                                                 $text_stack[0] .= array_shift($text_stack);
01267                                         }
01268                                         # Closing strong marker:
01269                                         array_shift($token_stack);
01270                                         $span = array_shift($text_stack);
01271                                         $span = $this->runSpanGamut($span);
01272                                         $span = "<strong>$span</strong>";
01273                                         $text_stack[0] .= $this->hashPart($span);
01274                                         $strong = '';
01275                                 } else {
01276                                         array_unshift($token_stack, $token);
01277                                         array_unshift($text_stack, '');
01278                                         $strong = $token;
01279                                 }
01280                         } else {
01281                                 # Here $token_len == 1
01282                                 if ($em) {
01283                                         if (strlen($token_stack[0]) == 1) {
01284                                                 # Closing emphasis marker:
01285                                                 array_shift($token_stack);
01286                                                 $span = array_shift($text_stack);
01287                                                 $span = $this->runSpanGamut($span);
01288                                                 $span = "<em>$span</em>";
01289                                                 $text_stack[0] .= $this->hashPart($span);
01290                                                 $em = '';
01291                                         } else {
01292                                                 $text_stack[0] .= $token;
01293                                         }
01294                                 } else {
01295                                         array_unshift($token_stack, $token);
01296                                         array_unshift($text_stack, '');
01297                                         $em = $token;
01298                                 }
01299                         }
01300                 }
01301                 return $text_stack[0];
01302         }
01303 
01304 
01305         function doBlockQuotes($text) {
01306                 $text = preg_replace_callback('/
01307                           (                                                             # Wrap whole match in $1
01308                                 (?>
01309                                   ^[ ]*>[ ]?                    # ">" at the start of a line
01310                                         .+\n                                    # rest of the first line
01311                                   (.+\n)*                                       # subsequent consecutive lines
01312                                   \n*                                           # blanks
01313                                 )+
01314                           )
01315                         /xm',
01316                         array(&$this, '_doBlockQuotes_callback'), $text);
01317 
01318                 return $text;
01319         }
01320         function _doBlockQuotes_callback($matches) {
01321                 $bq = $matches[1];
01322                 # trim one level of quoting - trim whitespace-only lines
01323                 $bq = preg_replace('/^[ ]*>[ ]?|^[ ]+$/m', '', $bq);
01324                 $bq = $this->runBlockGamut($bq);                # recurse
01325 
01326                 $bq = preg_replace('/^/m', "  ", $bq);
01327                 # These leading spaces cause problem with <pre> content, 
01328                 # so we need to fix that:
01329                 $bq = preg_replace_callback('{(\s*<pre>.+?</pre>)}sx', 
01330                         array(&$this, '_doBlockQuotes_callback2'), $bq);
01331 
01332                 return "\n". $this->hashBlock("<blockquote>\n$bq\n</blockquote>")."\n\n";
01333         }
01334         function _doBlockQuotes_callback2($matches) {
01335                 $pre = $matches[1];
01336                 $pre = preg_replace('/^  /m', '', $pre);
01337                 return $pre;
01338         }
01339 
01340 
01341         function formParagraphs($text) {
01342         #
01343         #       Params:
01344         #               $text - string to process with html <p> tags
01345         #
01346                 # Strip leading and trailing lines:
01347                 $text = preg_replace('/\A\n+|\n+\z/', '', $text);
01348 
01349                 $grafs = preg_split('/\n{2,}/', $text, -1, PREG_SPLIT_NO_EMPTY);
01350 
01351                 #
01352                 # Wrap <p> tags and unhashify HTML blocks
01353                 #
01354                 foreach ($grafs as $key => $value) {
01355                         if (!preg_match('/^B\x1A[0-9]+B$/', $value)) {
01356                                 # Is a paragraph.
01357                                 $value = $this->runSpanGamut($value);
01358                                 $value = preg_replace('/^([ ]*)/', "<p>", $value);
01359                                 $value .= "</p>";
01360                                 $grafs[$key] = $this->unhash($value);
01361                         }
01362                         else {
01363                                 # Is a block.
01364                                 # Modify elements of @grafs in-place...
01365                                 $graf = $value;
01366                                 $block = $this->html_hashes[$graf];
01367                                 $graf = $block;
01368 //                              if (preg_match('{
01369 //                                      \A
01370 //                                      (                                                       # $1 = <div> tag
01371 //                                        <div  \s+
01372 //                                        [^>]*
01373 //                                        \b
01374 //                                        markdown\s*=\s*  ([\'"])      #       $2 = attr quote char
01375 //                                        1
01376 //                                        \2
01377 //                                        [^>]*
01378 //                                        >
01379 //                                      )
01380 //                                      (                                                       # $3 = contents
01381 //                                      .*
01382 //                                      )
01383 //                                      (</div>)                                        # $4 = closing tag
01384 //                                      \z
01385 //                                      }xs', $block, $matches))
01386 //                              {
01387 //                                      list(, $div_open, , $div_content, $div_close) = $matches;
01388 //
01389 //                                      # We can't call Markdown(), because that resets the hash;
01390 //                                      # that initialization code should be pulled into its own sub, though.
01391 //                                      $div_content = $this->hashHTMLBlocks($div_content);
01392 //                                      
01393 //                                      # Run document gamut methods on the content.
01394 //                                      foreach ($this->document_gamut as $method => $priority) {
01395 //                                              $div_content = $this->$method($div_content);
01396 //                                      }
01397 //
01398 //                                      $div_open = preg_replace(
01399 //                                              '{\smarkdown\s*=\s*([\'"]).+?\1}', '', $div_open);
01400 //
01401 //                                      $graf = $div_open . "\n" . $div_content . "\n" . $div_close;
01402 //                              }
01403                                 $grafs[$key] = $graf;
01404                         }
01405                 }
01406 
01407                 return implode("\n\n", $grafs);
01408         }
01409 
01410 
01411         function encodeAttribute($text) {
01412         #
01413         # Encode text for a double-quoted HTML attribute. This function
01414         # is *not* suitable for attributes enclosed in single quotes.
01415         #
01416                 $text = $this->encodeAmpsAndAngles($text);
01417                 $text = str_replace('"', '&quot;', $text);
01418                 return $text;
01419         }
01420         
01421         
01422         function encodeAmpsAndAngles($text) {
01423         #
01424         # Smart processing for ampersands and angle brackets that need to 
01425         # be encoded. Valid character entities are left alone unless the
01426         # no-entities mode is set.
01427         #
01428                 if ($this->no_entities) {
01429                         $text = str_replace('&', '&amp;', $text);
01430                 } else {
01431                         # Ampersand-encoding based entirely on Nat Irons's Amputator
01432                         # MT plugin: <http://bumppo.net/projects/amputator/>
01433                         $text = preg_replace('/&(?!#?[xX]?(?:[0-9a-fA-F]+|\w+);)/', 
01434                                                                 '&amp;', $text);;
01435                 }
01436                 # Encode remaining <'s
01437                 $text = str_replace('<', '&lt;', $text);
01438 
01439                 return $text;
01440         }
01441 
01442 
01443         function doAutoLinks($text) {
01444                 $text = preg_replace_callback('{<((https?|ftp|dict):[^\'">\s]+)>}i', 
01445                         array(&$this, '_doAutoLinks_url_callback'), $text);
01446 
01447                 # Email addresses: <address@domain.foo>
01448                 $text = preg_replace_callback('{
01449                         <
01450                         (?:mailto:)?
01451                         (
01452                                 (?:
01453                                         [-!#$%&\'*+/=?^_`.{|}~\w\x80-\xFF]+
01454                                 |
01455                                         ".*?"
01456                                 )
01457                                 \@
01458                                 (?:
01459                                         [-a-z0-9\x80-\xFF]+(\.[-a-z0-9\x80-\xFF]+)*\.[a-z]+
01460                                 |
01461                                         \[[\d.a-fA-F:]+\]       # IPv4 & IPv6
01462                                 )
01463                         )
01464                         >
01465                         }xi',
01466                         array(&$this, '_doAutoLinks_email_callback'), $text);
01467 
01468                 return $text;
01469         }
01470         function _doAutoLinks_url_callback($matches) {
01471                 $url = $this->encodeAttribute($matches[1]);
01472                 $link = "<a href=\"$url\">$url</a>";
01473                 return $this->hashPart($link);
01474         }
01475         function _doAutoLinks_email_callback($matches) {
01476                 $address = $matches[1];
01477                 $link = $this->encodeEmailAddress($address);
01478                 return $this->hashPart($link);
01479         }
01480 
01481 
01482         function encodeEmailAddress($addr) {
01483         #
01484         #       Input: an email address, e.g. "foo@example.com"
01485         #
01486         #       Output: the email address as a mailto link, with each character
01487         #               of the address encoded as either a decimal or hex entity, in
01488         #               the hopes of foiling most address harvesting spam bots. E.g.:
01489         #
01490         #         <p><a href="&#109;&#x61;&#105;&#x6c;&#116;&#x6f;&#58;&#x66;o&#111;
01491         #        &#x40;&#101;&#x78;&#97;&#x6d;&#112;&#x6c;&#101;&#46;&#x63;&#111;
01492         #        &#x6d;">&#x66;o&#111;&#x40;&#101;&#x78;&#97;&#x6d;&#112;&#x6c;
01493         #        &#101;&#46;&#x63;&#111;&#x6d;</a></p>
01494         #
01495         #       Based by a filter by Matthew Wickline, posted to BBEdit-Talk.
01496         #   With some optimizations by Milian Wolff.
01497         #
01498                 $addr = "mailto:" . $addr;
01499                 $chars = preg_split('/(?<!^)(?!$)/', $addr);
01500                 $seed = (int)abs(crc32($addr) / strlen($addr)); # Deterministic seed.
01501                 
01502                 foreach ($chars as $key => $char) {
01503                         $ord = ord($char);
01504                         # Ignore non-ascii chars.
01505                         if ($ord < 128) {
01506                                 $r = ($seed * (1 + $key)) % 100; # Pseudo-random function.
01507                                 # roughly 10% raw, 45% hex, 45% dec
01508                                 # '@' *must* be encoded. I insist.
01509                                 if ($r > 90 && $char != '@') /* do nothing */;
01510                                 else if ($r < 45) $chars[$key] = '&#x'.dechex($ord).';';
01511                                 else              $chars[$key] = '&#'.$ord.';';
01512                         }
01513                 }
01514                 
01515                 $addr = implode('', $chars);
01516                 $text = implode('', array_slice($chars, 7)); # text without `mailto:`
01517                 $addr = "<a href=\"$addr\">$text</a>";
01518 
01519                 return $addr;
01520         }
01521 
01522 
01523         function parseSpan($str) {
01524         #
01525         # Take the string $str and parse it into tokens, hashing embeded HTML,
01526         # escaped characters and handling code spans.
01527         #
01528                 $output = '';
01529                 
01530                 $span_re = '{
01531                                 (
01532                                         \\\\'.$this->escape_chars_re.'
01533                                 |
01534                                         (?<![`\\\\])
01535                                         `+                                              # code span marker
01536                         '.( $this->no_markup ? '' : '
01537                                 |
01538                                         <!--    .*?     -->             # comment
01539                                 |
01540                                         <\?.*?\?> | <%.*?%>             # processing instruction
01541                                 |
01542                                         <[/!$]?[-a-zA-Z0-9:_]+  # regular tags
01543                                         (?>
01544                                                 \s
01545                                                 (?>[^"\'>]+|"[^"]*"|\'[^\']*\')*
01546                                         )?
01547                                         >
01548                         ').'
01549                                 )
01550                                 }xs';
01551 
01552                 while (1) {
01553                         #
01554                         # Each loop iteration seach for either the next tag, the next 
01555                         # openning code span marker, or the next escaped character. 
01556                         # Each token is then passed to handleSpanToken.
01557                         #
01558                         $parts = preg_split($span_re, $str, 2, PREG_SPLIT_DELIM_CAPTURE);
01559                         
01560                         # Create token from text preceding tag.
01561                         if ($parts[0] != "") {
01562                                 $output .= $parts[0];
01563                         }
01564                         
01565                         # Check if we reach the end.
01566                         if (isset($parts[1])) {
01567                                 $output .= $this->handleSpanToken($parts[1], $parts[2]);
01568                                 $str = $parts[2];
01569                         }
01570                         else {
01571                                 break;
01572                         }
01573                 }
01574                 
01575                 return $output;
01576         }
01577         
01578         
01579         function handleSpanToken($token, &$str) {
01580         #
01581         # Handle $token provided by parseSpan by determining its nature and 
01582         # returning the corresponding value that should replace it.
01583         #
01584                 switch ($token{0}) {
01585                         case "\\":
01586                                 return $this->hashPart("&#". ord($token{1}). ";");
01587                         case "`":
01588                                 # Search for end marker in remaining text.
01589                                 if (preg_match('/^(.*?[^`])'.preg_quote($token).'(?!`)(.*)$/sm', 
01590                                         $str, $matches))
01591                                 {
01592                                         $str = $matches[2];
01593                                         $codespan = $this->makeCodeSpan($matches[1]);
01594                                         return $this->hashPart($codespan);
01595                                 }
01596                                 return $token; // return as text since no ending marker found.
01597                         default:
01598                                 return $this->hashPart($token);
01599                 }
01600         }
01601 
01602 
01603         function outdent($text) {
01604         #
01605         # Remove one level of line-leading tabs or spaces
01606         #
01607                 return preg_replace('/^(\t|[ ]{1,'.$this->tab_width.'})/m', '', $text);
01608         }
01609 
01610 
01611         # String length function for detab. `_initDetab` will create a function to 
01612         # hanlde UTF-8 if the default function does not exist.
01613         var $utf8_strlen = 'mb_strlen';
01614         
01615         function detab($text) {
01616         #
01617         # Replace tabs with the appropriate amount of space.
01618         #
01619                 # For each line we separate the line in blocks delemited by
01620                 # tab characters. Then we reconstruct every line by adding the 
01621                 # appropriate number of space between each blocks.
01622                 
01623                 $text = preg_replace_callback('/^.*\t.*$/m',
01624                         array(&$this, '_detab_callback'), $text);
01625 
01626                 return $text;
01627         }
01628         function _detab_callback($matches) {
01629                 $line = $matches[0];
01630                 $strlen = $this->utf8_strlen; # strlen function for UTF-8.
01631                 
01632                 # Split in blocks.
01633                 $blocks = explode("\t", $line);
01634                 # Add each blocks to the line.
01635                 $line = $blocks[0];
01636                 unset($blocks[0]); # Do not add first block twice.
01637                 foreach ($blocks as $block) {
01638                         # Calculate amount of space, insert spaces, insert block.
01639                         $amount = $this->tab_width - 
01640                                 $strlen($line, 'UTF-8') % $this->tab_width;
01641                         $line .= str_repeat(" ", $amount) . $block;
01642                 }
01643                 return $line;
01644         }
01645         function _initDetab() {
01646         #
01647         # Check for the availability of the function in the `utf8_strlen` property
01648         # (initially `mb_strlen`). If the function is not available, create a 
01649         # function that will loosely count the number of UTF-8 characters with a
01650         # regular expression.
01651         #
01652                 if (function_exists($this->utf8_strlen)) return;
01653                 $this->utf8_strlen = create_function('$text', 'return preg_match_all(
01654                         "/[\\\\x00-\\\\xBF]|[\\\\xC0-\\\\xFF][\\\\x80-\\\\xBF]*/", 
01655                         $text, $m);');
01656         }
01657 
01658 
01659         function unhash($text) {
01660         #
01661         # Swap back in all the tags hashed by _HashHTMLBlocks.
01662         #
01663                 return preg_replace_callback('/(.)\x1A[0-9]+\1/', 
01664                         array(&$this, '_unhash_callback'), $text);
01665         }
01666         function _unhash_callback($matches) {
01667                 return $this->html_hashes[$matches[0]];
01668         }
01669 
01670 }
01671 
01672 
01673 #
01674 # Markdown Extra Parser Class
01675 #
01676 
01677 class MarkdownExtra_Parser extends Markdown_Parser {
01678 
01679         # Prefix for footnote ids.
01680         var $fn_id_prefix = "";
01681         
01682         # Optional title attribute for footnote links and backlinks.
01683         var $fn_link_title = MARKDOWN_FN_LINK_TITLE;
01684         var $fn_backlink_title = MARKDOWN_FN_BACKLINK_TITLE;
01685         
01686         # Optional class attribute for footnote links and backlinks.
01687         var $fn_link_class = MARKDOWN_FN_LINK_CLASS;
01688         var $fn_backlink_class = MARKDOWN_FN_BACKLINK_CLASS;
01689         
01690         # Predefined abbreviations.
01691         var $predef_abbr = array();
01692 
01693 
01694         function MarkdownExtra_Parser() {
01695         #
01696         # Constructor function. Initialize the parser object.
01697         #
01698                 # Add extra escapable characters before parent constructor 
01699                 # initialize the table.
01700                 $this->escape_chars .= ':|';
01701                 
01702                 # Insert extra document, block, and span transformations. 
01703                 # Parent constructor will do the sorting.
01704                 $this->document_gamut += array(
01705                         "doFencedCodeBlocks" => 5,
01706                         "stripFootnotes"     => 15,
01707                         "stripAbbreviations" => 25,
01708                         "appendFootnotes"    => 50,
01709                         );
01710                 $this->block_gamut += array(
01711                         "doFencedCodeBlocks" => 5,
01712                         "doTables"           => 15,
01713                         "doDefLists"         => 45,
01714                         );
01715                 $this->span_gamut += array(
01716                         "doFootnotes"        => 5,
01717                         "doAbbreviations"    => 70,
01718                         );
01719                 
01720                 parent::Markdown_Parser();
01721         }
01722         
01723         
01724         # Extra variables used during extra transformations.
01725         var $footnotes = array();
01726         var $footnotes_ordered = array();
01727         var $abbr_desciptions = array();
01728         var $abbr_word_re = '';
01729         
01730         # Give the current footnote number.
01731         var $footnote_counter = 1;
01732         
01733         
01734         function setup() {
01735         #
01736         # Setting up Extra-specific variables.
01737         #
01738                 parent::setup();
01739                 
01740                 $this->footnotes = array();
01741                 $this->footnotes_ordered = array();
01742                 $this->abbr_desciptions = array();
01743                 $this->abbr_word_re = '';
01744                 $this->footnote_counter = 1;
01745                 
01746                 foreach ($this->predef_abbr as $abbr_word => $abbr_desc) {
01747                         if ($this->abbr_word_re)
01748                                 $this->abbr_word_re .= '|';
01749                         $this->abbr_word_re .= preg_quote($abbr_word);
01750                         $this->abbr_desciptions[$abbr_word] = trim($abbr_desc);
01751                 }
01752         }
01753         
01754         function teardown() {
01755         #
01756         # Clearing Extra-specific variables.
01757         #
01758                 $this->footnotes = array();
01759                 $this->footnotes_ordered = array();
01760                 $this->abbr_desciptions = array();
01761                 $this->abbr_word_re = '';
01762                 
01763                 parent::teardown();
01764         }
01765         
01766         
01767         ### HTML Block Parser ###
01768         
01769         # Tags that are always treated as block tags:
01770         var $block_tags_re = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|address|form|fieldset|iframe|hr|legend';
01771         
01772         # Tags treated as block tags only if the opening tag is alone on it's line:
01773         var $context_block_tags_re = 'script|noscript|math|ins|del';
01774         
01775         # Tags where markdown="1" default to span mode:
01776         var $contain_span_tags_re = 'p|h[1-6]|li|dd|dt|td|th|legend|address';
01777         
01778         # Tags which must not have their contents modified, no matter where 
01779         # they appear:
01780         var $clean_tags_re = 'script|math';
01781         
01782         # Tags that do not need to be closed.
01783         var $auto_close_tags_re = 'hr|img';
01784         
01785 
01786         function hashHTMLBlocks($text) {
01787         #
01788         # Hashify HTML Blocks and "clean tags".
01789         #
01790         # We only want to do this for block-level HTML tags, such as headers,
01791         # lists, and tables. That's because we still want to wrap <p>s around
01792         # "paragraphs" that are wrapped in non-block-level tags, such as anchors,
01793         # phrase emphasis, and spans. The list of tags we're looking for is
01794         # hard-coded.
01795         #
01796         # This works by calling _HashHTMLBlocks_InMarkdown, which then calls
01797         # _HashHTMLBlocks_InHTML when it encounter block tags. When the markdown="1" 
01798         # attribute is found whitin a tag, _HashHTMLBlocks_InHTML calls back
01799         #  _HashHTMLBlocks_InMarkdown to handle the Markdown syntax within the tag.
01800         # These two functions are calling each other. It's recursive!
01801         #
01802                 #
01803                 # Call the HTML-in-Markdown hasher.
01804                 #
01805                 list($text, ) = $this->_hashHTMLBlocks_inMarkdown($text);
01806                 
01807                 return $text;
01808         }
01809         function _hashHTMLBlocks_inMarkdown($text, $indent = 0, 
01810                                                                                 $enclosing_tag_re = '', $span = false)
01811         {
01812         #
01813         # Parse markdown text, calling _HashHTMLBlocks_InHTML for block tags.
01814         #
01815         # *   $indent is the number of space to be ignored when checking for code 
01816         #     blocks. This is important because if we don't take the indent into 
01817         #     account, something like this (which looks right) won't work as expected:
01818         #
01819         #     <div>
01820         #         <div markdown="1">
01821         #         Hello World.  <-- Is this a Markdown code block or text?
01822         #         </div>  <-- Is this a Markdown code block or a real tag?
01823         #     <div>
01824         #
01825         #     If you don't like this, just don't indent the tag on which
01826         #     you apply the markdown="1" attribute.
01827         #
01828         # *   If $enclosing_tag_re is not empty, stops at the first unmatched closing 
01829         #     tag with that name. Nested tags supported.
01830         #
01831         # *   If $span is true, text inside must treated as span. So any double 
01832         #     newline will be replaced by a single newline so that it does not create 
01833         #     paragraphs.
01834         #
01835         # Returns an array of that form: ( processed text , remaining text )
01836         #
01837                 if ($text === '') return array('', '');
01838 
01839                 # Regex to check for the presense of newlines around a block tag.
01840                 $newline_before_re = '/(?:^\n?|\n\n)*$/';
01841                 $newline_after_re = 
01842                         '{
01843                                 ^                                               # Start of text following the tag.
01844                                 (?>[ ]*<!--.*?-->)?             # Optional comment.
01845                                 [ ]*\n                                  # Must be followed by newline.
01846                         }xs';
01847                 
01848                 # Regex to match any tag.
01849                 $block_tag_re =
01850                         '{
01851                                 (                                       # $2: Capture hole tag.
01852                                         </?                                     # Any opening or closing tag.
01853                                                 (?>                             # Tag name.
01854                                                         '.$this->block_tags_re.'                        |
01855                                                         '.$this->context_block_tags_re.'        |
01856                                                         '.$this->clean_tags_re.'                |
01857                                                         (?!\s)'.$enclosing_tag_re.'
01858                                                 )
01859                                                 (?:
01860                                                         (?=[\s"\'/a-zA-Z0-9])   # Allowed characters after tag name.
01861                                                         (?>
01862                                                                 ".*?"           |       # Double quotes (can contain `>`)
01863                                                                 \'.*?\'         |       # Single quotes (can contain `>`)
01864                                                                 .+?                             # Anything but quotes and `>`.
01865                                                         )*?
01866                                                 )?
01867                                         >                                       # End of tag.
01868                                 |
01869                                         <!--    .*?     -->     # HTML Comment
01870                                 |
01871                                         <\?.*?\?> | <%.*?%>     # Processing instruction
01872                                 |
01873                                         <!\[CDATA\[.*?\]\]>     # CData Block
01874                                 |
01875                                         # Code span marker
01876                                         `+
01877                                 '. ( !$span ? ' # If not in span.
01878                                 |
01879                                         # Indented code block
01880                                         (?: ^[ ]*\n | ^ | \n[ ]*\n )
01881                                         [ ]{'.($indent+4).'}[^\n]* \n
01882                                         (?>
01883                                                 (?: [ ]{'.($indent+4).'}[^\n]* | [ ]* ) \n
01884                                         )*
01885                                 |
01886                                         # Fenced code block marker
01887                                         (?> ^ | \n )
01888                                         [ ]{'.($indent).'}~~~+[ ]*\n
01889                                 ' : '' ). ' # End (if not is span).
01890                                 )
01891                         }xs';
01892 
01893                 
01894                 $depth = 0;             # Current depth inside the tag tree.
01895                 $parsed = "";   # Parsed text that will be returned.
01896 
01897                 #
01898                 # Loop through every tag until we find the closing tag of the parent
01899                 # or loop until reaching the end of text if no parent tag specified.
01900                 #
01901                 do {
01902                         #
01903                         # Split the text using the first $tag_match pattern found.
01904                         # Text before  pattern will be first in the array, text after
01905                         # pattern will be at the end, and between will be any catches made 
01906                         # by the pattern.
01907                         #
01908                         $parts = preg_split($block_tag_re, $text, 2, 
01909                                                                 PREG_SPLIT_DELIM_CAPTURE);
01910                         
01911                         # If in Markdown span mode, add a empty-string span-level hash 
01912                         # after each newline to prevent triggering any block element.
01913                         if ($span) {
01914                                 $void = $this->hashPart("", ':');
01915                                 $newline = "$void\n";
01916                                 $parts[0] = $void . str_replace("\n", $newline, $parts[0]) . $void;
01917                         }
01918                         
01919                         $parsed .= $parts[0]; # Text before current tag.
01920                         
01921                         # If end of $text has been reached. Stop loop.
01922                         if (count($parts) < 3) {
01923                                 $text = "";
01924                                 break;
01925                         }
01926                         
01927                         $tag  = $parts[1]; # Tag to handle.
01928                         $text = $parts[2]; # Remaining text after current tag.
01929                         $tag_re = preg_quote($tag); # For use in a regular expression.
01930                         
01931                         #
01932                         # Check for: Code span marker
01933                         #
01934                         if ($tag{0} == "`") {
01935                                 # Find corresponding end marker.
01936                                 $tag_re = preg_quote($tag);
01937                                 if (preg_match('{^(?>.+?|\n(?!\n))*?(?<!`)'.$tag_re.'(?!`)}',
01938                                         $text, $matches))
01939                                 {
01940                                         # End marker found: pass text unchanged until marker.
01941                                         $parsed .= $tag . $matches[0];
01942                                         $text = substr($text, strlen($matches[0]));
01943                                 }
01944                                 else {
01945                                         # Unmatched marker: just skip it.
01946                                         $parsed .= $tag;
01947                                 }
01948                         }
01949                         #
01950                         # Check for: Indented code block.
01951                         #
01952                         else if ($tag{0} == "\n" || $tag{0} == " ") {
01953                                 # Indented code block: pass it unchanged, will be handled 
01954                                 # later.
01955                                 $parsed .= $tag;
01956                         }
01957                         #
01958                         # Check for: Fenced code block marker.
01959                         #
01960                         else if ($tag{0} == "~") {
01961                                 # Fenced code block marker: find matching end marker.
01962                                 $tag_re = preg_quote(trim($tag));
01963                                 if (preg_match('{^(?>.*\n)+?'.$tag_re.' *\n}', $text, 
01964                                         $matches)) 
01965                                 {
01966                                         # End marker found: pass text unchanged until marker.
01967                                         $parsed .= $tag . $matches[0];
01968                                         $text = substr($text, strlen($matches[0]));
01969                                 }
01970                                 else {
01971                                         # No end marker: just skip it.
01972                                         $parsed .= $tag;
01973                                 }
01974                         }
01975                         #
01976                         # Check for: Opening Block level tag or
01977                         #            Opening Context Block tag (like ins and del) 
01978                         #               used as a block tag (tag is alone on it's line).
01979                         #
01980                         else if (preg_match('{^<(?:'.$this->block_tags_re.')\b}', $tag) ||
01981                                 (       preg_match('{^<(?:'.$this->context_block_tags_re.')\b}', $tag) &&
01982                                         preg_match($newline_before_re, $parsed) &&
01983                                         preg_match($newline_after_re, $text)    )
01984                                 )
01985                         {
01986                                 # Need to parse tag and following text using the HTML parser.
01987                                 list($block_text, $text) = 
01988                                         $this->_hashHTMLBlocks_inHTML($tag . $text, "hashBlock", true);
01989                                 
01990                                 # Make sure it stays outside of any paragraph by adding newlines.
01991                                 $parsed .= "\n\n$block_text\n\n";
01992                         }
01993                         #
01994                         # Check for: Clean tag (like script, math)
01995                         #            HTML Comments, processing instructions.
01996                         #
01997                         else if (preg_match('{^<(?:'.$this->clean_tags_re.')\b}', $tag) ||
01998                                 $tag{1} == '!' || $tag{1} == '?')
01999                         {
02000                                 # Need to parse tag and following text using the HTML parser.
02001                                 # (don't check for markdown attribute)
02002                                 list($block_text, $text) = 
02003                                         $this->_hashHTMLBlocks_inHTML($tag . $text, "hashClean", false);
02004                                 
02005                                 $parsed .= $block_text;
02006                         }
02007                         #
02008                         # Check for: Tag with same name as enclosing tag.
02009                         #
02010                         else if ($enclosing_tag_re !== '' &&
02011                                 # Same name as enclosing tag.
02012                                 preg_match('{^</?(?:'.$enclosing_tag_re.')\b}', $tag))
02013                         {
02014                                 #
02015                                 # Increase/decrease nested tag count.
02016                                 #
02017                                 if ($tag{1} == '/')                                             $depth--;
02018                                 else if ($tag{strlen($tag)-2} != '/')   $depth++;
02019 
02020                                 if ($depth < 0) {
02021                                         #
02022                                         # Going out of parent element. Clean up and break so we
02023                                         # return to the calling function.
02024                                         #
02025                                         $text = $tag . $text;
02026                                         break;
02027                                 }
02028                                 
02029                                 $parsed .= $tag;
02030                         }
02031                         else {
02032                                 $parsed .= $tag;
02033                         }
02034                 } while ($depth >= 0);
02035                 
02036                 return array($parsed, $text);
02037         }
02038         function _hashHTMLBlocks_inHTML($text, $hash_method, $md_attr) {
02039         #
02040         # Parse HTML, calling _HashHTMLBlocks_InMarkdown for block tags.
02041         #
02042         # *   Calls $hash_method to convert any blocks.
02043         # *   Stops when the first opening tag closes.
02044         # *   $md_attr indicate if the use of the `markdown="1"` attribute is allowed.
02045         #     (it is not inside clean tags)
02046         #
02047         # Returns an array of that form: ( processed text , remaining text )
02048         #
02049                 if ($text === '') return array('', '');
02050                 
02051                 # Regex to match `markdown` attribute inside of a tag.
02052                 $markdown_attr_re = '
02053                         {
02054                                 \s*                     # Eat whitespace before the `markdown` attribute
02055                                 markdown
02056                                 \s*=\s*
02057                                 (?>
02058                                         (["\'])         # $1: quote delimiter           
02059                                         (.*?)           # $2: attribute value
02060                                         \1                      # matching delimiter    
02061                                 |
02062                                         ([^\s>]*)       # $3: unquoted attribute value
02063                                 )
02064                                 ()                              # $4: make $3 always defined (avoid warnings)
02065                         }xs';
02066                 
02067                 # Regex to match any tag.
02068                 $tag_re = '{
02069                                 (                                       # $2: Capture hole tag.
02070                                         </?                                     # Any opening or closing tag.
02071                                                 [\w:$]+                 # Tag name.
02072                                                 (?:
02073                                                         (?=[\s"\'/a-zA-Z0-9])   # Allowed characters after tag name.
02074                                                         (?>
02075                                                                 ".*?"           |       # Double quotes (can contain `>`)
02076                                                                 \'.*?\'         |       # Single quotes (can contain `>`)
02077                                                                 .+?                             # Anything but quotes and `>`.
02078                                                         )*?
02079                                                 )?
02080                                         >                                       # End of tag.
02081                                 |
02082                                         <!--    .*?     -->     # HTML Comment
02083                                 |
02084                                         <\?.*?\?> | <%.*?%>     # Processing instruction
02085                                 |
02086                                         <!\[CDATA\[.*?\]\]>     # CData Block
02087                                 )
02088                         }xs';
02089                 
02090                 $original_text = $text;         # Save original text in case of faliure.
02091                 
02092                 $depth          = 0;    # Current depth inside the tag tree.
02093                 $block_text     = "";   # Temporary text holder for current text.
02094                 $parsed         = "";   # Parsed text that will be returned.
02095 
02096                 #
02097                 # Get the name of the starting tag.
02098                 # (This pattern makes $base_tag_name_re safe without quoting.)
02099                 #
02100                 if (preg_match('/^<([\w:$]*)\b/', $text, $matches))
02101                         $base_tag_name_re = $matches[1];
02102 
02103                 #
02104                 # Loop through every tag until we find the corresponding closing tag.
02105                 #
02106                 do {
02107                         #
02108                         # Split the text using the first $tag_match pattern found.
02109                         # Text before  pattern will be first in the array, text after
02110                         # pattern will be at the end, and between will be any catches made 
02111                         # by the pattern.
02112                         #
02113                         $parts = preg_split($tag_re, $text, 2, PREG_SPLIT_DELIM_CAPTURE);
02114                         
02115                         if (count($parts) < 3) {
02116                                 #
02117                                 # End of $text reached with unbalenced tag(s).
02118                                 # In that case, we return original text unchanged and pass the
02119                                 # first character as filtered to prevent an infinite loop in the 
02120                                 # parent function.
02121                                 #
02122                                 return array($original_text{0}, substr($original_text, 1));
02123                         }
02124                         
02125                         $block_text .= $parts[0]; # Text before current tag.
02126                         $tag         = $parts[1]; # Tag to handle.
02127                         $text        = $parts[2]; # Remaining text after current tag.
02128                         
02129                         #
02130                         # Check for: Auto-close tag (like <hr/>)
02131                         #                        Comments and Processing Instructions.
02132                         #
02133                         if (preg_match('{^</?(?:'.$this->auto_close_tags_re.')\b}', $tag) ||
02134                                 $tag{1} == '!' || $tag{1} == '?')
02135                         {
02136                                 # Just add the tag to the block as if it was text.
02137                                 $block_text .= $tag;
02138                         }
02139                         else {
02140                                 #
02141                                 # Increase/decrease nested tag count. Only do so if
02142                                 # the tag's name match base tag's.
02143                                 #
02144                                 if (preg_match('{^</?'.$base_tag_name_re.'\b}', $tag)) {
02145                                         if ($tag{1} == '/')                                             $depth--;
02146                                         else if ($tag{strlen($tag)-2} != '/')   $depth++;
02147                                 }
02148                                 
02149                                 #
02150                                 # Check for `markdown="1"` attribute and handle it.
02151                                 #
02152                                 if ($md_attr && 
02153                                         preg_match($markdown_attr_re, $tag, $attr_m) &&
02154                                         preg_match('/^1|block|span$/', $attr_m[2] . $attr_m[3]))
02155                                 {
02156                                         # Remove `markdown` attribute from opening tag.
02157                                         $tag = preg_replace($markdown_attr_re, '', $tag);
02158                                         
02159                                         # Check if text inside this tag must be parsed in span mode.
02160                                         $this->mode = $attr_m[2] . $attr_m[3];
02161                                         $span_mode = $this->mode == 'span' || $this->mode != 'block' &&
02162                                                 preg_match('{^<(?:'.$this->contain_span_tags_re.')\b}', $tag);
02163                                         
02164                                         # Calculate indent before tag.
02165                                         if (preg_match('/(?:^|\n)( *?)(?! ).*?$/', $block_text, $matches)) {
02166                                                 $strlen = $this->utf8_strlen;
02167                                                 $indent = $strlen($matches[1], 'UTF-8');
02168                                         } else {
02169                                                 $indent = 0;
02170                                         }
02171                                         
02172                                         # End preceding block with this tag.
02173                                         $block_text .= $tag;
02174                                         $parsed .= $this->$hash_method($block_text);
02175                                         
02176                                         # Get enclosing tag name for the ParseMarkdown function.
02177                                         # (This pattern makes $tag_name_re safe without quoting.)
02178                                         preg_match('/^<([\w:$]*)\b/', $tag, $matches);
02179                                         $tag_name_re = $matches[1];
02180                                         
02181                                         # Parse the content using the HTML-in-Markdown parser.
02182                                         list ($block_text, $text)
02183                                                 = $this->_hashHTMLBlocks_inMarkdown($text, $indent, 
02184                                                         $tag_name_re, $span_mode);
02185                                         
02186                                         # Outdent markdown text.
02187                                         if ($indent > 0) {
02188                                                 $block_text = preg_replace("/^[ ]{1,$indent}/m", "", 
02189                                                                                                         $block_text);
02190                                         }
02191                                         
02192                                         # Append tag content to parsed text.
02193                                         if (!$span_mode)        $parsed .= "\n\n$block_text\n\n";
02194                                         else                            $parsed .= "$block_text";
02195                                         
02196                                         # Start over a new block.
02197                                         $block_text = "";
02198                                 }
02199                                 else $block_text .= $tag;
02200                         }
02201                         
02202                 } while ($depth > 0);
02203                 
02204                 #
02205                 # Hash last block text that wasn't processed inside the loop.
02206                 #
02207                 $parsed .= $this->$hash_method($block_text);
02208                 
02209                 return array($parsed, $text);
02210         }
02211 
02212 
02213         function hashClean($text) {
02214         #
02215         # Called whenever a tag must be hashed when a function insert a "clean" tag
02216         # in $text, it pass through this function and is automaticaly escaped, 
02217         # blocking invalid nested overlap.
02218         #
02219                 return $this->hashPart($text, 'C');
02220         }
02221 
02222 
02223         function doHeaders($text) {
02224         #
02225         # Redefined to add id attribute support.
02226         #
02227                 # Setext-style headers:
02228                 #         Header 1  {#header1}
02229                 #         ========
02230                 #  
02231                 #         Header 2  {#header2}
02232                 #         --------
02233                 #
02234                 $text = preg_replace_callback(
02235                         '{
02236                                 (^.+?)                                                          # $1: Header text
02237                                 (?:[ ]+\{\#([-_:a-zA-Z0-9]+)\})?        # $2: Id attribute
02238                                 [ ]*\n(=+|-+)[ ]*\n+                            # $3: Header footer
02239                         }mx',
02240                         array(&$this, '_doHeaders_callback_setext'), $text);
02241 
02242                 # atx-style headers:
02243                 #       # Header 1        {#header1}
02244                 #       ## Header 2       {#header2}
02245                 #       ## Header 2 with closing hashes ##  {#header3}
02246                 #       ...
02247                 #       ###### Header 6   {#header2}
02248                 #
02249                 $text = preg_replace_callback('{
02250                                 ^(\#{1,6})      # $1 = string of #\'s
02251                                 [ ]*
02252                                 (.+?)           # $2 = Header text
02253                                 [ ]*
02254                                 \#*                     # optional closing #\'s (not counted)
02255                                 (?:[ ]+\{\#([-_:a-zA-Z0-9]+)\})? # id attribute
02256                                 [ ]*
02257                                 \n+
02258                         }xm',
02259                         array(&$this, '_doHeaders_callback_atx'), $text);
02260 
02261                 return $text;
02262         }
02263         function _doHeaders_attr($attr) {
02264                 if (empty($attr))  return "";
02265                 return " id=\"$attr\"";
02266         }
02267         function _doHeaders_callback_setext($matches) {
02268                 if ($matches[3] == '-' && preg_match('{^- }', $matches[1]))
02269                         return $matches[0];
02270                 $level = $matches[3]{0} == '=' ? 1 : 2;
02271                 $attr  = $this->_doHeaders_attr($id =& $matches[2]);
02272                 $block = "<h$level$attr>".$this->runSpanGamut($matches[1])."</h$level>";
02273                 return "\n" . $this->hashBlock($block) . "\n\n";
02274         }
02275         function _doHeaders_callback_atx($matches) {
02276                 $level = strlen($matches[1]);
02277                 $attr  = $this->_doHeaders_attr($id =& $matches[3]);
02278                 $block = "<h$level$attr>".$this->runSpanGamut($matches[2])."</h$level>";
02279                 return "\n" . $this->hashBlock($block) . "\n\n";
02280         }
02281 
02282 
02283         function doTables($text) {
02284         #
02285         # Form HTML tables.
02286         #
02287                 $less_than_tab = $this->tab_width - 1;
02288                 #
02289                 # Find tables with leading pipe.
02290                 #
02291                 #       | Header 1 | Header 2
02292                 #       | -------- | --------
02293                 #       | Cell 1   | Cell 2
02294                 #       | Cell 3   | Cell 4
02295                 #
02296                 $text = preg_replace_callback('
02297                         {
02298                                 ^                                                       # Start of a line
02299                                 [ ]{0,'.$less_than_tab.'}       # Allowed whitespace.
02300                                 [|]                                                     # Optional leading pipe (present)
02301                                 (.+) \n                                         # $1: Header row (at least one pipe)
02302                                 
02303                                 [ ]{0,'.$less_than_tab.'}       # Allowed whitespace.
02304                                 [|] ([ ]*[-:]+[-| :]*) \n       # $2: Header underline
02305                                 
02306                                 (                                                       # $3: Cells
02307                                         (?>
02308                                                 [ ]*                            # Allowed whitespace.
02309                                                 [|] .* \n                       # Row content.
02310                                         )*
02311                                 )
02312                                 (?=\n|\Z)                                       # Stop at final double newline.
02313                         }xm',
02314                         array(&$this, '_doTable_leadingPipe_callback'), $text);
02315                 
02316                 #
02317                 # Find tables without leading pipe.
02318                 #
02319                 #       Header 1 | Header 2
02320                 #       -------- | --------
02321                 #       Cell 1   | Cell 2
02322                 #       Cell 3   | Cell 4
02323                 #
02324                 $text = preg_replace_callback('
02325                         {
02326                                 ^                                                       # Start of a line
02327                                 [ ]{0,'.$less_than_tab.'}       # Allowed whitespace.
02328                                 (\S.*[|].*) \n                          # $1: Header row (at least one pipe)
02329                                 
02330                                 [ ]{0,'.$less_than_tab.'}       # Allowed whitespace.
02331                                 ([-:]+[ ]*[|][-| :]*) \n        # $2: Header underline
02332                                 
02333                                 (                                                       # $3: Cells
02334                                         (?>
02335                                                 .* [|] .* \n            # Row content
02336                                         )*
02337                                 )
02338                                 (?=\n|\Z)                                       # Stop at final double newline.
02339                         }xm',
02340                         array(&$this, '_DoTable_callback'), $text);
02341 
02342                 return $text;
02343         }
02344         function _doTable_leadingPipe_callback($matches) {
02345                 $head           = $matches[1];
02346                 $underline      = $matches[2];
02347                 $content        = $matches[3];
02348                 
02349                 # Remove leading pipe for each row.
02350                 $content        = preg_replace('/^ *[|]/m', '', $content);
02351                 
02352                 return $this->_doTable_callback(array($matches[0], $head, $underline, $content));
02353         }
02354         function _doTable_callback($matches) {
02355                 $head           = $matches[1];
02356                 $underline      = $matches[2];
02357                 $content        = $matches[3];
02358 
02359                 # Remove any tailing pipes for each line.
02360                 $head           = preg_replace('/[|] *$/m', '', $head);
02361                 $underline      = preg_replace('/[|] *$/m', '', $underline);
02362                 $content        = preg_replace('/[|] *$/m', '', $content);
02363                 
02364                 # Reading alignement from header underline.
02365                 $separators     = preg_split('/ *[|] */', $underline);
02366                 foreach ($separators as $n => $s) {
02367                         if (preg_match('/^ *-+: *$/', $s))              $attr[$n] = ' align="right"';
02368                         else if (preg_match('/^ *:-+: *$/', $s))$attr[$n] = ' align="center"';
02369                         else if (preg_match('/^ *:-+ *$/', $s)) $attr[$n] = ' align="left"';
02370                         else                                                                    $attr[$n] = '';
02371                 }
02372                 
02373                 # Parsing span elements, including code spans, character escapes, 
02374                 # and inline HTML tags, so that pipes inside those gets ignored.
02375                 $head           = $this->parseSpan($head);
02376                 $headers        = preg_split('/ *[|] */', $head);
02377                 $col_count      = count($headers);
02378                 
02379                 # Write column headers.
02380                 $text = "<table>\n";
02381                 $text .= "<thead>\n";
02382                 $text .= "<tr>\n";
02383                 foreach ($headers as $n => $header)
02384                         $text .= "  <th$attr[$n]>".$this->runSpanGamut(trim($header))."</th>\n";
02385                 $text .= "</tr>\n";
02386                 $text .= "</thead>\n";
02387                 
02388                 # Split content by row.
02389                 $rows = explode("\n", trim($content, "\n"));
02390                 
02391                 $text .= "<tbody>\n";
02392                 foreach ($rows as $row) {
02393                         # Parsing span elements, including code spans, character escapes, 
02394                         # and inline HTML tags, so that pipes inside those gets ignored.
02395                         $row = $this->parseSpan($row);
02396                         
02397                         # Split row by cell.
02398                         $row_cells = preg_split('/ *[|] */', $row, $col_count);
02399                         $row_cells = array_pad($row_cells, $col_count, '');
02400                         
02401                         $text .= "<tr>\n";
02402                         foreach ($row_cells as $n => $cell)
02403                                 $text .= "  <td$attr[$n]>".$this->runSpanGamut(trim($cell))."</td>\n";
02404                         $text .= "</tr>\n";
02405                 }
02406                 $text .= "</tbody>\n";
02407                 $text .= "</table>";
02408                 
02409                 return $this->hashBlock($text) . "\n";
02410         }
02411 
02412         
02413         function doDefLists($text) {
02414         #
02415         # Form HTML definition lists.
02416         #
02417                 $less_than_tab = $this->tab_width - 1;
02418 
02419                 # Re-usable pattern to match any entire dl list:
02420                 $whole_list_re = '(?>
02421                         (                                                               # $1 = whole list
02422                           (                                                             # $2
02423                                 [ ]{0,'.$less_than_tab.'}
02424                                 ((?>.*\S.*\n)+)                         # $3 = defined term
02425                                 \n?
02426                                 [ ]{0,'.$less_than_tab.'}:[ ]+ # colon starting definition
02427                           )
02428                           (?s:.+?)
02429                           (                                                             # $4
02430                                   \z
02431                                 |
02432                                   \n{2,}
02433                                   (?=\S)
02434                                   (?!                                           # Negative lookahead for another term
02435                                         [ ]{0,'.$less_than_tab.'}
02436                                         (?: \S.*\n )+?                  # defined term
02437                                         \n?
02438                                         [ ]{0,'.$less_than_tab.'}:[ ]+ # colon starting definition
02439                                   )
02440                                   (?!                                           # Negative lookahead for another definition
02441                                         [ ]{0,'.$less_than_tab.'}:[ ]+ # colon starting definition
02442                                   )
02443                           )
02444                         )
02445                 )'; // mx
02446 
02447                 $text = preg_replace_callback('{
02448                                 (?>\A\n?|(?<=\n\n))
02449                                 '.$whole_list_re.'
02450                         }mx',
02451                         array(&$this, '_doDefLists_callback'), $text);
02452 
02453                 return $text;
02454         }
02455         function _doDefLists_callback($matches) {
02456                 # Re-usable patterns to match list item bullets and number markers:
02457                 $list = $matches[1];
02458                 
02459                 # Turn double returns into triple returns, so that we can make a
02460                 # paragraph for the last item in a list, if necessary:
02461                 $result = trim($this->processDefListItems($list));
02462                 $result = "<dl>\n" . $result . "\n</dl>";
02463                 return $this->hashBlock($result) . "\n\n";
02464         }
02465 
02466 
02467         function processDefListItems($list_str) {
02468         #
02469         #       Process the contents of a single definition list, splitting it
02470         #       into individual term and definition list items.
02471         #
02472                 $less_than_tab = $this->tab_width - 1;
02473                 
02474                 # trim trailing blank lines:
02475                 $list_str = preg_replace("/\n{2,}\\z/", "\n", $list_str);
02476 
02477                 # Process definition terms.
02478                 $list_str = preg_replace_callback('{
02479                         (?>\A\n?|\n\n+)                                 # leading line
02480                         (                                                               # definition terms = $1
02481                                 [ ]{0,'.$less_than_tab.'}       # leading whitespace
02482                                 (?![:][ ]|[ ])                          # negative lookahead for a definition 
02483                                                                                         #   mark (colon) or more whitespace.
02484                                 (?> \S.* \n)+?                          # actual term (not whitespace). 
02485                         )                       
02486                         (?=\n?[ ]{0,3}:[ ])                             # lookahead for following line feed 
02487                                                                                         #   with a definition mark.
02488                         }xm',
02489                         array(&$this, '_processDefListItems_callback_dt'), $list_str);
02490 
02491                 # Process actual definitions.
02492                 $list_str = preg_replace_callback('{
02493                         \n(\n+)?                                                # leading line = $1
02494                         (                                                               # marker space = $2
02495                                 [ ]{0,'.$less_than_tab.'}       # whitespace before colon
02496                                 [:][ ]+                                         # definition mark (colon)
02497                         )
02498                         ((?s:.+?))                                              # definition text = $3
02499                         (?= \n+                                                 # stop at next definition mark,
02500                                 (?:                                                     # next term or end of text
02501                                         [ ]{0,'.$less_than_tab.'} [:][ ]        |
02502                                         <dt> | \z
02503                                 )                                               
02504                         )                                       
02505                         }xm',
02506                         array(&$this, '_processDefListItems_callback_dd'), $list_str);
02507 
02508                 return $list_str;
02509         }
02510         function _processDefListItems_callback_dt($matches) {
02511                 $terms = explode("\n", trim($matches[1]));
02512                 $text = '';
02513                 foreach ($terms as $term) {
02514                         $term = $this->runSpanGamut(trim($term));
02515                         $text .= "\n<dt>" . $term . "</dt>";
02516                 }
02517                 return $text . "\n";
02518         }
02519         function _processDefListItems_callback_dd($matches) {
02520                 $leading_line   = $matches[1];
02521                 $marker_space   = $matches[2];
02522                 $def                    = $matches[3];
02523 
02524                 if ($leading_line || preg_match('/\n{2,}/', $def)) {
02525                         # Replace marker with the appropriate whitespace indentation
02526                         $def = str_repeat(' ', strlen($marker_space)) . $def;
02527                         $def = $this->runBlockGamut($this->outdent($def . "\n\n"));
02528                         $def = "\n". $def ."\n";
02529                 }
02530                 else {
02531                         $def = rtrim($def);
02532                         $def = $this->runSpanGamut($this->outdent($def));
02533                 }
02534 
02535                 return "\n<dd>" . $def . "</dd>\n";
02536         }
02537 
02538 
02539         function doFencedCodeBlocks($text) {
02540         #
02541         # Adding the fenced code block syntax to regular Markdown:
02542         #
02543         # ~~~
02544         # Code block
02545         # ~~~
02546         #
02547                 $less_than_tab = $this->tab_width;
02548                 
02549                 $text = preg_replace_callback('{
02550                                 (?:\n|\A)
02551                                 # 1: Opening marker
02552                                 (
02553                                         ~{3,} # Marker: three tilde or more.
02554                                 )
02555                                 [ ]* \n # Whitespace and newline following marker.
02556                                 
02557                                 # 2: Content
02558                                 (
02559                                         (?>
02560                                                 (?!\1 [ ]* \n)  # Not a closing marker.
02561                                                 .*\n+
02562                                         )+
02563                                 )
02564                                 
02565                                 # Closing marker.
02566                                 \1 [ ]* \n
02567                         }xm',
02568                         array(&$this, '_doFencedCodeBlocks_callback'), $text);
02569 
02570                 return $text;
02571         }
02572         function _doFencedCodeBlocks_callback($matches) {
02573                 $codeblock = $matches[2];
02574                 $codeblock = htmlspecialchars($codeblock, ENT_NOQUOTES);
02575                 $codeblock = preg_replace_callback('/^\n+/',
02576                         array(&$this, '_doFencedCodeBlocks_newlines'), $codeblock);
02577                 $codeblock = "<pre><code>$codeblock</code></pre>";
02578                 return "\n\n".$this->hashBlock($codeblock)."\n\n";
02579         }
02580         function _doFencedCodeBlocks_newlines($matches) {
02581                 return str_repeat("<br$this->empty_element_suffix", 
02582                         strlen($matches[0]));
02583         }
02584 
02585 
02586         #
02587         # Redefining emphasis markers so that emphasis by underscore does not
02588         # work in the middle of a word.
02589         #
02590         var $em_relist = array(
02591                 ''  => '(?:(?<!\*)\*(?!\*)|(?<![a-zA-Z0-9_])_(?!_))(?=\S|$)(?![.,:;]\s)',
02592                 '*' => '(?<=\S|^)(?<!\*)\*(?!\*)',
02593                 '_' => '(?<=\S|^)(?<!_)_(?![a-zA-Z0-9_])',
02594                 );
02595         var $strong_relist = array(
02596                 ''   => '(?:(?<!\*)\*\*(?!\*)|(?<![a-zA-Z0-9_])__(?!_))(?=\S|$)(?![.,:;]\s)',
02597                 '**' => '(?<=\S|^)(?<!\*)\*\*(?!\*)',
02598                 '__' => '(?<=\S|^)(?<!_)__(?![a-zA-Z0-9_])',
02599                 );
02600         var $em_strong_relist = array(
02601                 ''    => '(?:(?<!\*)\*\*\*(?!\*)|(?<![a-zA-Z0-9_])___(?!_))(?=\S|$)(?![.,:;]\s)',
02602                 '***' => '(?<=\S|^)(?<!\*)\*\*\*(?!\*)',
02603                 '___' => '(?<=\S|^)(?<!_)___(?![a-zA-Z0-9_])',
02604                 );
02605 
02606 
02607         function formParagraphs($text) {
02608         #
02609         #       Params:
02610         #               $text - string to process with html <p> tags
02611         #
02612                 # Strip leading and trailing lines:
02613                 $text = preg_replace('/\A\n+|\n+\z/', '', $text);
02614                 
02615                 $grafs = preg_split('/\n{2,}/', $text, -1, PREG_SPLIT_NO_EMPTY);
02616 
02617                 #
02618                 # Wrap <p> tags and unhashify HTML blocks
02619                 #
02620                 foreach ($grafs as $key => $value) {
02621                         $value = trim($this->runSpanGamut($value));
02622                         
02623                         # Check if this should be enclosed in a paragraph.
02624                         # Clean tag hashes & block tag hashes are left alone.
02625                         $is_p = !preg_match('/^B\x1A[0-9]+B|^C\x1A[0-9]+C$/', $value);
02626                         
02627                         if ($is_p) {
02628                                 $value = "<p>$value</p>";
02629                         }
02630                         $grafs[$key] = $value;
02631                 }
02632                 
02633                 # Join grafs in one text, then unhash HTML tags. 
02634                 $text = implode("\n\n", $grafs);
02635                 
02636                 # Finish by removing any tag hashes still present in $text.
02637                 $text = $this->unhash($text);
02638                 
02639                 return $text;
02640         }
02641         
02642         
02643         ### Footnotes
02644         
02645         function stripFootnotes($text) {
02646         #
02647         # Strips link definitions from text, stores the URLs and titles in
02648         # hash references.
02649         #
02650                 $less_than_tab = $this->tab_width - 1;
02651 
02652                 # Link defs are in the form: [^id]: url "optional title"
02653                 $text = preg_replace_callback('{
02654                         ^[ ]{0,'.$less_than_tab.'}\[\^(.+?)\][ ]?:      # note_id = $1
02655                           [ ]*
02656                           \n?                                   # maybe *one* newline
02657                         (                                               # text = $2 (no blank lines allowed)
02658                                 (?:                                     
02659                                         .+                              # actual text
02660                                 |
02661                                         \n                              # newlines but 
02662                                         (?!\[\^.+?\]:\s)# negative lookahead for footnote marker.
02663                                         (?!\n+[ ]{0,3}\S)# ensure line is not blank and followed 
02664                                                                         # by non-indented content
02665                                 )*
02666                         )               
02667                         }xm',
02668                         array(&$this, '_stripFootnotes_callback'),
02669                         $text);
02670                 return $text;
02671         }
02672         function _stripFootnotes_callback($matches) {
02673                 $note_id = $this->fn_id_prefix . $matches[1];
02674                 $this->footnotes[$note_id] = $this->outdent($matches[2]);
02675                 return ''; # String that will replace the block
02676         }
02677 
02678 
02679         function doFootnotes($text) {
02680         #
02681         # Replace footnote references in $text [^id] with a special text-token 
02682         # which will be replaced by the actual footnote marker in appendFootnotes.
02683         #
02684                 if (!$this->in_anchor) {
02685                         $text = preg_replace('{\[\^(.+?)\]}', "F\x1Afn:\\1\x1A:", $text);
02686                 }
02687                 return $text;
02688         }
02689 
02690         
02691         function appendFootnotes($text) {
02692         #
02693         # Append footnote list to text.
02694         #
02695                 $text = preg_replace_callback('{F\x1Afn:(.*?)\x1A:}', 
02696                         array(&$this, '_appendFootnotes_callback'), $text);
02697         
02698                 if (!empty($this->footnotes_ordered)) {
02699                         $text .= "\n\n";
02700                         $text .= "<div class=\"footnotes\">\n";
02701                         $text .= "<hr". $this->empty_element_suffix ."\n";
02702                         $text .= "<ol>\n\n";
02703                         
02704                         $attr = " rev=\"footnote\"";
02705                         if ($this->fn_backlink_class != "") {
02706                                 $class = $this->fn_backlink_class;
02707                                 $class = $this->encodeAttribute($class);
02708                                 $attr .= " class=\"$class\"";
02709                         }
02710                         if ($this->fn_backlink_title != "") {
02711                                 $title = $this->fn_backlink_title;
02712                                 $title = $this->encodeAttribute($title);
02713                                 $attr .= " title=\"$title\"";
02714                         }
02715                         $num = 0;
02716                         
02717                         while (!empty($this->footnotes_ordered)) {
02718                                 $footnote = reset($this->footnotes_ordered);
02719                                 $note_id = key($this->footnotes_ordered);
02720                                 unset($this->footnotes_ordered[$note_id]);
02721                                 
02722                                 $footnote .= "\n"; # Need to append newline before parsing.
02723                                 $footnote = $this->runBlockGamut("$footnote\n");                                
02724                                 $footnote = preg_replace_callback('{F\x1Afn:(.*?)\x1A:}', 
02725                                         array(&$this, '_appendFootnotes_callback'), $footnote);
02726                                 
02727                                 $attr = str_replace("%%", ++$num, $attr);
02728                                 $note_id = $this->encodeAttribute($note_id);
02729                                 
02730                                 # Add backlink to last paragraph; create new paragraph if needed.
02731                                 $backlink = "<a href=\"#fnref:$note_id\"$attr>&#8617;</a>";
02732                                 if (preg_match('{</p>$}', $footnote)) {
02733                                         $footnote = substr($footnote, 0, -4) . "&#160;$backlink</p>";
02734                                 } else {
02735                                         $footnote .= "\n\n<p>$backlink</p>";
02736                                 }
02737                                 
02738                                 $text .= "<li id=\"fn:$note_id\">\n";
02739                                 $text .= $footnote . "\n";
02740                                 $text .= "</li>\n\n";
02741                         }
02742                         
02743                         $text .= "</ol>\n";
02744                         $text .= "</div>";
02745                 }
02746                 return $text;
02747         }
02748         function _appendFootnotes_callback($matches) {
02749                 $node_id = $this->fn_id_prefix . $matches[1];
02750                 
02751                 # Create footnote marker only if it has a corresponding footnote *and*
02752                 # the footnote hasn't been used by another marker.
02753                 if (isset($this->footnotes[$node_id])) {
02754                         # Transfert footnote content to the ordered list.
02755                         $this->footnotes_ordered[$node_id] = $this->footnotes[$node_id];
02756                         unset($this->footnotes[$node_id]);
02757                         
02758                         $num = $this->footnote_counter++;
02759                         $attr = " rel=\"footnote\"";
02760                         if ($this->fn_link_class != "") {
02761                                 $class = $this->fn_link_class;
02762                                 $class = $this->encodeAttribute($class);
02763                                 $attr .= " class=\"$class\"";
02764                         }
02765                         if ($this->fn_link_title != "") {
02766                                 $title = $this->fn_link_title;
02767                                 $title = $this->encodeAttribute($title);
02768                                 $attr .= " title=\"$title\"";
02769                         }
02770                         
02771                         $attr = str_replace("%%", $num, $attr);
02772                         $node_id = $this->encodeAttribute($node_id);
02773                         
02774                         return
02775                                 "<sup id=\"fnref:$node_id\">".
02776                                 "<a href=\"#fn:$node_id\"$attr>$num</a>".
02777                                 "</sup>";
02778                 }
02779                 
02780                 return "[^".$matches[1]."]";
02781         }
02782                 
02783         
02784         ### Abbreviations ###
02785         
02786         function stripAbbreviations($text) {
02787         #
02788         # Strips abbreviations from text, stores titles in hash references.
02789         #
02790                 $less_than_tab = $this->tab_width - 1;
02791 
02792                 # Link defs are in the form: [id]*: url "optional title"
02793                 $text = preg_replace_callback('{
02794                         ^[ ]{0,'.$less_than_tab.'}\*\[(.+?)\][ ]?:      # abbr_id = $1
02795                         (.*)                                    # text = $2 (no blank lines allowed)    
02796                         }xm',
02797                         array(&$this, '_stripAbbreviations_callback'),
02798                         $text);
02799                 return $text;
02800         }
02801         function _stripAbbreviations_callback($matches) {
02802                 $abbr_word = $matches[1];
02803                 $abbr_desc = $matches[2];
02804                 if ($this->abbr_word_re)
02805                         $this->abbr_word_re .= '|';
02806                 $this->abbr_word_re .= preg_quote($abbr_word);
02807                 $this->abbr_desciptions[$abbr_word] = trim($abbr_desc);
02808                 return ''; # String that will replace the block
02809         }
02810         
02811         
02812         function doAbbreviations($text) {
02813         #
02814         # Find defined abbreviations in text and wrap them in <abbr> elements.
02815         #
02816                 if ($this->abbr_word_re) {
02817                         // cannot use the /x modifier because abbr_word_re may 
02818                         // contain significant spaces:
02819                         $text = preg_replace_callback('{'.
02820                                 '(?<![\w\x1A])'.
02821                                 '(?:'.$this->abbr_word_re.')'.
02822                                 '(?![\w\x1A])'.
02823                                 '}', 
02824                                 array(&$this, '_doAbbreviations_callback'), $text);
02825                 }
02826                 return $text;
02827         }
02828         function _doAbbreviations_callback($matches) {
02829                 $abbr = $matches[0];
02830                 if (isset($this->abbr_desciptions[$abbr])) {
02831                         $desc = $this->abbr_desciptions[$abbr];
02832                         if (empty($desc)) {
02833                                 return $this->hashPart("<abbr>$abbr</abbr>");
02834                         } else {
02835                                 $desc = $this->encodeAttribute($desc);
02836                                 return $this->hashPart("<abbr title=\"$desc\">$abbr</abbr>");
02837                         }
02838                 } else {
02839                         return $matches[0];
02840                 }
02841         }
02842 
02843 }
02844 
02845 
02846 /*
02847 
02848 PHP Markdown Extra
02849 ==================
02850 
02851 Description
02852 -----------
02853 
02854 This is a PHP port of the original Markdown formatter written in Perl 
02855 by John Gruber. This special "Extra" version of PHP Markdown features 
02856 further enhancements to the syntax for making additional constructs 
02857 such as tables and definition list.
02858 
02859 Markdown is a text-to-HTML filter; it translates an easy-to-read /
02860 easy-to-write structured text format into HTML. Markdown's text format
02861 is most similar to that of plain text email, and supports features such
02862 as headers, *emphasis*, code blocks, blockquotes, and links.
02863 
02864 Markdown's syntax is designed not as a generic markup language, but
02865 specifically to serve as a front-end to (X)HTML. You can use span-level
02866 HTML tags anywhere in a Markdown document, and you can use block level
02867 HTML tags (like <div> and <table> as well).
02868 
02869 For more information about Markdown's syntax, see:
02870 
02871 <http://daringfireball.net/projects/markdown/>
02872 
02873 
02874 Bugs
02875 ----
02876 
02877 To file bug reports please send email to:
02878 
02879 <michel.fortin@michelf.com>
02880 
02881 Please include with your report: (1) the example input; (2) the output you
02882 expected; (3) the output Markdown actually produced.
02883 
02884 
02885 Version History
02886 --------------- 
02887 
02888 See the readme file for detailed release notes for this version.
02889 
02890 
02891 Copyright and License
02892 ---------------------
02893 
02894 PHP Markdown & Extra  
02895 Copyright (c) 2004-2009 Michel Fortin  
02896 <http://michelf.com/>  
02897 All rights reserved.
02898 
02899 Based on Markdown  
02900 Copyright (c) 2003-2006 John Gruber   
02901 <http://daringfireball.net/>   
02902 All rights reserved.
02903 
02904 Redistribution and use in source and binary forms, with or without
02905 modification, are permitted provided that the following conditions are
02906 met:
02907 
02908 *       Redistributions of source code must retain the above copyright notice,
02909         this list of conditions and the following disclaimer.
02910 
02911 *       Redistributions in binary form must reproduce the above copyright
02912         notice, this list of conditions and the following disclaimer in the
02913         documentation and/or other materials provided with the distribution.
02914 
02915 *       Neither the name "Markdown" nor the names of its contributors may
02916         be used to endorse or promote products derived from this software
02917         without specific prior written permission.
02918 
02919 This software is provided by the copyright holders and contributors "as
02920 is" and any express or implied warranties, including, but not limited
02921 to, the implied warranties of merchantability and fitness for a
02922 particular purpose are disclaimed. In no event shall the copyright owner
02923 or contributors be liable for any direct, indirect, incidental, special,
02924 exemplary, or consequential damages (including, but not limited to,
02925 procurement of substitute goods or services; loss of use, data, or
02926 profits; or business interruption) however caused and on any theory of
02927 liability, whether in contract, strict liability, or tort (including
02928 negligence or otherwise) arising in any way out of the use of this
02929 software, even if advised of the possibility of such damage.
02930 
02931 */
02932 ?>
 All Data Structures Namespaces Files Functions Variables Enumerations