documentation/moodle/markdown_8php_source.html

00001 <?php
00002 #
00003 # Markdown Extra  -  A text-to-HTML conversion tool for web writers
00004 #
00005 # PHP Markdown & Extra
00006 # Copyright (c) 2004-2009 Michel Fortin
00007 # <http://michelf.com/projects/php-markdown/>
00008 #
00009 # Original Markdown
00010 # Copyright (c) 2004-2006 John Gruber
00011 # <http://daringfireball.net/projects/markdown/>
00012 #
00013
00014
00015 define( 'MARKDOWN_VERSION',  "1.0.1n" ); # Sat 10 Oct 2009
00016 define( 'MARKDOWNEXTRA_VERSION',  "1.2.4" ); # Sat 10 Oct 2009
00017
00018
00019 #
00020 # Global default settings:
00021 #
00022
00023 # Change to ">" for HTML output
00024 @define( 'MARKDOWN_EMPTY_ELEMENT_SUFFIX',  " />");
00025
00026 # Define the width of a tab for code blocks.
00027 @define( 'MARKDOWN_TAB_WIDTH',     4 );
00028
00029 # Optional title attribute for footnote links and backlinks.
00030 @define( 'MARKDOWN_FN_LINK_TITLE',         "" );
00031 @define( 'MARKDOWN_FN_BACKLINK_TITLE',     "" );
00032
00033 # Optional class attribute for footnote links and backlinks.
00034 @define( 'MARKDOWN_FN_LINK_CLASS',         "" );
00035 @define( 'MARKDOWN_FN_BACKLINK_CLASS',     "" );
00036
00037
00038 #
00039 # WordPress settings:
00040 #
00041
00042 # Change to false to remove Markdown from posts and/or comments.
00043 @define( 'MARKDOWN_WP_POSTS',      true );
00044 @define( 'MARKDOWN_WP_COMMENTS',   true );
00045
00046
00047
00048 ### Standard Function Interface ###
00049
00050 @define( 'MARKDOWN_PARSER_CLASS',  'MarkdownExtra_Parser' );
00051
00052 function Markdown($text) {
00053 #
00054 # Initialize the parser and return the result of its transform method.
00055 #
00056         # Setup static parser variable.
00057         static $parser;
00058         if (!isset($parser)) {
00059                 $parser_class = MARKDOWN_PARSER_CLASS;
00060                 $parser = new $parser_class;
00061         }
00062
00063         # Transform text using parser.
00064         return $parser->transform($text);
00065 }
00066
00067
00068 ### WordPress Plugin Interface ###
00069
00070 /*
00071 Plugin Name: Markdown Extra
00072 Plugin URI: http://michelf.com/projects/php-markdown/
00073 Description: <a href="http://daringfireball.net/projects/markdown/syntax">Markdown syntax</a> allows you to write using an easy-to-read, easy-to-write plain text format. Based on the original Perl version by <a href="http://daringfireball.net/">John Gruber</a>. <a href="http://michelf.com/projects/php-markdown/">More...</a>
00074 Version: 1.2.4
00075 Author: Michel Fortin
00076 Author URI: http://michelf.com/
00077 */
00078
00079 if (isset($wp_version)) {
00080         # More details about how it works here:
00081         # <http://michelf.com/weblog/2005/wordpress-text-flow-vs-markdown/>
00082
00083         # Post content and excerpts
00084         # - Remove WordPress paragraph generator.
00085         # - Run Markdown on excerpt, then remove all tags.
00086         # - Add paragraph tag around the excerpt, but remove it for the excerpt rss.
00087         if (MARKDOWN_WP_POSTS) {
00088                 remove_filter('the_content',     'wpautop');
00089         remove_filter('the_content_rss', 'wpautop');
00090                 remove_filter('the_excerpt',     'wpautop');
00091                 add_filter('the_content',     'mdwp_MarkdownPost', 6);
00092         add_filter('the_content_rss', 'mdwp_MarkdownPost', 6);
00093                 add_filter('get_the_excerpt', 'mdwp_MarkdownPost', 6);
00094                 add_filter('get_the_excerpt', 'trim', 7);
00095                 add_filter('the_excerpt',     'mdwp_add_p');
00096                 add_filter('the_excerpt_rss', 'mdwp_strip_p');
00097
00098                 remove_filter('content_save_pre',  'balanceTags', 50);
00099                 remove_filter('excerpt_save_pre',  'balanceTags', 50);
00100                 add_filter('the_content',         'balanceTags', 50);
00101                 add_filter('get_the_excerpt', 'balanceTags', 9);
00102         }
00103
00104         # Add a footnote id prefix to posts when inside a loop.
00105         function mdwp_MarkdownPost($text) {
00106                 static $parser;
00107                 if (!$parser) {
00108                         $parser_class = MARKDOWN_PARSER_CLASS;
00109                         $parser = new $parser_class;
00110                 }
00111                 if (is_single() || is_page() || is_feed()) {
00112                         $parser->fn_id_prefix = "";
00113                 } else {
00114                         $parser->fn_id_prefix = get_the_ID() . ".";
00115                 }
00116                 return $parser->transform($text);
00117         }
00118
00119         # Comments
00120         # - Remove WordPress paragraph generator.
00121         # - Remove WordPress auto-link generator.
00122         # - Scramble important tags before passing them to the kses filter.
00123         # - Run Markdown on excerpt then remove paragraph tags.
00124         if (MARKDOWN_WP_COMMENTS) {
00125                 remove_filter('comment_text', 'wpautop', 30);
00126                 remove_filter('comment_text', 'make_clickable');
00127                 add_filter('pre_comment_content', 'Markdown', 6);
00128                 add_filter('pre_comment_content', 'mdwp_hide_tags', 8);
00129                 add_filter('pre_comment_content', 'mdwp_show_tags', 12);
00130                 add_filter('get_comment_text',    'Markdown', 6);
00131                 add_filter('get_comment_excerpt', 'Markdown', 6);
00132                 add_filter('get_comment_excerpt', 'mdwp_strip_p', 7);
00133
00134                 global $mdwp_hidden_tags, $mdwp_placeholders;
00135                 $mdwp_hidden_tags = explode(' ',
00136                         '<p> </p> <pre> </pre> <ol> </ol> <ul> </ul> <li> </li>');
00137                 $mdwp_placeholders = explode(' ', str_rot13(
00138                         'pEj07ZbbBZ U1kqgh4w4p pre2zmeN6K QTi31t9pre ol0MP1jzJR '.
00139                         'ML5IjmbRol ulANi1NsGY J7zRLJqPul liA8ctl16T K9nhooUHli'));
00140         }
00141
00142         function mdwp_add_p($text) {
00143                 if (!preg_match('{^$|^<(p|ul|ol|dl|pre|blockquote)>}i', $text)) {
00144                         $text = '<p>'.$text.'</p>';
00145                         $text = preg_replace('{\n{2,}}', "</p>\n\n<p>", $text);
00146                 }
00147                 return $text;
00148         }
00149
00150         function mdwp_strip_p($t) { return preg_replace('{</?p>}i', '', $t); }
00151
00152         function mdwp_hide_tags($text) {
00153                 global $mdwp_hidden_tags, $mdwp_placeholders;
00154                 return str_replace($mdwp_hidden_tags, $mdwp_placeholders, $text);
00155         }
00156         function mdwp_show_tags($text) {
00157                 global $mdwp_hidden_tags, $mdwp_placeholders;
00158                 return str_replace($mdwp_placeholders, $mdwp_hidden_tags, $text);
00159         }
00160 }
00161
00162
00163 ### bBlog Plugin Info ###
00164
00165 function identify_modifier_markdown() {
00166         return array(
00167                 'name' => 'markdown',
00168                 'type' => 'modifier',
00169                 'nicename' => 'PHP Markdown Extra',
00170                 'description' => 'A text-to-HTML conversion tool for web writers',
00171                 'authors' => 'Michel Fortin and John Gruber',
00172                 'licence' => 'GPL',
00173                 'version' => MARKDOWNEXTRA_VERSION,
00174                 'help' => '<a href="http://daringfireball.net/projects/markdown/syntax">Markdown syntax</a> allows you to write using an easy-to-read, easy-to-write plain text format. Based on the original Perl version by <a href="http://daringfireball.net/">John Gruber</a>. <a href="http://michelf.com/projects/php-markdown/">More...</a>',
00175                 );
00176 }
00177
00178
00179 ### Smarty Modifier Interface ###
00180
00181 function smarty_modifier_markdown($text) {
00182         return Markdown($text);
00183 }
00184
00185
00186 ### Textile Compatibility Mode ###
00187
00188 # Rename this file to "classTextile.php" and it can replace Textile everywhere.
00189
00190 if (strcasecmp(substr(__FILE__, -16), "classTextile.php") == 0) {
00191         # Try to include PHP SmartyPants. Should be in the same directory.
00192         @include_once 'smartypants.php';
00193         # Fake Textile class. It calls Markdown instead.
00194         class Textile {
00195                 function TextileThis($text, $lite='', $encode='') {
00196                         if ($lite == '' && $encode == '')    $text = Markdown($text);
00197                         if (function_exists('SmartyPants'))  $text = SmartyPants($text);
00198                         return $text;
00199                 }
00200                 # Fake restricted version: restrictions are not supported for now.
00201                 function TextileRestricted($text, $lite='', $noimage='') {
00202                         return $this->TextileThis($text, $lite);
00203                 }
00204                 # Workaround to ensure compatibility with TextPattern 4.0.3.
00205                 function blockLite($text) { return $text; }
00206         }
00207 }
00208
00209
00210
00211 #
00212 # Markdown Parser Class
00213 #
00214
00215 class Markdown_Parser {
00216
00217         # Regex to match balanced [brackets].
00218         # Needed to insert a maximum bracked depth while converting to PHP.
00219         var $nested_brackets_depth = 6;
00220         var $nested_brackets_re;
00221
00222         var $nested_url_parenthesis_depth = 4;
00223         var $nested_url_parenthesis_re;
00224
00225         # Table of hash values for escaped characters:
00226         var $escape_chars = '\`*_{}[]()>#+-.!';
00227         var $escape_chars_re;
00228
00229         # Change to ">" for HTML output.
00230         var $empty_element_suffix = MARKDOWN_EMPTY_ELEMENT_SUFFIX;
00231         var $tab_width = MARKDOWN_TAB_WIDTH;
00232
00233         # Change to `true` to disallow markup or entities.
00234         var $no_markup = false;
00235         var $no_entities = false;
00236
00237         # Predefined urls and titles for reference links and images.
00238         var $predef_urls = array();
00239         var $predef_titles = array();
00240
00241
00242         function Markdown_Parser() {
00243         #
00244         # Constructor function. Initialize appropriate member variables.
00245         #
00246                 $this->_initDetab();
00247                 $this->prepareItalicsAndBold();
00248
00249                 $this->nested_brackets_re =
00250                         str_repeat('(?>[^\[\]]+|\[', $this->nested_brackets_depth).
00251                         str_repeat('\])*', $this->nested_brackets_depth);
00252
00253                 $this->nested_url_parenthesis_re =
00254                         str_repeat('(?>[^()\s]+|\(', $this->nested_url_parenthesis_depth).
00255                         str_repeat('(?>\)))*', $this->nested_url_parenthesis_depth);
00256
00257                 $this->escape_chars_re = '['.preg_quote($this->escape_chars).']';
00258
00259                 # Sort document, block, and span gamut in ascendent priority order.
00260                 asort($this->document_gamut);
00261                 asort($this->block_gamut);
00262                 asort($this->span_gamut);
00263         }
00264
00265
00266         # Internal hashes used during transformation.
00267         var $urls = array();
00268         var $titles = array();
00269         var $html_hashes = array();
00270
00271         # Status flag to avoid invalid nesting.
00272         var $in_anchor = false;
00273
00274
00275         function setup() {
00276         #
00277         # Called before the transformation process starts to setup parser
00278         # states.
00279         #
00280                 # Clear global hashes.
00281                 $this->urls = $this->predef_urls;
00282                 $this->titles = $this->predef_titles;
00283                 $this->html_hashes = array();
00284
00285                 $in_anchor = false;
00286         }
00287
00288         function teardown() {
00289         #
00290         # Called after the transformation process to clear any variable
00291         # which may be taking up memory unnecessarly.
00292         #
00293                 $this->urls = array();
00294                 $this->titles = array();
00295                 $this->html_hashes = array();
00296         }
00297
00298
00299         function transform($text) {
00300         #
00301         # Main function. Performs some preprocessing on the input text
00302         # and pass it through the document gamut.
00303         #
00304                 $this->setup();
00305
00306                 # Remove UTF-8 BOM and marker character in input, if present.
00307                 $text = preg_replace('{^\xEF\xBB\xBF|\x1A}', '', $text);
00308
00309                 # Standardize line endings:
00310                 #   DOS to Unix and Mac to Unix
00311                 $text = preg_replace('{\r\n?}', "\n", $text);
00312
00313                 # Make sure $text ends with a couple of newlines:
00314                 $text .= "\n\n";
00315
00316                 # Convert all tabs to spaces.
00317                 $text = $this->detab($text);
00318
00319                 # Turn block-level HTML blocks into hash entries
00320                 $text = $this->hashHTMLBlocks($text);
00321
00322                 # Strip any lines consisting only of spaces and tabs.
00323                 # This makes subsequent regexen easier to write, because we can
00324                 # match consecutive blank lines with /\n+/ instead of something
00325                 # contorted like /[ ]*\n+/ .
00326                 $text = preg_replace('/^[ ]+$/m', '', $text);
00327
00328                 # Run document gamut methods.
00329                 foreach ($this->document_gamut as $method => $priority) {
00330                         $text = $this->$method($text);
00331                 }
00332
00333                 $this->teardown();
00334
00335                 return $text . "\n";
00336         }
00337
00338         var $document_gamut = array(
00339                 # Strip link definitions, store in hashes.
00340                 "stripLinkDefinitions" => 20,
00341
00342                 "runBasicBlockGamut"   => 30,
00343                 );
00344
00345
00346         function stripLinkDefinitions($text) {
00347         #
00348         # Strips link definitions from text, stores the URLs and titles in
00349         # hash references.
00350         #
00351                 $less_than_tab = $this->tab_width - 1;
00352
00353                 # Link defs are in the form: ^[id]: url "optional title"
00354                 $text = preg_replace_callback('{
00355                                                         ^[ ]{0,'.$less_than_tab.'}\[(.+)\][ ]?: # id = $1
00356                                                           [ ]*
00357                                                           \n?                           # maybe *one* newline
00358                                                           [ ]*
00359                                                         (?:
00360                                                           <(.+?)>                       # url = $2
00361                                                         |
00362                                                           (\S+?)                        # url = $3
00363                                                         )
00364                                                           [ ]*
00365                                                           \n?                           # maybe one newline
00366                                                           [ ]*
00367                                                         (?:
00368                                                                 (?<=\s)                 # lookbehind for whitespace
00369                                                                 ["(]
00370                                                                 (.*?)                   # title = $4
00371                                                                 [")]
00372                                                                 [ ]*
00373                                                         )?      # title is optional
00374                                                         (?:\n+|\Z)
00375                         }xm',
00376                         array(&$this, '_stripLinkDefinitions_callback'),
00377                         $text);
00378                 return $text;
00379         }
00380         function _stripLinkDefinitions_callback($matches) {
00381                 $link_id = strtolower($matches[1]);
00382                 $url = $matches[2] == '' ? $matches[3] : $matches[2];
00383                 $this->urls[$link_id] = $url;
00384                 $this->titles[$link_id] =& $matches[4];
00385                 return ''; # String that will replace the block
00386         }
00387
00388
00389         function hashHTMLBlocks($text) {
00390                 if ($this->no_markup)  return $text;
00391
00392                 $less_than_tab = $this->tab_width - 1;
00393
00394                 # Hashify HTML blocks:
00395                 # We only want to do this for block-level HTML tags, such as headers,
00396                 # lists, and tables. That's because we still want to wrap <p>s around
00397                 # "paragraphs" that are wrapped in non-block-level tags, such as anchors,
00398                 # phrase emphasis, and spans. The list of tags we're looking for is
00399                 # hard-coded:
00400                 #
00401                 # *  List "a" is made of tags which can be both inline or block-level.
00402                 #    These will be treated block-level when the start tag is alone on
00403                 #    its line, otherwise they're not matched here and will be taken as
00404                 #    inline later.
00405                 # *  List "b" is made of tags which are always block-level;
00406                 #
00407                 $block_tags_a_re = 'ins|del';
00408                 $block_tags_b_re = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|address|'.
00409                                                    'script|noscript|form|fieldset|iframe|math';
00410
00411                 # Regular expression for the content of a block tag.
00412                 $nested_tags_level = 4;
00413                 $attr = '
00414                         (?>                             # optional tag attributes
00415                           \s                    # starts with whitespace
00416                           (?>
00417                                 [^>"/]+         # text outside quotes
00418                           |
00419                                 /+(?!>)         # slash not followed by ">"
00420                           |
00421                                 "[^"]*"         # text inside double quotes (tolerate ">")
00422                           |
00423                                 \'[^\']*\'      # text inside single quotes (tolerate ">")
00424                           )*
00425                         )?
00426                         ';
00427                 $content =
00428                         str_repeat('
00429                                 (?>
00430                                   [^<]+                 # content without tag
00431                                 |
00432                                   <\2                   # nested opening tag
00433                                         '.$attr.'       # attributes
00434                                         (?>
00435                                           />
00436                                         |
00437                                           >', $nested_tags_level).      # end of opening tag
00438                                           '.*?'.                                        # last level nested tag content
00439                         str_repeat('
00440                                           </\2\s*>      # closing nested tag
00441                                         )
00442                                   |
00443                                         <(?!/\2\s*>     # other tags with a different name
00444                                   )
00445                                 )*',
00446                                 $nested_tags_level);
00447                 $content2 = str_replace('\2', '\3', $content);
00448
00449                 # First, look for nested blocks, e.g.:
00450                 #       <div>
00451                 #               <div>
00452                 #               tags for inner block must be indented.
00453                 #               </div>
00454                 #       </div>
00455                 #
00456                 # The outermost tags must start at the left margin for this to match, and
00457                 # the inner nested divs must be indented.
00458                 # We need to do this before the next, more liberal match, because the next
00459                 # match will start at the first `<div>` and stop at the first `</div>`.
00460                 $text = preg_replace_callback('{(?>
00461                         (?>
00462                                 (?<=\n\n)               # Starting after a blank line
00463                                 |                               # or
00464                                 \A\n?                   # the beginning of the doc
00465                         )
00466                         (                                               # save in $1
00467
00468                           # Match from `\n<tag>` to `</tag>\n`, handling nested tags
00469                           # in between.
00470
00471                                                 [ ]{0,'.$less_than_tab.'}
00472                                                 <('.$block_tags_b_re.')# start tag = $2
00473                                                 '.$attr.'>                      # attributes followed by > and \n
00474                                                 '.$content.'            # content, support nesting
00475                                                 </\2>                           # the matching end tag
00476                                                 [ ]*                            # trailing spaces/tabs
00477                                                 (?=\n+|\Z)      # followed by a newline or end of document
00478
00479                         | # Special version for tags of group a.
00480
00481                                                 [ ]{0,'.$less_than_tab.'}
00482                                                 <('.$block_tags_a_re.')# start tag = $3
00483                                                 '.$attr.'>[ ]*\n        # attributes followed by >
00484                                                 '.$content2.'           # content, support nesting
00485                                                 </\3>                           # the matching end tag
00486                                                 [ ]*                            # trailing spaces/tabs
00487                                                 (?=\n+|\Z)      # followed by a newline or end of document
00488
00489                         | # Special case just for <hr />. It was easier to make a special
00490                           # case than to make the other regex more complicated.
00491
00492                                                 [ ]{0,'.$less_than_tab.'}
00493                                                 <(hr)                           # start tag = $2
00494                                                 '.$attr.'                       # attributes
00495                                                 /?>                                     # the matching end tag
00496                                                 [ ]*
00497                                                 (?=\n{2,}|\Z)           # followed by a blank line or end of document
00498
00499                         | # Special case for standalone HTML comments:
00500
00501                                         [ ]{0,'.$less_than_tab.'}
00502                                         (?s:
00503                                                 <!-- .*? -->
00504                                         )
00505                                         [ ]*
00506                                         (?=\n{2,}|\Z)           # followed by a blank line or end of document
00507
00508                         | # PHP and ASP-style processor instructions (<? and <%)
00509
00510                                         [ ]{0,'.$less_than_tab.'}
00511                                         (?s:
00512                                                 <([?%])                 # $2
00513                                                 .*?
00514                                                 \2>
00515                                         )
00516                                         [ ]*
00517                                         (?=\n{2,}|\Z)           # followed by a blank line or end of document
00518
00519                         )
00520                         )}Sxmi',
00521                         array(&$this, '_hashHTMLBlocks_callback'),
00522                         $text);
00523
00524                 return $text;
00525         }
00526         function _hashHTMLBlocks_callback($matches) {
00527                 $text = $matches[1];
00528                 $key  = $this->hashBlock($text);
00529                 return "\n\n$key\n\n";
00530         }
00531
00532
00533         function hashPart($text, $boundary = 'X') {
00534         #
00535         # Called whenever a tag must be hashed when a function insert an atomic
00536         # element in the text stream. Passing $text to through this function gives
00537         # a unique text-token which will be reverted back when calling unhash.
00538         #
00539         # The $boundary argument specify what character should be used to surround
00540         # the token. By convension, "B" is used for block elements that needs not
00541         # to be wrapped into paragraph tags at the end, ":" is used for elements
00542         # that are word separators and "X" is used in the general case.
00543         #
00544                 # Swap back any tag hash found in $text so we do not have to `unhash`
00545                 # multiple times at the end.
00546                 $text = $this->unhash($text);
00547
00548                 # Then hash the block.
00549                 static $i = 0;
00550                 $key = "$boundary\x1A" . ++$i . $boundary;
00551                 $this->html_hashes[$key] = $text;
00552                 return $key; # String that will replace the tag.
00553         }
00554
00555
00556         function hashBlock($text) {
00557         #
00558         # Shortcut function for hashPart with block-level boundaries.
00559         #
00560                 return $this->hashPart($text, 'B');
00561         }
00562
00563
00564         var $block_gamut = array(
00565         #
00566         # These are all the transformations that form block-level
00567         # tags like paragraphs, headers, and list items.
00568         #
00569                 "doHeaders"         => 10,
00570                 "doHorizontalRules" => 20,
00571
00572                 "doLists"           => 40,
00573                 "doCodeBlocks"      => 50,
00574                 "doBlockQuotes"     => 60,
00575                 );
00576
00577         function runBlockGamut($text) {
00578         #
00579         # Run block gamut tranformations.
00580         #
00581                 # We need to escape raw HTML in Markdown source before doing anything
00582                 # else. This need to be done for each block, and not only at the
00583                 # begining in the Markdown function since hashed blocks can be part of
00584                 # list items and could have been indented. Indented blocks would have
00585                 # been seen as a code block in a previous pass of hashHTMLBlocks.
00586                 $text = $this->hashHTMLBlocks($text);
00587
00588                 return $this->runBasicBlockGamut($text);
00589         }
00590
00591         function runBasicBlockGamut($text) {
00592         #
00593         # Run block gamut tranformations, without hashing HTML blocks. This is
00594         # useful when HTML blocks are known to be already hashed, like in the first
00595         # whole-document pass.
00596         #
00597                 foreach ($this->block_gamut as $method => $priority) {
00598                         $text = $this->$method($text);
00599                 }
00600
00601                 # Finally form paragraph and restore hashed blocks.
00602                 $text = $this->formParagraphs($text);
00603
00604                 return $text;
00605         }
00606
00607
00608         function doHorizontalRules($text) {
00609                 # Do Horizontal Rules:
00610                 return preg_replace(
00611                         '{
00612                                 ^[ ]{0,3}       # Leading space
00613                                 ([-*_])         # $1: First marker
00614                                 (?>                     # Repeated marker group
00615                                         [ ]{0,2}        # Zero, one, or two spaces.
00616                                         \1                      # Marker character
00617                                 ){2,}           # Group repeated at least twice
00618                                 [ ]*            # Tailing spaces
00619                                 $                       # End of line.
00620                         }mx',
00621                         "\n".$this->hashBlock("<hr$this->empty_element_suffix")."\n",
00622                         $text);
00623         }
00624
00625
00626         var $span_gamut = array(
00627         #
00628         # These are all the transformations that occur *within* block-level
00629         # tags like paragraphs, headers, and list items.
00630         #
00631                 # Process character escapes, code spans, and inline HTML
00632                 # in one shot.
00633                 "parseSpan"           => -30,
00634
00635                 # Process anchor and image tags. Images must come first,
00636                 # because ![foo][f] looks like an anchor.
00637                 "doImages"            =>  10,
00638                 "doAnchors"           =>  20,
00639
00640                 # Make links out of things like `<http://example.com/>`
00641                 # Must come after doAnchors, because you can use < and >
00642                 # delimiters in inline links like [this](<url>).
00643                 "doAutoLinks"         =>  30,
00644                 "encodeAmpsAndAngles" =>  40,
00645
00646                 "doItalicsAndBold"    =>  50,
00647                 "doHardBreaks"        =>  60,
00648                 );
00649
00650         function runSpanGamut($text) {
00651         #
00652         # Run span gamut tranformations.
00653         #
00654                 foreach ($this->span_gamut as $method => $priority) {
00655                         $text = $this->$method($text);
00656                 }
00657
00658                 return $text;
00659         }
00660
00661
00662         function doHardBreaks($text) {
00663                 # Do hard breaks:
00664                 return preg_replace_callback('/ {2,}\n/',
00665                         array(&$this, '_doHardBreaks_callback'), $text);
00666         }
00667         function _doHardBreaks_callback($matches) {
00668                 return $this->hashPart("<br$this->empty_element_suffix\n");
00669         }
00670
00671
00672         function doAnchors($text) {
00673         #
00674         # Turn Markdown link shortcuts into XHTML <a> tags.
00675         #
00676                 if ($this->in_anchor) return $text;
00677                 $this->in_anchor = true;
00678
00679                 #
00680                 # First, handle reference-style links: [link text] [id]
00681                 #
00682                 $text = preg_replace_callback('{
00683                         (                                       # wrap whole match in $1
00684                           \[
00685                                 ('.$this->nested_brackets_re.') # link text = $2
00686                           \]
00687
00688                           [ ]?                          # one optional space
00689                           (?:\n[ ]*)?           # one optional newline followed by spaces
00690
00691                           \[
00692                                 (.*?)           # id = $3
00693                           \]
00694                         )
00695                         }xs',
00696                         array(&$this, '_doAnchors_reference_callback'), $text);
00697
00698                 #
00699                 # Next, inline-style links: [link text](url "optional title")
00700                 #
00701                 $text = preg_replace_callback('{
00702                         (                               # wrap whole match in $1
00703                           \[
00704                                 ('.$this->nested_brackets_re.') # link text = $2
00705                           \]
00706                           \(                    # literal paren
00707                                 [ \n]*
00708                                 (?:
00709                                         <(.+?)> # href = $3
00710                                 |
00711                                         ('.$this->nested_url_parenthesis_re.')  # href = $4
00712                                 )
00713                                 [ \n]*
00714                                 (                       # $5
00715                                   ([\'"])       # quote char = $6
00716                                   (.*?)         # Title = $7
00717                                   \6            # matching quote
00718                                   [ \n]*        # ignore any spaces/tabs between closing quote and )
00719                                 )?                      # title is optional
00720                           \)
00721                         )
00722                         }xs',
00723                         array(&$this, '_doAnchors_inline_callback'), $text);
00724
00725                 #
00726                 # Last, handle reference-style shortcuts: [link text]
00727                 # These must come last in case you've also got [link text][1]
00728                 # or [link text](/foo)
00729                 #
00730                 $text = preg_replace_callback('{
00731                         (                                       # wrap whole match in $1
00732                           \[
00733                                 ([^\[\]]+)              # link text = $2; can\'t contain [ or ]
00734                           \]
00735                         )
00736                         }xs',
00737                         array(&$this, '_doAnchors_reference_callback'), $text);
00738
00739                 $this->in_anchor = false;
00740                 return $text;
00741         }
00742         function _doAnchors_reference_callback($matches) {
00743                 $whole_match =  $matches[1];
00744                 $link_text   =  $matches[2];
00745                 $link_id     =& $matches[3];
00746
00747                 if ($link_id == "") {
00748                         # for shortcut links like [this][] or [this].
00749                         $link_id = $link_text;
00750                 }
00751
00752                 # lower-case and turn embedded newlines into spaces
00753                 $link_id = strtolower($link_id);
00754                 $link_id = preg_replace('{[ ]?\n}', ' ', $link_id);
00755
00756                 if (isset($this->urls[$link_id])) {
00757                         $url = $this->urls[$link_id];
00758                         $url = $this->encodeAttribute($url);
00759
00760                         $result = "<a href=\"$url\"";
00761                         if ( isset( $this->titles[$link_id] ) ) {
00762                                 $title = $this->titles[$link_id];
00763                                 $title = $this->encodeAttribute($title);
00764                                 $result .=  " title=\"$title\"";
00765                         }
00766
00767                         $link_text = $this->runSpanGamut($link_text);
00768                         $result .= ">$link_text</a>";
00769                         $result = $this->hashPart($result);
00770                 }
00771                 else {
00772                         $result = $whole_match;
00773                 }
00774                 return $result;
00775         }
00776         function _doAnchors_inline_callback($matches) {
00777                 $whole_match    =  $matches[1];
00778                 $link_text              =  $this->runSpanGamut($matches[2]);
00779                 $url                    =  $matches[3] == '' ? $matches[4] : $matches[3];
00780                 $title                  =& $matches[7];
00781
00782                 $url = $this->encodeAttribute($url);
00783
00784                 $result = "<a href=\"$url\"";
00785                 if (isset($title)) {
00786                         $title = $this->encodeAttribute($title);
00787                         $result .=  " title=\"$title\"";
00788                 }
00789
00790                 $link_text = $this->runSpanGamut($link_text);
00791                 $result .= ">$link_text</a>";
00792
00793                 return $this->hashPart($result);
00794         }
00795
00796
00797         function doImages($text) {
00798         #
00799         # Turn Markdown image shortcuts into <img> tags.
00800         #
00801                 #
00802                 # First, handle reference-style labeled images: ![alt text][id]
00803                 #
00804                 $text = preg_replace_callback('{
00805                         (                               # wrap whole match in $1
00806                           !\[
00807                                 ('.$this->nested_brackets_re.')         # alt text = $2
00808                           \]
00809
00810                           [ ]?                          # one optional space
00811                           (?:\n[ ]*)?           # one optional newline followed by spaces
00812
00813                           \[
00814                                 (.*?)           # id = $3
00815                           \]
00816
00817                         )
00818                         }xs',
00819                         array(&$this, '_doImages_reference_callback'), $text);
00820
00821                 #
00822                 # Next, handle inline images:  ![alt text](url "optional title")
00823                 # Don't forget: encode * and _
00824                 #
00825                 $text = preg_replace_callback('{
00826                         (                               # wrap whole match in $1
00827                           !\[
00828                                 ('.$this->nested_brackets_re.')         # alt text = $2
00829                           \]
00830                           \s?                   # One optional whitespace character
00831                           \(                    # literal paren
00832                                 [ \n]*
00833                                 (?:
00834                                         <(\S*)> # src url = $3
00835                                 |
00836                                         ('.$this->nested_url_parenthesis_re.')  # src url = $4
00837                                 )
00838                                 [ \n]*
00839                                 (                       # $5
00840                                   ([\'"])       # quote char = $6
00841                                   (.*?)         # title = $7
00842                                   \6            # matching quote
00843                                   [ \n]*
00844                                 )?                      # title is optional
00845                           \)
00846                         )
00847                         }xs',
00848                         array(&$this, '_doImages_inline_callback'), $text);
00849
00850                 return $text;
00851         }
00852         function _doImages_reference_callback($matches) {
00853                 $whole_match = $matches[1];
00854                 $alt_text    = $matches[2];
00855                 $link_id     = strtolower($matches[3]);
00856
00857                 if ($link_id == "") {
00858                         $link_id = strtolower($alt_text); # for shortcut links like ![this][].
00859                 }
00860
00861                 $alt_text = $this->encodeAttribute($alt_text);
00862                 if (isset($this->urls[$link_id])) {
00863                         $url = $this->encodeAttribute($this->urls[$link_id]);
00864                         $result = "<img src=\"$url\" alt=\"$alt_text\"";
00865                         if (isset($this->titles[$link_id])) {
00866                                 $title = $this->titles[$link_id];
00867                                 $title = $this->encodeAttribute($title);
00868                                 $result .=  " title=\"$title\"";
00869                         }
00870                         $result .= $this->empty_element_suffix;
00871                         $result = $this->hashPart($result);
00872                 }
00873                 else {
00874                         # If there's no such link ID, leave intact:
00875                         $result = $whole_match;
00876                 }
00877
00878                 return $result;
00879         }
00880         function _doImages_inline_callback($matches) {
00881                 $whole_match    = $matches[1];
00882                 $alt_text               = $matches[2];
00883                 $url                    = $matches[3] == '' ? $matches[4] : $matches[3];
00884                 $title                  =& $matches[7];
00885
00886                 $alt_text = $this->encodeAttribute($alt_text);
00887                 $url = $this->encodeAttribute($url);
00888                 $result = "<img src=\"$url\" alt=\"$alt_text\"";
00889                 if (isset($title)) {
00890                         $title = $this->encodeAttribute($title);
00891                         $result .=  " title=\"$title\""; # $title already quoted
00892                 }
00893                 $result .= $this->empty_element_suffix;
00894
00895                 return $this->hashPart($result);
00896         }
00897
00898
00899         function doHeaders($text) {
00900                 # Setext-style headers:
00901                 #         Header 1
00902                 #         ========
00903                 #
00904                 #         Header 2
00905                 #         --------
00906                 #
00907                 $text = preg_replace_callback('{ ^(.+?)[ ]*\n(=+|-+)[ ]*\n+ }mx',
00908                         array(&$this, '_doHeaders_callback_setext'), $text);
00909
00910                 # atx-style headers:
00911                 #       # Header 1
00912                 #       ## Header 2
00913                 #       ## Header 2 with closing hashes ##
00914                 #       ...
00915                 #       ###### Header 6
00916                 #
00917                 $text = preg_replace_callback('{
00918                                 ^(\#{1,6})      # $1 = string of #\'s
00919                                 [ ]*
00920                                 (.+?)           # $2 = Header text
00921                                 [ ]*
00922                                 \#*                     # optional closing #\'s (not counted)
00923                                 \n+
00924                         }xm',
00925                         array(&$this, '_doHeaders_callback_atx'), $text);
00926
00927                 return $text;
00928         }
00929         function _doHeaders_callback_setext($matches) {
00930                 # Terrible hack to check we haven't found an empty list item.
00931                 if ($matches[2] == '-' && preg_match('{^-(?: |$)}', $matches[1]))
00932                         return $matches[0];
00933
00934                 $level = $matches[2]{0} == '=' ? 1 : 2;
00935                 $block = "<h$level>".$this->runSpanGamut($matches[1])."</h$level>";
00936                 return "\n" . $this->hashBlock($block) . "\n\n";
00937         }
00938         function _doHeaders_callback_atx($matches) {
00939                 $level = strlen($matches[1]);
00940                 $block = "<h$level>".$this->runSpanGamut($matches[2])."</h$level>";
00941                 return "\n" . $this->hashBlock($block) . "\n\n";
00942         }
00943
00944
00945         function doLists($text) {
00946         #
00947         # Form HTML ordered (numbered) and unordered (bulleted) lists.
00948         #
00949                 $less_than_tab = $this->tab_width - 1;
00950
00951                 # Re-usable patterns to match list item bullets and number markers:
00952                 $marker_ul_re  = '[*+-]';
00953                 $marker_ol_re  = '\d+[.]';
00954                 $marker_any_re = "(?:$marker_ul_re|$marker_ol_re)";
00955
00956                 $markers_relist = array(
00957                         $marker_ul_re => $marker_ol_re,
00958                         $marker_ol_re => $marker_ul_re,
00959                         );
00960
00961                 foreach ($markers_relist as $marker_re => $other_marker_re) {
00962                         # Re-usable pattern to match any entirel ul or ol list:
00963                         $whole_list_re = '
00964                                 (                                                               # $1 = whole list
00965                                   (                                                             # $2
00966                                         ([ ]{0,'.$less_than_tab.'})     # $3 = number of spaces
00967                                         ('.$marker_re.')                        # $4 = first list item marker
00968                                         [ ]+
00969                                   )
00970                                   (?s:.+?)
00971                                   (                                                             # $5
00972                                           \z
00973                                         |
00974                                           \n{2,}
00975                                           (?=\S)
00976                                           (?!                                           # Negative lookahead for another list item marker
00977                                                 [ ]*
00978                                                 '.$marker_re.'[ ]+
00979                                           )
00980                                         |
00981                                           (?=                                           # Lookahead for another kind of list
00982                                             \n
00983                                                 \3                                              # Must have the same indentation
00984                                                 '.$other_marker_re.'[ ]+
00985                                           )
00986                                   )
00987                                 )
00988                         '; // mx
00989
00990                         # We use a different prefix before nested lists than top-level lists.
00991                         # See extended comment in _ProcessListItems().
00992
00993                         if ($this->list_level) {
00994                                 $text = preg_replace_callback('{
00995                                                 ^
00996                                                 '.$whole_list_re.'
00997                                         }mx',
00998                                         array(&$this, '_doLists_callback'), $text);
00999                         }
01000                         else {
01001                                 $text = preg_replace_callback('{
01002                                                 (?:(?<=\n)\n|\A\n?) # Must eat the newline
01003                                                 '.$whole_list_re.'
01004                                         }mx',
01005                                         array(&$this, '_doLists_callback'), $text);
01006                         }
01007                 }
01008
01009                 return $text;
01010         }
01011         function _doLists_callback($matches) {
01012                 # Re-usable patterns to match list item bullets and number markers:
01013                 $marker_ul_re  = '[*+-]';
01014                 $marker_ol_re  = '\d+[.]';
01015                 $marker_any_re = "(?:$marker_ul_re|$marker_ol_re)";
01016
01017                 $list = $matches[1];
01018                 $list_type = preg_match("/$marker_ul_re/", $matches[4]) ? "ul" : "ol";
01019
01020                 $marker_any_re = ( $list_type == "ul" ? $marker_ul_re : $marker_ol_re );
01021
01022                 $list .= "\n";
01023                 $result = $this->processListItems($list, $marker_any_re);
01024
01025                 $result = $this->hashBlock("<$list_type>\n" . $result . "</$list_type>");
01026                 return "\n". $result ."\n\n";
01027         }
01028
01029         var $list_level = 0;
01030
01031         function processListItems($list_str, $marker_any_re) {
01032         #
01033         #       Process the contents of a single ordered or unordered list, splitting it
01034         #       into individual list items.
01035         #
01036                 # The $this->list_level global keeps track of when we're inside a list.
01037                 # Each time we enter a list, we increment it; when we leave a list,
01038                 # we decrement. If it's zero, we're not in a list anymore.
01039                 #
01040                 # We do this because when we're not inside a list, we want to treat
01041                 # something like this:
01042                 #
01043                 #               I recommend upgrading to version
01044                 #               8. Oops, now this line is treated
01045                 #               as a sub-list.
01046                 #
01047                 # As a single paragraph, despite the fact that the second line starts
01048                 # with a digit-period-space sequence.
01049                 #
01050                 # Whereas when we're inside a list (or sub-list), that line will be
01051                 # treated as the start of a sub-list. What a kludge, huh? This is
01052                 # an aspect of Markdown's syntax that's hard to parse perfectly
01053                 # without resorting to mind-reading. Perhaps the solution is to
01054                 # change the syntax rules such that sub-lists must start with a
01055                 # starting cardinal number; e.g. "1." or "a.".
01056
01057                 $this->list_level++;
01058
01059                 # trim trailing blank lines:
01060                 $list_str = preg_replace("/\n{2,}\\z/", "\n", $list_str);
01061
01062                 $list_str = preg_replace_callback('{
01063                         (\n)?                                                   # leading line = $1
01064                         (^[ ]*)                                                 # leading whitespace = $2
01065                         ('.$marker_any_re.'                             # list marker and space = $3
01066                                 (?:[ ]+|(?=\n)) # space only required if item is not empty
01067                         )
01068                         ((?s:.*?))                                              # list item text   = $4
01069                         (?:(\n+(?=\n))|\n)                              # tailing blank line = $5
01070                         (?= \n* (\z | \2 ('.$marker_any_re.') (?:[ ]+|(?=\n))))
01071                         }xm',
01072                         array(&$this, '_processListItems_callback'), $list_str);
01073
01074                 $this->list_level--;
01075                 return $list_str;
01076         }
01077         function _processListItems_callback($matches) {
01078                 $item = $matches[4];
01079                 $leading_line =& $matches[1];
01080                 $leading_space =& $matches[2];
01081                 $marker_space = $matches[3];
01082                 $tailing_blank_line =& $matches[5];
01083
01084                 if ($leading_line || $tailing_blank_line ||
01085                         preg_match('/\n{2,}/', $item))
01086                 {
01087                         # Replace marker with the appropriate whitespace indentation
01088                         $item = $leading_space . str_repeat(' ', strlen($marker_space)) . $item;
01089                         $item = $this->runBlockGamut($this->outdent($item)."\n");
01090                 }
01091                 else {
01092                         # Recursion for sub-lists:
01093                         $item = $this->doLists($this->outdent($item));
01094                         $item = preg_replace('/\n+$/', '', $item);
01095                         $item = $this->runSpanGamut($item);
01096                 }
01097
01098                 return "<li>" . $item . "</li>\n";
01099         }
01100
01101
01102         function doCodeBlocks($text) {
01103         #
01104         #       Process Markdown `<pre><code>` blocks.
01105         #
01106                 $text = preg_replace_callback('{
01107                                 (?:\n\n|\A\n?)
01108                                 (                   # $1 = the code block -- one or more lines, starting with a space/tab
01109                                   (?>
01110                                         [ ]{'.$this->tab_width.'}  # Lines must start with a tab or a tab-width of spaces
01111                                         .*\n+
01112                                   )+
01113                                 )
01114                                 ((?=^[ ]{0,'.$this->tab_width.'}\S)|\Z) # Lookahead for non-space at line-start, or end of doc
01115                         }xm',
01116                         array(&$this, '_doCodeBlocks_callback'), $text);
01117
01118                 return $text;
01119         }
01120         function _doCodeBlocks_callback($matches) {
01121                 $codeblock = $matches[1];
01122
01123                 $codeblock = $this->outdent($codeblock);
01124                 $codeblock = htmlspecialchars($codeblock, ENT_NOQUOTES);
01125
01126                 # trim leading newlines and trailing newlines
01127                 $codeblock = preg_replace('/\A\n+|\n+\z/', '', $codeblock);
01128
01129                 $codeblock = "<pre><code>$codeblock\n</code></pre>";
01130                 return "\n\n".$this->hashBlock($codeblock)."\n\n";
01131         }
01132
01133
01134         function makeCodeSpan($code) {
01135         #
01136         # Create a code span markup for $code. Called from handleSpanToken.
01137         #
01138                 $code = htmlspecialchars(trim($code), ENT_NOQUOTES);
01139                 return $this->hashPart("<code>$code</code>");
01140         }
01141
01142
01143         var $em_relist = array(
01144                 ''  => '(?:(?<!\*)\*(?!\*)|(?<!_)_(?!_))(?=\S|$)(?![.,:;]\s)',
01145                 '*' => '(?<=\S|^)(?<!\*)\*(?!\*)',
01146                 '_' => '(?<=\S|^)(?<!_)_(?!_)',
01147                 );
01148         var $strong_relist = array(
01149                 ''   => '(?:(?<!\*)\*\*(?!\*)|(?<!_)__(?!_))(?=\S|$)(?![.,:;]\s)',
01150                 '**' => '(?<=\S|^)(?<!\*)\*\*(?!\*)',
01151                 '__' => '(?<=\S|^)(?<!_)__(?!_)',
01152                 );
01153         var $em_strong_relist = array(
01154                 ''    => '(?:(?<!\*)\*\*\*(?!\*)|(?<!_)___(?!_))(?=\S|$)(?![.,:;]\s)',
01155                 '***' => '(?<=\S|^)(?<!\*)\*\*\*(?!\*)',
01156                 '___' => '(?<=\S|^)(?<!_)___(?!_)',
01157                 );
01158         var $em_strong_prepared_relist;
01159
01160         function prepareItalicsAndBold() {
01161         #
01162         # Prepare regular expressions for searching emphasis tokens in any
01163         # context.
01164         #
01165                 foreach ($this->em_relist as $em => $em_re) {
01166                         foreach ($this->strong_relist as $strong => $strong_re) {
01167                                 # Construct list of allowed token expressions.
01168                                 $token_relist = array();
01169                                 if (isset($this->em_strong_relist["$em$strong"])) {
01170                                         $token_relist[] = $this->em_strong_relist["$em$strong"];
01171                                 }
01172                                 $token_relist[] = $em_re;
01173                                 $token_relist[] = $strong_re;
01174
01175                                 # Construct master expression from list.
01176                                 $token_re = '{('. implode('|', $token_relist) .')}';
01177                                 $this->em_strong_prepared_relist["$em$strong"] = $token_re;
01178                         }
01179                 }
01180         }
01181
01182         function doItalicsAndBold($text) {
01183                 $token_stack = array('');
01184                 $text_stack = array('');
01185                 $em = '';
01186                 $strong = '';
01187                 $tree_char_em = false;
01188
01189                 while (1) {
01190                         #
01191                         # Get prepared regular expression for seraching emphasis tokens
01192                         # in current context.
01193                         #
01194                         $token_re = $this->em_strong_prepared_relist["$em$strong"];
01195
01196                         #
01197                         # Each loop iteration search for the next emphasis token.
01198                         # Each token is then passed to handleSpanToken.
01199                         #
01200                         $parts = preg_split($token_re, $text, 2, PREG_SPLIT_DELIM_CAPTURE);
01201                         $text_stack[0] .= $parts[0];
01202                         $token =& $parts[1];
01203                         $text =& $parts[2];
01204
01205                         if (empty($token)) {
01206                                 # Reached end of text span: empty stack without emitting.
01207                                 # any more emphasis.
01208                                 while ($token_stack[0]) {
01209                                         $text_stack[1] .= array_shift($token_stack);
01210                                         $text_stack[0] .= array_shift($text_stack);
01211                                 }
01212                                 break;
01213                         }
01214
01215                         $token_len = strlen($token);
01216                         if ($tree_char_em) {
01217                                 # Reached closing marker while inside a three-char emphasis.
01218                                 if ($token_len == 3) {
01219                                         # Three-char closing marker, close em and strong.
01220                                         array_shift($token_stack);
01221                                         $span = array_shift($text_stack);
01222                                         $span = $this->runSpanGamut($span);
01223                                         $span = "<strong><em>$span</em></strong>";
01224                                         $text_stack[0] .= $this->hashPart($span);
01225                                         $em = '';
01226                                         $strong = '';
01227                                 } else {
01228                                         # Other closing marker: close one em or strong and
01229                                         # change current token state to match the other
01230                                         $token_stack[0] = str_repeat($token{0}, 3-$token_len);
01231                                         $tag = $token_len == 2 ? "strong" : "em";
01232                                         $span = $text_stack[0];
01233                                         $span = $this->runSpanGamut($span);
01234                                         $span = "<$tag>$span</$tag>";
01235                                         $text_stack[0] = $this->hashPart($span);
01236                                         $$tag = ''; # $$tag stands for $em or $strong
01237                                 }
01238                                 $tree_char_em = false;
01239                         } else if ($token_len == 3) {
01240                                 if ($em) {
01241                                         # Reached closing marker for both em and strong.
01242                                         # Closing strong marker:
01243                                         for ($i = 0; $i < 2; ++$i) {
01244                                                 $shifted_token = array_shift($token_stack);
01245                                                 $tag = strlen($shifted_token) == 2 ? "strong" : "em";
01246                                                 $span = array_shift($text_stack);
01247                                                 $span = $this->runSpanGamut($span);
01248                                                 $span = "<$tag>$span</$tag>";
01249                                                 $text_stack[0] .= $this->hashPart($span);
01250                                                 $$tag = ''; # $$tag stands for $em or $strong
01251                                         }
01252                                 } else {
01253                                         # Reached opening three-char emphasis marker. Push on token
01254                                         # stack; will be handled by the special condition above.
01255                                         $em = $token{0};
01256                                         $strong = "$em$em";
01257                                         array_unshift($token_stack, $token);
01258                                         array_unshift($text_stack, '');
01259                                         $tree_char_em = true;
01260                                 }
01261                         } else if ($token_len == 2) {
01262                                 if ($strong) {
01263                                         # Unwind any dangling emphasis marker:
01264                                         if (strlen($token_stack[0]) == 1) {
01265                                                 $text_stack[1] .= array_shift($token_stack);
01266                                                 $text_stack[0] .= array_shift($text_stack);
01267                                         }
01268                                         # Closing strong marker:
01269                                         array_shift($token_stack);
01270                                         $span = array_shift($text_stack);
01271                                         $span = $this->runSpanGamut($span);
01272                                         $span = "<strong>$span</strong>";
01273                                         $text_stack[0] .= $this->hashPart($span);
01274                                         $strong = '';
01275                                 } else {
01276                                         array_unshift($token_stack, $token);
01277                                         array_unshift($text_stack, '');
01278                                         $strong = $token;
01279                                 }
01280                         } else {
01281                                 # Here $token_len == 1
01282                                 if ($em) {
01283                                         if (strlen($token_stack[0]) == 1) {
01284                                                 # Closing emphasis marker:
01285                                                 array_shift($token_stack);
01286                                                 $span = array_shift($text_stack);
01287                                                 $span = $this->runSpanGamut($span);
01288                                                 $span = "<em>$span</em>";
01289                                                 $text_stack[0] .= $this->hashPart($span);
01290                                                 $em = '';
01291                                         } else {
01292                                                 $text_stack[0] .= $token;
01293                                         }
01294                                 } else {
01295                                         array_unshift($token_stack, $token);
01296                                         array_unshift($text_stack, '');
01297                                         $em = $token;
01298                                 }
01299                         }
01300                 }
01301                 return $text_stack[0];
01302         }
01303
01304
01305         function doBlockQuotes($text) {
01306                 $text = preg_replace_callback('/
01307                           (                                                             # Wrap whole match in $1
01308                                 (?>
01309                                   ^[ ]*>[ ]?                    # ">" at the start of a line
01310                                         .+\n                                    # rest of the first line
01311                                   (.+\n)*                                       # subsequent consecutive lines
01312                                   \n*                                           # blanks
01313                                 )+
01314                           )
01315                         /xm',
01316                         array(&$this, '_doBlockQuotes_callback'), $text);
01317
01318                 return $text;
01319         }
01320         function _doBlockQuotes_callback($matches) {
01321                 $bq = $matches[1];
01322                 # trim one level of quoting - trim whitespace-only lines
01323                 $bq = preg_replace('/^[ ]*>[ ]?|^[ ]+$/m', '', $bq);
01324                 $bq = $this->runBlockGamut($bq);                # recurse
01325
01326                 $bq = preg_replace('/^/m', "  ", $bq);
01327                 # These leading spaces cause problem with <pre> content,
01328                 # so we need to fix that:
01329                 $bq = preg_replace_callback('{(\s*<pre>.+?</pre>)}sx',
01330                         array(&$this, '_doBlockQuotes_callback2'), $bq);
01331
01332                 return "\n". $this->hashBlock("<blockquote>\n$bq\n</blockquote>")."\n\n";
01333         }
01334         function _doBlockQuotes_callback2($matches) {
01335                 $pre = $matches[1];
01336                 $pre = preg_replace('/^  /m', '', $pre);
01337                 return $pre;
01338         }
01339
01340
01341         function formParagraphs($text) {
01342         #
01343         #       Params:
01344         #               $text - string to process with html <p> tags
01345         #
01346                 # Strip leading and trailing lines:
01347                 $text = preg_replace('/\A\n+|\n+\z/', '', $text);
01348
01349                 $grafs = preg_split('/\n{2,}/', $text, -1, PREG_SPLIT_NO_EMPTY);
01350
01351                 #
01352                 # Wrap <p> tags and unhashify HTML blocks
01353                 #
01354                 foreach ($grafs as $key => $value) {
01355                         if (!preg_match('/^B\x1A[0-9]+B$/', $value)) {
01356                                 # Is a paragraph.
01357                                 $value = $this->runSpanGamut($value);
01358                                 $value = preg_replace('/^([ ]*)/', "<p>", $value);
01359                                 $value .= "</p>";
01360                                 $grafs[$key] = $this->unhash($value);
01361                         }
01362                         else {
01363                                 # Is a block.
01364                                 # Modify elements of @grafs in-place...
01365                                 $graf = $value;
01366                                 $block = $this->html_hashes[$graf];
01367                                 $graf = $block;
01368 //                              if (preg_match('{
01369 //                                      \A
01370 //                                      (                                                       # $1 = <div> tag
01371 //                                        <div  \s+
01372 //                                        [^>]*
01373 //                                        \b
01374 //                                        markdown\s*=\s*  ([\'"])      #       $2 = attr quote char
01375 //                                        1
01376 //                                        \2
01377 //                                        [^>]*
01378 //                                        >
01379 //                                      )
01380 //                                      (                                                       # $3 = contents
01381 //                                      .*
01382 //                                      )
01383 //                                      (</div>)                                        # $4 = closing tag
01384 //                                      \z
01385 //                                      }xs', $block, $matches))
01386 //                              {
01387 //                                      list(, $div_open, , $div_content, $div_close) = $matches;
01388 //
01389 //                                      # We can't call Markdown(), because that resets the hash;
01390 //                                      # that initialization code should be pulled into its own sub, though.
01391 //                                      $div_content = $this->hashHTMLBlocks($div_content);
01392 //
01393 //                                      # Run document gamut methods on the content.
01394 //                                      foreach ($this->document_gamut as $method => $priority) {
01395 //                                              $div_content = $this->$method($div_content);
01396 //                                      }
01397 //
01398 //                                      $div_open = preg_replace(
01399 //                                              '{\smarkdown\s*=\s*([\'"]).+?\1}', '', $div_open);
01400 //
01401 //                                      $graf = $div_open . "\n" . $div_content . "\n" . $div_close;
01402 //                              }
01403                                 $grafs[$key] = $graf;
01404                         }
01405                 }
01406
01407                 return implode("\n\n", $grafs);
01408         }
01409
01410
01411         function encodeAttribute($text) {
01412         #
01413         # Encode text for a double-quoted HTML attribute. This function
01414         # is *not* suitable for attributes enclosed in single quotes.
01415         #
01416                 $text = $this->encodeAmpsAndAngles($text);
01417                 $text = str_replace('"', '&quot;', $text);
01418                 return $text;
01419         }
01420
01421
01422         function encodeAmpsAndAngles($text) {
01423         #
01424         # Smart processing for ampersands and angle brackets that need to
01425         # be encoded. Valid character entities are left alone unless the
01426         # no-entities mode is set.
01427         #
01428                 if ($this->no_entities) {
01429                         $text = str_replace('&', '&amp;', $text);
01430                 } else {
01431                         # Ampersand-encoding based entirely on Nat Irons's Amputator
01432                         # MT plugin: <http://bumppo.net/projects/amputator/>
01433                         $text = preg_replace('/&(?!#?[xX]?(?:[0-9a-fA-F]+|\w+);)/',
01434                                                                 '&amp;', $text);;
01435                 }
01436                 # Encode remaining <'s
01437                 $text = str_replace('<', '&lt;', $text);
01438
01439                 return $text;
01440         }
01441
01442
01443         function doAutoLinks($text) {
01444                 $text = preg_replace_callback('{<((https?|ftp|dict):[^\'">\s]+)>}i',
01445                         array(&$this, '_doAutoLinks_url_callback'), $text);
01446
01447                 # Email addresses: <address@domain.foo>
01448                 $text = preg_replace_callback('{
01449                         <
01450                         (?:mailto:)?
01451                         (
01452                                 (?:
01453                                         [-!#$%&\'*+/=?^_`.{|}~\w\x80-\xFF]+
01454                                 |
01455                                         ".*?"
01456                                 )
01457                                 \@
01458                                 (?:
01459                                         [-a-z0-9\x80-\xFF]+(\.[-a-z0-9\x80-\xFF]+)*\.[a-z]+
01460                                 |
01461                                         \[[\d.a-fA-F:]+\]       # IPv4 & IPv6
01462                                 )
01463                         )
01464                         >
01465                         }xi',
01466                         array(&$this, '_doAutoLinks_email_callback'), $text);
01467
01468                 return $text;
01469         }
01470         function _doAutoLinks_url_callback($matches) {
01471                 $url = $this->encodeAttribute($matches[1]);
01472                 $link = "<a href=\"$url\">$url</a>";
01473                 return $this->hashPart($link);
01474         }
01475         function _doAutoLinks_email_callback($matches) {
01476                 $address = $matches[1];
01477                 $link = $this->encodeEmailAddress($address);
01478                 return $this->hashPart($link);
01479         }
01480
01481
01482         function encodeEmailAddress($addr) {
01483         #
01484         #       Input: an email address, e.g. "foo@example.com"
01485         #
01486         #       Output: the email address as a mailto link, with each character
01487         #               of the address encoded as either a decimal or hex entity, in
01488         #               the hopes of foiling most address harvesting spam bots. E.g.:
01489         #
01490         #         <p><a href="&#109;&#x61;&#105;&#x6c;&#116;&#x6f;&#58;&#x66;o&#111;
01491         #        &#x40;&#101;&#x78;&#97;&#x6d;&#112;&#x6c;&#101;&#46;&#x63;&#111;
01492         #        &#x6d;">&#x66;o&#111;&#x40;&#101;&#x78;&#97;&#x6d;&#112;&#x6c;
01493         #        &#101;&#46;&#x63;&#111;&#x6d;</a></p>
01494         #
01495         #       Based by a filter by Matthew Wickline, posted to BBEdit-Talk.
01496         #   With some optimizations by Milian Wolff.
01497         #
01498                 $addr = "mailto:" . $addr;
01499                 $chars = preg_split('/(?<!^)(?!$)/', $addr);
01500                 $seed = (int)abs(crc32($addr) / strlen($addr)); # Deterministic seed.
01501
01502                 foreach ($chars as $key => $char) {
01503                         $ord = ord($char);
01504                         # Ignore non-ascii chars.
01505                         if ($ord < 128) {
01506                                 $r = ($seed * (1 + $key)) % 100; # Pseudo-random function.
01507                                 # roughly 10% raw, 45% hex, 45% dec
01508                                 # '@' *must* be encoded. I insist.
01509                                 if ($r > 90 && $char != '@') /* do nothing */;
01510                                 else if ($r < 45) $chars[$key] = '&#x'.dechex($ord).';';
01511                                 else              $chars[$key] = '&#'.$ord.';';
01512                         }
01513                 }
01514
01515                 $addr = implode('', $chars);
01516                 $text = implode('', array_slice($chars, 7)); # text without `mailto:`
01517                 $addr = "<a href=\"$addr\">$text</a>";
01518
01519                 return $addr;
01520         }
01521
01522
01523         function parseSpan($str) {
01524         #
01525         # Take the string $str and parse it into tokens, hashing embeded HTML,
01526         # escaped characters and handling code spans.
01527         #
01528                 $output = '';
01529
01530                 $span_re = '{
01531                                 (
01532                                         \\\\'.$this->escape_chars_re.'
01533                                 |
01534                                         (?<![`\\\\])
01535                                         `+                                              # code span marker
01536                         '.( $this->no_markup ? '' : '
01537                                 |
01538                                         <!--    .*?     -->             # comment
01539                                 |
01540                                         <\?.*?\?> | <%.*?%>             # processing instruction
01541                                 |
01542                                         <[/!$]?[-a-zA-Z0-9:_]+  # regular tags
01543                                         (?>
01544                                                 \s
01545                                                 (?>[^"\'>]+|"[^"]*"|\'[^\']*\')*
01546                                         )?
01547                                         >
01548                         ').'
01549                                 )
01550                                 }xs';
01551
01552                 while (1) {
01553                         #
01554                         # Each loop iteration seach for either the next tag, the next
01555                         # openning code span marker, or the next escaped character.
01556                         # Each token is then passed to handleSpanToken.
01557                         #
01558                         $parts = preg_split($span_re, $str, 2, PREG_SPLIT_DELIM_CAPTURE);
01559
01560                         # Create token from text preceding tag.
01561                         if ($parts[0] != "") {
01562                                 $output .= $parts[0];
01563                         }
01564
01565                         # Check if we reach the end.
01566                         if (isset($parts[1])) {
01567                                 $output .= $this->handleSpanToken($parts[1], $parts[2]);
01568                                 $str = $parts[2];
01569                         }
01570                         else {
01571                                 break;
01572                         }
01573                 }
01574
01575                 return $output;
01576         }
01577
01578
01579         function handleSpanToken($token, &$str) {
01580         #
01581         # Handle $token provided by parseSpan by determining its nature and
01582         # returning the corresponding value that should replace it.
01583         #
01584                 switch ($token{0}) {
01585                         case "\\":
01586                                 return $this->hashPart("&#". ord($token{1}). ";");
01587                         case "`":
01588                                 # Search for end marker in remaining text.
01589                                 if (preg_match('/^(.*?[^`])'.preg_quote($token).'(?!`)(.*)$/sm',
01590                                         $str, $matches))
01591                                 {
01592                                         $str = $matches[2];
01593                                         $codespan = $this->makeCodeSpan($matches[1]);
01594                                         return $this->hashPart($codespan);
01595                                 }
01596                                 return $token; // return as text since no ending marker found.
01597                         default:
01598                                 return $this->hashPart($token);
01599                 }
01600         }
01601
01602
01603         function outdent($text) {
01604         #
01605         # Remove one level of line-leading tabs or spaces
01606         #
01607                 return preg_replace('/^(\t|[ ]{1,'.$this->tab_width.'})/m', '', $text);
01608         }
01609
01610
01611         # String length function for detab. `_initDetab` will create a function to
01612         # hanlde UTF-8 if the default function does not exist.
01613         var $utf8_strlen = 'mb_strlen';
01614
01615         function detab($text) {
01616         #
01617         # Replace tabs with the appropriate amount of space.
01618         #
01619                 # For each line we separate the line in blocks delemited by
01620                 # tab characters. Then we reconstruct every line by adding the
01621                 # appropriate number of space between each blocks.
01622
01623                 $text = preg_replace_callback('/^.*\t.*$/m',
01624                         array(&$this, '_detab_callback'), $text);
01625
01626                 return $text;
01627         }
01628         function _detab_callback($matches) {
01629                 $line = $matches[0];
01630                 $strlen = $this->utf8_strlen; # strlen function for UTF-8.
01631
01632                 # Split in blocks.
01633                 $blocks = explode("\t", $line);
01634                 # Add each blocks to the line.
01635                 $line = $blocks[0];
01636                 unset($blocks[0]); # Do not add first block twice.
01637                 foreach ($blocks as $block) {
01638                         # Calculate amount of space, insert spaces, insert block.
01639                         $amount = $this->tab_width -
01640                                 $strlen($line, 'UTF-8') % $this->tab_width;
01641                         $line .= str_repeat(" ", $amount) . $block;
01642                 }
01643                 return $line;
01644         }
01645         function _initDetab() {
01646         #
01647         # Check for the availability of the function in the `utf8_strlen` property
01648         # (initially `mb_strlen`). If the function is not available, create a
01649         # function that will loosely count the number of UTF-8 characters with a
01650         # regular expression.
01651         #
01652                 if (function_exists($this->utf8_strlen)) return;
01653                 $this->utf8_strlen = create_function('$text', 'return preg_match_all(
01654                         "/[\\\\x00-\\\\xBF]|[\\\\xC0-\\\\xFF][\\\\x80-\\\\xBF]*/",
01655                         $text, $m);');
01656         }
01657
01658
01659         function unhash($text) {
01660         #
01661         # Swap back in all the tags hashed by _HashHTMLBlocks.
01662         #
01663                 return preg_replace_callback('/(.)\x1A[0-9]+\1/',
01664                         array(&$this, '_unhash_callback'), $text);
01665         }
01666         function _unhash_callback($matches) {
01667                 return $this->html_hashes[$matches[0]];
01668         }
01669
01670 }
01671
01672
01673 #
01674 # Markdown Extra Parser Class
01675 #
01676
01677 class MarkdownExtra_Parser extends Markdown_Parser {
01678
01679         # Prefix for footnote ids.
01680         var $fn_id_prefix = "";
01681
01682         # Optional title attribute for footnote links and backlinks.
01683         var $fn_link_title = MARKDOWN_FN_LINK_TITLE;
01684         var $fn_backlink_title = MARKDOWN_FN_BACKLINK_TITLE;
01685
01686         # Optional class attribute for footnote links and backlinks.
01687         var $fn_link_class = MARKDOWN_FN_LINK_CLASS;
01688         var $fn_backlink_class = MARKDOWN_FN_BACKLINK_CLASS;
01689
01690         # Predefined abbreviations.
01691         var $predef_abbr = array();
01692
01693
01694         function MarkdownExtra_Parser() {
01695         #
01696         # Constructor function. Initialize the parser object.
01697         #
01698                 # Add extra escapable characters before parent constructor
01699                 # initialize the table.
01700                 $this->escape_chars .= ':|';
01701
01702                 # Insert extra document, block, and span transformations.
01703                 # Parent constructor will do the sorting.
01704                 $this->document_gamut += array(
01705                         "doFencedCodeBlocks" => 5,
01706                         "stripFootnotes"     => 15,
01707                         "stripAbbreviations" => 25,
01708                         "appendFootnotes"    => 50,
01709                         );
01710                 $this->block_gamut += array(
01711                         "doFencedCodeBlocks" => 5,
01712                         "doTables"           => 15,
01713                         "doDefLists"         => 45,
01714                         );
01715                 $this->span_gamut += array(
01716                         "doFootnotes"        => 5,
01717                         "doAbbreviations"    => 70,
01718                         );
01719
01720                 parent::Markdown_Parser();
01721         }
01722
01723
01724         # Extra variables used during extra transformations.
01725         var $footnotes = array();
01726         var $footnotes_ordered = array();
01727         var $abbr_desciptions = array();
01728         var $abbr_word_re = '';
01729
01730         # Give the current footnote number.
01731         var $footnote_counter = 1;
01732
01733
01734         function setup() {
01735         #
01736         # Setting up Extra-specific variables.
01737         #
01738                 parent::setup();
01739
01740                 $this->footnotes = array();
01741                 $this->footnotes_ordered = array();
01742                 $this->abbr_desciptions = array();
01743                 $this->abbr_word_re = '';
01744                 $this->footnote_counter = 1;
01745
01746                 foreach ($this->predef_abbr as $abbr_word => $abbr_desc) {
01747                         if ($this->abbr_word_re)
01748                                 $this->abbr_word_re .= '|';
01749                         $this->abbr_word_re .= preg_quote($abbr_word);
01750                         $this->abbr_desciptions[$abbr_word] = trim($abbr_desc);
01751                 }
01752         }
01753
01754         function teardown() {
01755         #
01756         # Clearing Extra-specific variables.
01757         #
01758                 $this->footnotes = array();
01759                 $this->footnotes_ordered = array();
01760                 $this->abbr_desciptions = array();
01761                 $this->abbr_word_re = '';
01762
01763                 parent::teardown();
01764         }
01765
01766
01767         ### HTML Block Parser ###
01768
01769         # Tags that are always treated as block tags:
01770         var $block_tags_re = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|address|form|fieldset|iframe|hr|legend';
01771
01772         # Tags treated as block tags only if the opening tag is alone on it's line:
01773         var $context_block_tags_re = 'script|noscript|math|ins|del';
01774
01775         # Tags where markdown="1" default to span mode:
01776         var $contain_span_tags_re = 'p|h[1-6]|li|dd|dt|td|th|legend|address';
01777
01778         # Tags which must not have their contents modified, no matter where
01779         # they appear:
01780         var $clean_tags_re = 'script|math';
01781
01782         # Tags that do not need to be closed.
01783         var $auto_close_tags_re = 'hr|img';
01784
01785
01786         function hashHTMLBlocks($text) {
01787         #
01788         # Hashify HTML Blocks and "clean tags".
01789         #
01790         # We only want to do this for block-level HTML tags, such as headers,
01791         # lists, and tables. That's because we still want to wrap <p>s around
01792         # "paragraphs" that are wrapped in non-block-level tags, such as anchors,
01793         # phrase emphasis, and spans. The list of tags we're looking for is
01794         # hard-coded.
01795         #
01796         # This works by calling _HashHTMLBlocks_InMarkdown, which then calls
01797         # _HashHTMLBlocks_InHTML when it encounter block tags. When the markdown="1"
01798         # attribute is found whitin a tag, _HashHTMLBlocks_InHTML calls back
01799         #  _HashHTMLBlocks_InMarkdown to handle the Markdown syntax within the tag.
01800         # These two functions are calling each other. It's recursive!
01801         #
01802                 #
01803                 # Call the HTML-in-Markdown hasher.
01804                 #
01805                 list($text, ) = $this->_hashHTMLBlocks_inMarkdown($text);
01806
01807                 return $text;
01808         }
01809         function _hashHTMLBlocks_inMarkdown($text, $indent = 0,
01810                                                                                 $enclosing_tag_re = '', $span = false)
01811         {
01812         #
01813         # Parse markdown text, calling _HashHTMLBlocks_InHTML for block tags.
01814         #
01815         # *   $indent is the number of space to be ignored when checking for code
01816         #     blocks. This is important because if we don't take the indent into
01817         #     account, something like this (which looks right) won't work as expected:
01818         #
01819         #     <div>
01820         #         <div markdown="1">
01821         #         Hello World.  <-- Is this a Markdown code block or text?
01822         #         </div>  <-- Is this a Markdown code block or a real tag?
01823         #     <div>
01824         #
01825         #     If you don't like this, just don't indent the tag on which
01826         #     you apply the markdown="1" attribute.
01827         #
01828         # *   If $enclosing_tag_re is not empty, stops at the first unmatched closing
01829         #     tag with that name. Nested tags supported.
01830         #
01831         # *   If $span is true, text inside must treated as span. So any double
01832         #     newline will be replaced by a single newline so that it does not create
01833         #     paragraphs.
01834         #
01835         # Returns an array of that form: ( processed text , remaining text )
01836         #
01837                 if ($text === '') return array('', '');
01838
01839                 # Regex to check for the presense of newlines around a block tag.
01840                 $newline_before_re = '/(?:^\n?|\n\n)*$/';
01841                 $newline_after_re =
01842                         '{
01843                                 ^                                               # Start of text following the tag.
01844                                 (?>[ ]*<!--.*?-->)?             # Optional comment.
01845                                 [ ]*\n                                  # Must be followed by newline.
01846                         }xs';
01847
01848                 # Regex to match any tag.
01849                 $block_tag_re =
01850                         '{
01851                                 (                                       # $2: Capture hole tag.
01852                                         </?                                     # Any opening or closing tag.
01853                                                 (?>                             # Tag name.
01854                                                         '.$this->block_tags_re.'                        |
01855                                                         '.$this->context_block_tags_re.'        |
01856                                                         '.$this->clean_tags_re.'                |
01857                                                         (?!\s)'.$enclosing_tag_re.'
01858                                                 )
01859                                                 (?:
01860                                                         (?=[\s"\'/a-zA-Z0-9])   # Allowed characters after tag name.
01861                                                         (?>
01862                                                                 ".*?"           |       # Double quotes (can contain `>`)
01863                                                                 \'.*?\'         |       # Single quotes (can contain `>`)
01864                                                                 .+?                             # Anything but quotes and `>`.
01865                                                         )*?
01866                                                 )?
01867                                         >                                       # End of tag.
01868                                 |
01869                                         <!--    .*?     -->     # HTML Comment
01870                                 |
01871                                         <\?.*?\?> | <%.*?%>     # Processing instruction
01872                                 |
01873                                         <!\[CDATA\[.*?\]\]>     # CData Block
01874                                 |
01875                                         # Code span marker
01876                                         `+
01877                                 '. ( !$span ? ' # If not in span.
01878                                 |
01879                                         # Indented code block
01880                                         (?: ^[ ]*\n | ^ | \n[ ]*\n )
01881                                         [ ]{'.($indent+4).'}[^\n]* \n
01882                                         (?>
01883                                                 (?: [ ]{'.($indent+4).'}[^\n]* | [ ]* ) \n
01884                                         )*
01885                                 |
01886                                         # Fenced code block marker
01887                                         (?> ^ | \n )
01888                                         [ ]{'.($indent).'}~~~+[ ]*\n
01889                                 ' : '' ). ' # End (if not is span).
01890                                 )
01891                         }xs';
01892
01893
01894                 $depth = 0;             # Current depth inside the tag tree.
01895                 $parsed = "";   # Parsed text that will be returned.
01896
01897                 #
01898                 # Loop through every tag until we find the closing tag of the parent
01899                 # or loop until reaching the end of text if no parent tag specified.
01900                 #
01901                 do {
01902                         #
01903                         # Split the text using the first $tag_match pattern found.
01904                         # Text before  pattern will be first in the array, text after
01905                         # pattern will be at the end, and between will be any catches made
01906                         # by the pattern.
01907                         #
01908                         $parts = preg_split($block_tag_re, $text, 2,
01909                                                                 PREG_SPLIT_DELIM_CAPTURE);
01910
01911                         # If in Markdown span mode, add a empty-string span-level hash
01912                         # after each newline to prevent triggering any block element.
01913                         if ($span) {
01914                                 $void = $this->hashPart("", ':');
01915                                 $newline = "$void\n";
01916                                 $parts[0] = $void . str_replace("\n", $newline, $parts[0]) . $void;
01917                         }
01918
01919                         $parsed .= $parts[0]; # Text before current tag.
01920
01921                         # If end of $text has been reached. Stop loop.
01922                         if (count($parts) < 3) {
01923                                 $text = "";
01924                                 break;
01925                         }
01926
01927                         $tag  = $parts[1]; # Tag to handle.
01928                         $text = $parts[2]; # Remaining text after current tag.
01929                         $tag_re = preg_quote($tag); # For use in a regular expression.
01930
01931                         #
01932                         # Check for: Code span marker
01933                         #
01934                         if ($tag{0} == "`") {
01935                                 # Find corresponding end marker.
01936                                 $tag_re = preg_quote($tag);
01937                                 if (preg_match('{^(?>.+?|\n(?!\n))*?(?<!`)'.$tag_re.'(?!`)}',
01938                                         $text, $matches))
01939                                 {
01940                                         # End marker found: pass text unchanged until marker.
01941                                         $parsed .= $tag . $matches[0];
01942                                         $text = substr($text, strlen($matches[0]));
01943                                 }
01944                                 else {
01945                                         # Unmatched marker: just skip it.
01946                                         $parsed .= $tag;
01947                                 }
01948                         }
01949                         #
01950                         # Check for: Indented code block.
01951                         #
01952                         else if ($tag{0} == "\n" || $tag{0} == " ") {
01953                                 # Indented code block: pass it unchanged, will be handled
01954                                 # later.
01955                                 $parsed .= $tag;
01956                         }
01957                         #
01958                         # Check for: Fenced code block marker.
01959                         #
01960                         else if ($tag{0} == "~") {
01961                                 # Fenced code block marker: find matching end marker.
01962                                 $tag_re = preg_quote(trim($tag));
01963                                 if (preg_match('{^(?>.*\n)+?'.$tag_re.' *\n}', $text,
01964                                         $matches))
01965                                 {
01966                                         # End marker found: pass text unchanged until marker.
01967                                         $parsed .= $tag . $matches[0];
01968                                         $text = substr($text, strlen($matches[0]));
01969                                 }
01970                                 else {
01971                                         # No end marker: just skip it.
01972                                         $parsed .= $tag;
01973                                 }
01974                         }
01975                         #
01976                         # Check for: Opening Block level tag or
01977                         #            Opening Context Block tag (like ins and del)
01978                         #               used as a block tag (tag is alone on it's line).
01979                         #
01980                         else if (preg_match('{^<(?:'.$this->block_tags_re.')\b}', $tag) ||
01981                                 (       preg_match('{^<(?:'.$this->context_block_tags_re.')\b}', $tag) &&
01982                                         preg_match($newline_before_re, $parsed) &&
01983                                         preg_match($newline_after_re, $text)    )
01984                                 )
01985                         {
01986                                 # Need to parse tag and following text using the HTML parser.
01987                                 list($block_text, $text) =
01988                                         $this->_hashHTMLBlocks_inHTML($tag . $text, "hashBlock", true);
01989
01990                                 # Make sure it stays outside of any paragraph by adding newlines.
01991                                 $parsed .= "\n\n$block_text\n\n";
01992                         }
01993                         #
01994                         # Check for: Clean tag (like script, math)
01995                         #            HTML Comments, processing instructions.
01996                         #
01997                         else if (preg_match('{^<(?:'.$this->clean_tags_re.')\b}', $tag) ||
01998                                 $tag{1} == '!' || $tag{1} == '?')
01999                         {
02000                                 # Need to parse tag and following text using the HTML parser.
02001                                 # (don't check for markdown attribute)
02002                                 list($block_text, $text) =
02003                                         $this->_hashHTMLBlocks_inHTML($tag . $text, "hashClean", false);
02004
02005                                 $parsed .= $block_text;
02006                         }
02007                         #
02008                         # Check for: Tag with same name as enclosing tag.
02009                         #
02010                         else if ($enclosing_tag_re !== '' &&
02011                                 # Same name as enclosing tag.
02012                                 preg_match('{^</?(?:'.$enclosing_tag_re.')\b}', $tag))
02013                         {
02014                                 #
02015                                 # Increase/decrease nested tag count.
02016                                 #
02017                                 if ($tag{1} == '/')                                             $depth--;
02018                                 else if ($tag{strlen($tag)-2} != '/')   $depth++;
02019
02020                                 if ($depth < 0) {
02021                                         #
02022                                         # Going out of parent element. Clean up and break so we
02023                                         # return to the calling function.
02024                                         #
02025                                         $text = $tag . $text;
02026                                         break;
02027                                 }
02028
02029                                 $parsed .= $tag;
02030                         }
02031                         else {
02032                                 $parsed .= $tag;
02033                         }
02034                 } while ($depth >= 0);
02035
02036                 return array($parsed, $text);
02037         }
02038         function _hashHTMLBlocks_inHTML($text, $hash_method, $md_attr) {
02039         #
02040         # Parse HTML, calling _HashHTMLBlocks_InMarkdown for block tags.
02041         #
02042         # *   Calls $hash_method to convert any blocks.
02043         # *   Stops when the first opening tag closes.
02044         # *   $md_attr indicate if the use of the `markdown="1"` attribute is allowed.
02045         #     (it is not inside clean tags)
02046         #
02047         # Returns an array of that form: ( processed text , remaining text )
02048         #
02049                 if ($text === '') return array('', '');
02050
02051                 # Regex to match `markdown` attribute inside of a tag.
02052                 $markdown_attr_re = '
02053                         {
02054                                 \s*                     # Eat whitespace before the `markdown` attribute
02055                                 markdown
02056                                 \s*=\s*
02057                                 (?>
02058                                         (["\'])         # $1: quote delimiter
02059                                         (.*?)           # $2: attribute value
02060                                         \1                      # matching delimiter
02061                                 |
02062                                         ([^\s>]*)       # $3: unquoted attribute value
02063                                 )
02064                                 ()                              # $4: make $3 always defined (avoid warnings)
02065                         }xs';
02066
02067                 # Regex to match any tag.
02068                 $tag_re = '{
02069                                 (                                       # $2: Capture hole tag.
02070                                         </?                                     # Any opening or closing tag.
02071                                                 [\w:$]+                 # Tag name.
02072                                                 (?:
02073                                                         (?=[\s"\'/a-zA-Z0-9])   # Allowed characters after tag name.
02074                                                         (?>
02075                                                                 ".*?"           |       # Double quotes (can contain `>`)
02076                                                                 \'.*?\'         |       # Single quotes (can contain `>`)
02077                                                                 .+?                             # Anything but quotes and `>`.
02078                                                         )*?
02079                                                 )?
02080                                         >                                       # End of tag.
02081                                 |
02082                                         <!--    .*?     -->     # HTML Comment
02083                                 |
02084                                         <\?.*?\?> | <%.*?%>     # Processing instruction
02085                                 |
02086                                         <!\[CDATA\[.*?\]\]>     # CData Block
02087                                 )
02088                         }xs';
02089
02090                 $original_text = $text;         # Save original text in case of faliure.
02091
02092                 $depth          = 0;    # Current depth inside the tag tree.
02093                 $block_text     = "";   # Temporary text holder for current text.
02094                 $parsed         = "";   # Parsed text that will be returned.
02095
02096                 #
02097                 # Get the name of the starting tag.
02098                 # (This pattern makes $base_tag_name_re safe without quoting.)
02099                 #
02100                 if (preg_match('/^<([\w:$]*)\b/', $text, $matches))
02101                         $base_tag_name_re = $matches[1];
02102
02103                 #
02104                 # Loop through every tag until we find the corresponding closing tag.
02105                 #
02106                 do {
02107                         #
02108                         # Split the text using the first $tag_match pattern found.
02109                         # Text before  pattern will be first in the array, text after
02110                         # pattern will be at the end, and between will be any catches made
02111                         # by the pattern.
02112                         #
02113                         $parts = preg_split($tag_re, $text, 2, PREG_SPLIT_DELIM_CAPTURE);
02114
02115                         if (count($parts) < 3) {
02116                                 #
02117                                 # End of $text reached with unbalenced tag(s).
02118                                 # In that case, we return original text unchanged and pass the
02119                                 # first character as filtered to prevent an infinite loop in the
02120                                 # parent function.
02121                                 #
02122                                 return array($original_text{0}, substr($original_text, 1));
02123                         }
02124
02125                         $block_text .= $parts[0]; # Text before current tag.
02126                         $tag         = $parts[1]; # Tag to handle.
02127                         $text        = $parts[2]; # Remaining text after current tag.
02128
02129                         #
02130                         # Check for: Auto-close tag (like <hr/>)
02131                         #                        Comments and Processing Instructions.
02132                         #
02133                         if (preg_match('{^</?(?:'.$this->auto_close_tags_re.')\b}', $tag) ||
02134                                 $tag{1} == '!' || $tag{1} == '?')
02135                         {
02136                                 # Just add the tag to the block as if it was text.
02137                                 $block_text .= $tag;
02138                         }
02139                         else {
02140                                 #
02141                                 # Increase/decrease nested tag count. Only do so if
02142                                 # the tag's name match base tag's.
02143                                 #
02144                                 if (preg_match('{^</?'.$base_tag_name_re.'\b}', $tag)) {
02145                                         if ($tag{1} == '/')                                             $depth--;
02146                                         else if ($tag{strlen($tag)-2} != '/')   $depth++;
02147                                 }
02148
02149                                 #
02150                                 # Check for `markdown="1"` attribute and handle it.
02151                                 #
02152                                 if ($md_attr &&
02153                                         preg_match($markdown_attr_re, $tag, $attr_m) &&
02154                                         preg_match('/^1|block|span$/', $attr_m[2] . $attr_m[3]))
02155                                 {
02156                                         # Remove `markdown` attribute from opening tag.
02157                                         $tag = preg_replace($markdown_attr_re, '', $tag);
02158
02159                                         # Check if text inside this tag must be parsed in span mode.
02160                                         $this->mode = $attr_m[2] . $attr_m[3];
02161                                         $span_mode = $this->mode == 'span' || $this->mode != 'block' &&
02162                                                 preg_match('{^<(?:'.$this->contain_span_tags_re.')\b}', $tag);
02163
02164                                         # Calculate indent before tag.
02165                                         if (preg_match('/(?:^|\n)( *?)(?! ).*?$/', $block_text, $matches)) {
02166                                                 $strlen = $this->utf8_strlen;
02167                                                 $indent = $strlen($matches[1], 'UTF-8');
02168                                         } else {
02169                                                 $indent = 0;
02170                                         }
02171
02172                                         # End preceding block with this tag.
02173                                         $block_text .= $tag;
02174                                         $parsed .= $this->$hash_method($block_text);
02175
02176                                         # Get enclosing tag name for the ParseMarkdown function.
02177                                         # (This pattern makes $tag_name_re safe without quoting.)
02178                                         preg_match('/^<([\w:$]*)\b/', $tag, $matches);
02179                                         $tag_name_re = $matches[1];
02180
02181                                         # Parse the content using the HTML-in-Markdown parser.
02182                                         list ($block_text, $text)
02183                                                 = $this->_hashHTMLBlocks_inMarkdown($text, $indent,
02184                                                         $tag_name_re, $span_mode);
02185
02186                                         # Outdent markdown text.
02187                                         if ($indent > 0) {
02188                                                 $block_text = preg_replace("/^[ ]{1,$indent}/m", "",
02189                                                                                                         $block_text);
02190                                         }
02191
02192                                         # Append tag content to parsed text.
02193                                         if (!$span_mode)        $parsed .= "\n\n$block_text\n\n";
02194                                         else                            $parsed .= "$block_text";
02195
02196                                         # Start over a new block.
02197                                         $block_text = "";
02198                                 }
02199                                 else $block_text .= $tag;
02200                         }
02201
02202                 } while ($depth > 0);
02203
02204                 #
02205                 # Hash last block text that wasn't processed inside the loop.
02206                 #
02207                 $parsed .= $this->$hash_method($block_text);
02208
02209                 return array($parsed, $text);
02210         }
02211
02212
02213         function hashClean($text) {
02214         #
02215         # Called whenever a tag must be hashed when a function insert a "clean" tag
02216         # in $text, it pass through this function and is automaticaly escaped,
02217         # blocking invalid nested overlap.
02218         #
02219                 return $this->hashPart($text, 'C');
02220         }
02221
02222
02223         function doHeaders($text) {
02224         #
02225         # Redefined to add id attribute support.
02226         #
02227                 # Setext-style headers:
02228                 #         Header 1  {#header1}
02229                 #         ========
02230                 #
02231                 #         Header 2  {#header2}
02232                 #         --------
02233                 #
02234                 $text = preg_replace_callback(
02235                         '{
02236                                 (^.+?)                                                          # $1: Header text
02237                                 (?:[ ]+\{\#([-_:a-zA-Z0-9]+)\})?        # $2: Id attribute
02238                                 [ ]*\n(=+|-+)[ ]*\n+                            # $3: Header footer
02239                         }mx',
02240                         array(&$this, '_doHeaders_callback_setext'), $text);
02241
02242                 # atx-style headers:
02243                 #       # Header 1        {#header1}
02244                 #       ## Header 2       {#header2}
02245                 #       ## Header 2 with closing hashes ##  {#header3}
02246                 #       ...
02247                 #       ###### Header 6   {#header2}
02248                 #
02249                 $text = preg_replace_callback('{
02250                                 ^(\#{1,6})      # $1 = string of #\'s
02251                                 [ ]*
02252                                 (.+?)           # $2 = Header text
02253                                 [ ]*
02254                                 \#*                     # optional closing #\'s (not counted)
02255                                 (?:[ ]+\{\#([-_:a-zA-Z0-9]+)\})? # id attribute
02256                                 [ ]*
02257                                 \n+
02258                         }xm',
02259                         array(&$this, '_doHeaders_callback_atx'), $text);
02260
02261                 return $text;
02262         }
02263         function _doHeaders_attr($attr) {
02264                 if (empty($attr))  return "";
02265                 return " id=\"$attr\"";
02266         }
02267         function _doHeaders_callback_setext($matches) {
02268                 if ($matches[3] == '-' && preg_match('{^- }', $matches[1]))
02269                         return $matches[0];
02270                 $level = $matches[3]{0} == '=' ? 1 : 2;
02271                 $attr  = $this->_doHeaders_attr($id =& $matches[2]);
02272                 $block = "<h$level$attr>".$this->runSpanGamut($matches[1])."</h$level>";
02273                 return "\n" . $this->hashBlock($block) . "\n\n";
02274         }
02275         function _doHeaders_callback_atx($matches) {
02276                 $level = strlen($matches[1]);
02277                 $attr  = $this->_doHeaders_attr($id =& $matches[3]);
02278                 $block = "<h$level$attr>".$this->runSpanGamut($matches[2])."</h$level>";
02279                 return "\n" . $this->hashBlock($block) . "\n\n";
02280         }
02281
02282
02283         function doTables($text) {
02284         #
02285         # Form HTML tables.
02286         #
02287                 $less_than_tab = $this->tab_width - 1;
02288                 #
02289                 # Find tables with leading pipe.
02290                 #
02291                 #       | Header 1 | Header 2
02292                 #       | -------- | --------
02293                 #       | Cell 1   | Cell 2
02294                 #       | Cell 3   | Cell 4
02295                 #
02296                 $text = preg_replace_callback('
02297                         {
02298                                 ^                                                       # Start of a line
02299                                 [ ]{0,'.$less_than_tab.'}       # Allowed whitespace.
02300                                 [|]                                                     # Optional leading pipe (present)
02301                                 (.+) \n                                         # $1: Header row (at least one pipe)
02302
02303                                 [ ]{0,'.$less_than_tab.'}       # Allowed whitespace.
02304                                 [|] ([ ]*[-:]+[-| :]*) \n       # $2: Header underline
02305
02306                                 (                                                       # $3: Cells
02307                                         (?>
02308                                                 [ ]*                            # Allowed whitespace.
02309                                                 [|] .* \n                       # Row content.
02310                                         )*
02311                                 )
02312                                 (?=\n|\Z)                                       # Stop at final double newline.
02313                         }xm',
02314                         array(&$this, '_doTable_leadingPipe_callback'), $text);
02315
02316                 #
02317                 # Find tables without leading pipe.
02318                 #
02319                 #       Header 1 | Header 2
02320                 #       -------- | --------
02321                 #       Cell 1   | Cell 2
02322                 #       Cell 3   | Cell 4
02323                 #
02324                 $text = preg_replace_callback('
02325                         {
02326                                 ^                                                       # Start of a line
02327                                 [ ]{0,'.$less_than_tab.'}       # Allowed whitespace.
02328                                 (\S.*[|].*) \n                          # $1: Header row (at least one pipe)
02329
02330                                 [ ]{0,'.$less_than_tab.'}       # Allowed whitespace.
02331                                 ([-:]+[ ]*[|][-| :]*) \n        # $2: Header underline
02332
02333                                 (                                                       # $3: Cells
02334                                         (?>
02335                                                 .* [|] .* \n            # Row content
02336                                         )*
02337                                 )
02338                                 (?=\n|\Z)                                       # Stop at final double newline.
02339                         }xm',
02340                         array(&$this, '_DoTable_callback'), $text);
02341
02342                 return $text;
02343         }
02344         function _doTable_leadingPipe_callback($matches) {
02345                 $head           = $matches[1];
02346                 $underline      = $matches[2];
02347                 $content        = $matches[3];
02348
02349                 # Remove leading pipe for each row.
02350                 $content        = preg_replace('/^ *[|]/m', '', $content);
02351
02352                 return $this->_doTable_callback(array($matches[0], $head, $underline, $content));
02353         }
02354         function _doTable_callback($matches) {
02355                 $head           = $matches[1];
02356                 $underline      = $matches[2];
02357                 $content        = $matches[3];
02358
02359                 # Remove any tailing pipes for each line.
02360                 $head           = preg_replace('/[|] *$/m', '', $head);
02361                 $underline      = preg_replace('/[|] *$/m', '', $underline);
02362                 $content        = preg_replace('/[|] *$/m', '', $content);
02363
02364                 # Reading alignement from header underline.
02365                 $separators     = preg_split('/ *[|] */', $underline);
02366                 foreach ($separators as $n => $s) {
02367                         if (preg_match('/^ *-+: *$/', $s))              $attr[$n] = ' align="right"';
02368                         else if (preg_match('/^ *:-+: *$/', $s))$attr[$n] = ' align="center"';
02369                         else if (preg_match('/^ *:-+ *$/', $s)) $attr[$n] = ' align="left"';
02370                         else                                                                    $attr[$n] = '';
02371                 }
02372
02373                 # Parsing span elements, including code spans, character escapes,
02374                 # and inline HTML tags, so that pipes inside those gets ignored.
02375                 $head           = $this->parseSpan($head);
02376                 $headers        = preg_split('/ *[|] */', $head);
02377                 $col_count      = count($headers);
02378
02379                 # Write column headers.
02380                 $text = "<table>\n";
02381                 $text .= "<thead>\n";
02382                 $text .= "<tr>\n";
02383                 foreach ($headers as $n => $header)
02384                         $text .= "  <th$attr[$n]>".$this->runSpanGamut(trim($header))."</th>\n";
02385                 $text .= "</tr>\n";
02386                 $text .= "</thead>\n";
02387
02388                 # Split content by row.
02389                 $rows = explode("\n", trim($content, "\n"));
02390
02391                 $text .= "<tbody>\n";
02392                 foreach ($rows as $row) {
02393                         # Parsing span elements, including code spans, character escapes,
02394                         # and inline HTML tags, so that pipes inside those gets ignored.
02395                         $row = $this->parseSpan($row);
02396
02397                         # Split row by cell.
02398                         $row_cells = preg_split('/ *[|] */', $row, $col_count);
02399                         $row_cells = array_pad($row_cells, $col_count, '');
02400
02401                         $text .= "<tr>\n";
02402                         foreach ($row_cells as $n => $cell)
02403                                 $text .= "  <td$attr[$n]>".$this->runSpanGamut(trim($cell))."</td>\n";
02404                         $text .= "</tr>\n";
02405                 }
02406                 $text .= "</tbody>\n";
02407                 $text .= "</table>";
02408
02409                 return $this->hashBlock($text) . "\n";
02410         }
02411
02412
02413         function doDefLists($text) {
02414         #
02415         # Form HTML definition lists.
02416         #
02417                 $less_than_tab = $this->tab_width - 1;
02418
02419                 # Re-usable pattern to match any entire dl list:
02420                 $whole_list_re = '(?>
02421                         (                                                               # $1 = whole list
02422                           (                                                             # $2
02423                                 [ ]{0,'.$less_than_tab.'}
02424                                 ((?>.*\S.*\n)+)                         # $3 = defined term
02425                                 \n?
02426                                 [ ]{0,'.$less_than_tab.'}:[ ]+ # colon starting definition
02427                           )
02428                           (?s:.+?)
02429                           (                                                             # $4
02430                                   \z
02431                                 |
02432                                   \n{2,}
02433                                   (?=\S)
02434                                   (?!                                           # Negative lookahead for another term
02435                                         [ ]{0,'.$less_than_tab.'}
02436                                         (?: \S.*\n )+?                  # defined term
02437                                         \n?
02438                                         [ ]{0,'.$less_than_tab.'}:[ ]+ # colon starting definition
02439                                   )
02440                                   (?!                                           # Negative lookahead for another definition
02441                                         [ ]{0,'.$less_than_tab.'}:[ ]+ # colon starting definition
02442                                   )
02443                           )
02444                         )
02445                 )'; // mx
02446
02447                 $text = preg_replace_callback('{
02448                                 (?>\A\n?|(?<=\n\n))
02449                                 '.$whole_list_re.'
02450                         }mx',
02451                         array(&$this, '_doDefLists_callback'), $text);
02452
02453                 return $text;
02454         }
02455         function _doDefLists_callback($matches) {
02456                 # Re-usable patterns to match list item bullets and number markers:
02457                 $list = $matches[1];
02458
02459                 # Turn double returns into triple returns, so that we can make a
02460                 # paragraph for the last item in a list, if necessary:
02461                 $result = trim($this->processDefListItems($list));
02462                 $result = "<dl>\n" . $result . "\n</dl>";
02463                 return $this->hashBlock($result) . "\n\n";
02464         }
02465
02466
02467         function processDefListItems($list_str) {
02468         #
02469         #       Process the contents of a single definition list, splitting it
02470         #       into individual term and definition list items.
02471         #
02472                 $less_than_tab = $this->tab_width - 1;
02473
02474                 # trim trailing blank lines:
02475                 $list_str = preg_replace("/\n{2,}\\z/", "\n", $list_str);
02476
02477                 # Process definition terms.
02478                 $list_str = preg_replace_callback('{
02479                         (?>\A\n?|\n\n+)                                 # leading line
02480                         (                                                               # definition terms = $1
02481                                 [ ]{0,'.$less_than_tab.'}       # leading whitespace
02482                                 (?![:][ ]|[ ])                          # negative lookahead for a definition
02483                                                                                         #   mark (colon) or more whitespace.
02484                                 (?> \S.* \n)+?                          # actual term (not whitespace).
02485                         )
02486                         (?=\n?[ ]{0,3}:[ ])                             # lookahead for following line feed
02487                                                                                         #   with a definition mark.
02488                         }xm',
02489                         array(&$this, '_processDefListItems_callback_dt'), $list_str);
02490
02491                 # Process actual definitions.
02492                 $list_str = preg_replace_callback('{
02493                         \n(\n+)?                                                # leading line = $1
02494                         (                                                               # marker space = $2
02495                                 [ ]{0,'.$less_than_tab.'}       # whitespace before colon
02496                                 [:][ ]+                                         # definition mark (colon)
02497                         )
02498                         ((?s:.+?))                                              # definition text = $3
02499                         (?= \n+                                                 # stop at next definition mark,
02500                                 (?:                                                     # next term or end of text
02501                                         [ ]{0,'.$less_than_tab.'} [:][ ]        |
02502                                         <dt> | \z
02503                                 )
02504                         )
02505                         }xm',
02506                         array(&$this, '_processDefListItems_callback_dd'), $list_str);
02507
02508                 return $list_str;
02509         }
02510         function _processDefListItems_callback_dt($matches) {
02511                 $terms = explode("\n", trim($matches[1]));
02512                 $text = '';
02513                 foreach ($terms as $term) {
02514                         $term = $this->runSpanGamut(trim($term));
02515                         $text .= "\n<dt>" . $term . "</dt>";
02516                 }
02517                 return $text . "\n";
02518         }
02519         function _processDefListItems_callback_dd($matches) {
02520                 $leading_line   = $matches[1];
02521                 $marker_space   = $matches[2];
02522                 $def                    = $matches[3];
02523
02524                 if ($leading_line || preg_match('/\n{2,}/', $def)) {
02525                         # Replace marker with the appropriate whitespace indentation
02526                         $def = str_repeat(' ', strlen($marker_space)) . $def;
02527                         $def = $this->runBlockGamut($this->outdent($def . "\n\n"));
02528                         $def = "\n". $def ."\n";
02529                 }
02530                 else {
02531                         $def = rtrim($def);
02532                         $def = $this->runSpanGamut($this->outdent($def));
02533                 }
02534
02535                 return "\n<dd>" . $def . "</dd>\n";
02536         }
02537
02538
02539         function doFencedCodeBlocks($text) {
02540         #
02541         # Adding the fenced code block syntax to regular Markdown:
02542         #
02543         # ~~~
02544         # Code block
02545         # ~~~
02546         #
02547                 $less_than_tab = $this->tab_width;
02548
02549                 $text = preg_replace_callback('{
02550                                 (?:\n|\A)
02551                                 # 1: Opening marker
02552                                 (
02553                                         ~{3,} # Marker: three tilde or more.
02554                                 )
02555                                 [ ]* \n # Whitespace and newline following marker.
02556
02557                                 # 2: Content
02558                                 (
02559                                         (?>
02560                                                 (?!\1 [ ]* \n)  # Not a closing marker.
02561                                                 .*\n+
02562                                         )+
02563                                 )
02564
02565                                 # Closing marker.
02566                                 \1 [ ]* \n
02567                         }xm',
02568                         array(&$this, '_doFencedCodeBlocks_callback'), $text);
02569
02570                 return $text;
02571         }
02572         function _doFencedCodeBlocks_callback($matches) {
02573                 $codeblock = $matches[2];
02574                 $codeblock = htmlspecialchars($codeblock, ENT_NOQUOTES);
02575                 $codeblock = preg_replace_callback('/^\n+/',
02576                         array(&$this, '_doFencedCodeBlocks_newlines'), $codeblock);
02577                 $codeblock = "<pre><code>$codeblock</code></pre>";
02578                 return "\n\n".$this->hashBlock($codeblock)."\n\n";
02579         }
02580         function _doFencedCodeBlocks_newlines($matches) {
02581                 return str_repeat("<br$this->empty_element_suffix",
02582                         strlen($matches[0]));
02583         }
02584
02585
02586         #
02587         # Redefining emphasis markers so that emphasis by underscore does not
02588         # work in the middle of a word.
02589         #
02590         var $em_relist = array(
02591                 ''  => '(?:(?<!\*)\*(?!\*)|(?<![a-zA-Z0-9_])_(?!_))(?=\S|$)(?![.,:;]\s)',
02592                 '*' => '(?<=\S|^)(?<!\*)\*(?!\*)',
02593                 '_' => '(?<=\S|^)(?<!_)_(?![a-zA-Z0-9_])',
02594                 );
02595         var $strong_relist = array(
02596                 ''   => '(?:(?<!\*)\*\*(?!\*)|(?<![a-zA-Z0-9_])__(?!_))(?=\S|$)(?![.,:;]\s)',
02597                 '**' => '(?<=\S|^)(?<!\*)\*\*(?!\*)',
02598                 '__' => '(?<=\S|^)(?<!_)__(?![a-zA-Z0-9_])',
02599                 );
02600         var $em_strong_relist = array(
02601                 ''    => '(?:(?<!\*)\*\*\*(?!\*)|(?<![a-zA-Z0-9_])___(?!_))(?=\S|$)(?![.,:;]\s)',
02602                 '***' => '(?<=\S|^)(?<!\*)\*\*\*(?!\*)',
02603                 '___' => '(?<=\S|^)(?<!_)___(?![a-zA-Z0-9_])',
02604                 );
02605
02606
02607         function formParagraphs($text) {
02608         #
02609         #       Params:
02610         #               $text - string to process with html <p> tags
02611         #
02612                 # Strip leading and trailing lines:
02613                 $text = preg_replace('/\A\n+|\n+\z/', '', $text);
02614
02615                 $grafs = preg_split('/\n{2,}/', $text, -1, PREG_SPLIT_NO_EMPTY);
02616
02617                 #
02618                 # Wrap <p> tags and unhashify HTML blocks
02619                 #
02620                 foreach ($grafs as $key => $value) {
02621                         $value = trim($this->runSpanGamut($value));
02622
02623                         # Check if this should be enclosed in a paragraph.
02624                         # Clean tag hashes & block tag hashes are left alone.
02625                         $is_p = !preg_match('/^B\x1A[0-9]+B|^C\x1A[0-9]+C$/', $value);
02626
02627                         if ($is_p) {
02628                                 $value = "<p>$value</p>";
02629                         }
02630                         $grafs[$key] = $value;
02631                 }
02632
02633                 # Join grafs in one text, then unhash HTML tags.
02634                 $text = implode("\n\n", $grafs);
02635
02636                 # Finish by removing any tag hashes still present in $text.
02637                 $text = $this->unhash($text);
02638
02639                 return $text;
02640         }
02641
02642
02643         ### Footnotes
02644
02645         function stripFootnotes($text) {
02646         #
02647         # Strips link definitions from text, stores the URLs and titles in
02648         # hash references.
02649         #
02650                 $less_than_tab = $this->tab_width - 1;
02651
02652                 # Link defs are in the form: [^id]: url "optional title"
02653                 $text = preg_replace_callback('{
02654                         ^[ ]{0,'.$less_than_tab.'}\[\^(.+?)\][ ]?:      # note_id = $1
02655                           [ ]*
02656                           \n?                                   # maybe *one* newline
02657                         (                                               # text = $2 (no blank lines allowed)
02658                                 (?:
02659                                         .+                              # actual text
02660                                 |
02661                                         \n                              # newlines but
02662                                         (?!\[\^.+?\]:\s)# negative lookahead for footnote marker.
02663                                         (?!\n+[ ]{0,3}\S)# ensure line is not blank and followed
02664                                                                         # by non-indented content
02665                                 )*
02666                         )
02667                         }xm',
02668                         array(&$this, '_stripFootnotes_callback'),
02669                         $text);
02670                 return $text;
02671         }
02672         function _stripFootnotes_callback($matches) {
02673                 $note_id = $this->fn_id_prefix . $matches[1];
02674                 $this->footnotes[$note_id] = $this->outdent($matches[2]);
02675                 return ''; # String that will replace the block
02676         }
02677
02678
02679         function doFootnotes($text) {
02680         #
02681         # Replace footnote references in $text [^id] with a special text-token
02682         # which will be replaced by the actual footnote marker in appendFootnotes.
02683         #
02684                 if (!$this->in_anchor) {
02685                         $text = preg_replace('{\[\^(.+?)\]}', "F\x1Afn:\\1\x1A:", $text);
02686                 }
02687                 return $text;
02688         }
02689
02690
02691         function appendFootnotes($text) {
02692         #
02693         # Append footnote list to text.
02694         #
02695                 $text = preg_replace_callback('{F\x1Afn:(.*?)\x1A:}',
02696                         array(&$this, '_appendFootnotes_callback'), $text);
02697
02698                 if (!empty($this->footnotes_ordered)) {
02699                         $text .= "\n\n";
02700                         $text .= "<div class=\"footnotes\">\n";
02701                         $text .= "<hr". $this->empty_element_suffix ."\n";
02702                         $text .= "<ol>\n\n";
02703
02704                         $attr = " rev=\"footnote\"";
02705                         if ($this->fn_backlink_class != "") {
02706                                 $class = $this->fn_backlink_class;
02707                                 $class = $this->encodeAttribute($class);
02708                                 $attr .= " class=\"$class\"";
02709                         }
02710                         if ($this->fn_backlink_title != "") {
02711                                 $title = $this->fn_backlink_title;
02712                                 $title = $this->encodeAttribute($title);
02713                                 $attr .= " title=\"$title\"";
02714                         }
02715                         $num = 0;
02716
02717                         while (!empty($this->footnotes_ordered)) {
02718                                 $footnote = reset($this->footnotes_ordered);
02719                                 $note_id = key($this->footnotes_ordered);
02720                                 unset($this->footnotes_ordered[$note_id]);
02721
02722                                 $footnote .= "\n"; # Need to append newline before parsing.
02723                                 $footnote = $this->runBlockGamut("$footnote\n");
02724                                 $footnote = preg_replace_callback('{F\x1Afn:(.*?)\x1A:}',
02725                                         array(&$this, '_appendFootnotes_callback'), $footnote);
02726
02727                                 $attr = str_replace("%%", ++$num, $attr);
02728                                 $note_id = $this->encodeAttribute($note_id);
02729
02730                                 # Add backlink to last paragraph; create new paragraph if needed.
02731                                 $backlink = "<a href=\"#fnref:$note_id\"$attr>&#8617;</a>";
02732                                 if (preg_match('{</p>$}', $footnote)) {
02733                                         $footnote = substr($footnote, 0, -4) . "&#160;$backlink</p>";
02734                                 } else {
02735                                         $footnote .= "\n\n<p>$backlink</p>";
02736                                 }
02737
02738                                 $text .= "<li id=\"fn:$note_id\">\n";
02739                                 $text .= $footnote . "\n";
02740                                 $text .= "</li>\n\n";
02741                         }
02742
02743                         $text .= "</ol>\n";
02744                         $text .= "</div>";
02745                 }
02746                 return $text;
02747         }
02748         function _appendFootnotes_callback($matches) {
02749                 $node_id = $this->fn_id_prefix . $matches[1];
02750
02751                 # Create footnote marker only if it has a corresponding footnote *and*
02752                 # the footnote hasn't been used by another marker.
02753                 if (isset($this->footnotes[$node_id])) {
02754                         # Transfert footnote content to the ordered list.
02755                         $this->footnotes_ordered[$node_id] = $this->footnotes[$node_id];
02756                         unset($this->footnotes[$node_id]);
02757
02758                         $num = $this->footnote_counter++;
02759                         $attr = " rel=\"footnote\"";
02760                         if ($this->fn_link_class != "") {
02761                                 $class = $this->fn_link_class;
02762                                 $class = $this->encodeAttribute($class);
02763                                 $attr .= " class=\"$class\"";
02764                         }
02765                         if ($this->fn_link_title != "") {
02766                                 $title = $this->fn_link_title;
02767                                 $title = $this->encodeAttribute($title);
02768                                 $attr .= " title=\"$title\"";
02769                         }
02770
02771                         $attr = str_replace("%%", $num, $attr);
02772                         $node_id = $this->encodeAttribute($node_id);
02773
02774                         return
02775                                 "<sup id=\"fnref:$node_id\">".
02776                                 "<a href=\"#fn:$node_id\"$attr>$num</a>".
02777                                 "</sup>";
02778                 }
02779
02780                 return "[^".$matches[1]."]";
02781         }
02782
02783
02784         ### Abbreviations ###
02785
02786         function stripAbbreviations($text) {
02787         #
02788         # Strips abbreviations from text, stores titles in hash references.
02789         #
02790                 $less_than_tab = $this->tab_width - 1;
02791
02792                 # Link defs are in the form: [id]*: url "optional title"
02793                 $text = preg_replace_callback('{
02794                         ^[ ]{0,'.$less_than_tab.'}\*\[(.+?)\][ ]?:      # abbr_id = $1
02795                         (.*)                                    # text = $2 (no blank lines allowed)
02796                         }xm',
02797                         array(&$this, '_stripAbbreviations_callback'),
02798                         $text);
02799                 return $text;
02800         }
02801         function _stripAbbreviations_callback($matches) {
02802                 $abbr_word = $matches[1];
02803                 $abbr_desc = $matches[2];
02804                 if ($this->abbr_word_re)
02805                         $this->abbr_word_re .= '|';
02806                 $this->abbr_word_re .= preg_quote($abbr_word);
02807                 $this->abbr_desciptions[$abbr_word] = trim($abbr_desc);
02808                 return ''; # String that will replace the block
02809         }
02810
02811
02812         function doAbbreviations($text) {
02813         #
02814         # Find defined abbreviations in text and wrap them in <abbr> elements.
02815         #
02816                 if ($this->abbr_word_re) {
02817                         // cannot use the /x modifier because abbr_word_re may
02818                         // contain significant spaces:
02819                         $text = preg_replace_callback('{'.
02820                                 '(?<![\w\x1A])'.
02821                                 '(?:'.$this->abbr_word_re.')'.
02822                                 '(?![\w\x1A])'.
02823                                 '}',
02824                                 array(&$this, '_doAbbreviations_callback'), $text);
02825                 }
02826                 return $text;
02827         }
02828         function _doAbbreviations_callback($matches) {
02829                 $abbr = $matches[0];
02830                 if (isset($this->abbr_desciptions[$abbr])) {
02831                         $desc = $this->abbr_desciptions[$abbr];
02832                         if (empty($desc)) {
02833                                 return $this->hashPart("<abbr>$abbr</abbr>");
02834                         } else {
02835                                 $desc = $this->encodeAttribute($desc);
02836                                 return $this->hashPart("<abbr title=\"$desc\">$abbr</abbr>");
02837                         }
02838                 } else {
02839                         return $matches[0];
02840                 }
02841         }
02842
02843 }
02844
02845
02846 /*
02847
02848 PHP Markdown Extra
02849 ==================
02850
02851 Description
02852 -----------
02853
02854 This is a PHP port of the original Markdown formatter written in Perl
02855 by John Gruber. This special "Extra" version of PHP Markdown features
02856 further enhancements to the syntax for making additional constructs
02857 such as tables and definition list.
02858
02859 Markdown is a text-to-HTML filter; it translates an easy-to-read /
02860 easy-to-write structured text format into HTML. Markdown's text format
02861 is most similar to that of plain text email, and supports features such
02862 as headers, *emphasis*, code blocks, blockquotes, and links.
02863
02864 Markdown's syntax is designed not as a generic markup language, but
02865 specifically to serve as a front-end to (X)HTML. You can use span-level
02866 HTML tags anywhere in a Markdown document, and you can use block level
02867 HTML tags (like <div> and <table> as well).
02868
02869 For more information about Markdown's syntax, see:
02870
02871 <http://daringfireball.net/projects/markdown/>
02872
02873
02874 Bugs
02875 ----
02876
02877 To file bug reports please send email to:
02878
02879 <michel.fortin@michelf.com>
02880
02881 Please include with your report: (1) the example input; (2) the output you
02882 expected; (3) the output Markdown actually produced.
02883
02884
02885 Version History
02886 ---------------
02887
02888 See the readme file for detailed release notes for this version.
02889
02890
02891 Copyright and License
02892 ---------------------
02893
02894 PHP Markdown & Extra
02895 Copyright (c) 2004-2009 Michel Fortin
02896 <http://michelf.com/>
02897 All rights reserved.
02898
02899 Based on Markdown
02900 Copyright (c) 2003-2006 John Gruber
02901 <http://daringfireball.net/>
02902 All rights reserved.
02903
02904 Redistribution and use in source and binary forms, with or without
02905 modification, are permitted provided that the following conditions are
02906 met:
02907
02908 *       Redistributions of source code must retain the above copyright notice,
02909         this list of conditions and the following disclaimer.
02910
02911 *       Redistributions in binary form must reproduce the above copyright
02912         notice, this list of conditions and the following disclaimer in the
02913         documentation and/or other materials provided with the distribution.
02914
02915 *       Neither the name "Markdown" nor the names of its contributors may
02916         be used to endorse or promote products derived from this software
02917         without specific prior written permission.
02918
02919 This software is provided by the copyright holders and contributors "as
02920 is" and any express or implied warranties, including, but not limited
02921 to, the implied warranties of merchantability and fitness for a
02922 particular purpose are disclaimed. In no event shall the copyright owner
02923 or contributors be liable for any direct, indirect, incidental, special,
02924 exemplary, or consequential damages (including, but not limited to,
02925 procurement of substitute goods or services; loss of use, data, or
02926 profits; or business interruption) however caused and on any theory of
02927 liability, whether in contract, strict liability, or tort (including
02928 negligence or otherwise) arising in any way out of the use of this
02929 software, even if advised of the possibility of such damage.
02930
02931 */
02932 ?>