|
Moodle
2.2.1
http://www.collinsharper.com
|
00001 <?php 00002 # 00003 # Markdown Extra - A text-to-HTML conversion tool for web writers 00004 # 00005 # PHP Markdown & Extra 00006 # Copyright (c) 2004-2009 Michel Fortin 00007 # <http://michelf.com/projects/php-markdown/> 00008 # 00009 # Original Markdown 00010 # Copyright (c) 2004-2006 John Gruber 00011 # <http://daringfireball.net/projects/markdown/> 00012 # 00013 00014 00015 define( 'MARKDOWN_VERSION', "1.0.1n" ); # Sat 10 Oct 2009 00016 define( 'MARKDOWNEXTRA_VERSION', "1.2.4" ); # Sat 10 Oct 2009 00017 00018 00019 # 00020 # Global default settings: 00021 # 00022 00023 # Change to ">" for HTML output 00024 @define( 'MARKDOWN_EMPTY_ELEMENT_SUFFIX', " />"); 00025 00026 # Define the width of a tab for code blocks. 00027 @define( 'MARKDOWN_TAB_WIDTH', 4 ); 00028 00029 # Optional title attribute for footnote links and backlinks. 00030 @define( 'MARKDOWN_FN_LINK_TITLE', "" ); 00031 @define( 'MARKDOWN_FN_BACKLINK_TITLE', "" ); 00032 00033 # Optional class attribute for footnote links and backlinks. 00034 @define( 'MARKDOWN_FN_LINK_CLASS', "" ); 00035 @define( 'MARKDOWN_FN_BACKLINK_CLASS', "" ); 00036 00037 00038 # 00039 # WordPress settings: 00040 # 00041 00042 # Change to false to remove Markdown from posts and/or comments. 00043 @define( 'MARKDOWN_WP_POSTS', true ); 00044 @define( 'MARKDOWN_WP_COMMENTS', true ); 00045 00046 00047 00048 ### Standard Function Interface ### 00049 00050 @define( 'MARKDOWN_PARSER_CLASS', 'MarkdownExtra_Parser' ); 00051 00052 function Markdown($text) { 00053 # 00054 # Initialize the parser and return the result of its transform method. 00055 # 00056 # Setup static parser variable. 00057 static $parser; 00058 if (!isset($parser)) { 00059 $parser_class = MARKDOWN_PARSER_CLASS; 00060 $parser = new $parser_class; 00061 } 00062 00063 # Transform text using parser. 00064 return $parser->transform($text); 00065 } 00066 00067 00068 ### WordPress Plugin Interface ### 00069 00070 /* 00071 Plugin Name: Markdown Extra 00072 Plugin URI: http://michelf.com/projects/php-markdown/ 00073 Description: <a href="http://daringfireball.net/projects/markdown/syntax">Markdown syntax</a> allows you to write using an easy-to-read, easy-to-write plain text format. Based on the original Perl version by <a href="http://daringfireball.net/">John Gruber</a>. <a href="http://michelf.com/projects/php-markdown/">More...</a> 00074 Version: 1.2.4 00075 Author: Michel Fortin 00076 Author URI: http://michelf.com/ 00077 */ 00078 00079 if (isset($wp_version)) { 00080 # More details about how it works here: 00081 # <http://michelf.com/weblog/2005/wordpress-text-flow-vs-markdown/> 00082 00083 # Post content and excerpts 00084 # - Remove WordPress paragraph generator. 00085 # - Run Markdown on excerpt, then remove all tags. 00086 # - Add paragraph tag around the excerpt, but remove it for the excerpt rss. 00087 if (MARKDOWN_WP_POSTS) { 00088 remove_filter('the_content', 'wpautop'); 00089 remove_filter('the_content_rss', 'wpautop'); 00090 remove_filter('the_excerpt', 'wpautop'); 00091 add_filter('the_content', 'mdwp_MarkdownPost', 6); 00092 add_filter('the_content_rss', 'mdwp_MarkdownPost', 6); 00093 add_filter('get_the_excerpt', 'mdwp_MarkdownPost', 6); 00094 add_filter('get_the_excerpt', 'trim', 7); 00095 add_filter('the_excerpt', 'mdwp_add_p'); 00096 add_filter('the_excerpt_rss', 'mdwp_strip_p'); 00097 00098 remove_filter('content_save_pre', 'balanceTags', 50); 00099 remove_filter('excerpt_save_pre', 'balanceTags', 50); 00100 add_filter('the_content', 'balanceTags', 50); 00101 add_filter('get_the_excerpt', 'balanceTags', 9); 00102 } 00103 00104 # Add a footnote id prefix to posts when inside a loop. 00105 function mdwp_MarkdownPost($text) { 00106 static $parser; 00107 if (!$parser) { 00108 $parser_class = MARKDOWN_PARSER_CLASS; 00109 $parser = new $parser_class; 00110 } 00111 if (is_single() || is_page() || is_feed()) { 00112 $parser->fn_id_prefix = ""; 00113 } else { 00114 $parser->fn_id_prefix = get_the_ID() . "."; 00115 } 00116 return $parser->transform($text); 00117 } 00118 00119 # Comments 00120 # - Remove WordPress paragraph generator. 00121 # - Remove WordPress auto-link generator. 00122 # - Scramble important tags before passing them to the kses filter. 00123 # - Run Markdown on excerpt then remove paragraph tags. 00124 if (MARKDOWN_WP_COMMENTS) { 00125 remove_filter('comment_text', 'wpautop', 30); 00126 remove_filter('comment_text', 'make_clickable'); 00127 add_filter('pre_comment_content', 'Markdown', 6); 00128 add_filter('pre_comment_content', 'mdwp_hide_tags', 8); 00129 add_filter('pre_comment_content', 'mdwp_show_tags', 12); 00130 add_filter('get_comment_text', 'Markdown', 6); 00131 add_filter('get_comment_excerpt', 'Markdown', 6); 00132 add_filter('get_comment_excerpt', 'mdwp_strip_p', 7); 00133 00134 global $mdwp_hidden_tags, $mdwp_placeholders; 00135 $mdwp_hidden_tags = explode(' ', 00136 '<p> </p> <pre> </pre> <ol> </ol> <ul> </ul> <li> </li>'); 00137 $mdwp_placeholders = explode(' ', str_rot13( 00138 'pEj07ZbbBZ U1kqgh4w4p pre2zmeN6K QTi31t9pre ol0MP1jzJR '. 00139 'ML5IjmbRol ulANi1NsGY J7zRLJqPul liA8ctl16T K9nhooUHli')); 00140 } 00141 00142 function mdwp_add_p($text) { 00143 if (!preg_match('{^$|^<(p|ul|ol|dl|pre|blockquote)>}i', $text)) { 00144 $text = '<p>'.$text.'</p>'; 00145 $text = preg_replace('{\n{2,}}', "</p>\n\n<p>", $text); 00146 } 00147 return $text; 00148 } 00149 00150 function mdwp_strip_p($t) { return preg_replace('{</?p>}i', '', $t); } 00151 00152 function mdwp_hide_tags($text) { 00153 global $mdwp_hidden_tags, $mdwp_placeholders; 00154 return str_replace($mdwp_hidden_tags, $mdwp_placeholders, $text); 00155 } 00156 function mdwp_show_tags($text) { 00157 global $mdwp_hidden_tags, $mdwp_placeholders; 00158 return str_replace($mdwp_placeholders, $mdwp_hidden_tags, $text); 00159 } 00160 } 00161 00162 00163 ### bBlog Plugin Info ### 00164 00165 function identify_modifier_markdown() { 00166 return array( 00167 'name' => 'markdown', 00168 'type' => 'modifier', 00169 'nicename' => 'PHP Markdown Extra', 00170 'description' => 'A text-to-HTML conversion tool for web writers', 00171 'authors' => 'Michel Fortin and John Gruber', 00172 'licence' => 'GPL', 00173 'version' => MARKDOWNEXTRA_VERSION, 00174 'help' => '<a href="http://daringfireball.net/projects/markdown/syntax">Markdown syntax</a> allows you to write using an easy-to-read, easy-to-write plain text format. Based on the original Perl version by <a href="http://daringfireball.net/">John Gruber</a>. <a href="http://michelf.com/projects/php-markdown/">More...</a>', 00175 ); 00176 } 00177 00178 00179 ### Smarty Modifier Interface ### 00180 00181 function smarty_modifier_markdown($text) { 00182 return Markdown($text); 00183 } 00184 00185 00186 ### Textile Compatibility Mode ### 00187 00188 # Rename this file to "classTextile.php" and it can replace Textile everywhere. 00189 00190 if (strcasecmp(substr(__FILE__, -16), "classTextile.php") == 0) { 00191 # Try to include PHP SmartyPants. Should be in the same directory. 00192 @include_once 'smartypants.php'; 00193 # Fake Textile class. It calls Markdown instead. 00194 class Textile { 00195 function TextileThis($text, $lite='', $encode='') { 00196 if ($lite == '' && $encode == '') $text = Markdown($text); 00197 if (function_exists('SmartyPants')) $text = SmartyPants($text); 00198 return $text; 00199 } 00200 # Fake restricted version: restrictions are not supported for now. 00201 function TextileRestricted($text, $lite='', $noimage='') { 00202 return $this->TextileThis($text, $lite); 00203 } 00204 # Workaround to ensure compatibility with TextPattern 4.0.3. 00205 function blockLite($text) { return $text; } 00206 } 00207 } 00208 00209 00210 00211 # 00212 # Markdown Parser Class 00213 # 00214 00215 class Markdown_Parser { 00216 00217 # Regex to match balanced [brackets]. 00218 # Needed to insert a maximum bracked depth while converting to PHP. 00219 var $nested_brackets_depth = 6; 00220 var $nested_brackets_re; 00221 00222 var $nested_url_parenthesis_depth = 4; 00223 var $nested_url_parenthesis_re; 00224 00225 # Table of hash values for escaped characters: 00226 var $escape_chars = '\`*_{}[]()>#+-.!'; 00227 var $escape_chars_re; 00228 00229 # Change to ">" for HTML output. 00230 var $empty_element_suffix = MARKDOWN_EMPTY_ELEMENT_SUFFIX; 00231 var $tab_width = MARKDOWN_TAB_WIDTH; 00232 00233 # Change to `true` to disallow markup or entities. 00234 var $no_markup = false; 00235 var $no_entities = false; 00236 00237 # Predefined urls and titles for reference links and images. 00238 var $predef_urls = array(); 00239 var $predef_titles = array(); 00240 00241 00242 function Markdown_Parser() { 00243 # 00244 # Constructor function. Initialize appropriate member variables. 00245 # 00246 $this->_initDetab(); 00247 $this->prepareItalicsAndBold(); 00248 00249 $this->nested_brackets_re = 00250 str_repeat('(?>[^\[\]]+|\[', $this->nested_brackets_depth). 00251 str_repeat('\])*', $this->nested_brackets_depth); 00252 00253 $this->nested_url_parenthesis_re = 00254 str_repeat('(?>[^()\s]+|\(', $this->nested_url_parenthesis_depth). 00255 str_repeat('(?>\)))*', $this->nested_url_parenthesis_depth); 00256 00257 $this->escape_chars_re = '['.preg_quote($this->escape_chars).']'; 00258 00259 # Sort document, block, and span gamut in ascendent priority order. 00260 asort($this->document_gamut); 00261 asort($this->block_gamut); 00262 asort($this->span_gamut); 00263 } 00264 00265 00266 # Internal hashes used during transformation. 00267 var $urls = array(); 00268 var $titles = array(); 00269 var $html_hashes = array(); 00270 00271 # Status flag to avoid invalid nesting. 00272 var $in_anchor = false; 00273 00274 00275 function setup() { 00276 # 00277 # Called before the transformation process starts to setup parser 00278 # states. 00279 # 00280 # Clear global hashes. 00281 $this->urls = $this->predef_urls; 00282 $this->titles = $this->predef_titles; 00283 $this->html_hashes = array(); 00284 00285 $in_anchor = false; 00286 } 00287 00288 function teardown() { 00289 # 00290 # Called after the transformation process to clear any variable 00291 # which may be taking up memory unnecessarly. 00292 # 00293 $this->urls = array(); 00294 $this->titles = array(); 00295 $this->html_hashes = array(); 00296 } 00297 00298 00299 function transform($text) { 00300 # 00301 # Main function. Performs some preprocessing on the input text 00302 # and pass it through the document gamut. 00303 # 00304 $this->setup(); 00305 00306 # Remove UTF-8 BOM and marker character in input, if present. 00307 $text = preg_replace('{^\xEF\xBB\xBF|\x1A}', '', $text); 00308 00309 # Standardize line endings: 00310 # DOS to Unix and Mac to Unix 00311 $text = preg_replace('{\r\n?}', "\n", $text); 00312 00313 # Make sure $text ends with a couple of newlines: 00314 $text .= "\n\n"; 00315 00316 # Convert all tabs to spaces. 00317 $text = $this->detab($text); 00318 00319 # Turn block-level HTML blocks into hash entries 00320 $text = $this->hashHTMLBlocks($text); 00321 00322 # Strip any lines consisting only of spaces and tabs. 00323 # This makes subsequent regexen easier to write, because we can 00324 # match consecutive blank lines with /\n+/ instead of something 00325 # contorted like /[ ]*\n+/ . 00326 $text = preg_replace('/^[ ]+$/m', '', $text); 00327 00328 # Run document gamut methods. 00329 foreach ($this->document_gamut as $method => $priority) { 00330 $text = $this->$method($text); 00331 } 00332 00333 $this->teardown(); 00334 00335 return $text . "\n"; 00336 } 00337 00338 var $document_gamut = array( 00339 # Strip link definitions, store in hashes. 00340 "stripLinkDefinitions" => 20, 00341 00342 "runBasicBlockGamut" => 30, 00343 ); 00344 00345 00346 function stripLinkDefinitions($text) { 00347 # 00348 # Strips link definitions from text, stores the URLs and titles in 00349 # hash references. 00350 # 00351 $less_than_tab = $this->tab_width - 1; 00352 00353 # Link defs are in the form: ^[id]: url "optional title" 00354 $text = preg_replace_callback('{ 00355 ^[ ]{0,'.$less_than_tab.'}\[(.+)\][ ]?: # id = $1 00356 [ ]* 00357 \n? # maybe *one* newline 00358 [ ]* 00359 (?: 00360 <(.+?)> # url = $2 00361 | 00362 (\S+?) # url = $3 00363 ) 00364 [ ]* 00365 \n? # maybe one newline 00366 [ ]* 00367 (?: 00368 (?<=\s) # lookbehind for whitespace 00369 ["(] 00370 (.*?) # title = $4 00371 [")] 00372 [ ]* 00373 )? # title is optional 00374 (?:\n+|\Z) 00375 }xm', 00376 array(&$this, '_stripLinkDefinitions_callback'), 00377 $text); 00378 return $text; 00379 } 00380 function _stripLinkDefinitions_callback($matches) { 00381 $link_id = strtolower($matches[1]); 00382 $url = $matches[2] == '' ? $matches[3] : $matches[2]; 00383 $this->urls[$link_id] = $url; 00384 $this->titles[$link_id] =& $matches[4]; 00385 return ''; # String that will replace the block 00386 } 00387 00388 00389 function hashHTMLBlocks($text) { 00390 if ($this->no_markup) return $text; 00391 00392 $less_than_tab = $this->tab_width - 1; 00393 00394 # Hashify HTML blocks: 00395 # We only want to do this for block-level HTML tags, such as headers, 00396 # lists, and tables. That's because we still want to wrap <p>s around 00397 # "paragraphs" that are wrapped in non-block-level tags, such as anchors, 00398 # phrase emphasis, and spans. The list of tags we're looking for is 00399 # hard-coded: 00400 # 00401 # * List "a" is made of tags which can be both inline or block-level. 00402 # These will be treated block-level when the start tag is alone on 00403 # its line, otherwise they're not matched here and will be taken as 00404 # inline later. 00405 # * List "b" is made of tags which are always block-level; 00406 # 00407 $block_tags_a_re = 'ins|del'; 00408 $block_tags_b_re = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|address|'. 00409 'script|noscript|form|fieldset|iframe|math'; 00410 00411 # Regular expression for the content of a block tag. 00412 $nested_tags_level = 4; 00413 $attr = ' 00414 (?> # optional tag attributes 00415 \s # starts with whitespace 00416 (?> 00417 [^>"/]+ # text outside quotes 00418 | 00419 /+(?!>) # slash not followed by ">" 00420 | 00421 "[^"]*" # text inside double quotes (tolerate ">") 00422 | 00423 \'[^\']*\' # text inside single quotes (tolerate ">") 00424 )* 00425 )? 00426 '; 00427 $content = 00428 str_repeat(' 00429 (?> 00430 [^<]+ # content without tag 00431 | 00432 <\2 # nested opening tag 00433 '.$attr.' # attributes 00434 (?> 00435 /> 00436 | 00437 >', $nested_tags_level). # end of opening tag 00438 '.*?'. # last level nested tag content 00439 str_repeat(' 00440 </\2\s*> # closing nested tag 00441 ) 00442 | 00443 <(?!/\2\s*> # other tags with a different name 00444 ) 00445 )*', 00446 $nested_tags_level); 00447 $content2 = str_replace('\2', '\3', $content); 00448 00449 # First, look for nested blocks, e.g.: 00450 # <div> 00451 # <div> 00452 # tags for inner block must be indented. 00453 # </div> 00454 # </div> 00455 # 00456 # The outermost tags must start at the left margin for this to match, and 00457 # the inner nested divs must be indented. 00458 # We need to do this before the next, more liberal match, because the next 00459 # match will start at the first `<div>` and stop at the first `</div>`. 00460 $text = preg_replace_callback('{(?> 00461 (?> 00462 (?<=\n\n) # Starting after a blank line 00463 | # or 00464 \A\n? # the beginning of the doc 00465 ) 00466 ( # save in $1 00467 00468 # Match from `\n<tag>` to `</tag>\n`, handling nested tags 00469 # in between. 00470 00471 [ ]{0,'.$less_than_tab.'} 00472 <('.$block_tags_b_re.')# start tag = $2 00473 '.$attr.'> # attributes followed by > and \n 00474 '.$content.' # content, support nesting 00475 </\2> # the matching end tag 00476 [ ]* # trailing spaces/tabs 00477 (?=\n+|\Z) # followed by a newline or end of document 00478 00479 | # Special version for tags of group a. 00480 00481 [ ]{0,'.$less_than_tab.'} 00482 <('.$block_tags_a_re.')# start tag = $3 00483 '.$attr.'>[ ]*\n # attributes followed by > 00484 '.$content2.' # content, support nesting 00485 </\3> # the matching end tag 00486 [ ]* # trailing spaces/tabs 00487 (?=\n+|\Z) # followed by a newline or end of document 00488 00489 | # Special case just for <hr />. It was easier to make a special 00490 # case than to make the other regex more complicated. 00491 00492 [ ]{0,'.$less_than_tab.'} 00493 <(hr) # start tag = $2 00494 '.$attr.' # attributes 00495 /?> # the matching end tag 00496 [ ]* 00497 (?=\n{2,}|\Z) # followed by a blank line or end of document 00498 00499 | # Special case for standalone HTML comments: 00500 00501 [ ]{0,'.$less_than_tab.'} 00502 (?s: 00503 <!-- .*? --> 00504 ) 00505 [ ]* 00506 (?=\n{2,}|\Z) # followed by a blank line or end of document 00507 00508 | # PHP and ASP-style processor instructions (<? and <%) 00509 00510 [ ]{0,'.$less_than_tab.'} 00511 (?s: 00512 <([?%]) # $2 00513 .*? 00514 \2> 00515 ) 00516 [ ]* 00517 (?=\n{2,}|\Z) # followed by a blank line or end of document 00518 00519 ) 00520 )}Sxmi', 00521 array(&$this, '_hashHTMLBlocks_callback'), 00522 $text); 00523 00524 return $text; 00525 } 00526 function _hashHTMLBlocks_callback($matches) { 00527 $text = $matches[1]; 00528 $key = $this->hashBlock($text); 00529 return "\n\n$key\n\n"; 00530 } 00531 00532 00533 function hashPart($text, $boundary = 'X') { 00534 # 00535 # Called whenever a tag must be hashed when a function insert an atomic 00536 # element in the text stream. Passing $text to through this function gives 00537 # a unique text-token which will be reverted back when calling unhash. 00538 # 00539 # The $boundary argument specify what character should be used to surround 00540 # the token. By convension, "B" is used for block elements that needs not 00541 # to be wrapped into paragraph tags at the end, ":" is used for elements 00542 # that are word separators and "X" is used in the general case. 00543 # 00544 # Swap back any tag hash found in $text so we do not have to `unhash` 00545 # multiple times at the end. 00546 $text = $this->unhash($text); 00547 00548 # Then hash the block. 00549 static $i = 0; 00550 $key = "$boundary\x1A" . ++$i . $boundary; 00551 $this->html_hashes[$key] = $text; 00552 return $key; # String that will replace the tag. 00553 } 00554 00555 00556 function hashBlock($text) { 00557 # 00558 # Shortcut function for hashPart with block-level boundaries. 00559 # 00560 return $this->hashPart($text, 'B'); 00561 } 00562 00563 00564 var $block_gamut = array( 00565 # 00566 # These are all the transformations that form block-level 00567 # tags like paragraphs, headers, and list items. 00568 # 00569 "doHeaders" => 10, 00570 "doHorizontalRules" => 20, 00571 00572 "doLists" => 40, 00573 "doCodeBlocks" => 50, 00574 "doBlockQuotes" => 60, 00575 ); 00576 00577 function runBlockGamut($text) { 00578 # 00579 # Run block gamut tranformations. 00580 # 00581 # We need to escape raw HTML in Markdown source before doing anything 00582 # else. This need to be done for each block, and not only at the 00583 # begining in the Markdown function since hashed blocks can be part of 00584 # list items and could have been indented. Indented blocks would have 00585 # been seen as a code block in a previous pass of hashHTMLBlocks. 00586 $text = $this->hashHTMLBlocks($text); 00587 00588 return $this->runBasicBlockGamut($text); 00589 } 00590 00591 function runBasicBlockGamut($text) { 00592 # 00593 # Run block gamut tranformations, without hashing HTML blocks. This is 00594 # useful when HTML blocks are known to be already hashed, like in the first 00595 # whole-document pass. 00596 # 00597 foreach ($this->block_gamut as $method => $priority) { 00598 $text = $this->$method($text); 00599 } 00600 00601 # Finally form paragraph and restore hashed blocks. 00602 $text = $this->formParagraphs($text); 00603 00604 return $text; 00605 } 00606 00607 00608 function doHorizontalRules($text) { 00609 # Do Horizontal Rules: 00610 return preg_replace( 00611 '{ 00612 ^[ ]{0,3} # Leading space 00613 ([-*_]) # $1: First marker 00614 (?> # Repeated marker group 00615 [ ]{0,2} # Zero, one, or two spaces. 00616 \1 # Marker character 00617 ){2,} # Group repeated at least twice 00618 [ ]* # Tailing spaces 00619 $ # End of line. 00620 }mx', 00621 "\n".$this->hashBlock("<hr$this->empty_element_suffix")."\n", 00622 $text); 00623 } 00624 00625 00626 var $span_gamut = array( 00627 # 00628 # These are all the transformations that occur *within* block-level 00629 # tags like paragraphs, headers, and list items. 00630 # 00631 # Process character escapes, code spans, and inline HTML 00632 # in one shot. 00633 "parseSpan" => -30, 00634 00635 # Process anchor and image tags. Images must come first, 00636 # because ![foo][f] looks like an anchor. 00637 "doImages" => 10, 00638 "doAnchors" => 20, 00639 00640 # Make links out of things like `<http://example.com/>` 00641 # Must come after doAnchors, because you can use < and > 00642 # delimiters in inline links like [this](<url>). 00643 "doAutoLinks" => 30, 00644 "encodeAmpsAndAngles" => 40, 00645 00646 "doItalicsAndBold" => 50, 00647 "doHardBreaks" => 60, 00648 ); 00649 00650 function runSpanGamut($text) { 00651 # 00652 # Run span gamut tranformations. 00653 # 00654 foreach ($this->span_gamut as $method => $priority) { 00655 $text = $this->$method($text); 00656 } 00657 00658 return $text; 00659 } 00660 00661 00662 function doHardBreaks($text) { 00663 # Do hard breaks: 00664 return preg_replace_callback('/ {2,}\n/', 00665 array(&$this, '_doHardBreaks_callback'), $text); 00666 } 00667 function _doHardBreaks_callback($matches) { 00668 return $this->hashPart("<br$this->empty_element_suffix\n"); 00669 } 00670 00671 00672 function doAnchors($text) { 00673 # 00674 # Turn Markdown link shortcuts into XHTML <a> tags. 00675 # 00676 if ($this->in_anchor) return $text; 00677 $this->in_anchor = true; 00678 00679 # 00680 # First, handle reference-style links: [link text] [id] 00681 # 00682 $text = preg_replace_callback('{ 00683 ( # wrap whole match in $1 00684 \[ 00685 ('.$this->nested_brackets_re.') # link text = $2 00686 \] 00687 00688 [ ]? # one optional space 00689 (?:\n[ ]*)? # one optional newline followed by spaces 00690 00691 \[ 00692 (.*?) # id = $3 00693 \] 00694 ) 00695 }xs', 00696 array(&$this, '_doAnchors_reference_callback'), $text); 00697 00698 # 00699 # Next, inline-style links: [link text](url "optional title") 00700 # 00701 $text = preg_replace_callback('{ 00702 ( # wrap whole match in $1 00703 \[ 00704 ('.$this->nested_brackets_re.') # link text = $2 00705 \] 00706 \( # literal paren 00707 [ \n]* 00708 (?: 00709 <(.+?)> # href = $3 00710 | 00711 ('.$this->nested_url_parenthesis_re.') # href = $4 00712 ) 00713 [ \n]* 00714 ( # $5 00715 ([\'"]) # quote char = $6 00716 (.*?) # Title = $7 00717 \6 # matching quote 00718 [ \n]* # ignore any spaces/tabs between closing quote and ) 00719 )? # title is optional 00720 \) 00721 ) 00722 }xs', 00723 array(&$this, '_doAnchors_inline_callback'), $text); 00724 00725 # 00726 # Last, handle reference-style shortcuts: [link text] 00727 # These must come last in case you've also got [link text][1] 00728 # or [link text](/foo) 00729 # 00730 $text = preg_replace_callback('{ 00731 ( # wrap whole match in $1 00732 \[ 00733 ([^\[\]]+) # link text = $2; can\'t contain [ or ] 00734 \] 00735 ) 00736 }xs', 00737 array(&$this, '_doAnchors_reference_callback'), $text); 00738 00739 $this->in_anchor = false; 00740 return $text; 00741 } 00742 function _doAnchors_reference_callback($matches) { 00743 $whole_match = $matches[1]; 00744 $link_text = $matches[2]; 00745 $link_id =& $matches[3]; 00746 00747 if ($link_id == "") { 00748 # for shortcut links like [this][] or [this]. 00749 $link_id = $link_text; 00750 } 00751 00752 # lower-case and turn embedded newlines into spaces 00753 $link_id = strtolower($link_id); 00754 $link_id = preg_replace('{[ ]?\n}', ' ', $link_id); 00755 00756 if (isset($this->urls[$link_id])) { 00757 $url = $this->urls[$link_id]; 00758 $url = $this->encodeAttribute($url); 00759 00760 $result = "<a href=\"$url\""; 00761 if ( isset( $this->titles[$link_id] ) ) { 00762 $title = $this->titles[$link_id]; 00763 $title = $this->encodeAttribute($title); 00764 $result .= " title=\"$title\""; 00765 } 00766 00767 $link_text = $this->runSpanGamut($link_text); 00768 $result .= ">$link_text</a>"; 00769 $result = $this->hashPart($result); 00770 } 00771 else { 00772 $result = $whole_match; 00773 } 00774 return $result; 00775 } 00776 function _doAnchors_inline_callback($matches) { 00777 $whole_match = $matches[1]; 00778 $link_text = $this->runSpanGamut($matches[2]); 00779 $url = $matches[3] == '' ? $matches[4] : $matches[3]; 00780 $title =& $matches[7]; 00781 00782 $url = $this->encodeAttribute($url); 00783 00784 $result = "<a href=\"$url\""; 00785 if (isset($title)) { 00786 $title = $this->encodeAttribute($title); 00787 $result .= " title=\"$title\""; 00788 } 00789 00790 $link_text = $this->runSpanGamut($link_text); 00791 $result .= ">$link_text</a>"; 00792 00793 return $this->hashPart($result); 00794 } 00795 00796 00797 function doImages($text) { 00798 # 00799 # Turn Markdown image shortcuts into <img> tags. 00800 # 00801 # 00802 # First, handle reference-style labeled images: ![alt text][id] 00803 # 00804 $text = preg_replace_callback('{ 00805 ( # wrap whole match in $1 00806 !\[ 00807 ('.$this->nested_brackets_re.') # alt text = $2 00808 \] 00809 00810 [ ]? # one optional space 00811 (?:\n[ ]*)? # one optional newline followed by spaces 00812 00813 \[ 00814 (.*?) # id = $3 00815 \] 00816 00817 ) 00818 }xs', 00819 array(&$this, '_doImages_reference_callback'), $text); 00820 00821 # 00822 # Next, handle inline images:  00823 # Don't forget: encode * and _ 00824 # 00825 $text = preg_replace_callback('{ 00826 ( # wrap whole match in $1 00827 !\[ 00828 ('.$this->nested_brackets_re.') # alt text = $2 00829 \] 00830 \s? # One optional whitespace character 00831 \( # literal paren 00832 [ \n]* 00833 (?: 00834 <(\S*)> # src url = $3 00835 | 00836 ('.$this->nested_url_parenthesis_re.') # src url = $4 00837 ) 00838 [ \n]* 00839 ( # $5 00840 ([\'"]) # quote char = $6 00841 (.*?) # title = $7 00842 \6 # matching quote 00843 [ \n]* 00844 )? # title is optional 00845 \) 00846 ) 00847 }xs', 00848 array(&$this, '_doImages_inline_callback'), $text); 00849 00850 return $text; 00851 } 00852 function _doImages_reference_callback($matches) { 00853 $whole_match = $matches[1]; 00854 $alt_text = $matches[2]; 00855 $link_id = strtolower($matches[3]); 00856 00857 if ($link_id == "") { 00858 $link_id = strtolower($alt_text); # for shortcut links like ![this][]. 00859 } 00860 00861 $alt_text = $this->encodeAttribute($alt_text); 00862 if (isset($this->urls[$link_id])) { 00863 $url = $this->encodeAttribute($this->urls[$link_id]); 00864 $result = "<img src=\"$url\" alt=\"$alt_text\""; 00865 if (isset($this->titles[$link_id])) { 00866 $title = $this->titles[$link_id]; 00867 $title = $this->encodeAttribute($title); 00868 $result .= " title=\"$title\""; 00869 } 00870 $result .= $this->empty_element_suffix; 00871 $result = $this->hashPart($result); 00872 } 00873 else { 00874 # If there's no such link ID, leave intact: 00875 $result = $whole_match; 00876 } 00877 00878 return $result; 00879 } 00880 function _doImages_inline_callback($matches) { 00881 $whole_match = $matches[1]; 00882 $alt_text = $matches[2]; 00883 $url = $matches[3] == '' ? $matches[4] : $matches[3]; 00884 $title =& $matches[7]; 00885 00886 $alt_text = $this->encodeAttribute($alt_text); 00887 $url = $this->encodeAttribute($url); 00888 $result = "<img src=\"$url\" alt=\"$alt_text\""; 00889 if (isset($title)) { 00890 $title = $this->encodeAttribute($title); 00891 $result .= " title=\"$title\""; # $title already quoted 00892 } 00893 $result .= $this->empty_element_suffix; 00894 00895 return $this->hashPart($result); 00896 } 00897 00898 00899 function doHeaders($text) { 00900 # Setext-style headers: 00901 # Header 1 00902 # ======== 00903 # 00904 # Header 2 00905 # -------- 00906 # 00907 $text = preg_replace_callback('{ ^(.+?)[ ]*\n(=+|-+)[ ]*\n+ }mx', 00908 array(&$this, '_doHeaders_callback_setext'), $text); 00909 00910 # atx-style headers: 00911 # # Header 1 00912 # ## Header 2 00913 # ## Header 2 with closing hashes ## 00914 # ... 00915 # ###### Header 6 00916 # 00917 $text = preg_replace_callback('{ 00918 ^(\#{1,6}) # $1 = string of #\'s 00919 [ ]* 00920 (.+?) # $2 = Header text 00921 [ ]* 00922 \#* # optional closing #\'s (not counted) 00923 \n+ 00924 }xm', 00925 array(&$this, '_doHeaders_callback_atx'), $text); 00926 00927 return $text; 00928 } 00929 function _doHeaders_callback_setext($matches) { 00930 # Terrible hack to check we haven't found an empty list item. 00931 if ($matches[2] == '-' && preg_match('{^-(?: |$)}', $matches[1])) 00932 return $matches[0]; 00933 00934 $level = $matches[2]{0} == '=' ? 1 : 2; 00935 $block = "<h$level>".$this->runSpanGamut($matches[1])."</h$level>"; 00936 return "\n" . $this->hashBlock($block) . "\n\n"; 00937 } 00938 function _doHeaders_callback_atx($matches) { 00939 $level = strlen($matches[1]); 00940 $block = "<h$level>".$this->runSpanGamut($matches[2])."</h$level>"; 00941 return "\n" . $this->hashBlock($block) . "\n\n"; 00942 } 00943 00944 00945 function doLists($text) { 00946 # 00947 # Form HTML ordered (numbered) and unordered (bulleted) lists. 00948 # 00949 $less_than_tab = $this->tab_width - 1; 00950 00951 # Re-usable patterns to match list item bullets and number markers: 00952 $marker_ul_re = '[*+-]'; 00953 $marker_ol_re = '\d+[.]'; 00954 $marker_any_re = "(?:$marker_ul_re|$marker_ol_re)"; 00955 00956 $markers_relist = array( 00957 $marker_ul_re => $marker_ol_re, 00958 $marker_ol_re => $marker_ul_re, 00959 ); 00960 00961 foreach ($markers_relist as $marker_re => $other_marker_re) { 00962 # Re-usable pattern to match any entirel ul or ol list: 00963 $whole_list_re = ' 00964 ( # $1 = whole list 00965 ( # $2 00966 ([ ]{0,'.$less_than_tab.'}) # $3 = number of spaces 00967 ('.$marker_re.') # $4 = first list item marker 00968 [ ]+ 00969 ) 00970 (?s:.+?) 00971 ( # $5 00972 \z 00973 | 00974 \n{2,} 00975 (?=\S) 00976 (?! # Negative lookahead for another list item marker 00977 [ ]* 00978 '.$marker_re.'[ ]+ 00979 ) 00980 | 00981 (?= # Lookahead for another kind of list 00982 \n 00983 \3 # Must have the same indentation 00984 '.$other_marker_re.'[ ]+ 00985 ) 00986 ) 00987 ) 00988 '; // mx 00989 00990 # We use a different prefix before nested lists than top-level lists. 00991 # See extended comment in _ProcessListItems(). 00992 00993 if ($this->list_level) { 00994 $text = preg_replace_callback('{ 00995 ^ 00996 '.$whole_list_re.' 00997 }mx', 00998 array(&$this, '_doLists_callback'), $text); 00999 } 01000 else { 01001 $text = preg_replace_callback('{ 01002 (?:(?<=\n)\n|\A\n?) # Must eat the newline 01003 '.$whole_list_re.' 01004 }mx', 01005 array(&$this, '_doLists_callback'), $text); 01006 } 01007 } 01008 01009 return $text; 01010 } 01011 function _doLists_callback($matches) { 01012 # Re-usable patterns to match list item bullets and number markers: 01013 $marker_ul_re = '[*+-]'; 01014 $marker_ol_re = '\d+[.]'; 01015 $marker_any_re = "(?:$marker_ul_re|$marker_ol_re)"; 01016 01017 $list = $matches[1]; 01018 $list_type = preg_match("/$marker_ul_re/", $matches[4]) ? "ul" : "ol"; 01019 01020 $marker_any_re = ( $list_type == "ul" ? $marker_ul_re : $marker_ol_re ); 01021 01022 $list .= "\n"; 01023 $result = $this->processListItems($list, $marker_any_re); 01024 01025 $result = $this->hashBlock("<$list_type>\n" . $result . "</$list_type>"); 01026 return "\n". $result ."\n\n"; 01027 } 01028 01029 var $list_level = 0; 01030 01031 function processListItems($list_str, $marker_any_re) { 01032 # 01033 # Process the contents of a single ordered or unordered list, splitting it 01034 # into individual list items. 01035 # 01036 # The $this->list_level global keeps track of when we're inside a list. 01037 # Each time we enter a list, we increment it; when we leave a list, 01038 # we decrement. If it's zero, we're not in a list anymore. 01039 # 01040 # We do this because when we're not inside a list, we want to treat 01041 # something like this: 01042 # 01043 # I recommend upgrading to version 01044 # 8. Oops, now this line is treated 01045 # as a sub-list. 01046 # 01047 # As a single paragraph, despite the fact that the second line starts 01048 # with a digit-period-space sequence. 01049 # 01050 # Whereas when we're inside a list (or sub-list), that line will be 01051 # treated as the start of a sub-list. What a kludge, huh? This is 01052 # an aspect of Markdown's syntax that's hard to parse perfectly 01053 # without resorting to mind-reading. Perhaps the solution is to 01054 # change the syntax rules such that sub-lists must start with a 01055 # starting cardinal number; e.g. "1." or "a.". 01056 01057 $this->list_level++; 01058 01059 # trim trailing blank lines: 01060 $list_str = preg_replace("/\n{2,}\\z/", "\n", $list_str); 01061 01062 $list_str = preg_replace_callback('{ 01063 (\n)? # leading line = $1 01064 (^[ ]*) # leading whitespace = $2 01065 ('.$marker_any_re.' # list marker and space = $3 01066 (?:[ ]+|(?=\n)) # space only required if item is not empty 01067 ) 01068 ((?s:.*?)) # list item text = $4 01069 (?:(\n+(?=\n))|\n) # tailing blank line = $5 01070 (?= \n* (\z | \2 ('.$marker_any_re.') (?:[ ]+|(?=\n)))) 01071 }xm', 01072 array(&$this, '_processListItems_callback'), $list_str); 01073 01074 $this->list_level--; 01075 return $list_str; 01076 } 01077 function _processListItems_callback($matches) { 01078 $item = $matches[4]; 01079 $leading_line =& $matches[1]; 01080 $leading_space =& $matches[2]; 01081 $marker_space = $matches[3]; 01082 $tailing_blank_line =& $matches[5]; 01083 01084 if ($leading_line || $tailing_blank_line || 01085 preg_match('/\n{2,}/', $item)) 01086 { 01087 # Replace marker with the appropriate whitespace indentation 01088 $item = $leading_space . str_repeat(' ', strlen($marker_space)) . $item; 01089 $item = $this->runBlockGamut($this->outdent($item)."\n"); 01090 } 01091 else { 01092 # Recursion for sub-lists: 01093 $item = $this->doLists($this->outdent($item)); 01094 $item = preg_replace('/\n+$/', '', $item); 01095 $item = $this->runSpanGamut($item); 01096 } 01097 01098 return "<li>" . $item . "</li>\n"; 01099 } 01100 01101 01102 function doCodeBlocks($text) { 01103 # 01104 # Process Markdown `<pre><code>` blocks. 01105 # 01106 $text = preg_replace_callback('{ 01107 (?:\n\n|\A\n?) 01108 ( # $1 = the code block -- one or more lines, starting with a space/tab 01109 (?> 01110 [ ]{'.$this->tab_width.'} # Lines must start with a tab or a tab-width of spaces 01111 .*\n+ 01112 )+ 01113 ) 01114 ((?=^[ ]{0,'.$this->tab_width.'}\S)|\Z) # Lookahead for non-space at line-start, or end of doc 01115 }xm', 01116 array(&$this, '_doCodeBlocks_callback'), $text); 01117 01118 return $text; 01119 } 01120 function _doCodeBlocks_callback($matches) { 01121 $codeblock = $matches[1]; 01122 01123 $codeblock = $this->outdent($codeblock); 01124 $codeblock = htmlspecialchars($codeblock, ENT_NOQUOTES); 01125 01126 # trim leading newlines and trailing newlines 01127 $codeblock = preg_replace('/\A\n+|\n+\z/', '', $codeblock); 01128 01129 $codeblock = "<pre><code>$codeblock\n</code></pre>"; 01130 return "\n\n".$this->hashBlock($codeblock)."\n\n"; 01131 } 01132 01133 01134 function makeCodeSpan($code) { 01135 # 01136 # Create a code span markup for $code. Called from handleSpanToken. 01137 # 01138 $code = htmlspecialchars(trim($code), ENT_NOQUOTES); 01139 return $this->hashPart("<code>$code</code>"); 01140 } 01141 01142 01143 var $em_relist = array( 01144 '' => '(?:(?<!\*)\*(?!\*)|(?<!_)_(?!_))(?=\S|$)(?![.,:;]\s)', 01145 '*' => '(?<=\S|^)(?<!\*)\*(?!\*)', 01146 '_' => '(?<=\S|^)(?<!_)_(?!_)', 01147 ); 01148 var $strong_relist = array( 01149 '' => '(?:(?<!\*)\*\*(?!\*)|(?<!_)__(?!_))(?=\S|$)(?![.,:;]\s)', 01150 '**' => '(?<=\S|^)(?<!\*)\*\*(?!\*)', 01151 '__' => '(?<=\S|^)(?<!_)__(?!_)', 01152 ); 01153 var $em_strong_relist = array( 01154 '' => '(?:(?<!\*)\*\*\*(?!\*)|(?<!_)___(?!_))(?=\S|$)(?![.,:;]\s)', 01155 '***' => '(?<=\S|^)(?<!\*)\*\*\*(?!\*)', 01156 '___' => '(?<=\S|^)(?<!_)___(?!_)', 01157 ); 01158 var $em_strong_prepared_relist; 01159 01160 function prepareItalicsAndBold() { 01161 # 01162 # Prepare regular expressions for searching emphasis tokens in any 01163 # context. 01164 # 01165 foreach ($this->em_relist as $em => $em_re) { 01166 foreach ($this->strong_relist as $strong => $strong_re) { 01167 # Construct list of allowed token expressions. 01168 $token_relist = array(); 01169 if (isset($this->em_strong_relist["$em$strong"])) { 01170 $token_relist[] = $this->em_strong_relist["$em$strong"]; 01171 } 01172 $token_relist[] = $em_re; 01173 $token_relist[] = $strong_re; 01174 01175 # Construct master expression from list. 01176 $token_re = '{('. implode('|', $token_relist) .')}'; 01177 $this->em_strong_prepared_relist["$em$strong"] = $token_re; 01178 } 01179 } 01180 } 01181 01182 function doItalicsAndBold($text) { 01183 $token_stack = array(''); 01184 $text_stack = array(''); 01185 $em = ''; 01186 $strong = ''; 01187 $tree_char_em = false; 01188 01189 while (1) { 01190 # 01191 # Get prepared regular expression for seraching emphasis tokens 01192 # in current context. 01193 # 01194 $token_re = $this->em_strong_prepared_relist["$em$strong"]; 01195 01196 # 01197 # Each loop iteration search for the next emphasis token. 01198 # Each token is then passed to handleSpanToken. 01199 # 01200 $parts = preg_split($token_re, $text, 2, PREG_SPLIT_DELIM_CAPTURE); 01201 $text_stack[0] .= $parts[0]; 01202 $token =& $parts[1]; 01203 $text =& $parts[2]; 01204 01205 if (empty($token)) { 01206 # Reached end of text span: empty stack without emitting. 01207 # any more emphasis. 01208 while ($token_stack[0]) { 01209 $text_stack[1] .= array_shift($token_stack); 01210 $text_stack[0] .= array_shift($text_stack); 01211 } 01212 break; 01213 } 01214 01215 $token_len = strlen($token); 01216 if ($tree_char_em) { 01217 # Reached closing marker while inside a three-char emphasis. 01218 if ($token_len == 3) { 01219 # Three-char closing marker, close em and strong. 01220 array_shift($token_stack); 01221 $span = array_shift($text_stack); 01222 $span = $this->runSpanGamut($span); 01223 $span = "<strong><em>$span</em></strong>"; 01224 $text_stack[0] .= $this->hashPart($span); 01225 $em = ''; 01226 $strong = ''; 01227 } else { 01228 # Other closing marker: close one em or strong and 01229 # change current token state to match the other 01230 $token_stack[0] = str_repeat($token{0}, 3-$token_len); 01231 $tag = $token_len == 2 ? "strong" : "em"; 01232 $span = $text_stack[0]; 01233 $span = $this->runSpanGamut($span); 01234 $span = "<$tag>$span</$tag>"; 01235 $text_stack[0] = $this->hashPart($span); 01236 $$tag = ''; # $$tag stands for $em or $strong 01237 } 01238 $tree_char_em = false; 01239 } else if ($token_len == 3) { 01240 if ($em) { 01241 # Reached closing marker for both em and strong. 01242 # Closing strong marker: 01243 for ($i = 0; $i < 2; ++$i) { 01244 $shifted_token = array_shift($token_stack); 01245 $tag = strlen($shifted_token) == 2 ? "strong" : "em"; 01246 $span = array_shift($text_stack); 01247 $span = $this->runSpanGamut($span); 01248 $span = "<$tag>$span</$tag>"; 01249 $text_stack[0] .= $this->hashPart($span); 01250 $$tag = ''; # $$tag stands for $em or $strong 01251 } 01252 } else { 01253 # Reached opening three-char emphasis marker. Push on token 01254 # stack; will be handled by the special condition above. 01255 $em = $token{0}; 01256 $strong = "$em$em"; 01257 array_unshift($token_stack, $token); 01258 array_unshift($text_stack, ''); 01259 $tree_char_em = true; 01260 } 01261 } else if ($token_len == 2) { 01262 if ($strong) { 01263 # Unwind any dangling emphasis marker: 01264 if (strlen($token_stack[0]) == 1) { 01265 $text_stack[1] .= array_shift($token_stack); 01266 $text_stack[0] .= array_shift($text_stack); 01267 } 01268 # Closing strong marker: 01269 array_shift($token_stack); 01270 $span = array_shift($text_stack); 01271 $span = $this->runSpanGamut($span); 01272 $span = "<strong>$span</strong>"; 01273 $text_stack[0] .= $this->hashPart($span); 01274 $strong = ''; 01275 } else { 01276 array_unshift($token_stack, $token); 01277 array_unshift($text_stack, ''); 01278 $strong = $token; 01279 } 01280 } else { 01281 # Here $token_len == 1 01282 if ($em) { 01283 if (strlen($token_stack[0]) == 1) { 01284 # Closing emphasis marker: 01285 array_shift($token_stack); 01286 $span = array_shift($text_stack); 01287 $span = $this->runSpanGamut($span); 01288 $span = "<em>$span</em>"; 01289 $text_stack[0] .= $this->hashPart($span); 01290 $em = ''; 01291 } else { 01292 $text_stack[0] .= $token; 01293 } 01294 } else { 01295 array_unshift($token_stack, $token); 01296 array_unshift($text_stack, ''); 01297 $em = $token; 01298 } 01299 } 01300 } 01301 return $text_stack[0]; 01302 } 01303 01304 01305 function doBlockQuotes($text) { 01306 $text = preg_replace_callback('/ 01307 ( # Wrap whole match in $1 01308 (?> 01309 ^[ ]*>[ ]? # ">" at the start of a line 01310 .+\n # rest of the first line 01311 (.+\n)* # subsequent consecutive lines 01312 \n* # blanks 01313 )+ 01314 ) 01315 /xm', 01316 array(&$this, '_doBlockQuotes_callback'), $text); 01317 01318 return $text; 01319 } 01320 function _doBlockQuotes_callback($matches) { 01321 $bq = $matches[1]; 01322 # trim one level of quoting - trim whitespace-only lines 01323 $bq = preg_replace('/^[ ]*>[ ]?|^[ ]+$/m', '', $bq); 01324 $bq = $this->runBlockGamut($bq); # recurse 01325 01326 $bq = preg_replace('/^/m', " ", $bq); 01327 # These leading spaces cause problem with <pre> content, 01328 # so we need to fix that: 01329 $bq = preg_replace_callback('{(\s*<pre>.+?</pre>)}sx', 01330 array(&$this, '_doBlockQuotes_callback2'), $bq); 01331 01332 return "\n". $this->hashBlock("<blockquote>\n$bq\n</blockquote>")."\n\n"; 01333 } 01334 function _doBlockQuotes_callback2($matches) { 01335 $pre = $matches[1]; 01336 $pre = preg_replace('/^ /m', '', $pre); 01337 return $pre; 01338 } 01339 01340 01341 function formParagraphs($text) { 01342 # 01343 # Params: 01344 # $text - string to process with html <p> tags 01345 # 01346 # Strip leading and trailing lines: 01347 $text = preg_replace('/\A\n+|\n+\z/', '', $text); 01348 01349 $grafs = preg_split('/\n{2,}/', $text, -1, PREG_SPLIT_NO_EMPTY); 01350 01351 # 01352 # Wrap <p> tags and unhashify HTML blocks 01353 # 01354 foreach ($grafs as $key => $value) { 01355 if (!preg_match('/^B\x1A[0-9]+B$/', $value)) { 01356 # Is a paragraph. 01357 $value = $this->runSpanGamut($value); 01358 $value = preg_replace('/^([ ]*)/', "<p>", $value); 01359 $value .= "</p>"; 01360 $grafs[$key] = $this->unhash($value); 01361 } 01362 else { 01363 # Is a block. 01364 # Modify elements of @grafs in-place... 01365 $graf = $value; 01366 $block = $this->html_hashes[$graf]; 01367 $graf = $block; 01368 // if (preg_match('{ 01369 // \A 01370 // ( # $1 = <div> tag 01371 // <div \s+ 01372 // [^>]* 01373 // \b 01374 // markdown\s*=\s* ([\'"]) # $2 = attr quote char 01375 // 1 01376 // \2 01377 // [^>]* 01378 // > 01379 // ) 01380 // ( # $3 = contents 01381 // .* 01382 // ) 01383 // (</div>) # $4 = closing tag 01384 // \z 01385 // }xs', $block, $matches)) 01386 // { 01387 // list(, $div_open, , $div_content, $div_close) = $matches; 01388 // 01389 // # We can't call Markdown(), because that resets the hash; 01390 // # that initialization code should be pulled into its own sub, though. 01391 // $div_content = $this->hashHTMLBlocks($div_content); 01392 // 01393 // # Run document gamut methods on the content. 01394 // foreach ($this->document_gamut as $method => $priority) { 01395 // $div_content = $this->$method($div_content); 01396 // } 01397 // 01398 // $div_open = preg_replace( 01399 // '{\smarkdown\s*=\s*([\'"]).+?\1}', '', $div_open); 01400 // 01401 // $graf = $div_open . "\n" . $div_content . "\n" . $div_close; 01402 // } 01403 $grafs[$key] = $graf; 01404 } 01405 } 01406 01407 return implode("\n\n", $grafs); 01408 } 01409 01410 01411 function encodeAttribute($text) { 01412 # 01413 # Encode text for a double-quoted HTML attribute. This function 01414 # is *not* suitable for attributes enclosed in single quotes. 01415 # 01416 $text = $this->encodeAmpsAndAngles($text); 01417 $text = str_replace('"', '"', $text); 01418 return $text; 01419 } 01420 01421 01422 function encodeAmpsAndAngles($text) { 01423 # 01424 # Smart processing for ampersands and angle brackets that need to 01425 # be encoded. Valid character entities are left alone unless the 01426 # no-entities mode is set. 01427 # 01428 if ($this->no_entities) { 01429 $text = str_replace('&', '&', $text); 01430 } else { 01431 # Ampersand-encoding based entirely on Nat Irons's Amputator 01432 # MT plugin: <http://bumppo.net/projects/amputator/> 01433 $text = preg_replace('/&(?!#?[xX]?(?:[0-9a-fA-F]+|\w+);)/', 01434 '&', $text);; 01435 } 01436 # Encode remaining <'s 01437 $text = str_replace('<', '<', $text); 01438 01439 return $text; 01440 } 01441 01442 01443 function doAutoLinks($text) { 01444 $text = preg_replace_callback('{<((https?|ftp|dict):[^\'">\s]+)>}i', 01445 array(&$this, '_doAutoLinks_url_callback'), $text); 01446 01447 # Email addresses: <address@domain.foo> 01448 $text = preg_replace_callback('{ 01449 < 01450 (?:mailto:)? 01451 ( 01452 (?: 01453 [-!#$%&\'*+/=?^_`.{|}~\w\x80-\xFF]+ 01454 | 01455 ".*?" 01456 ) 01457 \@ 01458 (?: 01459 [-a-z0-9\x80-\xFF]+(\.[-a-z0-9\x80-\xFF]+)*\.[a-z]+ 01460 | 01461 \[[\d.a-fA-F:]+\] # IPv4 & IPv6 01462 ) 01463 ) 01464 > 01465 }xi', 01466 array(&$this, '_doAutoLinks_email_callback'), $text); 01467 01468 return $text; 01469 } 01470 function _doAutoLinks_url_callback($matches) { 01471 $url = $this->encodeAttribute($matches[1]); 01472 $link = "<a href=\"$url\">$url</a>"; 01473 return $this->hashPart($link); 01474 } 01475 function _doAutoLinks_email_callback($matches) { 01476 $address = $matches[1]; 01477 $link = $this->encodeEmailAddress($address); 01478 return $this->hashPart($link); 01479 } 01480 01481 01482 function encodeEmailAddress($addr) { 01483 # 01484 # Input: an email address, e.g. "foo@example.com" 01485 # 01486 # Output: the email address as a mailto link, with each character 01487 # of the address encoded as either a decimal or hex entity, in 01488 # the hopes of foiling most address harvesting spam bots. E.g.: 01489 # 01490 # <p><a href="mailto:foo 01491 # @example.co 01492 # m">foo@exampl 01493 # e.com</a></p> 01494 # 01495 # Based by a filter by Matthew Wickline, posted to BBEdit-Talk. 01496 # With some optimizations by Milian Wolff. 01497 # 01498 $addr = "mailto:" . $addr; 01499 $chars = preg_split('/(?<!^)(?!$)/', $addr); 01500 $seed = (int)abs(crc32($addr) / strlen($addr)); # Deterministic seed. 01501 01502 foreach ($chars as $key => $char) { 01503 $ord = ord($char); 01504 # Ignore non-ascii chars. 01505 if ($ord < 128) { 01506 $r = ($seed * (1 + $key)) % 100; # Pseudo-random function. 01507 # roughly 10% raw, 45% hex, 45% dec 01508 # '@' *must* be encoded. I insist. 01509 if ($r > 90 && $char != '@') /* do nothing */; 01510 else if ($r < 45) $chars[$key] = '&#x'.dechex($ord).';'; 01511 else $chars[$key] = '&#'.$ord.';'; 01512 } 01513 } 01514 01515 $addr = implode('', $chars); 01516 $text = implode('', array_slice($chars, 7)); # text without `mailto:` 01517 $addr = "<a href=\"$addr\">$text</a>"; 01518 01519 return $addr; 01520 } 01521 01522 01523 function parseSpan($str) { 01524 # 01525 # Take the string $str and parse it into tokens, hashing embeded HTML, 01526 # escaped characters and handling code spans. 01527 # 01528 $output = ''; 01529 01530 $span_re = '{ 01531 ( 01532 \\\\'.$this->escape_chars_re.' 01533 | 01534 (?<![`\\\\]) 01535 `+ # code span marker 01536 '.( $this->no_markup ? '' : ' 01537 | 01538 <!-- .*? --> # comment 01539 | 01540 <\?.*?\?> | <%.*?%> # processing instruction 01541 | 01542 <[/!$]?[-a-zA-Z0-9:_]+ # regular tags 01543 (?> 01544 \s 01545 (?>[^"\'>]+|"[^"]*"|\'[^\']*\')* 01546 )? 01547 > 01548 ').' 01549 ) 01550 }xs'; 01551 01552 while (1) { 01553 # 01554 # Each loop iteration seach for either the next tag, the next 01555 # openning code span marker, or the next escaped character. 01556 # Each token is then passed to handleSpanToken. 01557 # 01558 $parts = preg_split($span_re, $str, 2, PREG_SPLIT_DELIM_CAPTURE); 01559 01560 # Create token from text preceding tag. 01561 if ($parts[0] != "") { 01562 $output .= $parts[0]; 01563 } 01564 01565 # Check if we reach the end. 01566 if (isset($parts[1])) { 01567 $output .= $this->handleSpanToken($parts[1], $parts[2]); 01568 $str = $parts[2]; 01569 } 01570 else { 01571 break; 01572 } 01573 } 01574 01575 return $output; 01576 } 01577 01578 01579 function handleSpanToken($token, &$str) { 01580 # 01581 # Handle $token provided by parseSpan by determining its nature and 01582 # returning the corresponding value that should replace it. 01583 # 01584 switch ($token{0}) { 01585 case "\\": 01586 return $this->hashPart("&#". ord($token{1}). ";"); 01587 case "`": 01588 # Search for end marker in remaining text. 01589 if (preg_match('/^(.*?[^`])'.preg_quote($token).'(?!`)(.*)$/sm', 01590 $str, $matches)) 01591 { 01592 $str = $matches[2]; 01593 $codespan = $this->makeCodeSpan($matches[1]); 01594 return $this->hashPart($codespan); 01595 } 01596 return $token; // return as text since no ending marker found. 01597 default: 01598 return $this->hashPart($token); 01599 } 01600 } 01601 01602 01603 function outdent($text) { 01604 # 01605 # Remove one level of line-leading tabs or spaces 01606 # 01607 return preg_replace('/^(\t|[ ]{1,'.$this->tab_width.'})/m', '', $text); 01608 } 01609 01610 01611 # String length function for detab. `_initDetab` will create a function to 01612 # hanlde UTF-8 if the default function does not exist. 01613 var $utf8_strlen = 'mb_strlen'; 01614 01615 function detab($text) { 01616 # 01617 # Replace tabs with the appropriate amount of space. 01618 # 01619 # For each line we separate the line in blocks delemited by 01620 # tab characters. Then we reconstruct every line by adding the 01621 # appropriate number of space between each blocks. 01622 01623 $text = preg_replace_callback('/^.*\t.*$/m', 01624 array(&$this, '_detab_callback'), $text); 01625 01626 return $text; 01627 } 01628 function _detab_callback($matches) { 01629 $line = $matches[0]; 01630 $strlen = $this->utf8_strlen; # strlen function for UTF-8. 01631 01632 # Split in blocks. 01633 $blocks = explode("\t", $line); 01634 # Add each blocks to the line. 01635 $line = $blocks[0]; 01636 unset($blocks[0]); # Do not add first block twice. 01637 foreach ($blocks as $block) { 01638 # Calculate amount of space, insert spaces, insert block. 01639 $amount = $this->tab_width - 01640 $strlen($line, 'UTF-8') % $this->tab_width; 01641 $line .= str_repeat(" ", $amount) . $block; 01642 } 01643 return $line; 01644 } 01645 function _initDetab() { 01646 # 01647 # Check for the availability of the function in the `utf8_strlen` property 01648 # (initially `mb_strlen`). If the function is not available, create a 01649 # function that will loosely count the number of UTF-8 characters with a 01650 # regular expression. 01651 # 01652 if (function_exists($this->utf8_strlen)) return; 01653 $this->utf8_strlen = create_function('$text', 'return preg_match_all( 01654 "/[\\\\x00-\\\\xBF]|[\\\\xC0-\\\\xFF][\\\\x80-\\\\xBF]*/", 01655 $text, $m);'); 01656 } 01657 01658 01659 function unhash($text) { 01660 # 01661 # Swap back in all the tags hashed by _HashHTMLBlocks. 01662 # 01663 return preg_replace_callback('/(.)\x1A[0-9]+\1/', 01664 array(&$this, '_unhash_callback'), $text); 01665 } 01666 function _unhash_callback($matches) { 01667 return $this->html_hashes[$matches[0]]; 01668 } 01669 01670 } 01671 01672 01673 # 01674 # Markdown Extra Parser Class 01675 # 01676 01677 class MarkdownExtra_Parser extends Markdown_Parser { 01678 01679 # Prefix for footnote ids. 01680 var $fn_id_prefix = ""; 01681 01682 # Optional title attribute for footnote links and backlinks. 01683 var $fn_link_title = MARKDOWN_FN_LINK_TITLE; 01684 var $fn_backlink_title = MARKDOWN_FN_BACKLINK_TITLE; 01685 01686 # Optional class attribute for footnote links and backlinks. 01687 var $fn_link_class = MARKDOWN_FN_LINK_CLASS; 01688 var $fn_backlink_class = MARKDOWN_FN_BACKLINK_CLASS; 01689 01690 # Predefined abbreviations. 01691 var $predef_abbr = array(); 01692 01693 01694 function MarkdownExtra_Parser() { 01695 # 01696 # Constructor function. Initialize the parser object. 01697 # 01698 # Add extra escapable characters before parent constructor 01699 # initialize the table. 01700 $this->escape_chars .= ':|'; 01701 01702 # Insert extra document, block, and span transformations. 01703 # Parent constructor will do the sorting. 01704 $this->document_gamut += array( 01705 "doFencedCodeBlocks" => 5, 01706 "stripFootnotes" => 15, 01707 "stripAbbreviations" => 25, 01708 "appendFootnotes" => 50, 01709 ); 01710 $this->block_gamut += array( 01711 "doFencedCodeBlocks" => 5, 01712 "doTables" => 15, 01713 "doDefLists" => 45, 01714 ); 01715 $this->span_gamut += array( 01716 "doFootnotes" => 5, 01717 "doAbbreviations" => 70, 01718 ); 01719 01720 parent::Markdown_Parser(); 01721 } 01722 01723 01724 # Extra variables used during extra transformations. 01725 var $footnotes = array(); 01726 var $footnotes_ordered = array(); 01727 var $abbr_desciptions = array(); 01728 var $abbr_word_re = ''; 01729 01730 # Give the current footnote number. 01731 var $footnote_counter = 1; 01732 01733 01734 function setup() { 01735 # 01736 # Setting up Extra-specific variables. 01737 # 01738 parent::setup(); 01739 01740 $this->footnotes = array(); 01741 $this->footnotes_ordered = array(); 01742 $this->abbr_desciptions = array(); 01743 $this->abbr_word_re = ''; 01744 $this->footnote_counter = 1; 01745 01746 foreach ($this->predef_abbr as $abbr_word => $abbr_desc) { 01747 if ($this->abbr_word_re) 01748 $this->abbr_word_re .= '|'; 01749 $this->abbr_word_re .= preg_quote($abbr_word); 01750 $this->abbr_desciptions[$abbr_word] = trim($abbr_desc); 01751 } 01752 } 01753 01754 function teardown() { 01755 # 01756 # Clearing Extra-specific variables. 01757 # 01758 $this->footnotes = array(); 01759 $this->footnotes_ordered = array(); 01760 $this->abbr_desciptions = array(); 01761 $this->abbr_word_re = ''; 01762 01763 parent::teardown(); 01764 } 01765 01766 01767 ### HTML Block Parser ### 01768 01769 # Tags that are always treated as block tags: 01770 var $block_tags_re = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|address|form|fieldset|iframe|hr|legend'; 01771 01772 # Tags treated as block tags only if the opening tag is alone on it's line: 01773 var $context_block_tags_re = 'script|noscript|math|ins|del'; 01774 01775 # Tags where markdown="1" default to span mode: 01776 var $contain_span_tags_re = 'p|h[1-6]|li|dd|dt|td|th|legend|address'; 01777 01778 # Tags which must not have their contents modified, no matter where 01779 # they appear: 01780 var $clean_tags_re = 'script|math'; 01781 01782 # Tags that do not need to be closed. 01783 var $auto_close_tags_re = 'hr|img'; 01784 01785 01786 function hashHTMLBlocks($text) { 01787 # 01788 # Hashify HTML Blocks and "clean tags". 01789 # 01790 # We only want to do this for block-level HTML tags, such as headers, 01791 # lists, and tables. That's because we still want to wrap <p>s around 01792 # "paragraphs" that are wrapped in non-block-level tags, such as anchors, 01793 # phrase emphasis, and spans. The list of tags we're looking for is 01794 # hard-coded. 01795 # 01796 # This works by calling _HashHTMLBlocks_InMarkdown, which then calls 01797 # _HashHTMLBlocks_InHTML when it encounter block tags. When the markdown="1" 01798 # attribute is found whitin a tag, _HashHTMLBlocks_InHTML calls back 01799 # _HashHTMLBlocks_InMarkdown to handle the Markdown syntax within the tag. 01800 # These two functions are calling each other. It's recursive! 01801 # 01802 # 01803 # Call the HTML-in-Markdown hasher. 01804 # 01805 list($text, ) = $this->_hashHTMLBlocks_inMarkdown($text); 01806 01807 return $text; 01808 } 01809 function _hashHTMLBlocks_inMarkdown($text, $indent = 0, 01810 $enclosing_tag_re = '', $span = false) 01811 { 01812 # 01813 # Parse markdown text, calling _HashHTMLBlocks_InHTML for block tags. 01814 # 01815 # * $indent is the number of space to be ignored when checking for code 01816 # blocks. This is important because if we don't take the indent into 01817 # account, something like this (which looks right) won't work as expected: 01818 # 01819 # <div> 01820 # <div markdown="1"> 01821 # Hello World. <-- Is this a Markdown code block or text? 01822 # </div> <-- Is this a Markdown code block or a real tag? 01823 # <div> 01824 # 01825 # If you don't like this, just don't indent the tag on which 01826 # you apply the markdown="1" attribute. 01827 # 01828 # * If $enclosing_tag_re is not empty, stops at the first unmatched closing 01829 # tag with that name. Nested tags supported. 01830 # 01831 # * If $span is true, text inside must treated as span. So any double 01832 # newline will be replaced by a single newline so that it does not create 01833 # paragraphs. 01834 # 01835 # Returns an array of that form: ( processed text , remaining text ) 01836 # 01837 if ($text === '') return array('', ''); 01838 01839 # Regex to check for the presense of newlines around a block tag. 01840 $newline_before_re = '/(?:^\n?|\n\n)*$/'; 01841 $newline_after_re = 01842 '{ 01843 ^ # Start of text following the tag. 01844 (?>[ ]*<!--.*?-->)? # Optional comment. 01845 [ ]*\n # Must be followed by newline. 01846 }xs'; 01847 01848 # Regex to match any tag. 01849 $block_tag_re = 01850 '{ 01851 ( # $2: Capture hole tag. 01852 </? # Any opening or closing tag. 01853 (?> # Tag name. 01854 '.$this->block_tags_re.' | 01855 '.$this->context_block_tags_re.' | 01856 '.$this->clean_tags_re.' | 01857 (?!\s)'.$enclosing_tag_re.' 01858 ) 01859 (?: 01860 (?=[\s"\'/a-zA-Z0-9]) # Allowed characters after tag name. 01861 (?> 01862 ".*?" | # Double quotes (can contain `>`) 01863 \'.*?\' | # Single quotes (can contain `>`) 01864 .+? # Anything but quotes and `>`. 01865 )*? 01866 )? 01867 > # End of tag. 01868 | 01869 <!-- .*? --> # HTML Comment 01870 | 01871 <\?.*?\?> | <%.*?%> # Processing instruction 01872 | 01873 <!\[CDATA\[.*?\]\]> # CData Block 01874 | 01875 # Code span marker 01876 `+ 01877 '. ( !$span ? ' # If not in span. 01878 | 01879 # Indented code block 01880 (?: ^[ ]*\n | ^ | \n[ ]*\n ) 01881 [ ]{'.($indent+4).'}[^\n]* \n 01882 (?> 01883 (?: [ ]{'.($indent+4).'}[^\n]* | [ ]* ) \n 01884 )* 01885 | 01886 # Fenced code block marker 01887 (?> ^ | \n ) 01888 [ ]{'.($indent).'}~~~+[ ]*\n 01889 ' : '' ). ' # End (if not is span). 01890 ) 01891 }xs'; 01892 01893 01894 $depth = 0; # Current depth inside the tag tree. 01895 $parsed = ""; # Parsed text that will be returned. 01896 01897 # 01898 # Loop through every tag until we find the closing tag of the parent 01899 # or loop until reaching the end of text if no parent tag specified. 01900 # 01901 do { 01902 # 01903 # Split the text using the first $tag_match pattern found. 01904 # Text before pattern will be first in the array, text after 01905 # pattern will be at the end, and between will be any catches made 01906 # by the pattern. 01907 # 01908 $parts = preg_split($block_tag_re, $text, 2, 01909 PREG_SPLIT_DELIM_CAPTURE); 01910 01911 # If in Markdown span mode, add a empty-string span-level hash 01912 # after each newline to prevent triggering any block element. 01913 if ($span) { 01914 $void = $this->hashPart("", ':'); 01915 $newline = "$void\n"; 01916 $parts[0] = $void . str_replace("\n", $newline, $parts[0]) . $void; 01917 } 01918 01919 $parsed .= $parts[0]; # Text before current tag. 01920 01921 # If end of $text has been reached. Stop loop. 01922 if (count($parts) < 3) { 01923 $text = ""; 01924 break; 01925 } 01926 01927 $tag = $parts[1]; # Tag to handle. 01928 $text = $parts[2]; # Remaining text after current tag. 01929 $tag_re = preg_quote($tag); # For use in a regular expression. 01930 01931 # 01932 # Check for: Code span marker 01933 # 01934 if ($tag{0} == "`") { 01935 # Find corresponding end marker. 01936 $tag_re = preg_quote($tag); 01937 if (preg_match('{^(?>.+?|\n(?!\n))*?(?<!`)'.$tag_re.'(?!`)}', 01938 $text, $matches)) 01939 { 01940 # End marker found: pass text unchanged until marker. 01941 $parsed .= $tag . $matches[0]; 01942 $text = substr($text, strlen($matches[0])); 01943 } 01944 else { 01945 # Unmatched marker: just skip it. 01946 $parsed .= $tag; 01947 } 01948 } 01949 # 01950 # Check for: Indented code block. 01951 # 01952 else if ($tag{0} == "\n" || $tag{0} == " ") { 01953 # Indented code block: pass it unchanged, will be handled 01954 # later. 01955 $parsed .= $tag; 01956 } 01957 # 01958 # Check for: Fenced code block marker. 01959 # 01960 else if ($tag{0} == "~") { 01961 # Fenced code block marker: find matching end marker. 01962 $tag_re = preg_quote(trim($tag)); 01963 if (preg_match('{^(?>.*\n)+?'.$tag_re.' *\n}', $text, 01964 $matches)) 01965 { 01966 # End marker found: pass text unchanged until marker. 01967 $parsed .= $tag . $matches[0]; 01968 $text = substr($text, strlen($matches[0])); 01969 } 01970 else { 01971 # No end marker: just skip it. 01972 $parsed .= $tag; 01973 } 01974 } 01975 # 01976 # Check for: Opening Block level tag or 01977 # Opening Context Block tag (like ins and del) 01978 # used as a block tag (tag is alone on it's line). 01979 # 01980 else if (preg_match('{^<(?:'.$this->block_tags_re.')\b}', $tag) || 01981 ( preg_match('{^<(?:'.$this->context_block_tags_re.')\b}', $tag) && 01982 preg_match($newline_before_re, $parsed) && 01983 preg_match($newline_after_re, $text) ) 01984 ) 01985 { 01986 # Need to parse tag and following text using the HTML parser. 01987 list($block_text, $text) = 01988 $this->_hashHTMLBlocks_inHTML($tag . $text, "hashBlock", true); 01989 01990 # Make sure it stays outside of any paragraph by adding newlines. 01991 $parsed .= "\n\n$block_text\n\n"; 01992 } 01993 # 01994 # Check for: Clean tag (like script, math) 01995 # HTML Comments, processing instructions. 01996 # 01997 else if (preg_match('{^<(?:'.$this->clean_tags_re.')\b}', $tag) || 01998 $tag{1} == '!' || $tag{1} == '?') 01999 { 02000 # Need to parse tag and following text using the HTML parser. 02001 # (don't check for markdown attribute) 02002 list($block_text, $text) = 02003 $this->_hashHTMLBlocks_inHTML($tag . $text, "hashClean", false); 02004 02005 $parsed .= $block_text; 02006 } 02007 # 02008 # Check for: Tag with same name as enclosing tag. 02009 # 02010 else if ($enclosing_tag_re !== '' && 02011 # Same name as enclosing tag. 02012 preg_match('{^</?(?:'.$enclosing_tag_re.')\b}', $tag)) 02013 { 02014 # 02015 # Increase/decrease nested tag count. 02016 # 02017 if ($tag{1} == '/') $depth--; 02018 else if ($tag{strlen($tag)-2} != '/') $depth++; 02019 02020 if ($depth < 0) { 02021 # 02022 # Going out of parent element. Clean up and break so we 02023 # return to the calling function. 02024 # 02025 $text = $tag . $text; 02026 break; 02027 } 02028 02029 $parsed .= $tag; 02030 } 02031 else { 02032 $parsed .= $tag; 02033 } 02034 } while ($depth >= 0); 02035 02036 return array($parsed, $text); 02037 } 02038 function _hashHTMLBlocks_inHTML($text, $hash_method, $md_attr) { 02039 # 02040 # Parse HTML, calling _HashHTMLBlocks_InMarkdown for block tags. 02041 # 02042 # * Calls $hash_method to convert any blocks. 02043 # * Stops when the first opening tag closes. 02044 # * $md_attr indicate if the use of the `markdown="1"` attribute is allowed. 02045 # (it is not inside clean tags) 02046 # 02047 # Returns an array of that form: ( processed text , remaining text ) 02048 # 02049 if ($text === '') return array('', ''); 02050 02051 # Regex to match `markdown` attribute inside of a tag. 02052 $markdown_attr_re = ' 02053 { 02054 \s* # Eat whitespace before the `markdown` attribute 02055 markdown 02056 \s*=\s* 02057 (?> 02058 (["\']) # $1: quote delimiter 02059 (.*?) # $2: attribute value 02060 \1 # matching delimiter 02061 | 02062 ([^\s>]*) # $3: unquoted attribute value 02063 ) 02064 () # $4: make $3 always defined (avoid warnings) 02065 }xs'; 02066 02067 # Regex to match any tag. 02068 $tag_re = '{ 02069 ( # $2: Capture hole tag. 02070 </? # Any opening or closing tag. 02071 [\w:$]+ # Tag name. 02072 (?: 02073 (?=[\s"\'/a-zA-Z0-9]) # Allowed characters after tag name. 02074 (?> 02075 ".*?" | # Double quotes (can contain `>`) 02076 \'.*?\' | # Single quotes (can contain `>`) 02077 .+? # Anything but quotes and `>`. 02078 )*? 02079 )? 02080 > # End of tag. 02081 | 02082 <!-- .*? --> # HTML Comment 02083 | 02084 <\?.*?\?> | <%.*?%> # Processing instruction 02085 | 02086 <!\[CDATA\[.*?\]\]> # CData Block 02087 ) 02088 }xs'; 02089 02090 $original_text = $text; # Save original text in case of faliure. 02091 02092 $depth = 0; # Current depth inside the tag tree. 02093 $block_text = ""; # Temporary text holder for current text. 02094 $parsed = ""; # Parsed text that will be returned. 02095 02096 # 02097 # Get the name of the starting tag. 02098 # (This pattern makes $base_tag_name_re safe without quoting.) 02099 # 02100 if (preg_match('/^<([\w:$]*)\b/', $text, $matches)) 02101 $base_tag_name_re = $matches[1]; 02102 02103 # 02104 # Loop through every tag until we find the corresponding closing tag. 02105 # 02106 do { 02107 # 02108 # Split the text using the first $tag_match pattern found. 02109 # Text before pattern will be first in the array, text after 02110 # pattern will be at the end, and between will be any catches made 02111 # by the pattern. 02112 # 02113 $parts = preg_split($tag_re, $text, 2, PREG_SPLIT_DELIM_CAPTURE); 02114 02115 if (count($parts) < 3) { 02116 # 02117 # End of $text reached with unbalenced tag(s). 02118 # In that case, we return original text unchanged and pass the 02119 # first character as filtered to prevent an infinite loop in the 02120 # parent function. 02121 # 02122 return array($original_text{0}, substr($original_text, 1)); 02123 } 02124 02125 $block_text .= $parts[0]; # Text before current tag. 02126 $tag = $parts[1]; # Tag to handle. 02127 $text = $parts[2]; # Remaining text after current tag. 02128 02129 # 02130 # Check for: Auto-close tag (like <hr/>) 02131 # Comments and Processing Instructions. 02132 # 02133 if (preg_match('{^</?(?:'.$this->auto_close_tags_re.')\b}', $tag) || 02134 $tag{1} == '!' || $tag{1} == '?') 02135 { 02136 # Just add the tag to the block as if it was text. 02137 $block_text .= $tag; 02138 } 02139 else { 02140 # 02141 # Increase/decrease nested tag count. Only do so if 02142 # the tag's name match base tag's. 02143 # 02144 if (preg_match('{^</?'.$base_tag_name_re.'\b}', $tag)) { 02145 if ($tag{1} == '/') $depth--; 02146 else if ($tag{strlen($tag)-2} != '/') $depth++; 02147 } 02148 02149 # 02150 # Check for `markdown="1"` attribute and handle it. 02151 # 02152 if ($md_attr && 02153 preg_match($markdown_attr_re, $tag, $attr_m) && 02154 preg_match('/^1|block|span$/', $attr_m[2] . $attr_m[3])) 02155 { 02156 # Remove `markdown` attribute from opening tag. 02157 $tag = preg_replace($markdown_attr_re, '', $tag); 02158 02159 # Check if text inside this tag must be parsed in span mode. 02160 $this->mode = $attr_m[2] . $attr_m[3]; 02161 $span_mode = $this->mode == 'span' || $this->mode != 'block' && 02162 preg_match('{^<(?:'.$this->contain_span_tags_re.')\b}', $tag); 02163 02164 # Calculate indent before tag. 02165 if (preg_match('/(?:^|\n)( *?)(?! ).*?$/', $block_text, $matches)) { 02166 $strlen = $this->utf8_strlen; 02167 $indent = $strlen($matches[1], 'UTF-8'); 02168 } else { 02169 $indent = 0; 02170 } 02171 02172 # End preceding block with this tag. 02173 $block_text .= $tag; 02174 $parsed .= $this->$hash_method($block_text); 02175 02176 # Get enclosing tag name for the ParseMarkdown function. 02177 # (This pattern makes $tag_name_re safe without quoting.) 02178 preg_match('/^<([\w:$]*)\b/', $tag, $matches); 02179 $tag_name_re = $matches[1]; 02180 02181 # Parse the content using the HTML-in-Markdown parser. 02182 list ($block_text, $text) 02183 = $this->_hashHTMLBlocks_inMarkdown($text, $indent, 02184 $tag_name_re, $span_mode); 02185 02186 # Outdent markdown text. 02187 if ($indent > 0) { 02188 $block_text = preg_replace("/^[ ]{1,$indent}/m", "", 02189 $block_text); 02190 } 02191 02192 # Append tag content to parsed text. 02193 if (!$span_mode) $parsed .= "\n\n$block_text\n\n"; 02194 else $parsed .= "$block_text"; 02195 02196 # Start over a new block. 02197 $block_text = ""; 02198 } 02199 else $block_text .= $tag; 02200 } 02201 02202 } while ($depth > 0); 02203 02204 # 02205 # Hash last block text that wasn't processed inside the loop. 02206 # 02207 $parsed .= $this->$hash_method($block_text); 02208 02209 return array($parsed, $text); 02210 } 02211 02212 02213 function hashClean($text) { 02214 # 02215 # Called whenever a tag must be hashed when a function insert a "clean" tag 02216 # in $text, it pass through this function and is automaticaly escaped, 02217 # blocking invalid nested overlap. 02218 # 02219 return $this->hashPart($text, 'C'); 02220 } 02221 02222 02223 function doHeaders($text) { 02224 # 02225 # Redefined to add id attribute support. 02226 # 02227 # Setext-style headers: 02228 # Header 1 {#header1} 02229 # ======== 02230 # 02231 # Header 2 {#header2} 02232 # -------- 02233 # 02234 $text = preg_replace_callback( 02235 '{ 02236 (^.+?) # $1: Header text 02237 (?:[ ]+\{\#([-_:a-zA-Z0-9]+)\})? # $2: Id attribute 02238 [ ]*\n(=+|-+)[ ]*\n+ # $3: Header footer 02239 }mx', 02240 array(&$this, '_doHeaders_callback_setext'), $text); 02241 02242 # atx-style headers: 02243 # # Header 1 {#header1} 02244 # ## Header 2 {#header2} 02245 # ## Header 2 with closing hashes ## {#header3} 02246 # ... 02247 # ###### Header 6 {#header2} 02248 # 02249 $text = preg_replace_callback('{ 02250 ^(\#{1,6}) # $1 = string of #\'s 02251 [ ]* 02252 (.+?) # $2 = Header text 02253 [ ]* 02254 \#* # optional closing #\'s (not counted) 02255 (?:[ ]+\{\#([-_:a-zA-Z0-9]+)\})? # id attribute 02256 [ ]* 02257 \n+ 02258 }xm', 02259 array(&$this, '_doHeaders_callback_atx'), $text); 02260 02261 return $text; 02262 } 02263 function _doHeaders_attr($attr) { 02264 if (empty($attr)) return ""; 02265 return " id=\"$attr\""; 02266 } 02267 function _doHeaders_callback_setext($matches) { 02268 if ($matches[3] == '-' && preg_match('{^- }', $matches[1])) 02269 return $matches[0]; 02270 $level = $matches[3]{0} == '=' ? 1 : 2; 02271 $attr = $this->_doHeaders_attr($id =& $matches[2]); 02272 $block = "<h$level$attr>".$this->runSpanGamut($matches[1])."</h$level>"; 02273 return "\n" . $this->hashBlock($block) . "\n\n"; 02274 } 02275 function _doHeaders_callback_atx($matches) { 02276 $level = strlen($matches[1]); 02277 $attr = $this->_doHeaders_attr($id =& $matches[3]); 02278 $block = "<h$level$attr>".$this->runSpanGamut($matches[2])."</h$level>"; 02279 return "\n" . $this->hashBlock($block) . "\n\n"; 02280 } 02281 02282 02283 function doTables($text) { 02284 # 02285 # Form HTML tables. 02286 # 02287 $less_than_tab = $this->tab_width - 1; 02288 # 02289 # Find tables with leading pipe. 02290 # 02291 # | Header 1 | Header 2 02292 # | -------- | -------- 02293 # | Cell 1 | Cell 2 02294 # | Cell 3 | Cell 4 02295 # 02296 $text = preg_replace_callback(' 02297 { 02298 ^ # Start of a line 02299 [ ]{0,'.$less_than_tab.'} # Allowed whitespace. 02300 [|] # Optional leading pipe (present) 02301 (.+) \n # $1: Header row (at least one pipe) 02302 02303 [ ]{0,'.$less_than_tab.'} # Allowed whitespace. 02304 [|] ([ ]*[-:]+[-| :]*) \n # $2: Header underline 02305 02306 ( # $3: Cells 02307 (?> 02308 [ ]* # Allowed whitespace. 02309 [|] .* \n # Row content. 02310 )* 02311 ) 02312 (?=\n|\Z) # Stop at final double newline. 02313 }xm', 02314 array(&$this, '_doTable_leadingPipe_callback'), $text); 02315 02316 # 02317 # Find tables without leading pipe. 02318 # 02319 # Header 1 | Header 2 02320 # -------- | -------- 02321 # Cell 1 | Cell 2 02322 # Cell 3 | Cell 4 02323 # 02324 $text = preg_replace_callback(' 02325 { 02326 ^ # Start of a line 02327 [ ]{0,'.$less_than_tab.'} # Allowed whitespace. 02328 (\S.*[|].*) \n # $1: Header row (at least one pipe) 02329 02330 [ ]{0,'.$less_than_tab.'} # Allowed whitespace. 02331 ([-:]+[ ]*[|][-| :]*) \n # $2: Header underline 02332 02333 ( # $3: Cells 02334 (?> 02335 .* [|] .* \n # Row content 02336 )* 02337 ) 02338 (?=\n|\Z) # Stop at final double newline. 02339 }xm', 02340 array(&$this, '_DoTable_callback'), $text); 02341 02342 return $text; 02343 } 02344 function _doTable_leadingPipe_callback($matches) { 02345 $head = $matches[1]; 02346 $underline = $matches[2]; 02347 $content = $matches[3]; 02348 02349 # Remove leading pipe for each row. 02350 $content = preg_replace('/^ *[|]/m', '', $content); 02351 02352 return $this->_doTable_callback(array($matches[0], $head, $underline, $content)); 02353 } 02354 function _doTable_callback($matches) { 02355 $head = $matches[1]; 02356 $underline = $matches[2]; 02357 $content = $matches[3]; 02358 02359 # Remove any tailing pipes for each line. 02360 $head = preg_replace('/[|] *$/m', '', $head); 02361 $underline = preg_replace('/[|] *$/m', '', $underline); 02362 $content = preg_replace('/[|] *$/m', '', $content); 02363 02364 # Reading alignement from header underline. 02365 $separators = preg_split('/ *[|] */', $underline); 02366 foreach ($separators as $n => $s) { 02367 if (preg_match('/^ *-+: *$/', $s)) $attr[$n] = ' align="right"'; 02368 else if (preg_match('/^ *:-+: *$/', $s))$attr[$n] = ' align="center"'; 02369 else if (preg_match('/^ *:-+ *$/', $s)) $attr[$n] = ' align="left"'; 02370 else $attr[$n] = ''; 02371 } 02372 02373 # Parsing span elements, including code spans, character escapes, 02374 # and inline HTML tags, so that pipes inside those gets ignored. 02375 $head = $this->parseSpan($head); 02376 $headers = preg_split('/ *[|] */', $head); 02377 $col_count = count($headers); 02378 02379 # Write column headers. 02380 $text = "<table>\n"; 02381 $text .= "<thead>\n"; 02382 $text .= "<tr>\n"; 02383 foreach ($headers as $n => $header) 02384 $text .= " <th$attr[$n]>".$this->runSpanGamut(trim($header))."</th>\n"; 02385 $text .= "</tr>\n"; 02386 $text .= "</thead>\n"; 02387 02388 # Split content by row. 02389 $rows = explode("\n", trim($content, "\n")); 02390 02391 $text .= "<tbody>\n"; 02392 foreach ($rows as $row) { 02393 # Parsing span elements, including code spans, character escapes, 02394 # and inline HTML tags, so that pipes inside those gets ignored. 02395 $row = $this->parseSpan($row); 02396 02397 # Split row by cell. 02398 $row_cells = preg_split('/ *[|] */', $row, $col_count); 02399 $row_cells = array_pad($row_cells, $col_count, ''); 02400 02401 $text .= "<tr>\n"; 02402 foreach ($row_cells as $n => $cell) 02403 $text .= " <td$attr[$n]>".$this->runSpanGamut(trim($cell))."</td>\n"; 02404 $text .= "</tr>\n"; 02405 } 02406 $text .= "</tbody>\n"; 02407 $text .= "</table>"; 02408 02409 return $this->hashBlock($text) . "\n"; 02410 } 02411 02412 02413 function doDefLists($text) { 02414 # 02415 # Form HTML definition lists. 02416 # 02417 $less_than_tab = $this->tab_width - 1; 02418 02419 # Re-usable pattern to match any entire dl list: 02420 $whole_list_re = '(?> 02421 ( # $1 = whole list 02422 ( # $2 02423 [ ]{0,'.$less_than_tab.'} 02424 ((?>.*\S.*\n)+) # $3 = defined term 02425 \n? 02426 [ ]{0,'.$less_than_tab.'}:[ ]+ # colon starting definition 02427 ) 02428 (?s:.+?) 02429 ( # $4 02430 \z 02431 | 02432 \n{2,} 02433 (?=\S) 02434 (?! # Negative lookahead for another term 02435 [ ]{0,'.$less_than_tab.'} 02436 (?: \S.*\n )+? # defined term 02437 \n? 02438 [ ]{0,'.$less_than_tab.'}:[ ]+ # colon starting definition 02439 ) 02440 (?! # Negative lookahead for another definition 02441 [ ]{0,'.$less_than_tab.'}:[ ]+ # colon starting definition 02442 ) 02443 ) 02444 ) 02445 )'; // mx 02446 02447 $text = preg_replace_callback('{ 02448 (?>\A\n?|(?<=\n\n)) 02449 '.$whole_list_re.' 02450 }mx', 02451 array(&$this, '_doDefLists_callback'), $text); 02452 02453 return $text; 02454 } 02455 function _doDefLists_callback($matches) { 02456 # Re-usable patterns to match list item bullets and number markers: 02457 $list = $matches[1]; 02458 02459 # Turn double returns into triple returns, so that we can make a 02460 # paragraph for the last item in a list, if necessary: 02461 $result = trim($this->processDefListItems($list)); 02462 $result = "<dl>\n" . $result . "\n</dl>"; 02463 return $this->hashBlock($result) . "\n\n"; 02464 } 02465 02466 02467 function processDefListItems($list_str) { 02468 # 02469 # Process the contents of a single definition list, splitting it 02470 # into individual term and definition list items. 02471 # 02472 $less_than_tab = $this->tab_width - 1; 02473 02474 # trim trailing blank lines: 02475 $list_str = preg_replace("/\n{2,}\\z/", "\n", $list_str); 02476 02477 # Process definition terms. 02478 $list_str = preg_replace_callback('{ 02479 (?>\A\n?|\n\n+) # leading line 02480 ( # definition terms = $1 02481 [ ]{0,'.$less_than_tab.'} # leading whitespace 02482 (?![:][ ]|[ ]) # negative lookahead for a definition 02483 # mark (colon) or more whitespace. 02484 (?> \S.* \n)+? # actual term (not whitespace). 02485 ) 02486 (?=\n?[ ]{0,3}:[ ]) # lookahead for following line feed 02487 # with a definition mark. 02488 }xm', 02489 array(&$this, '_processDefListItems_callback_dt'), $list_str); 02490 02491 # Process actual definitions. 02492 $list_str = preg_replace_callback('{ 02493 \n(\n+)? # leading line = $1 02494 ( # marker space = $2 02495 [ ]{0,'.$less_than_tab.'} # whitespace before colon 02496 [:][ ]+ # definition mark (colon) 02497 ) 02498 ((?s:.+?)) # definition text = $3 02499 (?= \n+ # stop at next definition mark, 02500 (?: # next term or end of text 02501 [ ]{0,'.$less_than_tab.'} [:][ ] | 02502 <dt> | \z 02503 ) 02504 ) 02505 }xm', 02506 array(&$this, '_processDefListItems_callback_dd'), $list_str); 02507 02508 return $list_str; 02509 } 02510 function _processDefListItems_callback_dt($matches) { 02511 $terms = explode("\n", trim($matches[1])); 02512 $text = ''; 02513 foreach ($terms as $term) { 02514 $term = $this->runSpanGamut(trim($term)); 02515 $text .= "\n<dt>" . $term . "</dt>"; 02516 } 02517 return $text . "\n"; 02518 } 02519 function _processDefListItems_callback_dd($matches) { 02520 $leading_line = $matches[1]; 02521 $marker_space = $matches[2]; 02522 $def = $matches[3]; 02523 02524 if ($leading_line || preg_match('/\n{2,}/', $def)) { 02525 # Replace marker with the appropriate whitespace indentation 02526 $def = str_repeat(' ', strlen($marker_space)) . $def; 02527 $def = $this->runBlockGamut($this->outdent($def . "\n\n")); 02528 $def = "\n". $def ."\n"; 02529 } 02530 else { 02531 $def = rtrim($def); 02532 $def = $this->runSpanGamut($this->outdent($def)); 02533 } 02534 02535 return "\n<dd>" . $def . "</dd>\n"; 02536 } 02537 02538 02539 function doFencedCodeBlocks($text) { 02540 # 02541 # Adding the fenced code block syntax to regular Markdown: 02542 # 02543 # ~~~ 02544 # Code block 02545 # ~~~ 02546 # 02547 $less_than_tab = $this->tab_width; 02548 02549 $text = preg_replace_callback('{ 02550 (?:\n|\A) 02551 # 1: Opening marker 02552 ( 02553 ~{3,} # Marker: three tilde or more. 02554 ) 02555 [ ]* \n # Whitespace and newline following marker. 02556 02557 # 2: Content 02558 ( 02559 (?> 02560 (?!\1 [ ]* \n) # Not a closing marker. 02561 .*\n+ 02562 )+ 02563 ) 02564 02565 # Closing marker. 02566 \1 [ ]* \n 02567 }xm', 02568 array(&$this, '_doFencedCodeBlocks_callback'), $text); 02569 02570 return $text; 02571 } 02572 function _doFencedCodeBlocks_callback($matches) { 02573 $codeblock = $matches[2]; 02574 $codeblock = htmlspecialchars($codeblock, ENT_NOQUOTES); 02575 $codeblock = preg_replace_callback('/^\n+/', 02576 array(&$this, '_doFencedCodeBlocks_newlines'), $codeblock); 02577 $codeblock = "<pre><code>$codeblock</code></pre>"; 02578 return "\n\n".$this->hashBlock($codeblock)."\n\n"; 02579 } 02580 function _doFencedCodeBlocks_newlines($matches) { 02581 return str_repeat("<br$this->empty_element_suffix", 02582 strlen($matches[0])); 02583 } 02584 02585 02586 # 02587 # Redefining emphasis markers so that emphasis by underscore does not 02588 # work in the middle of a word. 02589 # 02590 var $em_relist = array( 02591 '' => '(?:(?<!\*)\*(?!\*)|(?<![a-zA-Z0-9_])_(?!_))(?=\S|$)(?![.,:;]\s)', 02592 '*' => '(?<=\S|^)(?<!\*)\*(?!\*)', 02593 '_' => '(?<=\S|^)(?<!_)_(?![a-zA-Z0-9_])', 02594 ); 02595 var $strong_relist = array( 02596 '' => '(?:(?<!\*)\*\*(?!\*)|(?<![a-zA-Z0-9_])__(?!_))(?=\S|$)(?![.,:;]\s)', 02597 '**' => '(?<=\S|^)(?<!\*)\*\*(?!\*)', 02598 '__' => '(?<=\S|^)(?<!_)__(?![a-zA-Z0-9_])', 02599 ); 02600 var $em_strong_relist = array( 02601 '' => '(?:(?<!\*)\*\*\*(?!\*)|(?<![a-zA-Z0-9_])___(?!_))(?=\S|$)(?![.,:;]\s)', 02602 '***' => '(?<=\S|^)(?<!\*)\*\*\*(?!\*)', 02603 '___' => '(?<=\S|^)(?<!_)___(?![a-zA-Z0-9_])', 02604 ); 02605 02606 02607 function formParagraphs($text) { 02608 # 02609 # Params: 02610 # $text - string to process with html <p> tags 02611 # 02612 # Strip leading and trailing lines: 02613 $text = preg_replace('/\A\n+|\n+\z/', '', $text); 02614 02615 $grafs = preg_split('/\n{2,}/', $text, -1, PREG_SPLIT_NO_EMPTY); 02616 02617 # 02618 # Wrap <p> tags and unhashify HTML blocks 02619 # 02620 foreach ($grafs as $key => $value) { 02621 $value = trim($this->runSpanGamut($value)); 02622 02623 # Check if this should be enclosed in a paragraph. 02624 # Clean tag hashes & block tag hashes are left alone. 02625 $is_p = !preg_match('/^B\x1A[0-9]+B|^C\x1A[0-9]+C$/', $value); 02626 02627 if ($is_p) { 02628 $value = "<p>$value</p>"; 02629 } 02630 $grafs[$key] = $value; 02631 } 02632 02633 # Join grafs in one text, then unhash HTML tags. 02634 $text = implode("\n\n", $grafs); 02635 02636 # Finish by removing any tag hashes still present in $text. 02637 $text = $this->unhash($text); 02638 02639 return $text; 02640 } 02641 02642 02643 ### Footnotes 02644 02645 function stripFootnotes($text) { 02646 # 02647 # Strips link definitions from text, stores the URLs and titles in 02648 # hash references. 02649 # 02650 $less_than_tab = $this->tab_width - 1; 02651 02652 # Link defs are in the form: [^id]: url "optional title" 02653 $text = preg_replace_callback('{ 02654 ^[ ]{0,'.$less_than_tab.'}\[\^(.+?)\][ ]?: # note_id = $1 02655 [ ]* 02656 \n? # maybe *one* newline 02657 ( # text = $2 (no blank lines allowed) 02658 (?: 02659 .+ # actual text 02660 | 02661 \n # newlines but 02662 (?!\[\^.+?\]:\s)# negative lookahead for footnote marker. 02663 (?!\n+[ ]{0,3}\S)# ensure line is not blank and followed 02664 # by non-indented content 02665 )* 02666 ) 02667 }xm', 02668 array(&$this, '_stripFootnotes_callback'), 02669 $text); 02670 return $text; 02671 } 02672 function _stripFootnotes_callback($matches) { 02673 $note_id = $this->fn_id_prefix . $matches[1]; 02674 $this->footnotes[$note_id] = $this->outdent($matches[2]); 02675 return ''; # String that will replace the block 02676 } 02677 02678 02679 function doFootnotes($text) { 02680 # 02681 # Replace footnote references in $text [^id] with a special text-token 02682 # which will be replaced by the actual footnote marker in appendFootnotes. 02683 # 02684 if (!$this->in_anchor) { 02685 $text = preg_replace('{\[\^(.+?)\]}', "F\x1Afn:\\1\x1A:", $text); 02686 } 02687 return $text; 02688 } 02689 02690 02691 function appendFootnotes($text) { 02692 # 02693 # Append footnote list to text. 02694 # 02695 $text = preg_replace_callback('{F\x1Afn:(.*?)\x1A:}', 02696 array(&$this, '_appendFootnotes_callback'), $text); 02697 02698 if (!empty($this->footnotes_ordered)) { 02699 $text .= "\n\n"; 02700 $text .= "<div class=\"footnotes\">\n"; 02701 $text .= "<hr". $this->empty_element_suffix ."\n"; 02702 $text .= "<ol>\n\n"; 02703 02704 $attr = " rev=\"footnote\""; 02705 if ($this->fn_backlink_class != "") { 02706 $class = $this->fn_backlink_class; 02707 $class = $this->encodeAttribute($class); 02708 $attr .= " class=\"$class\""; 02709 } 02710 if ($this->fn_backlink_title != "") { 02711 $title = $this->fn_backlink_title; 02712 $title = $this->encodeAttribute($title); 02713 $attr .= " title=\"$title\""; 02714 } 02715 $num = 0; 02716 02717 while (!empty($this->footnotes_ordered)) { 02718 $footnote = reset($this->footnotes_ordered); 02719 $note_id = key($this->footnotes_ordered); 02720 unset($this->footnotes_ordered[$note_id]); 02721 02722 $footnote .= "\n"; # Need to append newline before parsing. 02723 $footnote = $this->runBlockGamut("$footnote\n"); 02724 $footnote = preg_replace_callback('{F\x1Afn:(.*?)\x1A:}', 02725 array(&$this, '_appendFootnotes_callback'), $footnote); 02726 02727 $attr = str_replace("%%", ++$num, $attr); 02728 $note_id = $this->encodeAttribute($note_id); 02729 02730 # Add backlink to last paragraph; create new paragraph if needed. 02731 $backlink = "<a href=\"#fnref:$note_id\"$attr>↩</a>"; 02732 if (preg_match('{</p>$}', $footnote)) { 02733 $footnote = substr($footnote, 0, -4) . " $backlink</p>"; 02734 } else { 02735 $footnote .= "\n\n<p>$backlink</p>"; 02736 } 02737 02738 $text .= "<li id=\"fn:$note_id\">\n"; 02739 $text .= $footnote . "\n"; 02740 $text .= "</li>\n\n"; 02741 } 02742 02743 $text .= "</ol>\n"; 02744 $text .= "</div>"; 02745 } 02746 return $text; 02747 } 02748 function _appendFootnotes_callback($matches) { 02749 $node_id = $this->fn_id_prefix . $matches[1]; 02750 02751 # Create footnote marker only if it has a corresponding footnote *and* 02752 # the footnote hasn't been used by another marker. 02753 if (isset($this->footnotes[$node_id])) { 02754 # Transfert footnote content to the ordered list. 02755 $this->footnotes_ordered[$node_id] = $this->footnotes[$node_id]; 02756 unset($this->footnotes[$node_id]); 02757 02758 $num = $this->footnote_counter++; 02759 $attr = " rel=\"footnote\""; 02760 if ($this->fn_link_class != "") { 02761 $class = $this->fn_link_class; 02762 $class = $this->encodeAttribute($class); 02763 $attr .= " class=\"$class\""; 02764 } 02765 if ($this->fn_link_title != "") { 02766 $title = $this->fn_link_title; 02767 $title = $this->encodeAttribute($title); 02768 $attr .= " title=\"$title\""; 02769 } 02770 02771 $attr = str_replace("%%", $num, $attr); 02772 $node_id = $this->encodeAttribute($node_id); 02773 02774 return 02775 "<sup id=\"fnref:$node_id\">". 02776 "<a href=\"#fn:$node_id\"$attr>$num</a>". 02777 "</sup>"; 02778 } 02779 02780 return "[^".$matches[1]."]"; 02781 } 02782 02783 02784 ### Abbreviations ### 02785 02786 function stripAbbreviations($text) { 02787 # 02788 # Strips abbreviations from text, stores titles in hash references. 02789 # 02790 $less_than_tab = $this->tab_width - 1; 02791 02792 # Link defs are in the form: [id]*: url "optional title" 02793 $text = preg_replace_callback('{ 02794 ^[ ]{0,'.$less_than_tab.'}\*\[(.+?)\][ ]?: # abbr_id = $1 02795 (.*) # text = $2 (no blank lines allowed) 02796 }xm', 02797 array(&$this, '_stripAbbreviations_callback'), 02798 $text); 02799 return $text; 02800 } 02801 function _stripAbbreviations_callback($matches) { 02802 $abbr_word = $matches[1]; 02803 $abbr_desc = $matches[2]; 02804 if ($this->abbr_word_re) 02805 $this->abbr_word_re .= '|'; 02806 $this->abbr_word_re .= preg_quote($abbr_word); 02807 $this->abbr_desciptions[$abbr_word] = trim($abbr_desc); 02808 return ''; # String that will replace the block 02809 } 02810 02811 02812 function doAbbreviations($text) { 02813 # 02814 # Find defined abbreviations in text and wrap them in <abbr> elements. 02815 # 02816 if ($this->abbr_word_re) { 02817 // cannot use the /x modifier because abbr_word_re may 02818 // contain significant spaces: 02819 $text = preg_replace_callback('{'. 02820 '(?<![\w\x1A])'. 02821 '(?:'.$this->abbr_word_re.')'. 02822 '(?![\w\x1A])'. 02823 '}', 02824 array(&$this, '_doAbbreviations_callback'), $text); 02825 } 02826 return $text; 02827 } 02828 function _doAbbreviations_callback($matches) { 02829 $abbr = $matches[0]; 02830 if (isset($this->abbr_desciptions[$abbr])) { 02831 $desc = $this->abbr_desciptions[$abbr]; 02832 if (empty($desc)) { 02833 return $this->hashPart("<abbr>$abbr</abbr>"); 02834 } else { 02835 $desc = $this->encodeAttribute($desc); 02836 return $this->hashPart("<abbr title=\"$desc\">$abbr</abbr>"); 02837 } 02838 } else { 02839 return $matches[0]; 02840 } 02841 } 02842 02843 } 02844 02845 02846 /* 02847 02848 PHP Markdown Extra 02849 ================== 02850 02851 Description 02852 ----------- 02853 02854 This is a PHP port of the original Markdown formatter written in Perl 02855 by John Gruber. This special "Extra" version of PHP Markdown features 02856 further enhancements to the syntax for making additional constructs 02857 such as tables and definition list. 02858 02859 Markdown is a text-to-HTML filter; it translates an easy-to-read / 02860 easy-to-write structured text format into HTML. Markdown's text format 02861 is most similar to that of plain text email, and supports features such 02862 as headers, *emphasis*, code blocks, blockquotes, and links. 02863 02864 Markdown's syntax is designed not as a generic markup language, but 02865 specifically to serve as a front-end to (X)HTML. You can use span-level 02866 HTML tags anywhere in a Markdown document, and you can use block level 02867 HTML tags (like <div> and <table> as well). 02868 02869 For more information about Markdown's syntax, see: 02870 02871 <http://daringfireball.net/projects/markdown/> 02872 02873 02874 Bugs 02875 ---- 02876 02877 To file bug reports please send email to: 02878 02879 <michel.fortin@michelf.com> 02880 02881 Please include with your report: (1) the example input; (2) the output you 02882 expected; (3) the output Markdown actually produced. 02883 02884 02885 Version History 02886 --------------- 02887 02888 See the readme file for detailed release notes for this version. 02889 02890 02891 Copyright and License 02892 --------------------- 02893 02894 PHP Markdown & Extra 02895 Copyright (c) 2004-2009 Michel Fortin 02896 <http://michelf.com/> 02897 All rights reserved. 02898 02899 Based on Markdown 02900 Copyright (c) 2003-2006 John Gruber 02901 <http://daringfireball.net/> 02902 All rights reserved. 02903 02904 Redistribution and use in source and binary forms, with or without 02905 modification, are permitted provided that the following conditions are 02906 met: 02907 02908 * Redistributions of source code must retain the above copyright notice, 02909 this list of conditions and the following disclaimer. 02910 02911 * Redistributions in binary form must reproduce the above copyright 02912 notice, this list of conditions and the following disclaimer in the 02913 documentation and/or other materials provided with the distribution. 02914 02915 * Neither the name "Markdown" nor the names of its contributors may 02916 be used to endorse or promote products derived from this software 02917 without specific prior written permission. 02918 02919 This software is provided by the copyright holders and contributors "as 02920 is" and any express or implied warranties, including, but not limited 02921 to, the implied warranties of merchantability and fitness for a 02922 particular purpose are disclaimed. In no event shall the copyright owner 02923 or contributors be liable for any direct, indirect, incidental, special, 02924 exemplary, or consequential damages (including, but not limited to, 02925 procurement of substitute goods or services; loss of use, data, or 02926 profits; or business interruption) however caused and on any theory of 02927 liability, whether in contract, strict liability, or tort (including 02928 negligence or otherwise) arising in any way out of the use of this 02929 software, even if advised of the possibility of such damage. 02930 02931 */ 02932 ?>