|
Moodle
2.2.1
http://www.collinsharper.com
|
00001 <?php 00002 00003 // This file is part of Moodle - http://moodle.org/ 00004 // 00005 // Moodle is free software: you can redistribute it and/or modify 00006 // it under the terms of the GNU General Public License as published by 00007 // the Free Software Foundation, either version 3 of the License, or 00008 // (at your option) any later version. 00009 // 00010 // Moodle is distributed in the hope that it will be useful, 00011 // but WITHOUT ANY WARRANTY; without even the implied warranty of 00012 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00013 // GNU General Public License for more details. 00014 // 00015 // You should have received a copy of the GNU General Public License 00016 // along with Moodle. If not, see <http://www.gnu.org/licenses/>. 00017 00027 defined('MOODLE_INTERNAL') || die(); 00028 00032 define( "STATE_NONE",1 ); // blank line has been detected, so looking for first line on next para 00033 define( "STATE_PARAGRAPH",2 ); // currently processing vanilla paragraph 00034 define( "STATE_BLOCKQUOTE",3 ); // currently processing blockquote section 00035 define( "STATE_PREFORM",4 ); // currently processing preformatted text 00036 define( "STATE_NOTIKI",5 ); // currently processing preformatted / no formatting 00041 define( "LIST_NONE", 1 ); // no lists active 00042 define( "LIST_UNORDERED", 2 ); // unordered list active 00043 define( "LIST_ORDERED", 3 ); // ordered list active 00044 define( "LIST_DEFINITION", 4 ); // definition list active 00052 class WikiToMarkdown { 00053 00054 var $block_state; 00055 var $list_state; 00056 var $list_depth; 00057 var $list_backtrack; 00058 var $output; // output buffer 00059 var $courseid; 00060 00061 function close_block( $state ) { 00062 // provide appropriate closure for block according to state 00063 00064 // if in list close this first 00065 $lclose = ""; 00066 if ($this->list_state != LIST_NONE) { 00067 $lclose = $this->do_list( " ",true ); 00068 } 00069 00070 $sclose = ""; 00071 switch ($state) { 00072 case STATE_PARAGRAPH: 00073 $sclose = "\n"; 00074 break; 00075 case STATE_BLOCKQUOTE: 00076 $sclose = "\n"; 00077 break; 00078 case STATE_PREFORM: 00079 $sclose = "</pre>\n"; 00080 break; 00081 case STATE_NOTIKI: 00082 $sclose = "\n"; 00083 break; 00084 } 00085 00086 return $lclose . $sclose; 00087 } 00088 00089 function do_replace( $line, $mark, $tag ) { 00090 // do the regex thingy for things like bold, italic etc 00091 // $mark is the magic character, and $tag the HTML tag to insert 00092 00093 // BODGE: replace inline $mark characters in places where we want them ignored 00094 // they will be put back after main substitutue, stops problems with eg, and/or 00095 $bodge = chr(1); 00096 $line = preg_replace( '/([[:alnum:]])'.$mark.'([[:alnum:]])/i', '\\1'.$bodge.'\\2',$line ); 00097 00098 $regex = '/(^| |[(.,])'.$mark.'([^'.$mark.']*)'.$mark.'([^[:alnum:]]|$)/i'; 00099 $replace = '\\1<'.$tag.'>\\2</'.$tag.'>\\3'; 00100 $line = preg_replace( $regex, $replace, $line ); 00101 00102 // BODGE: back we go 00103 $line = preg_replace( '/'.$bodge.'/i', $mark, $line ); 00104 00105 return $line; 00106 } 00107 00108 00109 function do_replace_markdown( $line, $mark, $tag ) { 00110 // do the regex thingy for things like bold, italic etc 00111 // $mark is the magic character, and $tag the HTML tag to insert 00112 // MARKDOWN version does not generate HTML tags, just straigt replace 00113 00114 // BODGE: replace inline $mark characters in places where we want them ignored 00115 // they will be put back after main substitutue, stops problems with eg, and/or 00116 $bodge = chr(1); 00117 $line = preg_replace( '/([[:alnum:]])'.$mark.'([[:alnum:]])/i', '\\1'.$bodge.'\\2',$line ); 00118 00119 $regex = '/(^| |[(.,])'.$mark.'([^'.$mark.']*)'.$mark.'([^[:alnum:]]|$)/i'; 00120 $replace = '\\1'.$tag.'\\2'.$tag.'\\3'; 00121 $line = preg_replace( $regex, $replace, $line ); 00122 00123 // BODGE: back we go 00124 $line = preg_replace( '/'.$bodge.'/i', $mark, $line ); 00125 00126 return $line; 00127 } 00128 00129 00130 function do_replace_sub( $line, $mark, $tag ) { 00131 // do regex for subscript and superscript (slightly different) 00132 // $mark is the magic character and $tag the HTML tag to insert 00133 00134 $regex = '/'.$mark.'([^'.$mark.']*)'.$mark.'/i'; 00135 $replace = '<'.$tag.'>\\1</'.$tag.'>'; 00136 00137 return preg_replace( $regex, $replace, $line ); 00138 } 00139 00140 function do_list( $line, $blank=false ) { 00141 // handle line with list character on it 00142 // if blank line implies drop to level 0 00143 00144 // get magic character and then delete it from the line if not blank 00145 if ($blank) { 00146 $listchar=""; 00147 $count = 0; 00148 } 00149 else { 00150 $listchar = $line{0}; 00151 $count = strspn( $line, $listchar ); 00152 $line = preg_replace( "/^[".$listchar."]+ /i", "", $line ); 00153 } 00154 00155 // find what sort of list this character represents 00156 $list_tag = ""; 00157 $list_close_tag = ""; 00158 $item_tag = ""; 00159 $item_close_tag = ""; 00160 $list_style = LIST_NONE; 00161 switch ($listchar) { 00162 case '*': 00163 $list_tag = ""; 00164 $list_close_tag = ""; 00165 $item_tag = "*"; 00166 $item_close_tag = ""; 00167 $list_style = LIST_UNORDERED; 00168 break; 00169 case '#': 00170 $list_tag = ""; 00171 $list_close_tag = ""; 00172 $item_tag = "1."; 00173 $item_close_tag = ""; 00174 $list_style = LIST_ORDERED; 00175 break; 00176 case ';': 00177 $list_tag = "<dl>"; 00178 $list_close_tag = "</dl>"; 00179 $item_tag = "<dd>"; 00180 $item_close_tag = "</dd>"; 00181 $list_style = LIST_DEFINITION; 00182 break; 00183 case ':': 00184 $list_tag = "<dl>"; 00185 $list_close_tag = "</dl>"; 00186 $item_tag = "<dt>"; 00187 $item_close_tag = "</dt>"; 00188 $list_style = LIST_DEFINITION; 00189 break; 00190 } 00191 00192 // tag opening/closing regime now - fun bit :-) 00193 $tags = ""; 00194 00195 // if depth has reduced do number of closes to restore level 00196 for ($i=$this->list_depth; $i>$count; $i-- ) { 00197 $close_tag = array_pop( $this->list_backtrack ); 00198 $tags = $tags . $close_tag; 00199 } 00200 00201 // if depth has increased do number of opens to balance 00202 for ($i=$this->list_depth; $i<$count; $i++ ) { 00203 array_push( $this->list_backtrack, "$list_close_tag" ); 00204 $tags = $tags . "$list_tag"; 00205 } 00206 00207 // ok, so list state is now same as style and depth same as count 00208 $this->list_state = $list_style; 00209 $this->list_depth = $count; 00210 00211 // get indent 00212 $indent = substr( " ",1,$count-1 ); 00213 00214 if ($blank) { 00215 $newline = $tags; 00216 } 00217 else { 00218 $newline = $tags . $indent . "$item_tag " . $line . "$item_close_tag"; 00219 } 00220 00221 return $newline; 00222 } 00223 00224 00225 function line_replace( $line ) { 00226 // return line after various formatting replacements 00227 // have been made - order is vital to stop them interfering with each other 00228 00229 global $CFG; 00230 00231 // ---- (at least) means a <hr /> 00232 // MARKDOWN: no change so leave 00233 00234 // is this a list line (starts with * # ; :) 00235 if (preg_match( "/^([*]+|[#]+|[;]+|[:]+) /i", $line )) { 00236 $line = $this->do_list( $line ); 00237 } 00238 00239 // typographic conventions 00240 // MARKDOWN: no equiv. so convert to entity as before 00241 // $line = str_replace( "--", "—", $line ); 00242 // $line = str_replace( " - ", " – ", $line ); 00243 $line = str_replace( "...", " … ", $line ); 00244 $line = str_replace( "(R)", "®", $line ); 00245 $line = str_replace( "(r)", "®", $line ); 00246 $line = str_replace( "(TM)", "™", $line ); 00247 $line = str_replace( "(tm)", "™", $line ); 00248 $line = str_replace( "(C)", "©", $line ); 00249 $line = str_replace( "1/4", "¼", $line ); 00250 $line = str_replace( "1/2", "½", $line ); 00251 $line = str_replace( "3/4", "¾", $line ); 00252 $line = preg_replace( "/([[:digit:]]+[[:space:]]*)x([[:space:]]*[[:digit:]]+)/i", "\\1×\\2", $line ); // (digits) x (digits) - multiply 00253 // do formatting tags 00254 // NOTE: The / replacement *has* to be first, or it will screw the 00255 // HTML tags that are added by the other ones 00256 // MARKDOWN: only bold and italic change, rest are just HTML 00257 $line = $this->do_replace_markdown( $line, "\*", "**" ); 00258 $line = $this->do_replace_markdown( $line, "/", "*" ); 00259 $line = $this->do_replace( $line, "\+", "ins" ); 00260 // $line = $this->do_replace( $line, "-", "del" ); 00261 $line = $this->do_replace_sub( $line, "~", "sub" ); 00262 $line = $this->do_replace_sub( $line, "\^", "sup" ); 00263 $line = $this->do_replace( $line, "%", "code" ); 00264 $line = $this->do_replace( $line, "@", "cite" ); 00265 00266 // convert urls into proper link with optional link text URL(text) 00267 // MARDOWN: HTML conversion should work fine 00268 $line = preg_replace("/([[:space:]]|^)([[:alnum:]]+)://([^[:space:]]*)([[:alnum:]#?/&=])\(([^)]+)\)/i", 00269 "\\1[\\5](\\2://\\3\\4)", $line); 00270 $line = preg_replace("/([[:space:]])www\.([^[:space:]]*)([[:alnum:]#?/&=])\(([^)]+)\)/i", 00271 "\\1[\\5](http://www.\\2\\3)", $line); 00272 00273 // make urls (with and without httpd) into proper links 00274 $line = preg_replace("/([[:space:]]|^)([[:alnum:]]+)://([^[:space:]]*)([[:alnum:]#?/&=])/i", 00275 "\\1<\\2://\\3\\4>", $line); 00276 $line = preg_replace("/([[:space:]])www\.([^[:space:]]*)([[:alnum:]#?/&=])/i", 00277 "\\1<http://www.\\2\\3>", $line); 00278 00279 // make email addresses into mailtos.... 00280 // MARKDOWN doesn't quite support this, so do as html 00281 $line = preg_replace("/([[:space:]]|^)([[:alnum:]._-]+@[[:alnum:]._-]+)\(([^)]+)\)/i", 00282 "\\1<a href=\"mailto:\\2\">\\3</a>", $line); 00283 00284 // !# at the beginning of any lines means a heading 00285 // MARKDOWN: value (1-6) becomes number of hashes 00286 if (preg_match( "/^!([1-6]) (.*)$/i", $line, $regs )) { 00287 $depth = substr( $line, 1, 1 ); 00288 $out = substr( '##########', 0, $depth); 00289 $line = preg_replace( "/^!([1-6]) (.*)$/i", "$out \\2", $line ); 00290 } 00291 00292 // acronym handing, example HTML(Hypertext Markyp Language) 00293 // MARKDOWN: no equiv. so just leave as HTML 00294 $line = preg_replace( "/([A-Z]+)\(([^)]+)\)/", "<acronym title=\"\\2\">\\1</acronym>", $line ); 00295 00296 // Replace resource link >>##(Description Text) 00297 // MARKDOWN: change to MD web link style 00298 $line = preg_replace("/ ([a-zA-Z]+):([0-9]+)\(([^)]+)\)/i", 00299 " [\\3](".$CFG->wwwroot."/mod/\\1/view.php?id=\\2) ", $line ); 00300 00301 require_once($CFG->libdir.'/filelib.php'); 00302 $coursefileurl = get_file_url($this->courseid); 00303 00304 // Replace picture resource link 00305 $line = preg_replace("#/([a-zA-Z0-9./_-]+)(png|gif|jpg)\(([^)]+)\)#i", 00306 "", $line ); 00307 00308 // Replace file resource link 00309 $line = preg_replace("#file:/([[:alnum:]/._-]+)\(([^)]+)\)#i", 00310 "[\\2](".$coursefileurl."/\\1)", $line ); 00311 00312 return $line; 00313 } 00314 00315 function convert( $content,$courseid ) { 00316 00317 // main entry point for processing Wiki-like text 00318 // $content is string containing text with Wiki-Like formatting 00319 // return: string containing Markdown formatting 00320 00321 // initialisation stuff 00322 $this->output = ""; 00323 $this->block_state = STATE_NONE; 00324 $this->list_state = LIST_NONE; 00325 $this->list_depth = 0; 00326 $this->list_backtrack = array(); 00327 $this->spelling_on = false; 00328 $this->courseid = $courseid; 00329 00330 // split content into array of single lines 00331 $lines = explode( "\n",$content ); 00332 $buffer = ""; 00333 00334 // run through lines 00335 foreach( $lines as $line ) { 00336 // is this a blank line? 00337 $blank_line = preg_match( "/^[[:blank:]\r]*$/i", $line ); 00338 if ($blank_line) { 00339 // first end current block according to state 00340 $buffer = $buffer . $this->close_block( $this->block_state ); 00341 $this->block_state = STATE_NONE; 00342 continue; 00343 } 00344 00345 // act now depending on current block state 00346 if ($this->block_state == STATE_NONE) { 00347 // first character of line defines block type 00348 if (preg_match( "/^> /i",$line )) { 00349 // blockquote 00350 $buffer = $buffer . $this->line_replace( $line ). "\n"; 00351 $this->block_state = STATE_BLOCKQUOTE; 00352 } 00353 else 00354 if (preg_match( "/^ /i",$line) ) { 00355 // preformatted text 00356 // MARKDOWN: no real equiv. so just use <pre> 00357 $buffer = $buffer . "<pre>\n"; 00358 $buffer = $buffer . $this->line_replace($line) . "\n"; 00359 $this->block_state = STATE_PREFORM; 00360 } 00361 else 00362 if (preg_match("/^\% /i",$line) ) { 00363 // preformatted text - no processing 00364 // MARKDOWN: this is MD code form of a paragraph 00365 $buffer = $buffer . " " . preg_replace( "/^\%/i","",$line) . "\n"; 00366 $this->block_state = STATE_NOTIKI; 00367 } 00368 else { 00369 // ordinary paragraph 00370 $buffer = $buffer . $this->line_replace($line) . "\n"; 00371 $this->block_state = STATE_PARAGRAPH; 00372 } 00373 continue; 00374 } 00375 00376 if (($this->block_state == STATE_PARAGRAPH) | 00377 ($this->block_state == STATE_BLOCKQUOTE) | 00378 ($this->block_state == STATE_PREFORM) ) { 00379 $buffer = $buffer . $this->line_replace($line) . "\n"; 00380 continue; 00381 } 00382 elseif ($this->block_state == STATE_NOTIKI) { 00383 $buffer = $buffer . " " .$line . "\n"; 00384 } 00385 } 00386 00387 // close off any block level tags 00388 $buffer = $buffer . $this->close_block( $this->block_state ); 00389 00390 //return $buffer; 00391 return $buffer; 00392 } 00393 }