documentation/moodle/wiki__to__markdown_8php_source.html

00001 <?php
00002
00003 // This file is part of Moodle - http://moodle.org/
00004 //
00005 // Moodle is free software: you can redistribute it and/or modify
00006 // it under the terms of the GNU General Public License as published by
00007 // the Free Software Foundation, either version 3 of the License, or
00008 // (at your option) any later version.
00009 //
00010 // Moodle is distributed in the hope that it will be useful,
00011 // but WITHOUT ANY WARRANTY; without even the implied warranty of
00012 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00013 // GNU General Public License for more details.
00014 //
00015 // You should have received a copy of the GNU General Public License
00016 // along with Moodle.  If not, see <http://www.gnu.org/licenses/>.
00017
00027 defined('MOODLE_INTERNAL') || die();
00028
00032 define( "STATE_NONE",1 ); // blank line has been detected, so looking for first line on next para
00033 define( "STATE_PARAGRAPH",2 ); // currently processing vanilla paragraph
00034 define( "STATE_BLOCKQUOTE",3 ); // currently processing blockquote section
00035 define( "STATE_PREFORM",4 ); // currently processing preformatted text
00036 define( "STATE_NOTIKI",5 ); // currently processing preformatted / no formatting
00041 define( "LIST_NONE", 1 ); // no lists active
00042 define( "LIST_UNORDERED", 2 ); // unordered list active
00043 define( "LIST_ORDERED", 3 ); // ordered list active
00044 define( "LIST_DEFINITION", 4 ); // definition list active
00052 class WikiToMarkdown {
00053
00054   var $block_state;
00055   var $list_state;
00056   var $list_depth;
00057   var $list_backtrack;
00058   var $output; // output buffer
00059   var $courseid;
00060
00061   function close_block( $state ) {
00062     // provide appropriate closure for block according to state
00063
00064     // if in list close this first
00065     $lclose = "";
00066     if ($this->list_state != LIST_NONE) {
00067       $lclose = $this->do_list( " ",true );
00068     }
00069
00070     $sclose = "";
00071     switch ($state) {
00072       case STATE_PARAGRAPH:
00073         $sclose =  "\n";
00074         break;
00075       case STATE_BLOCKQUOTE:
00076         $sclose =  "\n";
00077         break;
00078       case STATE_PREFORM:
00079         $sclose =  "</pre>\n";
00080         break;
00081       case STATE_NOTIKI:
00082         $sclose =  "\n";
00083         break;
00084     }
00085
00086     return $lclose . $sclose;
00087   }
00088
00089   function do_replace( $line, $mark, $tag ) {
00090     // do the regex thingy for things like bold, italic etc
00091     // $mark is the magic character, and $tag the HTML tag to insert
00092
00093     // BODGE: replace inline $mark characters in places where we want them ignored
00094     // they will be put back after main substitutue, stops problems with eg, and/or
00095     $bodge = chr(1);
00096     $line = preg_replace( '/([[:alnum:]])'.$mark.'([[:alnum:]])/i', '\\1'.$bodge.'\\2',$line );
00097
00098     $regex = '/(^| |[(.,])'.$mark.'([^'.$mark.']*)'.$mark.'([^[:alnum:]]|$)/i';
00099     $replace = '\\1<'.$tag.'>\\2</'.$tag.'>\\3';
00100     $line = preg_replace( $regex, $replace, $line );
00101
00102     // BODGE: back we go
00103     $line = preg_replace( '/'.$bodge.'/i', $mark, $line );
00104
00105     return $line;
00106   }
00107
00108
00109   function do_replace_markdown( $line, $mark, $tag ) {
00110     // do the regex thingy for things like bold, italic etc
00111     // $mark is the magic character, and $tag the HTML tag to insert
00112     // MARKDOWN version does not generate HTML tags, just straigt replace
00113
00114     // BODGE: replace inline $mark characters in places where we want them ignored
00115     // they will be put back after main substitutue, stops problems with eg, and/or
00116     $bodge = chr(1);
00117     $line = preg_replace( '/([[:alnum:]])'.$mark.'([[:alnum:]])/i', '\\1'.$bodge.'\\2',$line );
00118
00119     $regex = '/(^| |[(.,])'.$mark.'([^'.$mark.']*)'.$mark.'([^[:alnum:]]|$)/i';
00120     $replace = '\\1'.$tag.'\\2'.$tag.'\\3';
00121     $line = preg_replace( $regex, $replace, $line );
00122
00123     // BODGE: back we go
00124     $line = preg_replace( '/'.$bodge.'/i', $mark, $line );
00125
00126     return $line;
00127   }
00128
00129
00130   function do_replace_sub( $line, $mark, $tag ) {
00131     // do regex for subscript and superscript (slightly different)
00132     // $mark is the magic character and $tag the HTML tag to insert
00133
00134     $regex = '/'.$mark.'([^'.$mark.']*)'.$mark.'/i';
00135     $replace = '<'.$tag.'>\\1</'.$tag.'>';
00136
00137     return preg_replace( $regex, $replace, $line );
00138   }
00139
00140   function do_list( $line, $blank=false ) {
00141     // handle line with list character on it
00142     // if blank line implies drop to level 0
00143
00144     // get magic character and then delete it from the line if not blank
00145     if ($blank) {
00146       $listchar="";
00147       $count = 0;
00148     }
00149     else {
00150       $listchar = $line{0};
00151       $count = strspn( $line, $listchar );
00152       $line = preg_replace( "/^[".$listchar."]+ /i", "", $line );
00153     }
00154
00155     // find what sort of list this character represents
00156     $list_tag = "";
00157     $list_close_tag = "";
00158     $item_tag = "";
00159     $item_close_tag = "";
00160     $list_style = LIST_NONE;
00161     switch ($listchar) {
00162       case '*':
00163         $list_tag = "";
00164         $list_close_tag = "";
00165         $item_tag = "*";
00166         $item_close_tag = "";
00167         $list_style = LIST_UNORDERED;
00168         break;
00169       case '#':
00170         $list_tag = "";
00171         $list_close_tag = "";
00172         $item_tag = "1.";
00173         $item_close_tag = "";
00174         $list_style = LIST_ORDERED;
00175         break;
00176       case ';':
00177         $list_tag = "<dl>";
00178         $list_close_tag = "</dl>";
00179         $item_tag = "<dd>";
00180         $item_close_tag = "</dd>";
00181         $list_style = LIST_DEFINITION;
00182         break;
00183       case ':':
00184         $list_tag = "<dl>";
00185         $list_close_tag = "</dl>";
00186         $item_tag = "<dt>";
00187         $item_close_tag = "</dt>";
00188         $list_style = LIST_DEFINITION;
00189         break;
00190       }
00191
00192     // tag opening/closing regime now - fun bit :-)
00193     $tags = "";
00194
00195     // if depth has reduced do number of closes to restore level
00196     for ($i=$this->list_depth; $i>$count; $i-- ) {
00197       $close_tag = array_pop( $this->list_backtrack );
00198       $tags = $tags . $close_tag;
00199       }
00200
00201     // if depth has increased do number of opens to balance
00202     for ($i=$this->list_depth; $i<$count; $i++ ) {
00203       array_push( $this->list_backtrack, "$list_close_tag" );
00204       $tags = $tags . "$list_tag";
00205     }
00206
00207     // ok, so list state is now same as style and depth same as count
00208     $this->list_state = $list_style;
00209     $this->list_depth = $count;
00210
00211     // get indent
00212     $indent = substr( "                      ",1,$count-1 );
00213
00214     if ($blank) {
00215       $newline = $tags;
00216     }
00217     else {
00218       $newline = $tags . $indent . "$item_tag " . $line . "$item_close_tag";
00219     }
00220
00221     return $newline;
00222   }
00223
00224
00225   function line_replace( $line ) {
00226     // return line after various formatting replacements
00227     // have been made - order is vital to stop them interfering with each other
00228
00229     global $CFG;
00230
00231     // ---- (at least) means a <hr />
00232     // MARKDOWN: no change so leave
00233
00234     // is this a list line (starts with * # ; :)
00235     if (preg_match( "/^([*]+|[#]+|[;]+|[:]+) /i", $line )) {
00236       $line = $this->do_list( $line );
00237     }
00238
00239    // typographic conventions
00240    // MARKDOWN: no equiv. so convert to entity as before
00241     // $line = str_replace( "--", "&#8212;", $line );
00242     // $line = str_replace( " - ", " &#8211; ", $line );
00243     $line = str_replace( "...", " &#8230; ", $line );
00244     $line = str_replace( "(R)", "&#174;", $line );
00245     $line = str_replace( "(r)", "&#174;", $line );
00246     $line = str_replace( "(TM)", "&#8482;", $line );
00247     $line = str_replace( "(tm)", "&#8482;", $line );
00248     $line = str_replace( "(C)", "&#169;", $line );
00249     $line = str_replace( "1/4", "&#188;", $line );
00250     $line = str_replace( "1/2", "&#189;", $line );
00251     $line = str_replace( "3/4", "&#190;", $line );
00252     $line = preg_replace( "/([[:digit:]]+[[:space:]]*)x([[:space:]]*[[:digit:]]+)/i", "\\1&#215;\\2", $line ); // (digits) x (digits) - multiply
00253     // do formatting tags
00254     // NOTE: The / replacement  *has* to be first, or it will screw the
00255     //    HTML tags that are added by the other ones
00256     // MARKDOWN: only bold and italic change, rest are just HTML
00257     $line = $this->do_replace_markdown( $line, "\*", "**" );
00258     $line = $this->do_replace_markdown( $line, "/", "*" );
00259     $line = $this->do_replace( $line, "\+", "ins" );
00260     // $line = $this->do_replace( $line, "-", "del" );
00261     $line = $this->do_replace_sub( $line, "~", "sub" );
00262     $line = $this->do_replace_sub( $line, "\^", "sup" );
00263     $line = $this->do_replace( $line, "%", "code" );
00264     $line = $this->do_replace( $line, "@", "cite" );
00265
00266     // convert urls into proper link with optional link text URL(text)
00267     // MARDOWN: HTML conversion should work fine
00268     $line = preg_replace("/([[:space:]]|^)([[:alnum:]]+)://([^[:space:]]*)([[:alnum:]#?/&=])\(([^)]+)\)/i",
00269       "\\1[\\5](\\2://\\3\\4)", $line);
00270     $line = preg_replace("/([[:space:]])www\.([^[:space:]]*)([[:alnum:]#?/&=])\(([^)]+)\)/i",
00271       "\\1[\\5](http://www.\\2\\3)", $line);
00272
00273     // make urls (with and without httpd) into proper links
00274     $line = preg_replace("/([[:space:]]|^)([[:alnum:]]+)://([^[:space:]]*)([[:alnum:]#?/&=])/i",
00275       "\\1<\\2://\\3\\4>", $line);
00276     $line = preg_replace("/([[:space:]])www\.([^[:space:]]*)([[:alnum:]#?/&=])/i",
00277       "\\1<http://www.\\2\\3>", $line);
00278
00279     // make email addresses into mailtos....
00280     // MARKDOWN doesn't quite support this, so do as html
00281     $line = preg_replace("/([[:space:]]|^)([[:alnum:]._-]+@[[:alnum:]._-]+)\(([^)]+)\)/i",
00282        "\\1<a href=\"mailto:\\2\">\\3</a>", $line);
00283
00284     // !# at the beginning of any lines means a heading
00285     // MARKDOWN: value (1-6) becomes number of hashes
00286     if (preg_match( "/^!([1-6]) (.*)$/i", $line, $regs )) {
00287       $depth = substr( $line, 1, 1 );
00288       $out = substr( '##########', 0, $depth);
00289       $line = preg_replace( "/^!([1-6]) (.*)$/i", "$out \\2", $line );
00290     }
00291
00292     // acronym handing, example HTML(Hypertext Markyp Language)
00293     // MARKDOWN: no equiv. so just leave as HTML
00294     $line = preg_replace( "/([A-Z]+)\(([^)]+)\)/", "<acronym title=\"\\2\">\\1</acronym>", $line );
00295
00296     // Replace resource link >>##(Description Text)
00297     // MARKDOWN: change to MD web link style
00298     $line = preg_replace("/ ([a-zA-Z]+):([0-9]+)\(([^)]+)\)/i",
00299        " [\\3](".$CFG->wwwroot."/mod/\\1/view.php?id=\\2) ", $line );
00300
00301     require_once($CFG->libdir.'/filelib.php');
00302     $coursefileurl = get_file_url($this->courseid);
00303
00304     // Replace picture resource link
00305     $line = preg_replace("#/([a-zA-Z0-9./_-]+)(png|gif|jpg)\(([^)]+)\)#i",
00306             "![\\3](".$coursefileurl."/\\1\\2)", $line );
00307
00308     // Replace file resource link
00309     $line = preg_replace("#file:/([[:alnum:]/._-]+)\(([^)]+)\)#i",
00310             "[\\2](".$coursefileurl."/\\1)", $line );
00311
00312     return $line;
00313   }
00314
00315   function convert( $content,$courseid ) {
00316
00317     // main entry point for processing Wiki-like text
00318     // $content is string containing text with Wiki-Like formatting
00319     // return: string containing Markdown formatting
00320
00321     // initialisation stuff
00322     $this->output = "";
00323     $this->block_state = STATE_NONE;
00324     $this->list_state = LIST_NONE;
00325     $this->list_depth = 0;
00326     $this->list_backtrack = array();
00327     $this->spelling_on = false;
00328     $this->courseid = $courseid;
00329
00330     // split content into array of single lines
00331     $lines = explode( "\n",$content );
00332     $buffer = "";
00333
00334     // run through lines
00335     foreach( $lines as $line ) {
00336       // is this a blank line?
00337       $blank_line = preg_match( "/^[[:blank:]\r]*$/i", $line );
00338       if ($blank_line) {
00339         // first end current block according to state
00340         $buffer = $buffer . $this->close_block( $this->block_state );
00341         $this->block_state = STATE_NONE;
00342         continue;
00343       }
00344
00345       // act now depending on current block state
00346       if ($this->block_state == STATE_NONE) {
00347         // first character of line defines block type
00348         if (preg_match( "/^> /i",$line )) {
00349           // blockquote
00350           $buffer = $buffer . $this->line_replace( $line ). "\n";
00351           $this->block_state = STATE_BLOCKQUOTE;
00352         }
00353         else
00354         if (preg_match( "/^  /i",$line) ) {
00355           // preformatted text
00356           // MARKDOWN: no real equiv. so just use <pre>
00357           $buffer = $buffer . "<pre>\n";
00358           $buffer = $buffer . $this->line_replace($line) . "\n";
00359           $this->block_state = STATE_PREFORM;
00360         }
00361         else
00362         if (preg_match("/^\% /i",$line) ) {
00363                 // preformatted text - no processing
00364                 // MARKDOWN: this is MD code form of a paragraph
00365                 $buffer = $buffer . "    " . preg_replace( "/^\%/i","",$line) . "\n";
00366                 $this->block_state = STATE_NOTIKI;
00367         }
00368         else {
00369           // ordinary paragraph
00370           $buffer = $buffer . $this->line_replace($line) . "\n";
00371           $this->block_state = STATE_PARAGRAPH;
00372         }
00373         continue;
00374       }
00375
00376       if (($this->block_state == STATE_PARAGRAPH) |
00377           ($this->block_state == STATE_BLOCKQUOTE) |
00378           ($this->block_state == STATE_PREFORM) ) {
00379         $buffer = $buffer . $this->line_replace($line) . "\n";
00380         continue;
00381       }
00382       elseif ($this->block_state == STATE_NOTIKI) {
00383         $buffer = $buffer . "    " .$line . "\n";
00384       }
00385     }
00386
00387     // close off any block level tags
00388     $buffer = $buffer . $this->close_block( $this->block_state );
00389
00390     //return $buffer;
00391     return $buffer;
00392   }
00393 }