|
Moodle
2.2.1
http://www.collinsharper.com
|
00001 <?php 00002 00022 class HTMLPurifier_Lexer_PEARSax3 extends HTMLPurifier_Lexer 00023 { 00024 00028 protected $tokens = array(); 00029 protected $last_token_was_empty; 00030 00031 private $parent_handler; 00032 private $stack = array(); 00033 00034 public function tokenizeHTML($string, $config, $context) { 00035 00036 $this->tokens = array(); 00037 $this->last_token_was_empty = false; 00038 00039 $string = $this->normalize($string, $config, $context); 00040 00041 $this->parent_handler = set_error_handler(array($this, 'muteStrictErrorHandler')); 00042 00043 $parser = new XML_HTMLSax3(); 00044 $parser->set_object($this); 00045 $parser->set_element_handler('openHandler','closeHandler'); 00046 $parser->set_data_handler('dataHandler'); 00047 $parser->set_escape_handler('escapeHandler'); 00048 00049 // doesn't seem to work correctly for attributes 00050 $parser->set_option('XML_OPTION_ENTITIES_PARSED', 1); 00051 00052 $parser->parse($string); 00053 00054 restore_error_handler(); 00055 00056 return $this->tokens; 00057 00058 } 00059 00063 public function openHandler(&$parser, $name, $attrs, $closed) { 00064 // entities are not resolved in attrs 00065 foreach ($attrs as $key => $attr) { 00066 $attrs[$key] = $this->parseData($attr); 00067 } 00068 if ($closed) { 00069 $this->tokens[] = new HTMLPurifier_Token_Empty($name, $attrs); 00070 $this->last_token_was_empty = true; 00071 } else { 00072 $this->tokens[] = new HTMLPurifier_Token_Start($name, $attrs); 00073 } 00074 $this->stack[] = $name; 00075 return true; 00076 } 00077 00081 public function closeHandler(&$parser, $name) { 00082 // HTMLSax3 seems to always send empty tags an extra close tag 00083 // check and ignore if you see it: 00084 // [TESTME] to make sure it doesn't overreach 00085 if ($this->last_token_was_empty) { 00086 $this->last_token_was_empty = false; 00087 return true; 00088 } 00089 $this->tokens[] = new HTMLPurifier_Token_End($name); 00090 if (!empty($this->stack)) array_pop($this->stack); 00091 return true; 00092 } 00093 00097 public function dataHandler(&$parser, $data) { 00098 $this->last_token_was_empty = false; 00099 $this->tokens[] = new HTMLPurifier_Token_Text($data); 00100 return true; 00101 } 00102 00106 public function escapeHandler(&$parser, $data) { 00107 if (strpos($data, '--') === 0) { 00108 // remove trailing and leading double-dashes 00109 $data = substr($data, 2); 00110 if (strlen($data) >= 2 && substr($data, -2) == "--") { 00111 $data = substr($data, 0, -2); 00112 } 00113 if (isset($this->stack[sizeof($this->stack) - 1]) && 00114 $this->stack[sizeof($this->stack) - 1] == "style") { 00115 $this->tokens[] = new HTMLPurifier_Token_Text($data); 00116 } else { 00117 $this->tokens[] = new HTMLPurifier_Token_Comment($data); 00118 } 00119 $this->last_token_was_empty = false; 00120 } 00121 // CDATA is handled elsewhere, but if it was handled here: 00122 //if (strpos($data, '[CDATA[') === 0) { 00123 // $this->tokens[] = new HTMLPurifier_Token_Text( 00124 // substr($data, 7, strlen($data) - 9) ); 00125 //} 00126 return true; 00127 } 00128 00132 public function muteStrictErrorHandler($errno, $errstr, $errfile=null, $errline=null, $errcontext=null) { 00133 if ($errno == E_STRICT) return; 00134 return call_user_func($this->parent_handler, $errno, $errstr, $errfile, $errline, $errcontext); 00135 } 00136 00137 } 00138 00139 // vim: et sw=4 sts=4