|
Moodle
2.2.1
http://www.collinsharper.com
|
00001 <?php 00002 00003 // This file is part of Moodle - http://moodle.org/ 00004 // 00005 // Moodle is free software: you can redistribute it and/or modify 00006 // it under the terms of the GNU General Public License as published by 00007 // the Free Software Foundation, either version 3 of the License, or 00008 // (at your option) any later version. 00009 // 00010 // Moodle is distributed in the hope that it will be useful, 00011 // but WITHOUT ANY WARRANTY; without even the implied warranty of 00012 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00013 // GNU General Public License for more details. 00014 // 00015 // You should have received a copy of the GNU General Public License 00016 // along with Moodle. If not, see <http://www.gnu.org/licenses/>. 00017 00025 defined('MOODLE_INTERNAL') || die(); 00026 00028 require_once($CFG->libdir.'/lexer.php'); 00029 00032 define("TOKEN_USER","0"); 00033 define("TOKEN_META","1"); 00034 define("TOKEN_EXACT","2"); 00035 define("TOKEN_NEGATE","3"); 00036 define("TOKEN_STRING","4"); 00037 define("TOKEN_USERID","5"); 00038 define("TOKEN_DATEFROM","6"); 00039 define("TOKEN_DATETO","7"); 00040 define("TOKEN_INSTANCE","8"); 00041 00049 class search_token { 00050 private $value; 00051 private $type; 00052 00053 function search_token($type,$value){ 00054 $this->type = $type; 00055 $this->value = $this->sanitize($value); 00056 00057 } 00058 00059 // Try to clean up user input to avoid potential security issues. 00060 // Need to think about this some more. 00061 00062 function sanitize($userstring){ 00063 return htmlspecialchars($userstring); 00064 } 00065 function getValue(){ 00066 return $this->value; 00067 } 00068 function getType(){ 00069 return $this->type; 00070 } 00071 } 00072 00073 00083 class search_lexer extends Lexer{ 00084 00085 function search_lexer(&$parser){ 00086 00087 // Call parent constructor. 00088 $this->Lexer($parser); 00089 00090 //Set up the state machine and pattern matches for transitions. 00091 00092 // Patterns to handle strings of the form datefrom:foo 00093 00094 // If we see the string datefrom: while in the base accept state, start 00095 // parsing a username and go to the indatefrom state. 00096 $this->addEntryPattern("datefrom:\S+","accept","indatefrom"); 00097 00098 // Snarf everything into the username until we see whitespace, then exit 00099 // back to the base accept state. 00100 $this->addExitPattern("\s","indatefrom"); 00101 00102 00103 // Patterns to handle strings of the form dateto:foo 00104 00105 // If we see the string dateto: while in the base accept state, start 00106 // parsing a username and go to the indateto state. 00107 $this->addEntryPattern("dateto:\S+","accept","indateto"); 00108 00109 // Snarf everything into the username until we see whitespace, then exit 00110 // back to the base accept state. 00111 $this->addExitPattern("\s","indateto"); 00112 00113 00114 // Patterns to handle strings of the form instance:foo 00115 00116 // If we see the string instance: while in the base accept state, start 00117 // parsing for instance number and go to the ininstance state. 00118 $this->addEntryPattern("instance:\S+","accept","ininstance"); 00119 00120 // Snarf everything into the username until we see whitespace, then exit 00121 // back to the base accept state. 00122 $this->addExitPattern("\s","ininstance"); 00123 00124 00125 // Patterns to handle strings of the form userid:foo 00126 00127 // If we see the string userid: while in the base accept state, start 00128 // parsing a username and go to the inuserid state. 00129 $this->addEntryPattern("userid:\S+","accept","inuserid"); 00130 00131 // Snarf everything into the username until we see whitespace, then exit 00132 // back to the base accept state. 00133 $this->addExitPattern("\s","inuserid"); 00134 00135 00136 // Patterns to handle strings of the form user:foo 00137 00138 // If we see the string user: while in the base accept state, start 00139 // parsing a username and go to the inusername state. 00140 $this->addEntryPattern("user:\S+","accept","inusername"); 00141 00142 // Snarf everything into the username until we see whitespace, then exit 00143 // back to the base accept state. 00144 $this->addExitPattern("\s","inusername"); 00145 00146 00147 // Patterns to handle strings of the form meta:foo 00148 00149 // If we see the string meta: while in the base accept state, start 00150 // parsing a username and go to the inmeta state. 00151 $this->addEntryPattern("subject:\S+","accept","inmeta"); 00152 00153 // Snarf everything into the meta token until we see whitespace, then exit 00154 // back to the base accept state. 00155 $this->addExitPattern("\s","inmeta"); 00156 00157 00158 // Patterns to handle required exact match strings (+foo) . 00159 00160 // If we see a + sign while in the base accept state, start 00161 // parsing an exact match string and enter the inrequired state 00162 $this->addEntryPattern("\+\S+","accept","inrequired"); 00163 // When we see white space, exit back to accept state. 00164 $this->addExitPattern("\s","inrequired"); 00165 00166 // Handle excluded strings (-foo) 00167 00168 // If we see a - sign while in the base accept state, start 00169 // parsing an excluded string and enter the inexcluded state 00170 $this->addEntryPattern("\-\S+","accept","inexcluded"); 00171 // When we see white space, exit back to accept state. 00172 $this->addExitPattern("\s","inexcluded"); 00173 00174 00175 // Patterns to handle quoted strings. 00176 00177 // If we see a quote while in the base accept state, start 00178 // parsing a quoted string and enter the inquotedstring state. 00179 // Grab everything until we see the closing quote. 00180 00181 $this->addEntryPattern("\"[^\"]+","accept","inquotedstring"); 00182 00183 // When we see a closing quote, reenter the base accept state. 00184 $this->addExitPattern("\"","inquotedstring"); 00185 00186 // Patterns to handle ordinary, nonquoted words. 00187 00188 // When we see non-whitespace, snarf everything into the nonquoted word 00189 // until we see whitespace again. 00190 $this->addEntryPattern("\S+","accept","plainstring"); 00191 00192 // Once we see whitespace, reenter the base accept state. 00193 $this->addExitPattern("\s","plainstring"); 00194 00195 } 00196 } 00197 00198 00199 00210 class search_parser { 00211 private $tokens; 00212 00213 // This function is called by the code that's interested in the result of the parse operation. 00214 function get_parsed_array(){ 00215 return $this->tokens; 00216 } 00217 00218 /* 00219 * Functions below this are part of the state machine for the parse 00220 * operation and should not be called directly. 00221 */ 00222 00223 // Base state. No output emitted. 00224 function accept() { 00225 return true; 00226 } 00227 00228 // State for handling datefrom:foo constructs. Potentially emits a token. 00229 function indatefrom($content){ 00230 if (strlen($content) < 10) { // State exit or missing parameter. 00231 return true; 00232 } 00233 // Strip off the datefrom: part and add the reminder to the parsed token array 00234 $param = trim(substr($content,9)); 00235 $this->tokens[] = new search_token(TOKEN_DATEFROM,$param); 00236 return true; 00237 } 00238 00239 // State for handling dateto:foo constructs. Potentially emits a token. 00240 function indateto($content){ 00241 if (strlen($content) < 8) { // State exit or missing parameter. 00242 return true; 00243 } 00244 // Strip off the dateto: part and add the reminder to the parsed token array 00245 $param = trim(substr($content,7)); 00246 $this->tokens[] = new search_token(TOKEN_DATETO,$param); 00247 return true; 00248 } 00249 00250 // State for handling instance:foo constructs. Potentially emits a token. 00251 function ininstance($content){ 00252 if (strlen($content) < 10) { // State exit or missing parameter. 00253 return true; 00254 } 00255 // Strip off the instance: part and add the reminder to the parsed token array 00256 $param = trim(substr($content,9)); 00257 $this->tokens[] = new search_token(TOKEN_INSTANCE,$param); 00258 return true; 00259 } 00260 00261 00262 // State for handling userid:foo constructs. Potentially emits a token. 00263 function inuserid($content){ 00264 if (strlen($content) < 8) { // State exit or missing parameter. 00265 return true; 00266 } 00267 // Strip off the userid: part and add the reminder to the parsed token array 00268 $param = trim(substr($content,7)); 00269 $this->tokens[] = new search_token(TOKEN_USERID,$param); 00270 return true; 00271 } 00272 00273 00274 // State for handling user:foo constructs. Potentially emits a token. 00275 function inusername($content){ 00276 if (strlen($content) < 6) { // State exit or missing parameter. 00277 return true; 00278 } 00279 // Strip off the user: part and add the reminder to the parsed token array 00280 $param = trim(substr($content,5)); 00281 $this->tokens[] = new search_token(TOKEN_USER,$param); 00282 return true; 00283 } 00284 00285 00286 // State for handling meta:foo constructs. Potentially emits a token. 00287 function inmeta($content){ 00288 if (strlen($content) < 9) { // Missing parameter. 00289 return true; 00290 } 00291 // Strip off the meta: part and add the reminder to the parsed token array. 00292 $param = trim(substr($content,8)); 00293 $this->tokens[] = new search_token(TOKEN_META,$param); 00294 return true; 00295 } 00296 00297 00298 // State entered when we've seen a required string (+foo). Potentially 00299 // emits a token. 00300 function inrequired($content){ 00301 if (strlen($content) < 2) { // State exit or missing parameter, don't emit. 00302 return true; 00303 } 00304 // Strip off the + sign and add the reminder to the parsed token array. 00305 $this->tokens[] = new search_token(TOKEN_EXACT,substr($content,1)); 00306 return true; 00307 } 00308 00309 // State entered when we've seen an excluded string (-foo). Potentially 00310 // emits a token. 00311 function inexcluded($content){ 00312 if (strlen($content) < 2) { // State exit or missing parameter. 00313 return true; 00314 } 00315 // Strip off the -sign and add the reminder to the parsed token array. 00316 $this->tokens[] = new search_token(TOKEN_NEGATE,substr($content,1)); 00317 return true; 00318 } 00319 00320 00321 // State entered when we've seen a quoted string. Potentially emits a token. 00322 function inquotedstring($content){ 00323 if (strlen($content) < 2) { // State exit or missing parameter. 00324 return true; 00325 } 00326 // Strip off the opening quote and add the reminder to the parsed token array. 00327 $this->tokens[] = new search_token(TOKEN_STRING,substr($content,1)); 00328 return true; 00329 } 00330 00331 // State entered when we've seen an ordinary, non-quoted word. Potentially 00332 // emits a token. 00333 function plainstring($content){ 00334 if (trim($content) === '') { // State exit 00335 return true; 00336 } 00337 // Add the string to the parsed token array. 00338 $this->tokens[] = new search_token(TOKEN_STRING,$content); 00339 return true; 00340 } 00341 } 00342 00355 function search_generate_text_SQL($parsetree, $datafield, $metafield, $mainidfield, $useridfield, 00356 $userfirstnamefield, $userlastnamefield, $timefield, $instancefield) { 00357 global $CFG, $DB; 00358 static $p = 0; 00359 00362 if ($DB->get_db_family() != 'mysql') { 00363 return search_generate_SQL($parsetree, $datafield, $metafield, $mainidfield, $useridfield, 00364 $userfirstnamefield, $userlastnamefield, $timefield, $instancefield); 00365 } 00366 00369 if ($DB->get_db_family() == 'mysql') { 00370 $nonseparatedlangs = array('ja', 'th', 'zh_cn', 'zh_tw'); 00371 if (in_array(current_language(), $nonseparatedlangs)) { 00372 return search_generate_SQL($parsetree, $datafield, $metafield, $mainidfield, $useridfield, 00373 $userfirstnamefield, $userlastnamefield, $timefield, $instancefield); 00374 } 00375 } 00376 00378 $non_text_tokens = array(); 00379 $params = array(); 00380 00381 $ntokens = count($parsetree); 00382 if ($ntokens == 0) { 00383 return ""; 00384 } 00385 00386 $SQLString = ''; 00387 $text_sql_string = ''; 00388 00389 $datasearch_clause = ''; 00390 $metasearch_clause = ''; 00391 00392 foreach ($parsetree as $token) { 00393 00394 $type = $token->getType(); 00395 $value = $token->getValue(); 00396 00397 switch($type){ 00398 case TOKEN_STRING: 00400 if (strstr($value, ' ')) { 00401 $datasearch_clause .= '"' . $value . '" '; 00403 } else { 00404 $datasearch_clause .= '+' . $value . '* '; 00405 } 00406 break; 00407 case TOKEN_EXACT: 00409 $datasearch_clause .= '+' . $value . ' '; 00410 break; 00411 case TOKEN_NEGATE: 00413 $datasearch_clause .= '-' . $value . '* '; 00414 break; 00415 case TOKEN_META: 00417 $metasearch_clause .= '+' . $value . '* '; 00418 break; 00419 case TOKEN_USER: 00420 case TOKEN_USERID: 00421 case TOKEN_INSTANCE: 00422 case TOKEN_DATETO: 00423 case TOKEN_DATEFROM: 00425 $non_text_tokens[] = $token; 00426 break; 00427 default: 00428 return ''; 00429 } 00430 } 00431 00433 if (!empty($non_text_tokens)) { 00434 list($SQLString, $sparams) = search_generate_SQL($non_text_tokens, $datafield, $metafield, $mainidfield, $useridfield, 00435 $userfirstnamefield, $userlastnamefield, $timefield, $instancefield); 00436 $params = array_merge($params, $sparams); 00437 } 00439 if (!empty($datasearch_clause)) { 00441 if (!empty($datafield)) { 00442 $text_sql_string .= 'MATCH (' . $datafield; 00444 if (!empty($metafield)) { 00445 $text_sql_string .= ', ' . $metafield; 00446 } 00448 $text_sql_string .= ') AGAINST (' . "'"; 00450 $text_sql_string .= ':sgt'.$p; 00451 $params['sgt'.$p++] = trim($datasearch_clause); 00453 $text_sql_string .= "' IN BOOLEAN MODE)"; 00454 } 00455 } 00457 if (!empty($metasearch_clause)) { 00459 if (!empty($metafield)) { 00461 if (!empty($text_sql_string)) { 00462 $text_sql_string .= ' AND '; 00463 } 00464 $text_sql_string .= 'MATCH (' . $metafield; 00466 $text_sql_string .= ') AGAINST (' . "'"; 00468 $text_sql_string .= ':sgt'.$p; 00469 $params['sgt'.$p++] = trim($metasearch_clause); 00471 $text_sql_string .= "' IN BOOLEAN MODE)"; 00472 } 00473 } 00475 if (!empty($SQLString)) { 00477 if (!empty($text_sql_string)) { 00478 $text_sql_string .= ' AND '; 00479 } 00480 $text_sql_string .= $SQLString; 00481 } 00482 00483 return array($text_sql_string, $params); 00484 } 00485 00497 function search_generate_SQL($parsetree, $datafield, $metafield, $mainidfield, $useridfield, 00498 $userfirstnamefield, $userlastnamefield, $timefield, $instancefield) { 00499 global $CFG, $DB; 00500 static $p = 0; 00501 00502 if ($DB->sql_regex_supported()) { 00503 $REGEXP = $DB->sql_regex(true); 00504 $NOTREGEXP = $DB->sql_regex(false); 00505 } 00506 00507 $params = array(); 00508 00509 $ntokens = count($parsetree); 00510 if ($ntokens == 0) { 00511 return ""; 00512 } 00513 00514 $SQLString = ''; 00515 00516 for ($i=0; $i<$ntokens; $i++){ 00517 if ($i > 0) {// We have more than one clause, need to tack on AND 00518 $SQLString .= ' AND '; 00519 } 00520 00521 $type = $parsetree[$i]->getType(); 00522 $value = $parsetree[$i]->getValue(); 00523 00525 if (!$DB->sql_regex_supported()) { 00526 $value = trim($value, '+-'); 00527 if ($type == TOKEN_EXACT) { 00528 $type = TOKEN_STRING; 00529 } 00530 } 00531 00532 $name1 = 'sq'.$p++; 00533 $name2 = 'sq'.$p++; 00534 00535 switch($type){ 00536 case TOKEN_STRING: 00537 $SQLString .= "((".$DB->sql_like($datafield, ":$name1", false).") OR (".$DB->sql_like($metafield, ":$name2", false)."))"; 00538 $params[$name1] = "%$value%"; 00539 $params[$name2] = "%$value%"; 00540 break; 00541 case TOKEN_EXACT: 00542 $SQLString .= "(($datafield $REGEXP :$name1) OR ($metafield $REGEXP :$name2))"; 00543 $params[$name1] = "[[:<:]]".$value."[[:>:]]"; 00544 $params[$name2] = "[[:<:]]".$value."[[:>:]]"; 00545 break; 00546 case TOKEN_META: 00547 if ($metafield != '') { 00548 $SQLString .= "(".$DB->sql_like($metafield, ":$name1", false).")"; 00549 $params[$name1] = "%$value%"; 00550 } 00551 break; 00552 case TOKEN_USER: 00553 $SQLString .= "(($mainidfield = $useridfield) AND ((".$DB->sql_like($userfirstnamefield, ":$name1", false).") OR (".$DB->sql_like($userlastnamefield, ":$name2", false).")))"; 00554 $params[$name1] = "%$value%"; 00555 $params[$name2] = "%$value%"; 00556 break; 00557 case TOKEN_USERID: 00558 $SQLString .= "($useridfield = :$name1)"; 00559 $params[$name1] = $value; 00560 break; 00561 case TOKEN_INSTANCE: 00562 $SQLString .= "($instancefield = :$name1)"; 00563 $params[$name1] = $value; 00564 break; 00565 case TOKEN_DATETO: 00566 $SQLString .= "($timefield <= :$name1)"; 00567 $params[$name1] = $value; 00568 break; 00569 case TOKEN_DATEFROM: 00570 $SQLString .= "($timefield >= :$name1)"; 00571 $params[$name1] = $value; 00572 break; 00573 case TOKEN_NEGATE: 00574 $SQLString .= "(NOT ((".$DB->sql_like($datafield, ":$name1", false).") OR (".$DB->sql_like($metafield, ":$name2", false).")))"; 00575 $params[$name1] = "%$value%"; 00576 $params[$name2] = "%$value%"; 00577 break; 00578 default: 00579 return ''; 00580 00581 } 00582 } 00583 return array($SQLString, $params); 00584 }