Moodle  2.2.1
http://www.collinsharper.com
C:/xampp/htdocs/moodle/lib/searchlib.php
Go to the documentation of this file.
00001 <?php
00002 
00003 // This file is part of Moodle - http://moodle.org/
00004 //
00005 // Moodle is free software: you can redistribute it and/or modify
00006 // it under the terms of the GNU General Public License as published by
00007 // the Free Software Foundation, either version 3 of the License, or
00008 // (at your option) any later version.
00009 //
00010 // Moodle is distributed in the hope that it will be useful,
00011 // but WITHOUT ANY WARRANTY; without even the implied warranty of
00012 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00013 // GNU General Public License for more details.
00014 //
00015 // You should have received a copy of the GNU General Public License
00016 // along with Moodle.  If not, see <http://www.gnu.org/licenses/>.
00017 
00025 defined('MOODLE_INTERNAL') || die();
00026 
00028 require_once($CFG->libdir.'/lexer.php');
00029 
00032 define("TOKEN_USER","0");
00033 define("TOKEN_META","1");
00034 define("TOKEN_EXACT","2");
00035 define("TOKEN_NEGATE","3");
00036 define("TOKEN_STRING","4");
00037 define("TOKEN_USERID","5");
00038 define("TOKEN_DATEFROM","6");
00039 define("TOKEN_DATETO","7");
00040 define("TOKEN_INSTANCE","8");
00041 
00049 class search_token {
00050   private $value;
00051   private $type;
00052 
00053   function search_token($type,$value){
00054     $this->type = $type;
00055     $this->value = $this->sanitize($value);
00056 
00057   }
00058 
00059   // Try to clean up user input to avoid potential security issues.
00060   // Need to think about this some more.
00061 
00062   function sanitize($userstring){
00063     return htmlspecialchars($userstring);
00064   }
00065   function getValue(){
00066     return $this->value;
00067   }
00068   function getType(){
00069     return $this->type;
00070   }
00071 }
00072 
00073 
00083 class search_lexer extends Lexer{
00084 
00085   function search_lexer(&$parser){
00086 
00087     // Call parent constructor.
00088     $this->Lexer($parser);
00089 
00090     //Set up the state machine and pattern matches for transitions.
00091 
00092     // Patterns to handle strings  of the form datefrom:foo
00093 
00094     // If we see the string datefrom: while in the base accept state, start
00095     // parsing a username and go to the indatefrom state.
00096     $this->addEntryPattern("datefrom:\S+","accept","indatefrom");
00097 
00098     // Snarf everything into the username until we see whitespace, then exit
00099     // back to the base accept state.
00100     $this->addExitPattern("\s","indatefrom");
00101 
00102 
00103     // Patterns to handle strings  of the form dateto:foo
00104 
00105     // If we see the string dateto: while in the base accept state, start
00106     // parsing a username and go to the indateto state.
00107     $this->addEntryPattern("dateto:\S+","accept","indateto");
00108 
00109     // Snarf everything into the username until we see whitespace, then exit
00110     // back to the base accept state.
00111     $this->addExitPattern("\s","indateto");
00112 
00113 
00114     // Patterns to handle strings  of the form instance:foo
00115 
00116     // If we see the string instance: while in the base accept state, start
00117     // parsing for instance number and go to the ininstance state.
00118     $this->addEntryPattern("instance:\S+","accept","ininstance");
00119 
00120     // Snarf everything into the username until we see whitespace, then exit
00121     // back to the base accept state.
00122     $this->addExitPattern("\s","ininstance");
00123 
00124 
00125     // Patterns to handle strings  of the form userid:foo
00126 
00127     // If we see the string userid: while in the base accept state, start
00128     // parsing a username and go to the inuserid state.
00129     $this->addEntryPattern("userid:\S+","accept","inuserid");
00130 
00131     // Snarf everything into the username until we see whitespace, then exit
00132     // back to the base accept state.
00133     $this->addExitPattern("\s","inuserid");
00134 
00135 
00136     // Patterns to handle strings  of the form user:foo
00137 
00138     // If we see the string user: while in the base accept state, start
00139     // parsing a username and go to the inusername state.
00140     $this->addEntryPattern("user:\S+","accept","inusername");
00141 
00142     // Snarf everything into the username until we see whitespace, then exit
00143     // back to the base accept state.
00144     $this->addExitPattern("\s","inusername");
00145 
00146 
00147     // Patterns to handle strings  of the form meta:foo
00148 
00149    // If we see the string meta: while in the base accept state, start
00150     // parsing a username and go to the inmeta state.
00151     $this->addEntryPattern("subject:\S+","accept","inmeta");
00152 
00153     // Snarf everything into the meta token until we see whitespace, then exit
00154     // back to the base accept state.
00155     $this->addExitPattern("\s","inmeta");
00156 
00157 
00158     // Patterns to handle required exact match strings (+foo) .
00159 
00160     // If we see a + sign  while in the base accept state, start
00161     // parsing an exact match string and enter the inrequired state
00162     $this->addEntryPattern("\+\S+","accept","inrequired");
00163     // When we see white space, exit back to accept state.
00164     $this->addExitPattern("\s","inrequired");
00165 
00166     // Handle excluded strings (-foo)
00167 
00168    // If we see a - sign  while in the base accept state, start
00169     // parsing an excluded string and enter the inexcluded state
00170     $this->addEntryPattern("\-\S+","accept","inexcluded");
00171     // When we see white space, exit back to accept state.
00172     $this->addExitPattern("\s","inexcluded");
00173 
00174 
00175     // Patterns to handle quoted strings.
00176 
00177     // If we see a quote  while in the base accept state, start
00178     // parsing a quoted string and enter the inquotedstring state.
00179     // Grab everything until we see the closing quote.
00180 
00181     $this->addEntryPattern("\"[^\"]+","accept","inquotedstring");
00182 
00183     // When we see a closing quote, reenter the base accept state.
00184     $this->addExitPattern("\"","inquotedstring");
00185 
00186     // Patterns to handle ordinary, nonquoted words.
00187 
00188     // When we see non-whitespace, snarf everything into the nonquoted word
00189     // until we see whitespace again.
00190     $this->addEntryPattern("\S+","accept","plainstring");
00191 
00192     // Once we see whitespace, reenter the base accept state.
00193     $this->addExitPattern("\s","plainstring");
00194 
00195   }
00196 }
00197 
00198 
00199 
00210 class search_parser {
00211     private $tokens;
00212 
00213     // This function is called by the code that's interested in the result of the parse operation.
00214     function get_parsed_array(){
00215         return $this->tokens;
00216     }
00217 
00218     /*
00219      * Functions below this are part of the state machine for the parse
00220      * operation and should not be called directly.
00221      */
00222 
00223     // Base state. No output emitted.
00224     function accept() {
00225         return true;
00226     }
00227 
00228     // State for handling datefrom:foo constructs. Potentially emits a token.
00229     function indatefrom($content){
00230         if (strlen($content) < 10) { // State exit or missing parameter.
00231             return true;
00232         }
00233         // Strip off the datefrom: part and add the reminder to the parsed token array
00234         $param = trim(substr($content,9));
00235         $this->tokens[] = new search_token(TOKEN_DATEFROM,$param);
00236         return true;
00237     }
00238 
00239     // State for handling dateto:foo constructs. Potentially emits a token.
00240     function indateto($content){
00241         if (strlen($content) < 8) { // State exit or missing parameter.
00242             return true;
00243         }
00244         // Strip off the dateto: part and add the reminder to the parsed token array
00245         $param = trim(substr($content,7));
00246         $this->tokens[] = new search_token(TOKEN_DATETO,$param);
00247         return true;
00248     }
00249 
00250     // State for handling instance:foo constructs. Potentially emits a token.
00251     function ininstance($content){
00252         if (strlen($content) < 10) { // State exit or missing parameter.
00253             return true;
00254         }
00255         // Strip off the instance: part and add the reminder to the parsed token array
00256         $param = trim(substr($content,9));
00257         $this->tokens[] = new search_token(TOKEN_INSTANCE,$param);
00258         return true;
00259     }
00260 
00261 
00262     // State for handling userid:foo constructs. Potentially emits a token.
00263     function inuserid($content){
00264         if (strlen($content) < 8) { // State exit or missing parameter.
00265             return true;
00266         }
00267         // Strip off the userid: part and add the reminder to the parsed token array
00268         $param = trim(substr($content,7));
00269         $this->tokens[] = new search_token(TOKEN_USERID,$param);
00270         return true;
00271     }
00272 
00273 
00274     // State for handling user:foo constructs. Potentially emits a token.
00275     function inusername($content){
00276         if (strlen($content) < 6) { // State exit or missing parameter.
00277             return true;
00278         }
00279         // Strip off the user: part and add the reminder to the parsed token array
00280         $param = trim(substr($content,5));
00281         $this->tokens[] = new search_token(TOKEN_USER,$param);
00282         return true;
00283     }
00284 
00285 
00286     // State for handling meta:foo constructs. Potentially emits a token.
00287     function inmeta($content){
00288         if (strlen($content) < 9) { // Missing parameter.
00289             return true;
00290         }
00291         // Strip off the meta: part and add the reminder to the parsed token array.
00292         $param = trim(substr($content,8));
00293         $this->tokens[] = new search_token(TOKEN_META,$param);
00294         return true;
00295     }
00296 
00297 
00298     // State entered when we've seen a required string (+foo). Potentially
00299     // emits a token.
00300     function inrequired($content){
00301         if (strlen($content) < 2) { // State exit or missing parameter, don't emit.
00302             return true;
00303         }
00304         // Strip off the + sign and add the reminder to the parsed token array.
00305         $this->tokens[] = new search_token(TOKEN_EXACT,substr($content,1));
00306         return true;
00307     }
00308 
00309     // State entered when we've seen an excluded string (-foo). Potentially
00310     // emits a token.
00311     function inexcluded($content){
00312         if (strlen($content) < 2) { // State exit or missing parameter.
00313             return true;
00314         }
00315         // Strip off the -sign and add the reminder to the parsed token array.
00316         $this->tokens[] = new search_token(TOKEN_NEGATE,substr($content,1));
00317         return true;
00318     }
00319 
00320 
00321     // State entered when we've seen a quoted string. Potentially emits a token.
00322     function inquotedstring($content){
00323         if (strlen($content) < 2) { // State exit or missing parameter.
00324             return true;
00325         }
00326         // Strip off the opening quote and add the reminder to the parsed token array.
00327         $this->tokens[] = new search_token(TOKEN_STRING,substr($content,1));
00328         return true;
00329     }
00330 
00331     // State entered when we've seen an ordinary, non-quoted word. Potentially
00332     // emits a token.
00333     function plainstring($content){
00334         if (trim($content) === '') { // State exit
00335             return true;
00336         }
00337         // Add the string to the parsed token array.
00338         $this->tokens[] = new search_token(TOKEN_STRING,$content);
00339         return true;
00340     }
00341 }
00342 
00355 function search_generate_text_SQL($parsetree, $datafield, $metafield, $mainidfield, $useridfield,
00356                              $userfirstnamefield, $userlastnamefield, $timefield, $instancefield) {
00357     global $CFG, $DB;
00358     static $p = 0;
00359 
00362     if ($DB->get_db_family() != 'mysql') {
00363         return search_generate_SQL($parsetree, $datafield, $metafield, $mainidfield, $useridfield,
00364                                    $userfirstnamefield, $userlastnamefield, $timefield, $instancefield);
00365     }
00366 
00369     if ($DB->get_db_family() == 'mysql') {
00370         $nonseparatedlangs = array('ja', 'th', 'zh_cn', 'zh_tw');
00371         if (in_array(current_language(), $nonseparatedlangs)) {
00372             return search_generate_SQL($parsetree, $datafield, $metafield, $mainidfield, $useridfield,
00373                                        $userfirstnamefield, $userlastnamefield, $timefield, $instancefield);
00374         }
00375     }
00376 
00378     $non_text_tokens = array();
00379     $params = array();
00380 
00381     $ntokens = count($parsetree);
00382     if ($ntokens == 0) {
00383         return "";
00384     }
00385 
00386     $SQLString = '';
00387     $text_sql_string = '';
00388 
00389     $datasearch_clause = '';
00390     $metasearch_clause = '';
00391 
00392     foreach ($parsetree as $token) {
00393 
00394         $type = $token->getType();
00395         $value = $token->getValue();
00396 
00397         switch($type){
00398             case TOKEN_STRING:
00400                 if (strstr($value, ' ')) {
00401                     $datasearch_clause .= '"' . $value . '" ';
00403                 } else {
00404                     $datasearch_clause .= '+' . $value . '* ';
00405                 }
00406                 break;
00407             case TOKEN_EXACT:
00409                 $datasearch_clause .= '+' . $value . ' ';
00410                 break;
00411             case TOKEN_NEGATE:
00413                 $datasearch_clause .= '-' . $value . '* ';
00414                 break;
00415             case TOKEN_META:
00417                 $metasearch_clause .= '+' . $value . '* ';
00418                 break;
00419             case TOKEN_USER:
00420             case TOKEN_USERID:
00421             case TOKEN_INSTANCE:
00422             case TOKEN_DATETO:
00423             case TOKEN_DATEFROM:
00425                 $non_text_tokens[] = $token;
00426                 break;
00427             default:
00428                 return '';
00429         }
00430     }
00431 
00433     if (!empty($non_text_tokens)) {
00434         list($SQLString, $sparams) = search_generate_SQL($non_text_tokens, $datafield, $metafield, $mainidfield, $useridfield,
00435                                          $userfirstnamefield, $userlastnamefield, $timefield, $instancefield);
00436         $params = array_merge($params, $sparams);
00437     }
00439     if (!empty($datasearch_clause)) {
00441         if (!empty($datafield)) {
00442             $text_sql_string .= 'MATCH (' . $datafield;
00444             if (!empty($metafield)) {
00445                 $text_sql_string .= ', ' . $metafield;
00446             }
00448             $text_sql_string .= ') AGAINST (' . "'";
00450             $text_sql_string .= ':sgt'.$p;
00451             $params['sgt'.$p++] = trim($datasearch_clause);
00453             $text_sql_string .= "' IN BOOLEAN MODE)";
00454         }
00455     }
00457     if (!empty($metasearch_clause)) {
00459         if (!empty($metafield)) {
00461             if (!empty($text_sql_string)) {
00462                 $text_sql_string .= ' AND ';
00463             }
00464             $text_sql_string .= 'MATCH (' . $metafield;
00466             $text_sql_string .= ') AGAINST (' . "'";
00468             $text_sql_string .= ':sgt'.$p;
00469             $params['sgt'.$p++] = trim($metasearch_clause);
00471             $text_sql_string .= "' IN BOOLEAN MODE)";
00472         }
00473     }
00475     if (!empty($SQLString)) {
00477         if (!empty($text_sql_string)) {
00478             $text_sql_string .= ' AND ';
00479         }
00480         $text_sql_string .= $SQLString;
00481     }
00482 
00483     return array($text_sql_string, $params);
00484 }
00485 
00497 function search_generate_SQL($parsetree, $datafield, $metafield, $mainidfield, $useridfield,
00498                              $userfirstnamefield, $userlastnamefield, $timefield, $instancefield) {
00499     global $CFG, $DB;
00500     static $p = 0;
00501 
00502     if ($DB->sql_regex_supported()) {
00503         $REGEXP    = $DB->sql_regex(true);
00504         $NOTREGEXP = $DB->sql_regex(false);
00505     }
00506 
00507     $params = array();
00508 
00509     $ntokens = count($parsetree);
00510     if ($ntokens == 0) {
00511         return "";
00512     }
00513 
00514     $SQLString = '';
00515 
00516     for ($i=0; $i<$ntokens; $i++){
00517         if ($i > 0) {// We have more than one clause, need to tack on AND
00518             $SQLString .= ' AND ';
00519         }
00520 
00521         $type = $parsetree[$i]->getType();
00522         $value = $parsetree[$i]->getValue();
00523 
00525         if (!$DB->sql_regex_supported()) {
00526             $value = trim($value, '+-');
00527             if ($type == TOKEN_EXACT) {
00528                 $type = TOKEN_STRING;
00529             }
00530         }
00531 
00532         $name1 = 'sq'.$p++;
00533         $name2 = 'sq'.$p++;
00534 
00535         switch($type){
00536             case TOKEN_STRING:
00537                 $SQLString .= "((".$DB->sql_like($datafield, ":$name1", false).") OR (".$DB->sql_like($metafield, ":$name2", false)."))";
00538                 $params[$name1] =  "%$value%";
00539                 $params[$name2] =  "%$value%";
00540                 break;
00541             case TOKEN_EXACT:
00542                 $SQLString .= "(($datafield $REGEXP :$name1) OR ($metafield $REGEXP :$name2))";
00543                 $params[$name1] =  "[[:<:]]".$value."[[:>:]]";
00544                 $params[$name2] =  "[[:<:]]".$value."[[:>:]]";
00545                 break;
00546             case TOKEN_META:
00547                 if ($metafield != '') {
00548                     $SQLString .= "(".$DB->sql_like($metafield, ":$name1", false).")";
00549                     $params[$name1] =  "%$value%";
00550                 }
00551                 break;
00552             case TOKEN_USER:
00553                 $SQLString .= "(($mainidfield = $useridfield) AND ((".$DB->sql_like($userfirstnamefield, ":$name1", false).") OR (".$DB->sql_like($userlastnamefield, ":$name2", false).")))";
00554                 $params[$name1] =  "%$value%";
00555                 $params[$name2] =  "%$value%";
00556                 break;
00557             case TOKEN_USERID:
00558                 $SQLString .= "($useridfield = :$name1)";
00559                 $params[$name1] =  $value;
00560                 break;
00561             case TOKEN_INSTANCE:
00562                 $SQLString .= "($instancefield = :$name1)";
00563                 $params[$name1] =  $value;
00564                 break;
00565             case TOKEN_DATETO:
00566                 $SQLString .= "($timefield <= :$name1)";
00567                 $params[$name1] =  $value;
00568                 break;
00569             case TOKEN_DATEFROM:
00570                 $SQLString .= "($timefield >= :$name1)";
00571                 $params[$name1] =  $value;
00572                 break;
00573             case TOKEN_NEGATE:
00574                 $SQLString .= "(NOT ((".$DB->sql_like($datafield, ":$name1", false).") OR (".$DB->sql_like($metafield, ":$name2", false).")))";
00575                 $params[$name1] =  "%$value%";
00576                 $params[$name2] =  "%$value%";
00577                 break;
00578             default:
00579                 return '';
00580 
00581         }
00582     }
00583     return array($SQLString, $params);
00584 }
 All Data Structures Namespaces Files Functions Variables Enumerations