Moodle  2.2.1
http://www.collinsharper.com
C:/xampp/htdocs/moodle/mod/wiki/diff/difflib.php
Go to the documentation of this file.
00001 <?php
00010 // Standard diff
00012  
00027 function ouwiki_diff_internal($file1,$file2) {
00028         // Basic variables
00029         $n=count($file2);
00030         $m=count($file1);
00031     
00032     // Special-case for empty file2 which otherwise causes error
00033     if($n==0) 
00034     {
00035         $result=array();
00036         for($i=1;$i<=$m;$i++) 
00037         {
00038             $result[$i]=0;
00039         }
00040         return $result;
00041     }
00042         
00043         // Step 1   Build list of elements
00045         
00046         $V=array(); 
00047         for($j=1;$j<=$n;$j++) {
00048                 $V[$j]=new StdClass;
00049                 $V[$j]->serial=$j;
00050                 $V[$j]->hash=crc32($file2[$j]);
00051         }
00052     
00053         // Step 2   Sort by hash,serial
00055         
00056         usort($V,"ouwiki_diff_sort_v");
00057     
00058     // Make it start from 1 again
00059     array_unshift($V,'bogus');
00060     unset($V[0]);
00061         
00062         // $V is now an array including the line number 'serial' and hash
00063         // of each line in file 2, sorted by hash and then serial.      
00064         
00065         // Step 3   Equivalence classes
00067         
00068         $E=array();
00069         $E[0]=new StdClass;
00070         $E[0]->serial=0;
00071         $E[0]->last=true;
00072         for($j=1;$j<=$n;$j++) {
00073                 $E[$j]=new StdClass;
00074                 $E[$j]->serial=$V[$j]->serial;
00075                 $E[$j]->last=$j===$n || $V[$j]->hash!==$V[$j+1]->hash;
00076         }
00077 
00078         // E is now an array sorted the same way as $V which includes 
00079         // the line number 'serial' and whether or not that is the 'last'
00080         // line in the given equivalence class, i.e. set of identical lines
00081         
00082         // Step 4   For each line in file1, finds start of equivalence class
00084     $P=array();
00085         for($i=1;$i<=$m;$i++) {
00086                 // Find matching last entry from equivalence list    
00087                 $P[$i]=ouwiki_diff_find_last($V,$E,crc32($file1[$i]));
00088         }
00089         
00090         // P is now an array that finds the index (within $V) of the *first*
00091         // matching line in $V (referencing file 2, but not a line number,
00092         // because sorted in $V order) for each line in file 1. In other words
00093         // if you were to start at the P-value in $V and continue through, you
00094         // would find all the lines from file 2 that are equal to the given line 
00095         // from file 1. 
00096         
00097         // Step 5   Initialise vector of candidates
00099         
00100         // I do not trust PHP references further than I can throw them (preferably
00101         // at the idiot who came up with the idea) so I am using a separate array
00102         // to store candidates and all references are integers into that.
00103         
00104         $candidates=array();
00105         $candidates[0]=new StdClass;
00106         $candidates[0]->a=0;
00107         $candidates[0]->b=0;
00108         $candidates[0]->previous=null;
00109         $candidates[1]=new StdClass;
00110         $candidates[1]->a=$m+1; 
00111         $candidates[1]->b=$n+1;
00112         $candidates[1]->previous=null;
00113         
00114         $K=array();
00115         $K[0]=0; // Ref to candidate 0
00116         $K[1]=1; // Ref to candidate 1
00117         $k=0;
00118         
00119         // Step 6   Merge stage
00121         
00122         for($i=1;$i<=$m;$i++) {
00123                 if($P[$i]!==0) {
00124                         ouwiki_diff_merge($K,$k,$i,$E,$P[$i],$candidates);
00125                 }
00126         }
00127         
00128         // Step 7
00130         
00131         $J=array();
00132         for($i=1;$i<=$m;$i++) {
00133                 $J[$i]=0;
00134         }
00135         
00136         // Step 8   Follow candidate chain to make nice representation
00138     
00139         $index=$K[$k];
00140         while(!is_null($index)) {
00141         // Stop when we reach the first, dummy candidate
00142         if($candidates[$index]->a!=0) {
00143                     $J[$candidates[$index]->a]=$candidates[$index]->b;
00144         }
00145                 $index=$candidates[$index]->previous;
00146         }       
00147         
00148         // Step 9   Get rid of 'jackpots' (hash collisions)
00150         
00151         for($i=1;$i<=$m;$i++) {
00152                 if($J[$i]!=0 && $file1[$i]!=$file2[$J[$i]]) {
00153                         $J[$i]=0;
00154                 }
00155         }
00156         
00157         // Done! (Maybe.)
00158         return $J;
00159 }
00160 
00161 // Functions needed by parts of the algorithm
00163 
00164 // Merge, from step 7 (Appendix A.3)
00165 function ouwiki_diff_merge(&$K,&$k,$i,&$E,$p,&$candidates) {
00166         $r=0;
00167         $c=$K[0];
00168         
00169         while(true) {
00170             $j=$E[$p]->serial; // Paper says 'i' but this is wrong (OCR)
00171             
00172             // Binary search in $K from $r to $k
00173             $min=$r;
00174             $max=$k+1;
00175             
00176                 while(true) {
00177                         $try = (int)(($min+$max)/2);
00178                         if($candidates[$K[$try]]->b >= $j) {
00179                                 $max=$try;
00180                         } else if($candidates[$K[$try+1]]->b <= $j) {
00181                                 $min=$try+1;
00182                         } else { // $try is less and $try+1 is more
00183                             $s=$try;
00184                                 break;
00185                         }
00186                         if($max<=$min) {
00187                             $s=-1;
00188                             break;
00189                         }
00190                 }
00191 
00192                 if($s>-1) {
00193                         if($candidates[$K[$s+1]]->b > $j) {
00194                                 // Create new candidate
00195                                 $index=count($candidates);
00196                                 $candidates[$index]=new StdClass;
00197                                 $candidates[$index]->a=$i;
00198                                 $candidates[$index]->b=$j;
00199                                 $candidates[$index]->previous=$K[$s];
00200                                 $K[$r]=$c;
00201                                 $r=$s+1;
00202                                 $c=$index; // Or should this go before?
00203                         }       
00204                     
00205                     if($s===$k) {
00206                         $K[$k+2]=$K[$k+1];
00207                         $k++;
00208                     break;
00209                     }               
00210                 }
00211                 
00212                 if($E[$p]->last) {
00213                     break;
00214                 }
00215                 
00216                 $p++;
00217         }
00218         $K[$r]=$c;
00219         
00220 }
00221 
00222 // From Step 2
00223 function ouwiki_diff_sort_v($a,$b) {
00224     if($a->hash < $b->hash) {
00225         return -1;
00226     } else if($a->hash > $b->hash) {
00227         return 1;    
00228     } else if($a->serial < $b->serial) {
00229         return -1;
00230     } else if($a->serial > $b->serial) {
00231         return 1;
00232     } else {
00233         return 0;
00234     }
00235 }
00236 
00237 // From Step 4
00238 function ouwiki_diff_find_last(&$V,&$E,$hash) {
00239     // Binary search in $V until we find something with $hash
00240     
00241     // Min = 1, array is 1-indexed
00242     $min=1;
00243     // Max = 1 higher than highest key
00244     end($V);
00245     $max=key($V)+1;
00246     while(true) {
00247         $try = (int)(($min+$max)/2);
00248         if($V[$try]->hash > $hash) {
00249             $max=$try;
00250         } else if($V[$try]->hash < $hash) {
00251             $min=$try+1;
00252         } else { // Equal
00253             break;
00254         }
00255         if($max<=$min) {
00256                 // No matching line
00257             return 0;
00258         }
00259     }
00260     
00261     // Now check back in $E to find the first line of that equivalence class
00262     for($j=$try;!$E[$j-1]->last;$j--) ;
00263         return $j;
00264 }
00265 
00267 
00268 
00273 class ouwiki_line {
00275     var $words=array();
00276     
00282     function ouwiki_line($data,$linepos) {
00283         // 1. Turn things we don't want into spaces (so that positioning stays same)
00284         
00285         // Whitespace replaced with space
00286         $data=preg_replace('/\s/',' ',$data);
00287         
00288         // Various ways of writing non-breaking space replaced with space
00289         // Note that using a single param for replace only works because all
00290         // the search strings are 6 characters long
00291         $data=str_replace(array('&nbsp;','&#xA0;','&#160;'),'      ',$data);
00292         
00293         // Tags replaced with equal number of spaces
00294         $data=preg_replace_callback('/<.*?'.'>/',create_function(
00295             '$matches','return preg_replace("/./"," ",$matches[0]);'),$data);
00296             
00297         // 2. Analyse string so that each space-separated thing 
00298         // is counted as a 'word' (note these may not be real words,
00299         // for instance words may include punctuation at either end)
00300         $pos=0;
00301         while(true) {
00302             // Find a non-space
00303             $strlendata = strlen($data);
00304             for(;$pos < $strlendata && substr($data,$pos,1)===' ';$pos++) ;
00305             if($pos==$strlendata) {
00306                 // No more content
00307                 break;
00308             }
00309             
00310             // Aaaand find the next space after that
00311             $space2=strpos($data,' ',$pos);
00312             if($space2===false) {
00313                 // No more spaces? Everything left must be a word
00314                 $this->words[]=new ouwiki_word(substr($data,$pos),$pos+$linepos);
00315                 break;
00316             } else {
00317                 $this->words[]=new ouwiki_word(substr($data,$pos,$space2-$pos),$pos+$linepos);
00318                 $pos=$space2;
00319             }
00320         }
00321     }
00322     
00326     function get_as_string() {
00327         $result='';
00328         foreach($this->words as $word) {
00329             if($result!=='') {
00330                 $result.=' ';
00331             }
00332             $result.=$word->word;
00333         }
00334         return $result;
00335     }
00336     
00342     function get_as_strings($lines) {
00343         $strings=array();
00344         foreach($lines as $key=>$value) {
00345             $strings[$key]=$value->get_as_string();        
00346         }
00347         return $strings;
00348     }
00349     
00350     
00354     function is_empty() {
00355         return count($this->words)===0;
00356     }
00357 }
00358 
00365 class ouwiki_word {
00367     var $word;
00369     var $start;
00370     
00371     function ouwiki_word($word,$start) {
00372         $this->word=$word;
00373         $this->start=$start;
00374     }
00375 }
00376 
00382 function ouwiki_diff_html_to_lines($content) {
00383     // These functions are a pain mostly because PHP preg_* don't provide
00384     // proper information as to the start/end position of matches. As a
00385     // consequence there is a lot of hackery going down. At every point we
00386     // replace things with spaces rather than getting rid, in order to store
00387     // positions within original content.
00388     
00389     // Get rid of all script, style, object tags (that might contain non-text
00390     // outside tags)
00391     $content=preg_replace_callback(
00392         '^(<script .*?</script>)|(<object .*?</object>)|(<style .*?</style>)^i',create_function(
00393             '$matches','return preg_replace("/./"," ",$matches[0]);'),$content); 
00394     
00395     // Get rid of all ` symbols as we are going to use these for a marker later.
00396     $content=preg_replace('/[`]/',' ',$content);
00397     
00398     // Put line breaks on block tags. Mark each line break with ` symbol
00399     $blocktags=array('p','div','h1','h2','h3','h4','h5','h6','td','li');
00400     $taglist='';
00401     foreach($blocktags as $blocktag) {
00402         if($taglist!=='') {
00403             $taglist.='|';
00404         }
00405         $taglist.="<$blocktag>|<\\/$blocktag>";
00406     }
00407     $content=preg_replace_callback('/(('.$taglist.')\s*)+/i',create_function(
00408         '$matches','return "`".preg_replace("/./"," ",substr($matches[0],1));'),$content);
00409         
00410     // Now go through splitting each line
00411     $lines=array(); $index=1;
00412     $pos=0;
00413     while($pos<strlen($content)) {
00414         $nextline=strpos($content,'`',$pos);
00415         if($nextline===false) {
00416             // No more line breaks? Take content to end
00417             $nextline=strlen($content);
00418         }
00419         
00420         $linestr=substr($content,$pos,$nextline-$pos);
00421         $line=new ouwiki_line($linestr,$pos);
00422         if(!$line->is_empty()) {
00423             $lines[$index++]=$line;
00424         }
00425         $pos=$nextline+1;
00426     }
00427     return $lines; 
00428 }
00429 
00434 class ouwiki_change_range {
00435     var $file1start,$file1count;
00436     var $file2start,$file2count;
00437 }
00438 
00442 class ouwiki_changes {
00443     
00445     var $adds;
00446     
00448     var $deletes;
00449     
00451     var $changes;
00452     
00458     function ouwiki_changes($diff,$count2) {
00459         // Find deleted lines
00460         $this->deletes=self::internal_find_deletes($diff,$count2);
00461         
00462         // Added lines work the same way after the comparison is
00463         // reversed.
00464         $this->adds=self::internal_find_deletes(
00465             ouwiki_diff_internal_flip($diff,$count2),count($diff));
00466         
00467         // Changed ranges are all the other lines from file 1 that
00468         // weren't found in file 2 but aren't deleted, and the 
00469         // corresponding lines from file 2 (between the equivalent
00470         // 'found' lines).        
00471         $this->changes=array();
00472         $matchbefore=0;
00473         $inrange=-1; $lastrange=-1;
00474         foreach($diff as $index1=>$index2) {
00475             // Changed line if this isn't in 'deleted' section and
00476             // doesn't have a match in file2.
00477             if($index2===0 && !in_array($index1,$this->deletes)) {
00478                 if($inrange===-1) {
00479                     // Not already in a range, start a new one at array end
00480                     $inrange=count($this->changes);
00481                     $this->changes[$inrange]=new ouwiki_change_range;
00482                     $this->changes[$inrange]->file1start=$index1;
00483                     $this->changes[$inrange]->file1count=1;                    
00484                     $this->changes[$inrange]->file2start=$matchbefore+1; // Last valid from file2                    
00485                     $this->changes[$inrange]->file2count=0;
00486                     $lastrange=$inrange;
00487                 } else {
00488                     // One more line that gets added to the range
00489                     $this->changes[$inrange]->file1count++;
00490                 }
00491             } else {
00492                 // Not in a range any more
00493                 $inrange=-1;
00494                 // If we have a line match...
00495                 if($index2!==0) {
00496                     // Remember this line as next range must start after it
00497                     $matchbefore=$index2;
00498                     // If last range is still looking for a number, fill that in too
00499                     if($lastrange!==-1) {
00500                         $this->changes[$lastrange]->file2count=$index2
00501                             -$this->changes[$lastrange]->file2start;
00502                         $lastrange=-1;
00503                     }
00504                 }
00505             }
00506         }
00507         // Unfinished range in file2 gets end of file
00508         if($lastrange!==-1) {
00509             $this->changes[$lastrange]->file2count=$count2
00510                 -$this->changes[$lastrange]->file2start+1;
00511         }
00512     }
00513 
00522     function internal_find_deletes($diff,$count2) {
00523         $deletes=array();
00524         
00525         // 1. Create a new array that includes the lowest-valued
00526         //    index2 value below each run of 0s.
00527         //    I.e. if our array is say 1,2,0,0,0,3,0 then the
00528         //    resulting array will be -,-,3,3,3,-,0
00529         $squidges=array();
00530         $lowest=0;
00531         $countdiff = count($diff);
00532         for($index1=$countdiff;$index1>=1;$index1--) {
00533             $index2=$diff[$index1];
00534             if($index2===0) {
00535                 $squidges[$index1]=$lowest;
00536             } else {
00537                 $lowest=$index2;
00538             }
00539         }
00540         
00541         // 2. OK now we can use this new array to work out 
00542         //    items that are known to be deleted because we
00543         //    have matching items either side        
00544         $highest=0;
00545         foreach($diff as $index1=>$index2) {
00546             if($index2===0) {
00547                 if($highest===$count2 || $highest+1===$squidges[$index1]) {
00548                     // Yep! Definitely deleted.
00549                     $deletes[]=$index1;                    
00550                 } 
00551             } else {
00552                 $highest=$index2;                
00553             }
00554         }
00555         return $deletes;        
00556     }
00557 }
00558 
00566 function ouwiki_diff_internal_flip($diff,$count2) {
00567     $flip=array();
00568     for($i=1;$i<=$count2;$i++) {
00569         $flip[$i]=0;
00570     }
00571     foreach($diff as $index1=>$index2) {
00572         if($index2!==0) {
00573             $flip[$index2]=$index1;
00574         }
00575     }
00576     return $flip;
00577 }
00578 
00587 function ouwiki_diff_words($lines1,$lines2) {
00588     // Prepare arrays
00589     $deleted=array();
00590     $added=array();
00591     // Get line difference
00592     $linediff=ouwiki_diff(
00593         ouwiki_line::get_as_strings($lines1),
00594         ouwiki_line::get_as_strings($lines2));
00595         
00596     // Handle lines that were entirely deleted
00597     foreach($linediff->deletes as $deletedline) {
00598         $deleted = array_merge($deleted, $lines1[$deletedline]->words);
00599     }
00600     // And ones that were entirely added
00601     foreach($linediff->adds as $addedline) {
00602         $added = array_merge($added, $lines2[$addedline]->words);
00603     }
00604     
00605     // Changes get diffed at the individual-word level
00606     foreach($linediff->changes as $changerange) {
00607         // Build list of all words in each side of the range
00608         $file1words=array();
00609         for($index=$changerange->file1start;
00610             $index<$changerange->file1start+$changerange->file1count;$index++) {
00611             foreach($lines1[$index]->words as $word) {
00612                 $file1words[]=$word;
00613             }
00614         }
00615         $file2words=array();
00616         for($index=$changerange->file2start;
00617             $index<$changerange->file2start+$changerange->file2count;$index++) {
00618             foreach($lines2[$index]->words as $word) {
00619                 $file2words[]=$word;
00620             }
00621         }
00622                 
00623         // Make arrays 1-based
00624         array_unshift($file1words,'dummy');
00625         unset($file1words[0]);
00626         array_unshift($file2words,'dummy');
00627         unset($file2words[0]);
00628         
00629         // Convert word lists into plain strings
00630         $file1strings=array();
00631         foreach($file1words as $index=>$word) {
00632             $file1strings[$index]=$word->word;
00633         }
00634         $file2strings=array();
00635         foreach($file2words as $index=>$word) {
00636             $file2strings[$index]=$word->word;
00637         }
00638         
00639         // Run diff on strings
00640         $worddiff=ouwiki_diff($file1strings,$file2strings);
00641         foreach($worddiff->adds as $index) {
00642             $added[]=$file2words[$index];
00643         }
00644         foreach($worddiff->deletes as $index) {
00645             $deleted[]=$file1words[$index];
00646         }
00647         foreach($worddiff->changes as $changerange) {
00648             for($index=$changerange->file1start;
00649                 $index<$changerange->file1start+$changerange->file1count;$index++) {
00650                 $deleted[]=$file1words[$index];
00651             }
00652             for($index=$changerange->file2start;
00653                 $index<$changerange->file2start+$changerange->file2count;$index++) {
00654                 $added[]=$file2words[$index];
00655             }
00656         }
00657     }
00658     
00659     return array($deleted,$added);
00660 }
00661 
00669 function ouwiki_diff($file1,$file2) {
00670     return new ouwiki_changes(ouwiki_diff_internal($file1,$file2),count($file2));
00671 }
00672 
00680 function ouwiki_diff_add_markers($html,$words,$markerclass,$beforetext,$aftertext) {
00681     // Sort words by start position
00682     usort($words, create_function('$a,$b','return $a->start-$b->start;'));
00683     
00684     // Add marker for each word. We use an odd tag name which will
00685     // be replaced by span later, this for ease of replacing 
00686     $spanstart="<ouwiki_diff_add_markers>";
00687     $pos=0;
00688     $result='';
00689     foreach($words as $word) {
00690         // Add everything up to the word
00691         $result.=substr($html,$pos,$word->start-$pos);
00692         // Add word
00693         $result.=$spanstart.$word->word.'</ouwiki_diff_add_markers>';
00694         // Update position
00695         $pos=$word->start+strlen($word->word);
00696     }
00697 
00698     // Add everything after last word
00699     $result.=substr($html,$pos);
00700     
00701     // If we end a marker then immediately start one, get rid of
00702     // both the end and start
00703     $result=preg_replace('^</ouwiki_diff_add_markers>(\s*)<ouwiki_diff_add_markers>^','$1',$result);
00704     
00705     // Turn markers into proper span
00706     $result=preg_replace('^<ouwiki_diff_add_markers>^',$beforetext.'<span class="'.$markerclass.'">',$result);
00707     $result=preg_replace('^</ouwiki_diff_add_markers>^','</span>'.$aftertext,$result);
00708     
00709     return $result;
00710 }
00711 
00718 function ouwiki_diff_html($html1,$html2) {
00719     $lines1=ouwiki_diff_html_to_lines($html1);
00720     $lines2=ouwiki_diff_html_to_lines($html2);
00721     list($deleted,$added)=ouwiki_diff_words($lines1,$lines2);
00722     $result1=ouwiki_diff_add_markers($html1,$deleted,'ouw_deleted',
00723         '<strong class="accesshide">'.get_string('deletedbegins','wiki').'</strong>',
00724         '<strong class="accesshide">'.get_string('deletedends','wiki').'</strong>');
00725     $result2=ouwiki_diff_add_markers($html2,$added,'ouw_added',
00726         '<strong class="accesshide">'.get_string('addedbegins','wiki').'</strong>',
00727         '<strong class="accesshide">'.get_string('addedends','wiki').'</strong>');
00728     return array($result1,$result2);    
00729 }
00730 
 All Data Structures Namespaces Files Functions Variables Enumerations