Moodle  2.2.1
http://www.collinsharper.com
C:/xampp/htdocs/moodle/repository/url/locallib.php
Go to the documentation of this file.
00001 <?php
00002 
00038 /*
00039  * This is a BSD License approved by the Open Source Initiative (OSI).
00040  * See:  http://www.opensource.org/licenses/bsd-license.php
00041  */
00042 
00064 function url_to_absolute( $baseUrl, $relativeUrl )
00065 {
00066         // If relative URL has a scheme, clean path and return.
00067         $r = split_url( $relativeUrl );
00068         if ( $r === FALSE )
00069                 return FALSE;
00070         if ( !empty( $r['scheme'] ) )
00071         {
00072                 if ( !empty( $r['path'] ) && $r['path'][0] == '/' )
00073                         $r['path'] = url_remove_dot_segments( $r['path'] );
00074                 return join_url( $r );
00075         }
00076 
00077         // Make sure the base URL is absolute.
00078         $b = split_url( $baseUrl );
00079         if ( $b === FALSE || empty( $b['scheme'] ) || empty( $b['host'] ) )
00080                 return FALSE;
00081         $r['scheme'] = $b['scheme'];
00082 
00083         // If relative URL has an authority, clean path and return.
00084         if ( isset( $r['host'] ) )
00085         {
00086                 if ( !empty( $r['path'] ) )
00087                         $r['path'] = url_remove_dot_segments( $r['path'] );
00088                 return join_url( $r );
00089         }
00090         unset( $r['port'] );
00091         unset( $r['user'] );
00092         unset( $r['pass'] );
00093 
00094         // Copy base authority.
00095         $r['host'] = $b['host'];
00096         if ( isset( $b['port'] ) ) $r['port'] = $b['port'];
00097         if ( isset( $b['user'] ) ) $r['user'] = $b['user'];
00098         if ( isset( $b['pass'] ) ) $r['pass'] = $b['pass'];
00099 
00100         // If relative URL has no path, use base path
00101         if ( empty( $r['path'] ) )
00102         {
00103                 if ( !empty( $b['path'] ) )
00104                         $r['path'] = $b['path'];
00105                 if ( !isset( $r['query'] ) && isset( $b['query'] ) )
00106                         $r['query'] = $b['query'];
00107                 return join_url( $r );
00108         }
00109 
00110         // If relative URL path doesn't start with /, merge with base path
00111         if ( $r['path'][0] != '/' )
00112         {
00113                 $base = mb_strrchr( $b['path'], '/', TRUE, 'UTF-8' );
00114                 if ( $base === FALSE ) $base = '';
00115                 $r['path'] = $base . '/' . $r['path'];
00116         }
00117         $r['path'] = url_remove_dot_segments( $r['path'] );
00118         return join_url( $r );
00119 }
00120 
00137 function url_remove_dot_segments( $path )
00138 {
00139         // multi-byte character explode
00140         $inSegs  = preg_split( '!/!u', $path );
00141         $outSegs = array( );
00142         foreach ( $inSegs as $seg )
00143         {
00144                 if ( $seg == '' || $seg == '.')
00145                         continue;
00146                 if ( $seg == '..' )
00147                         array_pop( $outSegs );
00148                 else
00149                         array_push( $outSegs, $seg );
00150         }
00151         $outPath = implode( '/', $outSegs );
00152         if ( $path[0] == '/' )
00153                 $outPath = '/' . $outPath;
00154         // compare last multi-byte character against '/'
00155         if ( $outPath != '/' &&
00156                 (mb_strlen($path)-1) == mb_strrpos( $path, '/', 'UTF-8' ) )
00157                 $outPath .= '/';
00158         return $outPath;
00159 }
00160 
00251 function split_url( $url, $decode=TRUE )
00252 {
00253         // Character sets from RFC3986.
00254         $xunressub     = 'a-zA-Z\d\-._~\!$&\'()*+,;=';
00255         $xpchar        = $xunressub . ':@%';
00256 
00257         // Scheme from RFC3986.
00258         $xscheme        = '([a-zA-Z][a-zA-Z\d+-.]*)';
00259 
00260         // User info (user + password) from RFC3986.
00261         $xuserinfo     = '((['  . $xunressub . '%]*)' .
00262                          '(:([' . $xunressub . ':%]*))?)';
00263 
00264         // IPv4 from RFC3986 (without digit constraints).
00265         $xipv4         = '(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})';
00266 
00267         // IPv6 from RFC2732 (without digit and grouping constraints).
00268         $xipv6         = '(\[([a-fA-F\d.:]+)\])';
00269 
00270         // Host name from RFC1035.  Technically, must start with a letter.
00271         // Relax that restriction to better parse URL structure, then
00272         // leave host name validation to application.
00273         $xhost_name    = '([a-zA-Z\d-.%]+)';
00274 
00275         // Authority from RFC3986.  Skip IP future.
00276         $xhost         = '(' . $xhost_name . '|' . $xipv4 . '|' . $xipv6 . ')';
00277         $xport         = '(\d*)';
00278         $xauthority    = '((' . $xuserinfo . '@)?' . $xhost .
00279                          '?(:' . $xport . ')?)';
00280 
00281         // Path from RFC3986.  Blend absolute & relative for efficiency.
00282         $xslash_seg    = '(/[' . $xpchar . ']*)';
00283         $xpath_authabs = '((//' . $xauthority . ')((/[' . $xpchar . ']*)*))';
00284         $xpath_rel     = '([' . $xpchar . ']+' . $xslash_seg . '*)';
00285         $xpath_abs     = '(/(' . $xpath_rel . ')?)';
00286         $xapath        = '(' . $xpath_authabs . '|' . $xpath_abs .
00287                          '|' . $xpath_rel . ')';
00288 
00289         // Query and fragment from RFC3986.
00290         $xqueryfrag    = '([' . $xpchar . '/?' . ']*)';
00291 
00292         // URL.
00293         $xurl          = '^(' . $xscheme . ':)?' .  $xapath . '?' .
00294                          '(\?' . $xqueryfrag . ')?(#' . $xqueryfrag . ')?$';
00295 
00296 
00297         // Split the URL into components.
00298         if ( !preg_match( '!' . $xurl . '!', $url, $m ) )
00299                 return FALSE;
00300 
00301         if ( !empty($m[2]) )            $parts['scheme']  = strtolower($m[2]);
00302 
00303         if ( !empty($m[7]) ) {
00304                 if ( isset( $m[9] ) )   $parts['user']    = $m[9];
00305                 else                    $parts['user']    = '';
00306         }
00307         if ( !empty($m[10]) )           $parts['pass']    = $m[11];
00308 
00309         if ( !empty($m[13]) )           $h=$parts['host'] = $m[13];
00310         else if ( !empty($m[14]) )      $parts['host']    = $m[14];
00311         else if ( !empty($m[16]) )      $parts['host']    = $m[16];
00312         else if ( !empty( $m[5] ) )     $parts['host']    = '';
00313         if ( !empty($m[17]) )           $parts['port']    = $m[18];
00314 
00315         if ( !empty($m[19]) )           $parts['path']    = $m[19];
00316         else if ( !empty($m[21]) )      $parts['path']    = $m[21];
00317         else if ( !empty($m[25]) )      $parts['path']    = $m[25];
00318 
00319         if ( !empty($m[27]) )           $parts['query']   = $m[28];
00320         if ( !empty($m[29]) )           $parts['fragment']= $m[30];
00321 
00322         if ( !$decode )
00323                 return $parts;
00324         if ( !empty($parts['user']) )
00325                 $parts['user']     = rawurldecode( $parts['user'] );
00326         if ( !empty($parts['pass']) )
00327                 $parts['pass']     = rawurldecode( $parts['pass'] );
00328         if ( !empty($parts['path']) )
00329                 $parts['path']     = rawurldecode( $parts['path'] );
00330         if ( isset($h) )
00331                 $parts['host']     = rawurldecode( $parts['host'] );
00332         if ( !empty($parts['query']) )
00333                 $parts['query']    = rawurldecode( $parts['query'] );
00334         if ( !empty($parts['fragment']) )
00335                 $parts['fragment'] = rawurldecode( $parts['fragment'] );
00336         return $parts;
00337 }
00338 
00385 function join_url( $parts, $encode=TRUE )
00386 {
00387         if ( $encode )
00388         {
00389                 if ( isset( $parts['user'] ) )
00390                         $parts['user']     = rawurlencode( $parts['user'] );
00391                 if ( isset( $parts['pass'] ) )
00392                         $parts['pass']     = rawurlencode( $parts['pass'] );
00393                 if ( isset( $parts['host'] ) &&
00394                         !preg_match( '!^(\[[\da-f.:]+\]])|([\da-f.:]+)$!ui', $parts['host'] ) )
00395                         $parts['host']     = rawurlencode( $parts['host'] );
00396                 if ( !empty( $parts['path'] ) )
00397                         $parts['path']     = preg_replace( '!%2F!ui', '/',
00398                                 rawurlencode( $parts['path'] ) );
00399                 if ( isset( $parts['query'] ) )
00400                         $parts['query']    = rawurlencode( $parts['query'] );
00401                 if ( isset( $parts['fragment'] ) )
00402                         $parts['fragment'] = rawurlencode( $parts['fragment'] );
00403         }
00404 
00405         $url = '';
00406         if ( !empty( $parts['scheme'] ) )
00407                 $url .= $parts['scheme'] . ':';
00408         if ( isset( $parts['host'] ) )
00409         {
00410                 $url .= '//';
00411                 if ( isset( $parts['user'] ) )
00412                 {
00413                         $url .= $parts['user'];
00414                         if ( isset( $parts['pass'] ) )
00415                                 $url .= ':' . $parts['pass'];
00416                         $url .= '@';
00417                 }
00418                 if ( preg_match( '!^[\da-f]*:[\da-f.:]+$!ui', $parts['host'] ) )
00419                         $url .= '[' . $parts['host'] . ']';     // IPv6
00420                 else
00421                         $url .= $parts['host'];                 // IPv4 or name
00422                 if ( isset( $parts['port'] ) )
00423                         $url .= ':' . $parts['port'];
00424                 if ( !empty( $parts['path'] ) && $parts['path'][0] != '/' )
00425                         $url .= '/';
00426         }
00427         if ( !empty( $parts['path'] ) )
00428                 $url .= $parts['path'];
00429         if ( isset( $parts['query'] ) )
00430                 $url .= '?' . $parts['query'];
00431         if ( isset( $parts['fragment'] ) )
00432                 $url .= '#' . $parts['fragment'];
00433         return $url;
00434 }
00463 function extract_html_urls( $text )
00464 {
00465         $match_elements = array(
00466                 // HTML
00467                 array('element'=>'a',           'attribute'=>'href'),           // 2.0
00468                 array('element'=>'a',           'attribute'=>'urn'),            // 2.0
00469                 array('element'=>'base',        'attribute'=>'href'),           // 2.0
00470                 array('element'=>'form',        'attribute'=>'action'),         // 2.0
00471                 array('element'=>'img',         'attribute'=>'src'),            // 2.0
00472                 array('element'=>'link',        'attribute'=>'href'),           // 2.0
00473 
00474                 array('element'=>'applet',      'attribute'=>'code'),           // 3.2
00475                 array('element'=>'applet',      'attribute'=>'codebase'),       // 3.2
00476                 array('element'=>'area',        'attribute'=>'href'),           // 3.2
00477                 array('element'=>'body',        'attribute'=>'background'),     // 3.2
00478                 array('element'=>'img',         'attribute'=>'usemap'),         // 3.2
00479                 array('element'=>'input',       'attribute'=>'src'),            // 3.2
00480 
00481                 array('element'=>'applet',      'attribute'=>'archive'),        // 4.01
00482                 array('element'=>'applet',      'attribute'=>'object'),         // 4.01
00483                 array('element'=>'blockquote',  'attribute'=>'cite'),           // 4.01
00484                 array('element'=>'del',         'attribute'=>'cite'),           // 4.01
00485                 array('element'=>'frame',       'attribute'=>'longdesc'),       // 4.01
00486                 array('element'=>'frame',       'attribute'=>'src'),            // 4.01
00487                 array('element'=>'head',        'attribute'=>'profile'),        // 4.01
00488                 array('element'=>'iframe',      'attribute'=>'longdesc'),       // 4.01
00489                 array('element'=>'iframe',      'attribute'=>'src'),            // 4.01
00490                 array('element'=>'img',         'attribute'=>'longdesc'),       // 4.01
00491                 array('element'=>'input',       'attribute'=>'usemap'),         // 4.01
00492                 array('element'=>'ins',         'attribute'=>'cite'),           // 4.01
00493                 array('element'=>'object',      'attribute'=>'archive'),        // 4.01
00494                 array('element'=>'object',      'attribute'=>'classid'),        // 4.01
00495                 array('element'=>'object',      'attribute'=>'codebase'),       // 4.01
00496                 array('element'=>'object',      'attribute'=>'data'),           // 4.01
00497                 array('element'=>'object',      'attribute'=>'usemap'),         // 4.01
00498                 array('element'=>'q',           'attribute'=>'cite'),           // 4.01
00499                 array('element'=>'script',      'attribute'=>'src'),            // 4.01
00500 
00501                 array('element'=>'audio',       'attribute'=>'src'),            // 5.0
00502                 array('element'=>'command',     'attribute'=>'icon'),           // 5.0
00503                 array('element'=>'embed',       'attribute'=>'src'),            // 5.0
00504                 array('element'=>'event-source','attribute'=>'src'),            // 5.0
00505                 array('element'=>'html',        'attribute'=>'manifest'),       // 5.0
00506                 array('element'=>'source',      'attribute'=>'src'),            // 5.0
00507                 array('element'=>'video',       'attribute'=>'src'),            // 5.0
00508                 array('element'=>'video',       'attribute'=>'poster'),         // 5.0
00509 
00510                 array('element'=>'bgsound',     'attribute'=>'src'),            // Extension
00511                 array('element'=>'body',        'attribute'=>'credits'),        // Extension
00512                 array('element'=>'body',        'attribute'=>'instructions'),   // Extension
00513                 array('element'=>'body',        'attribute'=>'logo'),           // Extension
00514                 array('element'=>'div',         'attribute'=>'href'),           // Extension
00515                 array('element'=>'div',         'attribute'=>'src'),            // Extension
00516                 array('element'=>'embed',       'attribute'=>'code'),           // Extension
00517                 array('element'=>'embed',       'attribute'=>'pluginspage'),    // Extension
00518                 array('element'=>'html',        'attribute'=>'background'),     // Extension
00519                 array('element'=>'ilayer',      'attribute'=>'src'),            // Extension
00520                 array('element'=>'img',         'attribute'=>'dynsrc'),         // Extension
00521                 array('element'=>'img',         'attribute'=>'lowsrc'),         // Extension
00522                 array('element'=>'input',       'attribute'=>'dynsrc'),         // Extension
00523                 array('element'=>'input',       'attribute'=>'lowsrc'),         // Extension
00524                 array('element'=>'table',       'attribute'=>'background'),     // Extension
00525                 array('element'=>'td',          'attribute'=>'background'),     // Extension
00526                 array('element'=>'th',          'attribute'=>'background'),     // Extension
00527                 array('element'=>'layer',       'attribute'=>'src'),            // Extension
00528                 array('element'=>'xml',         'attribute'=>'src'),            // Extension
00529 
00530                 array('element'=>'button',      'attribute'=>'action'),         // Forms 2.0
00531                 array('element'=>'datalist',    'attribute'=>'data'),           // Forms 2.0
00532                 array('element'=>'form',        'attribute'=>'data'),           // Forms 2.0
00533                 array('element'=>'input',       'attribute'=>'action'),         // Forms 2.0
00534                 array('element'=>'select',      'attribute'=>'data'),           // Forms 2.0
00535 
00536                 // XHTML
00537                 array('element'=>'html',        'attribute'=>'xmlns'),
00538 
00539                 // WML
00540                 array('element'=>'access',      'attribute'=>'path'),           // 1.3
00541                 array('element'=>'card',        'attribute'=>'onenterforward'), // 1.3
00542                 array('element'=>'card',        'attribute'=>'onenterbackward'),// 1.3
00543                 array('element'=>'card',        'attribute'=>'ontimer'),        // 1.3
00544                 array('element'=>'go',          'attribute'=>'href'),           // 1.3
00545                 array('element'=>'option',      'attribute'=>'onpick'),         // 1.3
00546                 array('element'=>'template',    'attribute'=>'onenterforward'), // 1.3
00547                 array('element'=>'template',    'attribute'=>'onenterbackward'),// 1.3
00548                 array('element'=>'template',    'attribute'=>'ontimer'),        // 1.3
00549                 array('element'=>'wml',         'attribute'=>'xmlns'),          // 2.0
00550         );
00551 
00552         $match_metas = array(
00553                 'content-base',
00554                 'content-location',
00555                 'referer',
00556                 'location',
00557                 'refresh',
00558         );
00559 
00560         // Extract all elements
00561         if ( !preg_match_all( '/<([a-z][^>]*)>/iu', $text, $matches ) )
00562                 return array( );
00563         $elements = $matches[1];
00564         $value_pattern = '=(("([^"]*)")|([^\s]*))';
00565 
00566         // Match elements and attributes
00567         foreach ( $match_elements as $match_element )
00568         {
00569                 $name = $match_element['element'];
00570                 $attr = $match_element['attribute'];
00571                 $pattern = '/^' . $name . '\s.*' . $attr . $value_pattern . '/iu';
00572                 if ( $name == 'object' )
00573                         $split_pattern = '/\s*/u';      // Space-separated URL list
00574                 else if ( $name == 'archive' )
00575                         $split_pattern = '/,\s*/u';     // Comma-separated URL list
00576                 else
00577                         unset( $split_pattern );        // Single URL
00578                 foreach ( $elements as $element )
00579                 {
00580                         if ( !preg_match( $pattern, $element, $match ) )
00581                                 continue;
00582                         $m = empty($match[3]) ? (!empty($match[4])?$match[4]:'') : $match[3];
00583                         if ( !isset( $split_pattern ) )
00584                                 $urls[$name][$attr][] = $m;
00585                         else
00586                         {
00587                                 $msplit = preg_split( $split_pattern, $m );
00588                                 foreach ( $msplit as $ms )
00589                                         $urls[$name][$attr][] = $ms;
00590                         }
00591                 }
00592         }
00593 
00594         // Match meta http-equiv elements
00595         foreach ( $match_metas as $match_meta )
00596         {
00597                 $attr_pattern    = '/http-equiv="?' . $match_meta . '"?/iu';
00598                 $content_pattern = '/content'  . $value_pattern . '/iu';
00599                 $refresh_pattern = '/\d*;\s*(url=)?(.*)$/iu';
00600                 foreach ( $elements as $element )
00601                 {
00602                         if ( !preg_match( '/^meta/iu', $element ) ||
00603                                 !preg_match( $attr_pattern, $element ) ||
00604                                 !preg_match( $content_pattern, $element, $match ) )
00605                                 continue;
00606                         $m = empty($match[3]) ? $match[4] : $match[3];
00607                         if ( $match_meta != 'refresh' )
00608                                 $urls['meta']['http-equiv'][] = $m;
00609                         else if ( preg_match( $refresh_pattern, $m, $match ) )
00610                                 $urls['meta']['http-equiv'][] = $match[2];
00611                 }
00612         }
00613 
00614         // Match style attributes
00615         $urls['style'] = array( );
00616         $style_pattern = '/style' . $value_pattern . '/iu';
00617         foreach ( $elements as $element )
00618         {
00619                 if ( !preg_match( $style_pattern, $element, $match ) )
00620                         continue;
00621                 $m = empty($match[3]) ? $match[4] : $match[3];
00622                 $style_urls = extract_css_urls( $m );
00623                 if ( !empty( $style_urls ) )
00624                         $urls['style'] = array_merge_recursive(
00625                                 $urls['style'], $style_urls );
00626         }
00627 
00628         // Match style bodies
00629         if ( preg_match_all( '/<style[^>]*>(.*?)<\/style>/siu', $text, $style_bodies ) )
00630         {
00631                 foreach ( $style_bodies[1] as $style_body )
00632                 {
00633                         $style_urls = extract_css_urls( $style_body );
00634                         if ( !empty( $style_urls ) )
00635                                 $urls['style'] = array_merge_recursive(
00636                                         $urls['style'], $style_urls );
00637                 }
00638         }
00639         if ( empty($urls['style']) )
00640                 unset( $urls['style'] );
00641 
00642         return $urls;
00643 }
00665 function extract_css_urls( $text )
00666 {
00667         $urls = array( );
00668 
00669         $url_pattern     = '(([^\\\\\'", \(\)]*(\\\\.)?)+)';
00670         $urlfunc_pattern = 'url\(\s*[\'"]?' . $url_pattern . '[\'"]?\s*\)';
00671         $pattern         = '/(' .
00672                  '(@import\s*[\'"]' . $url_pattern     . '[\'"])' .
00673                 '|(@import\s*'      . $urlfunc_pattern . ')'      .
00674                 '|('                . $urlfunc_pattern . ')'      .  ')/iu';
00675         if ( !preg_match_all( $pattern, $text, $matches ) )
00676                 return $urls;
00677 
00678         // @import '...'
00679         // @import "..."
00680         foreach ( $matches[3] as $match )
00681                 if ( !empty($match) )
00682                         $urls['import'][] =
00683                                 preg_replace( '/\\\\(.)/u', '\\1', $match );
00684 
00685         // @import url(...)
00686         // @import url('...')
00687         // @import url("...")
00688         foreach ( $matches[7] as $match )
00689                 if ( !empty($match) )
00690                         $urls['import'][] =
00691                                 preg_replace( '/\\\\(.)/u', '\\1', $match );
00692 
00693         // url(...)
00694         // url('...')
00695         // url("...")
00696         foreach ( $matches[11] as $match )
00697                 if ( !empty($match) )
00698                         $urls['property'][] =
00699                                 preg_replace( '/\\\\(.)/u', '\\1', $match );
00700 
00701         return $urls;
00702 }
 All Data Structures Namespaces Files Functions Variables Enumerations