|
Moodle
2.2.1
http://www.collinsharper.com
|
00001 <?php 00002 00038 /* 00039 * This is a BSD License approved by the Open Source Initiative (OSI). 00040 * See: http://www.opensource.org/licenses/bsd-license.php 00041 */ 00042 00064 function url_to_absolute( $baseUrl, $relativeUrl ) 00065 { 00066 // If relative URL has a scheme, clean path and return. 00067 $r = split_url( $relativeUrl ); 00068 if ( $r === FALSE ) 00069 return FALSE; 00070 if ( !empty( $r['scheme'] ) ) 00071 { 00072 if ( !empty( $r['path'] ) && $r['path'][0] == '/' ) 00073 $r['path'] = url_remove_dot_segments( $r['path'] ); 00074 return join_url( $r ); 00075 } 00076 00077 // Make sure the base URL is absolute. 00078 $b = split_url( $baseUrl ); 00079 if ( $b === FALSE || empty( $b['scheme'] ) || empty( $b['host'] ) ) 00080 return FALSE; 00081 $r['scheme'] = $b['scheme']; 00082 00083 // If relative URL has an authority, clean path and return. 00084 if ( isset( $r['host'] ) ) 00085 { 00086 if ( !empty( $r['path'] ) ) 00087 $r['path'] = url_remove_dot_segments( $r['path'] ); 00088 return join_url( $r ); 00089 } 00090 unset( $r['port'] ); 00091 unset( $r['user'] ); 00092 unset( $r['pass'] ); 00093 00094 // Copy base authority. 00095 $r['host'] = $b['host']; 00096 if ( isset( $b['port'] ) ) $r['port'] = $b['port']; 00097 if ( isset( $b['user'] ) ) $r['user'] = $b['user']; 00098 if ( isset( $b['pass'] ) ) $r['pass'] = $b['pass']; 00099 00100 // If relative URL has no path, use base path 00101 if ( empty( $r['path'] ) ) 00102 { 00103 if ( !empty( $b['path'] ) ) 00104 $r['path'] = $b['path']; 00105 if ( !isset( $r['query'] ) && isset( $b['query'] ) ) 00106 $r['query'] = $b['query']; 00107 return join_url( $r ); 00108 } 00109 00110 // If relative URL path doesn't start with /, merge with base path 00111 if ( $r['path'][0] != '/' ) 00112 { 00113 $base = mb_strrchr( $b['path'], '/', TRUE, 'UTF-8' ); 00114 if ( $base === FALSE ) $base = ''; 00115 $r['path'] = $base . '/' . $r['path']; 00116 } 00117 $r['path'] = url_remove_dot_segments( $r['path'] ); 00118 return join_url( $r ); 00119 } 00120 00137 function url_remove_dot_segments( $path ) 00138 { 00139 // multi-byte character explode 00140 $inSegs = preg_split( '!/!u', $path ); 00141 $outSegs = array( ); 00142 foreach ( $inSegs as $seg ) 00143 { 00144 if ( $seg == '' || $seg == '.') 00145 continue; 00146 if ( $seg == '..' ) 00147 array_pop( $outSegs ); 00148 else 00149 array_push( $outSegs, $seg ); 00150 } 00151 $outPath = implode( '/', $outSegs ); 00152 if ( $path[0] == '/' ) 00153 $outPath = '/' . $outPath; 00154 // compare last multi-byte character against '/' 00155 if ( $outPath != '/' && 00156 (mb_strlen($path)-1) == mb_strrpos( $path, '/', 'UTF-8' ) ) 00157 $outPath .= '/'; 00158 return $outPath; 00159 } 00160 00251 function split_url( $url, $decode=TRUE ) 00252 { 00253 // Character sets from RFC3986. 00254 $xunressub = 'a-zA-Z\d\-._~\!$&\'()*+,;='; 00255 $xpchar = $xunressub . ':@%'; 00256 00257 // Scheme from RFC3986. 00258 $xscheme = '([a-zA-Z][a-zA-Z\d+-.]*)'; 00259 00260 // User info (user + password) from RFC3986. 00261 $xuserinfo = '(([' . $xunressub . '%]*)' . 00262 '(:([' . $xunressub . ':%]*))?)'; 00263 00264 // IPv4 from RFC3986 (without digit constraints). 00265 $xipv4 = '(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})'; 00266 00267 // IPv6 from RFC2732 (without digit and grouping constraints). 00268 $xipv6 = '(\[([a-fA-F\d.:]+)\])'; 00269 00270 // Host name from RFC1035. Technically, must start with a letter. 00271 // Relax that restriction to better parse URL structure, then 00272 // leave host name validation to application. 00273 $xhost_name = '([a-zA-Z\d-.%]+)'; 00274 00275 // Authority from RFC3986. Skip IP future. 00276 $xhost = '(' . $xhost_name . '|' . $xipv4 . '|' . $xipv6 . ')'; 00277 $xport = '(\d*)'; 00278 $xauthority = '((' . $xuserinfo . '@)?' . $xhost . 00279 '?(:' . $xport . ')?)'; 00280 00281 // Path from RFC3986. Blend absolute & relative for efficiency. 00282 $xslash_seg = '(/[' . $xpchar . ']*)'; 00283 $xpath_authabs = '((//' . $xauthority . ')((/[' . $xpchar . ']*)*))'; 00284 $xpath_rel = '([' . $xpchar . ']+' . $xslash_seg . '*)'; 00285 $xpath_abs = '(/(' . $xpath_rel . ')?)'; 00286 $xapath = '(' . $xpath_authabs . '|' . $xpath_abs . 00287 '|' . $xpath_rel . ')'; 00288 00289 // Query and fragment from RFC3986. 00290 $xqueryfrag = '([' . $xpchar . '/?' . ']*)'; 00291 00292 // URL. 00293 $xurl = '^(' . $xscheme . ':)?' . $xapath . '?' . 00294 '(\?' . $xqueryfrag . ')?(#' . $xqueryfrag . ')?$'; 00295 00296 00297 // Split the URL into components. 00298 if ( !preg_match( '!' . $xurl . '!', $url, $m ) ) 00299 return FALSE; 00300 00301 if ( !empty($m[2]) ) $parts['scheme'] = strtolower($m[2]); 00302 00303 if ( !empty($m[7]) ) { 00304 if ( isset( $m[9] ) ) $parts['user'] = $m[9]; 00305 else $parts['user'] = ''; 00306 } 00307 if ( !empty($m[10]) ) $parts['pass'] = $m[11]; 00308 00309 if ( !empty($m[13]) ) $h=$parts['host'] = $m[13]; 00310 else if ( !empty($m[14]) ) $parts['host'] = $m[14]; 00311 else if ( !empty($m[16]) ) $parts['host'] = $m[16]; 00312 else if ( !empty( $m[5] ) ) $parts['host'] = ''; 00313 if ( !empty($m[17]) ) $parts['port'] = $m[18]; 00314 00315 if ( !empty($m[19]) ) $parts['path'] = $m[19]; 00316 else if ( !empty($m[21]) ) $parts['path'] = $m[21]; 00317 else if ( !empty($m[25]) ) $parts['path'] = $m[25]; 00318 00319 if ( !empty($m[27]) ) $parts['query'] = $m[28]; 00320 if ( !empty($m[29]) ) $parts['fragment']= $m[30]; 00321 00322 if ( !$decode ) 00323 return $parts; 00324 if ( !empty($parts['user']) ) 00325 $parts['user'] = rawurldecode( $parts['user'] ); 00326 if ( !empty($parts['pass']) ) 00327 $parts['pass'] = rawurldecode( $parts['pass'] ); 00328 if ( !empty($parts['path']) ) 00329 $parts['path'] = rawurldecode( $parts['path'] ); 00330 if ( isset($h) ) 00331 $parts['host'] = rawurldecode( $parts['host'] ); 00332 if ( !empty($parts['query']) ) 00333 $parts['query'] = rawurldecode( $parts['query'] ); 00334 if ( !empty($parts['fragment']) ) 00335 $parts['fragment'] = rawurldecode( $parts['fragment'] ); 00336 return $parts; 00337 } 00338 00385 function join_url( $parts, $encode=TRUE ) 00386 { 00387 if ( $encode ) 00388 { 00389 if ( isset( $parts['user'] ) ) 00390 $parts['user'] = rawurlencode( $parts['user'] ); 00391 if ( isset( $parts['pass'] ) ) 00392 $parts['pass'] = rawurlencode( $parts['pass'] ); 00393 if ( isset( $parts['host'] ) && 00394 !preg_match( '!^(\[[\da-f.:]+\]])|([\da-f.:]+)$!ui', $parts['host'] ) ) 00395 $parts['host'] = rawurlencode( $parts['host'] ); 00396 if ( !empty( $parts['path'] ) ) 00397 $parts['path'] = preg_replace( '!%2F!ui', '/', 00398 rawurlencode( $parts['path'] ) ); 00399 if ( isset( $parts['query'] ) ) 00400 $parts['query'] = rawurlencode( $parts['query'] ); 00401 if ( isset( $parts['fragment'] ) ) 00402 $parts['fragment'] = rawurlencode( $parts['fragment'] ); 00403 } 00404 00405 $url = ''; 00406 if ( !empty( $parts['scheme'] ) ) 00407 $url .= $parts['scheme'] . ':'; 00408 if ( isset( $parts['host'] ) ) 00409 { 00410 $url .= '//'; 00411 if ( isset( $parts['user'] ) ) 00412 { 00413 $url .= $parts['user']; 00414 if ( isset( $parts['pass'] ) ) 00415 $url .= ':' . $parts['pass']; 00416 $url .= '@'; 00417 } 00418 if ( preg_match( '!^[\da-f]*:[\da-f.:]+$!ui', $parts['host'] ) ) 00419 $url .= '[' . $parts['host'] . ']'; // IPv6 00420 else 00421 $url .= $parts['host']; // IPv4 or name 00422 if ( isset( $parts['port'] ) ) 00423 $url .= ':' . $parts['port']; 00424 if ( !empty( $parts['path'] ) && $parts['path'][0] != '/' ) 00425 $url .= '/'; 00426 } 00427 if ( !empty( $parts['path'] ) ) 00428 $url .= $parts['path']; 00429 if ( isset( $parts['query'] ) ) 00430 $url .= '?' . $parts['query']; 00431 if ( isset( $parts['fragment'] ) ) 00432 $url .= '#' . $parts['fragment']; 00433 return $url; 00434 } 00463 function extract_html_urls( $text ) 00464 { 00465 $match_elements = array( 00466 // HTML 00467 array('element'=>'a', 'attribute'=>'href'), // 2.0 00468 array('element'=>'a', 'attribute'=>'urn'), // 2.0 00469 array('element'=>'base', 'attribute'=>'href'), // 2.0 00470 array('element'=>'form', 'attribute'=>'action'), // 2.0 00471 array('element'=>'img', 'attribute'=>'src'), // 2.0 00472 array('element'=>'link', 'attribute'=>'href'), // 2.0 00473 00474 array('element'=>'applet', 'attribute'=>'code'), // 3.2 00475 array('element'=>'applet', 'attribute'=>'codebase'), // 3.2 00476 array('element'=>'area', 'attribute'=>'href'), // 3.2 00477 array('element'=>'body', 'attribute'=>'background'), // 3.2 00478 array('element'=>'img', 'attribute'=>'usemap'), // 3.2 00479 array('element'=>'input', 'attribute'=>'src'), // 3.2 00480 00481 array('element'=>'applet', 'attribute'=>'archive'), // 4.01 00482 array('element'=>'applet', 'attribute'=>'object'), // 4.01 00483 array('element'=>'blockquote', 'attribute'=>'cite'), // 4.01 00484 array('element'=>'del', 'attribute'=>'cite'), // 4.01 00485 array('element'=>'frame', 'attribute'=>'longdesc'), // 4.01 00486 array('element'=>'frame', 'attribute'=>'src'), // 4.01 00487 array('element'=>'head', 'attribute'=>'profile'), // 4.01 00488 array('element'=>'iframe', 'attribute'=>'longdesc'), // 4.01 00489 array('element'=>'iframe', 'attribute'=>'src'), // 4.01 00490 array('element'=>'img', 'attribute'=>'longdesc'), // 4.01 00491 array('element'=>'input', 'attribute'=>'usemap'), // 4.01 00492 array('element'=>'ins', 'attribute'=>'cite'), // 4.01 00493 array('element'=>'object', 'attribute'=>'archive'), // 4.01 00494 array('element'=>'object', 'attribute'=>'classid'), // 4.01 00495 array('element'=>'object', 'attribute'=>'codebase'), // 4.01 00496 array('element'=>'object', 'attribute'=>'data'), // 4.01 00497 array('element'=>'object', 'attribute'=>'usemap'), // 4.01 00498 array('element'=>'q', 'attribute'=>'cite'), // 4.01 00499 array('element'=>'script', 'attribute'=>'src'), // 4.01 00500 00501 array('element'=>'audio', 'attribute'=>'src'), // 5.0 00502 array('element'=>'command', 'attribute'=>'icon'), // 5.0 00503 array('element'=>'embed', 'attribute'=>'src'), // 5.0 00504 array('element'=>'event-source','attribute'=>'src'), // 5.0 00505 array('element'=>'html', 'attribute'=>'manifest'), // 5.0 00506 array('element'=>'source', 'attribute'=>'src'), // 5.0 00507 array('element'=>'video', 'attribute'=>'src'), // 5.0 00508 array('element'=>'video', 'attribute'=>'poster'), // 5.0 00509 00510 array('element'=>'bgsound', 'attribute'=>'src'), // Extension 00511 array('element'=>'body', 'attribute'=>'credits'), // Extension 00512 array('element'=>'body', 'attribute'=>'instructions'), // Extension 00513 array('element'=>'body', 'attribute'=>'logo'), // Extension 00514 array('element'=>'div', 'attribute'=>'href'), // Extension 00515 array('element'=>'div', 'attribute'=>'src'), // Extension 00516 array('element'=>'embed', 'attribute'=>'code'), // Extension 00517 array('element'=>'embed', 'attribute'=>'pluginspage'), // Extension 00518 array('element'=>'html', 'attribute'=>'background'), // Extension 00519 array('element'=>'ilayer', 'attribute'=>'src'), // Extension 00520 array('element'=>'img', 'attribute'=>'dynsrc'), // Extension 00521 array('element'=>'img', 'attribute'=>'lowsrc'), // Extension 00522 array('element'=>'input', 'attribute'=>'dynsrc'), // Extension 00523 array('element'=>'input', 'attribute'=>'lowsrc'), // Extension 00524 array('element'=>'table', 'attribute'=>'background'), // Extension 00525 array('element'=>'td', 'attribute'=>'background'), // Extension 00526 array('element'=>'th', 'attribute'=>'background'), // Extension 00527 array('element'=>'layer', 'attribute'=>'src'), // Extension 00528 array('element'=>'xml', 'attribute'=>'src'), // Extension 00529 00530 array('element'=>'button', 'attribute'=>'action'), // Forms 2.0 00531 array('element'=>'datalist', 'attribute'=>'data'), // Forms 2.0 00532 array('element'=>'form', 'attribute'=>'data'), // Forms 2.0 00533 array('element'=>'input', 'attribute'=>'action'), // Forms 2.0 00534 array('element'=>'select', 'attribute'=>'data'), // Forms 2.0 00535 00536 // XHTML 00537 array('element'=>'html', 'attribute'=>'xmlns'), 00538 00539 // WML 00540 array('element'=>'access', 'attribute'=>'path'), // 1.3 00541 array('element'=>'card', 'attribute'=>'onenterforward'), // 1.3 00542 array('element'=>'card', 'attribute'=>'onenterbackward'),// 1.3 00543 array('element'=>'card', 'attribute'=>'ontimer'), // 1.3 00544 array('element'=>'go', 'attribute'=>'href'), // 1.3 00545 array('element'=>'option', 'attribute'=>'onpick'), // 1.3 00546 array('element'=>'template', 'attribute'=>'onenterforward'), // 1.3 00547 array('element'=>'template', 'attribute'=>'onenterbackward'),// 1.3 00548 array('element'=>'template', 'attribute'=>'ontimer'), // 1.3 00549 array('element'=>'wml', 'attribute'=>'xmlns'), // 2.0 00550 ); 00551 00552 $match_metas = array( 00553 'content-base', 00554 'content-location', 00555 'referer', 00556 'location', 00557 'refresh', 00558 ); 00559 00560 // Extract all elements 00561 if ( !preg_match_all( '/<([a-z][^>]*)>/iu', $text, $matches ) ) 00562 return array( ); 00563 $elements = $matches[1]; 00564 $value_pattern = '=(("([^"]*)")|([^\s]*))'; 00565 00566 // Match elements and attributes 00567 foreach ( $match_elements as $match_element ) 00568 { 00569 $name = $match_element['element']; 00570 $attr = $match_element['attribute']; 00571 $pattern = '/^' . $name . '\s.*' . $attr . $value_pattern . '/iu'; 00572 if ( $name == 'object' ) 00573 $split_pattern = '/\s*/u'; // Space-separated URL list 00574 else if ( $name == 'archive' ) 00575 $split_pattern = '/,\s*/u'; // Comma-separated URL list 00576 else 00577 unset( $split_pattern ); // Single URL 00578 foreach ( $elements as $element ) 00579 { 00580 if ( !preg_match( $pattern, $element, $match ) ) 00581 continue; 00582 $m = empty($match[3]) ? (!empty($match[4])?$match[4]:'') : $match[3]; 00583 if ( !isset( $split_pattern ) ) 00584 $urls[$name][$attr][] = $m; 00585 else 00586 { 00587 $msplit = preg_split( $split_pattern, $m ); 00588 foreach ( $msplit as $ms ) 00589 $urls[$name][$attr][] = $ms; 00590 } 00591 } 00592 } 00593 00594 // Match meta http-equiv elements 00595 foreach ( $match_metas as $match_meta ) 00596 { 00597 $attr_pattern = '/http-equiv="?' . $match_meta . '"?/iu'; 00598 $content_pattern = '/content' . $value_pattern . '/iu'; 00599 $refresh_pattern = '/\d*;\s*(url=)?(.*)$/iu'; 00600 foreach ( $elements as $element ) 00601 { 00602 if ( !preg_match( '/^meta/iu', $element ) || 00603 !preg_match( $attr_pattern, $element ) || 00604 !preg_match( $content_pattern, $element, $match ) ) 00605 continue; 00606 $m = empty($match[3]) ? $match[4] : $match[3]; 00607 if ( $match_meta != 'refresh' ) 00608 $urls['meta']['http-equiv'][] = $m; 00609 else if ( preg_match( $refresh_pattern, $m, $match ) ) 00610 $urls['meta']['http-equiv'][] = $match[2]; 00611 } 00612 } 00613 00614 // Match style attributes 00615 $urls['style'] = array( ); 00616 $style_pattern = '/style' . $value_pattern . '/iu'; 00617 foreach ( $elements as $element ) 00618 { 00619 if ( !preg_match( $style_pattern, $element, $match ) ) 00620 continue; 00621 $m = empty($match[3]) ? $match[4] : $match[3]; 00622 $style_urls = extract_css_urls( $m ); 00623 if ( !empty( $style_urls ) ) 00624 $urls['style'] = array_merge_recursive( 00625 $urls['style'], $style_urls ); 00626 } 00627 00628 // Match style bodies 00629 if ( preg_match_all( '/<style[^>]*>(.*?)<\/style>/siu', $text, $style_bodies ) ) 00630 { 00631 foreach ( $style_bodies[1] as $style_body ) 00632 { 00633 $style_urls = extract_css_urls( $style_body ); 00634 if ( !empty( $style_urls ) ) 00635 $urls['style'] = array_merge_recursive( 00636 $urls['style'], $style_urls ); 00637 } 00638 } 00639 if ( empty($urls['style']) ) 00640 unset( $urls['style'] ); 00641 00642 return $urls; 00643 } 00665 function extract_css_urls( $text ) 00666 { 00667 $urls = array( ); 00668 00669 $url_pattern = '(([^\\\\\'", \(\)]*(\\\\.)?)+)'; 00670 $urlfunc_pattern = 'url\(\s*[\'"]?' . $url_pattern . '[\'"]?\s*\)'; 00671 $pattern = '/(' . 00672 '(@import\s*[\'"]' . $url_pattern . '[\'"])' . 00673 '|(@import\s*' . $urlfunc_pattern . ')' . 00674 '|(' . $urlfunc_pattern . ')' . ')/iu'; 00675 if ( !preg_match_all( $pattern, $text, $matches ) ) 00676 return $urls; 00677 00678 // @import '...' 00679 // @import "..." 00680 foreach ( $matches[3] as $match ) 00681 if ( !empty($match) ) 00682 $urls['import'][] = 00683 preg_replace( '/\\\\(.)/u', '\\1', $match ); 00684 00685 // @import url(...) 00686 // @import url('...') 00687 // @import url("...") 00688 foreach ( $matches[7] as $match ) 00689 if ( !empty($match) ) 00690 $urls['import'][] = 00691 preg_replace( '/\\\\(.)/u', '\\1', $match ); 00692 00693 // url(...) 00694 // url('...') 00695 // url("...") 00696 foreach ( $matches[11] as $match ) 00697 if ( !empty($match) ) 00698 $urls['property'][] = 00699 preg_replace( '/\\\\(.)/u', '\\1', $match ); 00700 00701 return $urls; 00702 }