|
Moodle
2.2.1
http://www.collinsharper.com
|
00001 <?php 00002 //============================================================+ 00003 // File name : tcpdf_parser.php 00004 // Version : 1.0.000 00005 // Begin : 2011-05-23 00006 // Last Update : 2011-07-14 00007 // Author : Nicola Asuni - Tecnick.com S.r.l - Via Della Pace, 11 - 09044 - Quartucciu (CA) - ITALY - www.tecnick.com - info@tecnick.com 00008 // License : http://www.tecnick.com/pagefiles/tcpdf/LICENSE.TXT GNU-LGPLv3 + YOU CAN'T REMOVE ANY TCPDF COPYRIGHT NOTICE OR LINK FROM THE GENERATED PDF DOCUMENTS. 00009 // ------------------------------------------------------------------- 00010 // Copyright (C) 2011-2011 Nicola Asuni - Tecnick.com S.r.l. 00011 // 00012 // This file is part of TCPDF software library. 00013 // 00014 // TCPDF is free software: you can redistribute it and/or modify it 00015 // under the terms of the GNU Lesser General Public License as 00016 // published by the Free Software Foundation, either version 3 of the 00017 // License, or (at your option) any later version. Additionally, 00018 // YOU CAN'T REMOVE ANY TCPDF COPYRIGHT NOTICE OR LINK FROM THE 00019 // GENERATED PDF DOCUMENTS. 00020 // 00021 // TCPDF is distributed in the hope that it will be useful, but 00022 // WITHOUT ANY WARRANTY; without even the implied warranty of 00023 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 00024 // See the GNU Lesser General Public License for more details. 00025 // 00026 // You should have received a copy of the License 00027 // along with TCPDF. If not, see 00028 // <http://www.tecnick.com/pagefiles/tcpdf/LICENSE.TXT>. 00029 // 00030 // See LICENSE.TXT file for more information. 00031 // ------------------------------------------------------------------- 00032 // 00033 // Description : This is a PHP class for parsing PDF documents. 00034 // 00035 //============================================================+ 00036 00045 // include class for decoding filters 00046 require_once(dirname(__FILE__).'/tcpdf_filters.php'); 00047 00056 class TCPDF_PARSER { 00057 00062 private $pdfdata = ''; 00063 00068 protected $xref = array(); 00069 00074 protected $objects = array(); 00075 00080 private $FilterDecoders; 00081 00082 // ----------------------------------------------------------------------------- 00083 00090 public function __construct($data) { 00091 if (empty($data)) { 00092 $this->Error('Empty PDF data.'); 00093 } 00094 $this->pdfdata = $data; 00095 // get length 00096 $pdflen = strlen($this->pdfdata); 00097 // initialize class for decoding filters 00098 $this->FilterDecoders = new TCPDF_FILTERS(); 00099 // get xref and trailer data 00100 $this->xref = $this->getXrefData(); 00101 // parse all document objects 00102 $this->objects = array(); 00103 foreach ($this->xref['xref'] as $obj => $offset) { 00104 if (!isset($this->objects[$obj])) { 00105 $this->objects[$obj] = $this->getIndirectObject($obj, $offset, true); 00106 } 00107 } 00108 // release some memory 00109 unset($this->pdfdata); 00110 $this->pdfdata = ''; 00111 } 00112 00119 public function getParsedData() { 00120 return array($this->xref, $this->objects); 00121 } 00122 00131 protected function getXrefData($offset=0, $xref=array()) { 00132 // find last startxref 00133 if (preg_match_all('/[\r\n]startxref[\s]*[\r\n]+([0-9]+)[\s]*[\r\n]+%%EOF/i', $this->pdfdata, $matches, PREG_SET_ORDER, $offset) == 0) { 00134 $this->Error('Unable to find startxref'); 00135 } 00136 $matches = array_pop($matches); 00137 $startxref = $matches[1]; 00138 // check xref position 00139 if (strpos($this->pdfdata, 'xref', $startxref) != $startxref) { 00140 $this->Error('Unable to find xref'); 00141 } 00142 // extract xref data (object indexes and offsets) 00143 $offset = $startxref + 5; 00144 // initialize object number 00145 $obj_num = 0; 00146 while (preg_match('/^([0-9]+)[\s]([0-9]+)[\s]?([nf]?)/im', $this->pdfdata, $matches, PREG_OFFSET_CAPTURE, $offset) > 0) { 00147 $offset = (strlen($matches[0][0]) + $matches[0][1]); 00148 if ($matches[3][0] == 'n') { 00149 // create unique object index: [object number]_[generation number] 00150 $index = $obj_num.'_'.intval($matches[2][0]); 00151 // check if object already exist 00152 if (!isset($xref['xref'][$index])) { 00153 // store object offset position 00154 $xref['xref'][$index] = intval($matches[1][0]); 00155 } 00156 ++$obj_num; 00157 $offset += 2; 00158 } elseif ($matches[3][0] == 'f') { 00159 ++$obj_num; 00160 $offset += 2; 00161 } else { 00162 // object number (index) 00163 $obj_num = intval($matches[1][0]); 00164 } 00165 } 00166 // get trailer data 00167 if (preg_match('/trailer[\s]*<<(.*)>>[\s]*[\r\n]+startxref[\s]*[\r\n]+/isU', $this->pdfdata, $matches, PREG_OFFSET_CAPTURE, $offset) > 0) { 00168 $trailer_data = $matches[1][0]; 00169 if (!isset($xref['trailer'])) { 00170 // get only the last updated version 00171 $xref['trailer'] = array(); 00172 // parse trailer_data 00173 if (preg_match('/Size[\s]+([0-9]+)/i', $trailer_data, $matches) > 0) { 00174 $xref['trailer']['size'] = intval($matches[1]); 00175 } 00176 if (preg_match('/Root[\s]+([0-9]+)[\s]+([0-9]+)[\s]+R/i', $trailer_data, $matches) > 0) { 00177 $xref['trailer']['root'] = intval($matches[1]).'_'.intval($matches[2]); 00178 } 00179 if (preg_match('/Encrypt[\s]+([0-9]+)[\s]+([0-9]+)[\s]+R/i', $trailer_data, $matches) > 0) { 00180 $xref['trailer']['encrypt'] = intval($matches[1]).'_'.intval($matches[2]); 00181 } 00182 if (preg_match('/Info[\s]+([0-9]+)[\s]+([0-9]+)[\s]+R/i', $trailer_data, $matches) > 0) { 00183 $xref['trailer']['info'] = intval($matches[1]).'_'.intval($matches[2]); 00184 } 00185 if (preg_match('/ID[\s]*[\[][\s]*[<]([^>]*)[>][\s]*[<]([^>]*)[>]/i', $trailer_data, $matches) > 0) { 00186 $xref['trailer']['id'] = array(); 00187 $xref['trailer']['id'][0] = $matches[1]; 00188 $xref['trailer']['id'][1] = $matches[2]; 00189 } 00190 } 00191 if (preg_match('/Prev[\s]+([0-9]+)/i', $trailer_data, $matches) > 0) { 00192 // get previous xref 00193 $xref = getXrefData(substr($this->pdfdata, 0, $startxref), intval($matches[1]), $xref); 00194 } 00195 } else { 00196 $this->Error('Unable to find trailer'); 00197 } 00198 return $xref; 00199 } 00200 00208 protected function getRawObject($offset=0) { 00209 $objtype = ''; // object type to be returned 00210 $objval = ''; // object value to be returned 00211 // skip initial white space chars: \x00 null (NUL), \x09 horizontal tab (HT), \x0A line feed (LF), \x0C form feed (FF), \x0D carriage return (CR), \x20 space (SP) 00212 $offset += strspn($this->pdfdata, "\x00\x09\x0a\x0c\x0d\x20", $offset); 00213 // get first char 00214 $char = $this->pdfdata{$offset}; 00215 // get object type 00216 switch ($char) { 00217 case '%': { // \x25 PERCENT SIGN 00218 // skip comment and search for next token 00219 $next = strcspn($this->pdfdata, "\r\n", $offset); 00220 if ($next > 0) { 00221 $offset += $next; 00222 return $this->getRawObject($this->pdfdata, $offset); 00223 } 00224 break; 00225 } 00226 case '/': { // \x2F SOLIDUS 00227 // name object 00228 $objtype = $char; 00229 ++$offset; 00230 if (preg_match('/^([^\x00\x09\x0a\x0c\x0d\x20\s\x28\x29\x3c\x3e\x5b\x5d\x7b\x7d\x2f\x25]+)/', substr($this->pdfdata, $offset, 256), $matches) == 1) { 00231 $objval = $matches[1]; // unescaped value 00232 $offset += strlen($objval); 00233 } 00234 break; 00235 } 00236 case '(': // \x28 LEFT PARENTHESIS 00237 case ')': { // \x29 RIGHT PARENTHESIS 00238 // literal string object 00239 $objtype = $char; 00240 ++$offset; 00241 $strpos = $offset; 00242 if ($char == '(') { 00243 $open_bracket = 1; 00244 while ($open_bracket > 0) { 00245 if (!isset($this->pdfdata{$strpos})) { 00246 break; 00247 } 00248 $ch = $this->pdfdata{$strpos}; 00249 switch ($ch) { 00250 case '\\': { // REVERSE SOLIDUS (5Ch) (Backslash) 00251 // skip next character 00252 ++$strpos; 00253 break; 00254 } 00255 case '(': { // LEFT PARENHESIS (28h) 00256 ++$open_bracket; 00257 break; 00258 } 00259 case ')': { // RIGHT PARENTHESIS (29h) 00260 --$open_bracket; 00261 break; 00262 } 00263 } 00264 ++$strpos; 00265 } 00266 $objval = substr($this->pdfdata, $offset, ($strpos - $offset - 1)); 00267 $offset = $strpos; 00268 } 00269 break; 00270 } 00271 case '[': // \x5B LEFT SQUARE BRACKET 00272 case ']': { // \x5D RIGHT SQUARE BRACKET 00273 // array object 00274 $objtype = $char; 00275 ++$offset; 00276 if ($char == '[') { 00277 // get array content 00278 $objval = array(); 00279 do { 00280 // get element 00281 $element = $this->getRawObject($offset); 00282 $offset = $element[2]; 00283 $objval[] = $element; 00284 } while ($element[0] != ']'); 00285 // remove closing delimiter 00286 array_pop($objval); 00287 } 00288 break; 00289 } 00290 case '<': // \x3C LESS-THAN SIGN 00291 case '>': { // \x3E GREATER-THAN SIGN 00292 if (isset($this->pdfdata{($offset + 1)}) AND ($this->pdfdata{($offset + 1)} == $char)) { 00293 // dictionary object 00294 $objtype = $char.$char; 00295 $offset += 2; 00296 if ($char == '<') { 00297 // get array content 00298 $objval = array(); 00299 do { 00300 // get element 00301 $element = $this->getRawObject($offset); 00302 $offset = $element[2]; 00303 $objval[] = $element; 00304 } while ($element[0] != '>>'); 00305 // remove closing delimiter 00306 array_pop($objval); 00307 } 00308 } else { 00309 // hexadecimal string object 00310 $objtype = $char; 00311 ++$offset; 00312 if (($char == '<') AND (preg_match('/^([0-9A-Fa-f]+)[>]/iU', substr($this->pdfdata, $offset), $matches) == 1)) { 00313 $objval = $matches[1]; 00314 $offset += strlen($matches[0]); 00315 } 00316 } 00317 break; 00318 } 00319 default: { 00320 if (substr($this->pdfdata, $offset, 6) == 'endobj') { 00321 // indirect object 00322 $objtype = 'endobj'; 00323 $offset += 6; 00324 } elseif (substr($this->pdfdata, $offset, 4) == 'null') { 00325 // null object 00326 $objtype = 'null'; 00327 $offset += 4; 00328 $objval = 'null'; 00329 } elseif (substr($this->pdfdata, $offset, 4) == 'true') { 00330 // boolean true object 00331 $objtype = 'boolean'; 00332 $offset += 4; 00333 $objval = 'true'; 00334 } elseif (substr($this->pdfdata, $offset, 5) == 'false') { 00335 // boolean false object 00336 $objtype = 'boolean'; 00337 $offset += 5; 00338 $objval = 'false'; 00339 } elseif (substr($this->pdfdata, $offset, 6) == 'stream') { 00340 // start stream object 00341 $objtype = 'stream'; 00342 $offset += 6; 00343 if (preg_match('/^[\r\n]+(.*)[\r\n]*endstream/isU', substr($this->pdfdata, $offset), $matches) == 1) { 00344 $objval = $matches[1]; 00345 $offset += strlen($matches[0]); 00346 } 00347 } elseif (substr($this->pdfdata, $offset, 9) == 'endstream') { 00348 // end stream object 00349 $objtype = 'endstream'; 00350 $offset += 9; 00351 } elseif (preg_match('/^([0-9]+)[\s]+([0-9]+)[\s]+R/iU', substr($this->pdfdata, $offset, 33), $matches) == 1) { 00352 // indirect object reference 00353 $objtype = 'ojbref'; 00354 $offset += strlen($matches[0]); 00355 $objval = intval($matches[1]).'_'.intval($matches[2]); 00356 } elseif (preg_match('/^([0-9]+)[\s]+([0-9]+)[\s]+obj/iU', substr($this->pdfdata, $offset, 33), $matches) == 1) { 00357 // object start 00358 $objtype = 'ojb'; 00359 $objval = intval($matches[1]).'_'.intval($matches[2]); 00360 $offset += strlen ($matches[0]); 00361 } elseif (($numlen = strspn($this->pdfdata, '+-.0123456789', $offset)) > 0) { 00362 // numeric object 00363 $objtype = 'numeric'; 00364 $objval = substr($this->pdfdata, $offset, $numlen); 00365 $offset += $numlen; 00366 } 00367 break; 00368 } 00369 } 00370 return array($objtype, $objval, $offset); 00371 } 00372 00382 protected function getIndirectObject($obj_ref, $offset=0, $decoding=true) { 00383 $obj = explode('_', $obj_ref); 00384 if (($obj === false) OR (count($obj) != 2)) { 00385 $this->Error('Invalid object reference: '.$obj); 00386 return; 00387 } 00388 $objref = $obj[0].' '.$obj[1].' obj'; 00389 if (strpos($this->pdfdata, $objref, $offset) != $offset) { 00390 // an indirect reference to an undefined object shall be considered a reference to the null object 00391 return array('null', 'null', $offset); 00392 } 00393 // starting position of object content 00394 $offset += strlen($objref); 00395 // get array of object content 00396 $objdata = array(); 00397 $i = 0; // object main index 00398 do { 00399 // get element 00400 $element = $this->getRawObject($offset); 00401 $offset = $element[2]; 00402 // decode stream using stream's dictionary information 00403 if ($decoding AND ($element[0] == 'stream') AND (isset($objdata[($i - 1)][0])) AND ($objdata[($i - 1)][0] == '<<')) { 00404 $element[3] = $this->decodeStream($objdata[($i - 1)][1], $element[1]); 00405 } 00406 $objdata[$i] = $element; 00407 ++$i; 00408 } while ($element[0] != 'endobj'); 00409 // remove closing delimiter 00410 array_pop($objdata); 00411 // return raw object content 00412 return $objdata; 00413 } 00414 00422 protected function getObjectVal($obj) { 00423 if ($obj[0] == 'objref') { 00424 // reference to indirect object 00425 if (isset($this->objects[$obj[1]])) { 00426 // this object has been already parsed 00427 return $this->objects[$obj[1]]; 00428 } elseif (isset($this->xref[$obj[1]])) { 00429 // parse new object 00430 $this->objects[$obj[1]] = $this->getIndirectObject($obj[1], $this->xref[$obj[1]], false); 00431 return $this->objects[$obj[1]]; 00432 } 00433 } 00434 return $obj; 00435 } 00436 00445 protected function decodeStream($sdic, $stream) { 00446 // get stream lenght and filters 00447 $slength = strlen($stream); 00448 $filters = array(); 00449 foreach ($sdic as $k => $v) { 00450 if ($v[0] == '/') { 00451 if (($v[1] == 'Length') AND (isset($sdic[($k + 1)])) AND ($sdic[($k + 1)][0] == 'numeric')) { 00452 // get declared stream lenght 00453 $declength = intval($sdic[($k + 1)][1]); 00454 if ($declength < $slength) { 00455 $stream = substr($stream, 0, $declength); 00456 $slength = $declength; 00457 } 00458 } elseif (($v[1] == 'Filter') AND (isset($sdic[($k + 1)]))) { 00459 // resolve indirect object 00460 $objval = $this->getObjectVal($sdic[($k + 1)]); 00461 if ($objval[0] == '/') { 00462 // single filter 00463 $filters[] = $objval[1]; 00464 } elseif ($objval[0] == '[') { 00465 // array of filters 00466 foreach ($objval[1] as $flt) { 00467 if ($flt[0] == '/') { 00468 $filters[] = $flt[1]; 00469 } 00470 } 00471 } 00472 } 00473 } 00474 } 00475 // decode the stream 00476 $remaining_filters = array(); 00477 foreach ($filters as $filter) { 00478 if (in_array($filter, $this->FilterDecoders->getAvailableFilters())) { 00479 $stream = $this->FilterDecoders->decodeFilter($filter, $stream); 00480 } else { 00481 // add missing filter to array 00482 $remaining_filters[] = $filter; 00483 } 00484 } 00485 return array($stream, $remaining_filters); 00486 } 00487 00494 public function Error($msg) { 00495 // exit program and print error 00496 die('<strong>TCPDF_PARSER ERROR: </strong>'.$msg); 00497 } 00498 00499 } // END OF TCPDF_PARSER CLASS 00500 00501 //============================================================+ 00502 // END OF FILE 00503 //============================================================+