Moodle  2.2.1
http://www.collinsharper.com
C:/xampp/htdocs/moodle/lib/tcpdf/tcpdf_parser.php
Go to the documentation of this file.
00001 <?php
00002 //============================================================+
00003 // File name   : tcpdf_parser.php
00004 // Version     : 1.0.000
00005 // Begin       : 2011-05-23
00006 // Last Update : 2011-07-14
00007 // Author      : Nicola Asuni - Tecnick.com S.r.l - Via Della Pace, 11 - 09044 - Quartucciu (CA) - ITALY - www.tecnick.com - info@tecnick.com
00008 // License     : http://www.tecnick.com/pagefiles/tcpdf/LICENSE.TXT GNU-LGPLv3 + YOU CAN'T REMOVE ANY TCPDF COPYRIGHT NOTICE OR LINK FROM THE GENERATED PDF DOCUMENTS.
00009 // -------------------------------------------------------------------
00010 // Copyright (C) 2011-2011  Nicola Asuni - Tecnick.com S.r.l.
00011 //
00012 // This file is part of TCPDF software library.
00013 //
00014 // TCPDF is free software: you can redistribute it and/or modify it
00015 // under the terms of the GNU Lesser General Public License as
00016 // published by the Free Software Foundation, either version 3 of the
00017 // License, or (at your option) any later version. Additionally,
00018 // YOU CAN'T REMOVE ANY TCPDF COPYRIGHT NOTICE OR LINK FROM THE
00019 // GENERATED PDF DOCUMENTS.
00020 //
00021 // TCPDF is distributed in the hope that it will be useful, but
00022 // WITHOUT ANY WARRANTY; without even the implied warranty of
00023 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
00024 // See the GNU Lesser General Public License for more details.
00025 //
00026 // You should have received a copy of the License
00027 // along with TCPDF. If not, see
00028 // <http://www.tecnick.com/pagefiles/tcpdf/LICENSE.TXT>.
00029 //
00030 // See LICENSE.TXT file for more information.
00031 // -------------------------------------------------------------------
00032 //
00033 // Description : This is a PHP class for parsing PDF documents.
00034 //
00035 //============================================================+
00036 
00045 // include class for decoding filters
00046 require_once(dirname(__FILE__).'/tcpdf_filters.php');
00047 
00056 class TCPDF_PARSER {
00057 
00062         private $pdfdata = '';
00063 
00068         protected $xref = array();
00069 
00074         protected $objects = array();
00075 
00080         private $FilterDecoders;
00081 
00082 // -----------------------------------------------------------------------------
00083 
00090         public function __construct($data) {
00091                 if (empty($data)) {
00092                         $this->Error('Empty PDF data.');
00093                 }
00094                 $this->pdfdata = $data;
00095                 // get length
00096                 $pdflen = strlen($this->pdfdata);
00097                 // initialize class for decoding filters
00098                 $this->FilterDecoders = new TCPDF_FILTERS();
00099                 // get xref and trailer data
00100                 $this->xref = $this->getXrefData();
00101                 // parse all document objects
00102                 $this->objects = array();
00103                 foreach ($this->xref['xref'] as $obj => $offset) {
00104                         if (!isset($this->objects[$obj])) {
00105                                 $this->objects[$obj] = $this->getIndirectObject($obj, $offset, true);
00106                         }
00107                 }
00108                 // release some memory
00109                 unset($this->pdfdata);
00110                 $this->pdfdata = '';
00111         }
00112 
00119         public function getParsedData() {
00120                 return array($this->xref, $this->objects);
00121         }
00122 
00131         protected function getXrefData($offset=0, $xref=array()) {
00132                 // find last startxref
00133                 if (preg_match_all('/[\r\n]startxref[\s]*[\r\n]+([0-9]+)[\s]*[\r\n]+%%EOF/i', $this->pdfdata, $matches, PREG_SET_ORDER, $offset) == 0) {
00134                         $this->Error('Unable to find startxref');
00135                 }
00136                 $matches = array_pop($matches);
00137                 $startxref = $matches[1];
00138                 // check xref position
00139                 if (strpos($this->pdfdata, 'xref', $startxref) != $startxref) {
00140                         $this->Error('Unable to find xref');
00141                 }
00142                 // extract xref data (object indexes and offsets)
00143                 $offset = $startxref + 5;
00144                 // initialize object number
00145                 $obj_num = 0;
00146                 while (preg_match('/^([0-9]+)[\s]([0-9]+)[\s]?([nf]?)/im', $this->pdfdata, $matches, PREG_OFFSET_CAPTURE, $offset) > 0) {
00147                         $offset = (strlen($matches[0][0]) + $matches[0][1]);
00148                         if ($matches[3][0] == 'n') {
00149                                 // create unique object index: [object number]_[generation number]
00150                                 $index = $obj_num.'_'.intval($matches[2][0]);
00151                                 // check if object already exist
00152                                 if (!isset($xref['xref'][$index])) {
00153                                         // store object offset position
00154                                         $xref['xref'][$index] = intval($matches[1][0]);
00155                                 }
00156                                 ++$obj_num;
00157                                 $offset += 2;
00158                         } elseif ($matches[3][0] == 'f') {
00159                                 ++$obj_num;
00160                                 $offset += 2;
00161                         } else {
00162                                 // object number (index)
00163                                 $obj_num = intval($matches[1][0]);
00164                         }
00165                 }
00166                 // get trailer data
00167                 if (preg_match('/trailer[\s]*<<(.*)>>[\s]*[\r\n]+startxref[\s]*[\r\n]+/isU', $this->pdfdata, $matches, PREG_OFFSET_CAPTURE, $offset) > 0) {
00168                         $trailer_data = $matches[1][0];
00169                         if (!isset($xref['trailer'])) {
00170                                 // get only the last updated version
00171                                 $xref['trailer'] = array();
00172                                 // parse trailer_data
00173                                 if (preg_match('/Size[\s]+([0-9]+)/i', $trailer_data, $matches) > 0) {
00174                                         $xref['trailer']['size'] = intval($matches[1]);
00175                                 }
00176                                 if (preg_match('/Root[\s]+([0-9]+)[\s]+([0-9]+)[\s]+R/i', $trailer_data, $matches) > 0) {
00177                                         $xref['trailer']['root'] = intval($matches[1]).'_'.intval($matches[2]);
00178                                 }
00179                                 if (preg_match('/Encrypt[\s]+([0-9]+)[\s]+([0-9]+)[\s]+R/i', $trailer_data, $matches) > 0) {
00180                                         $xref['trailer']['encrypt'] = intval($matches[1]).'_'.intval($matches[2]);
00181                                 }
00182                                 if (preg_match('/Info[\s]+([0-9]+)[\s]+([0-9]+)[\s]+R/i', $trailer_data, $matches) > 0) {
00183                                         $xref['trailer']['info'] = intval($matches[1]).'_'.intval($matches[2]);
00184                                 }
00185                                 if (preg_match('/ID[\s]*[\[][\s]*[<]([^>]*)[>][\s]*[<]([^>]*)[>]/i', $trailer_data, $matches) > 0) {
00186                                         $xref['trailer']['id'] = array();
00187                                         $xref['trailer']['id'][0] = $matches[1];
00188                                         $xref['trailer']['id'][1] = $matches[2];
00189                                 }
00190                         }
00191                         if (preg_match('/Prev[\s]+([0-9]+)/i', $trailer_data, $matches) > 0) {
00192                                 // get previous xref
00193                                 $xref = getXrefData(substr($this->pdfdata, 0, $startxref), intval($matches[1]), $xref);
00194                         }
00195                 } else {
00196                         $this->Error('Unable to find trailer');
00197                 }
00198                 return $xref;
00199         }
00200 
00208         protected function getRawObject($offset=0) {
00209                 $objtype = ''; // object type to be returned
00210                 $objval = ''; // object value to be returned
00211                 // skip initial white space chars: \x00 null (NUL), \x09 horizontal tab (HT), \x0A line feed (LF), \x0C form feed (FF), \x0D carriage return (CR), \x20 space (SP)
00212                 $offset += strspn($this->pdfdata, "\x00\x09\x0a\x0c\x0d\x20", $offset);
00213                 // get first char
00214                 $char = $this->pdfdata{$offset};
00215                 // get object type
00216                 switch ($char) {
00217                         case '%': { // \x25 PERCENT SIGN
00218                                 // skip comment and search for next token
00219                                 $next = strcspn($this->pdfdata, "\r\n", $offset);
00220                                 if ($next > 0) {
00221                                         $offset += $next;
00222                                         return $this->getRawObject($this->pdfdata, $offset);
00223                                 }
00224                                 break;
00225                         }
00226                         case '/': { // \x2F SOLIDUS
00227                                 // name object
00228                                 $objtype = $char;
00229                                 ++$offset;
00230                                 if (preg_match('/^([^\x00\x09\x0a\x0c\x0d\x20\s\x28\x29\x3c\x3e\x5b\x5d\x7b\x7d\x2f\x25]+)/', substr($this->pdfdata, $offset, 256), $matches) == 1) {
00231                                         $objval = $matches[1]; // unescaped value
00232                                         $offset += strlen($objval);
00233                                 }
00234                                 break;
00235                         }
00236                         case '(':   // \x28 LEFT PARENTHESIS
00237                         case ')': { // \x29 RIGHT PARENTHESIS
00238                                 // literal string object
00239                                 $objtype = $char;
00240                                 ++$offset;
00241                                 $strpos = $offset;
00242                                 if ($char == '(') {
00243                                         $open_bracket = 1;
00244                                         while ($open_bracket > 0) {
00245                                                 if (!isset($this->pdfdata{$strpos})) {
00246                                                         break;
00247                                                 }
00248                                                 $ch = $this->pdfdata{$strpos};
00249                                                 switch ($ch) {
00250                                                         case '\\': { // REVERSE SOLIDUS (5Ch) (Backslash)
00251                                                                 // skip next character
00252                                                                 ++$strpos;
00253                                                                 break;
00254                                                         }
00255                                                         case '(': { // LEFT PARENHESIS (28h)
00256                                                                 ++$open_bracket;
00257                                                                 break;
00258                                                         }
00259                                                         case ')': { // RIGHT PARENTHESIS (29h)
00260                                                                 --$open_bracket;
00261                                                                 break;
00262                                                         }
00263                                                 }
00264                                                 ++$strpos;
00265                                         }
00266                                         $objval = substr($this->pdfdata, $offset, ($strpos - $offset - 1));
00267                                         $offset = $strpos;
00268                                 }
00269                                 break;
00270                         }
00271                         case '[':   // \x5B LEFT SQUARE BRACKET
00272                         case ']': { // \x5D RIGHT SQUARE BRACKET
00273                                 // array object
00274                                 $objtype = $char;
00275                                 ++$offset;
00276                                 if ($char == '[') {
00277                                         // get array content
00278                                         $objval = array();
00279                                         do {
00280                                                 // get element
00281                                                 $element = $this->getRawObject($offset);
00282                                                 $offset = $element[2];
00283                                                 $objval[] = $element;
00284                                         } while ($element[0] != ']');
00285                                         // remove closing delimiter
00286                                         array_pop($objval);
00287                                 }
00288                                 break;
00289                         }
00290                         case '<':   // \x3C LESS-THAN SIGN
00291                         case '>': { // \x3E GREATER-THAN SIGN
00292                                 if (isset($this->pdfdata{($offset + 1)}) AND ($this->pdfdata{($offset + 1)} == $char)) {
00293                                         // dictionary object
00294                                         $objtype = $char.$char;
00295                                         $offset += 2;
00296                                         if ($char == '<') {
00297                                                 // get array content
00298                                                 $objval = array();
00299                                                 do {
00300                                                         // get element
00301                                                         $element = $this->getRawObject($offset);
00302                                                         $offset = $element[2];
00303                                                         $objval[] = $element;
00304                                                 } while ($element[0] != '>>');
00305                                                 // remove closing delimiter
00306                                                 array_pop($objval);
00307                                         }
00308                                 } else {
00309                                         // hexadecimal string object
00310                                         $objtype = $char;
00311                                         ++$offset;
00312                                         if (($char == '<') AND (preg_match('/^([0-9A-Fa-f]+)[>]/iU', substr($this->pdfdata, $offset), $matches) == 1)) {
00313                                                 $objval = $matches[1];
00314                                                 $offset += strlen($matches[0]);
00315                                         }
00316                                 }
00317                                 break;
00318                         }
00319                         default: {
00320                                 if (substr($this->pdfdata, $offset, 6) == 'endobj') {
00321                                         // indirect object
00322                                         $objtype = 'endobj';
00323                                         $offset += 6;
00324                                 } elseif (substr($this->pdfdata, $offset, 4) == 'null') {
00325                                         // null object
00326                                         $objtype = 'null';
00327                                         $offset += 4;
00328                                         $objval = 'null';
00329                                 } elseif (substr($this->pdfdata, $offset, 4) == 'true') {
00330                                         // boolean true object
00331                                         $objtype = 'boolean';
00332                                         $offset += 4;
00333                                         $objval = 'true';
00334                                 } elseif (substr($this->pdfdata, $offset, 5) == 'false') {
00335                                         // boolean false object
00336                                         $objtype = 'boolean';
00337                                         $offset += 5;
00338                                         $objval = 'false';
00339                                 } elseif (substr($this->pdfdata, $offset, 6) == 'stream') {
00340                                         // start stream object
00341                                         $objtype = 'stream';
00342                                         $offset += 6;
00343                                         if (preg_match('/^[\r\n]+(.*)[\r\n]*endstream/isU', substr($this->pdfdata, $offset), $matches) == 1) {
00344                                                 $objval = $matches[1];
00345                                                 $offset += strlen($matches[0]);
00346                                         }
00347                                 } elseif (substr($this->pdfdata, $offset, 9) == 'endstream') {
00348                                         // end stream object
00349                                         $objtype = 'endstream';
00350                                         $offset += 9;
00351                                 } elseif (preg_match('/^([0-9]+)[\s]+([0-9]+)[\s]+R/iU', substr($this->pdfdata, $offset, 33), $matches) == 1) {
00352                                         // indirect object reference
00353                                         $objtype = 'ojbref';
00354                                         $offset += strlen($matches[0]);
00355                                         $objval = intval($matches[1]).'_'.intval($matches[2]);
00356                                 } elseif (preg_match('/^([0-9]+)[\s]+([0-9]+)[\s]+obj/iU', substr($this->pdfdata, $offset, 33), $matches) == 1) {
00357                                         // object start
00358                                         $objtype = 'ojb';
00359                                         $objval = intval($matches[1]).'_'.intval($matches[2]);
00360                                         $offset += strlen ($matches[0]);
00361                                 } elseif (($numlen = strspn($this->pdfdata, '+-.0123456789', $offset)) > 0) {
00362                                         // numeric object
00363                                         $objtype = 'numeric';
00364                                         $objval = substr($this->pdfdata, $offset, $numlen);
00365                                         $offset += $numlen;
00366                                 }
00367                                 break;
00368                         }
00369                 }
00370                 return array($objtype, $objval, $offset);
00371         }
00372 
00382         protected function getIndirectObject($obj_ref, $offset=0, $decoding=true) {
00383                 $obj = explode('_', $obj_ref);
00384                 if (($obj === false) OR (count($obj) != 2)) {
00385                         $this->Error('Invalid object reference: '.$obj);
00386                         return;
00387                 }
00388                 $objref = $obj[0].' '.$obj[1].' obj';
00389                 if (strpos($this->pdfdata, $objref, $offset) != $offset) {
00390                         // an indirect reference to an undefined object shall be considered a reference to the null object
00391                         return array('null', 'null', $offset);
00392                 }
00393                 // starting position of object content
00394                 $offset += strlen($objref);
00395                 // get array of object content
00396                 $objdata = array();
00397                 $i = 0; // object main index
00398                 do {
00399                         // get element
00400                         $element = $this->getRawObject($offset);
00401                         $offset = $element[2];
00402                         // decode stream using stream's dictionary information
00403                         if ($decoding AND ($element[0] == 'stream') AND (isset($objdata[($i - 1)][0])) AND ($objdata[($i - 1)][0] == '<<')) {
00404                                 $element[3] = $this->decodeStream($objdata[($i - 1)][1], $element[1]);
00405                         }
00406                         $objdata[$i] = $element;
00407                         ++$i;
00408                 } while ($element[0] != 'endobj');
00409                 // remove closing delimiter
00410                 array_pop($objdata);
00411                 // return raw object content
00412                 return $objdata;
00413         }
00414 
00422         protected function getObjectVal($obj) {
00423                 if ($obj[0] == 'objref') {
00424                         // reference to indirect object
00425                         if (isset($this->objects[$obj[1]])) {
00426                                 // this object has been already parsed
00427                                 return $this->objects[$obj[1]];
00428                         } elseif (isset($this->xref[$obj[1]])) {
00429                                 // parse new object
00430                                 $this->objects[$obj[1]] = $this->getIndirectObject($obj[1], $this->xref[$obj[1]], false);
00431                                 return $this->objects[$obj[1]];
00432                         }
00433                 }
00434                 return $obj;
00435         }
00436 
00445         protected function decodeStream($sdic, $stream) {
00446                 // get stream lenght and filters
00447                 $slength = strlen($stream);
00448                 $filters = array();
00449                 foreach ($sdic as $k => $v) {
00450                         if ($v[0] == '/') {
00451                                 if (($v[1] == 'Length') AND (isset($sdic[($k + 1)])) AND ($sdic[($k + 1)][0] == 'numeric')) {
00452                                         // get declared stream lenght
00453                                         $declength = intval($sdic[($k + 1)][1]);
00454                                         if ($declength < $slength) {
00455                                                 $stream = substr($stream, 0, $declength);
00456                                                 $slength = $declength;
00457                                         }
00458                                 } elseif (($v[1] == 'Filter') AND (isset($sdic[($k + 1)]))) {
00459                                         // resolve indirect object
00460                                         $objval = $this->getObjectVal($sdic[($k + 1)]);
00461                                         if ($objval[0] == '/') {
00462                                                 // single filter
00463                                                 $filters[] = $objval[1];
00464                                         } elseif ($objval[0] == '[') {
00465                                                 // array of filters
00466                                                 foreach ($objval[1] as $flt) {
00467                                                         if ($flt[0] == '/') {
00468                                                                 $filters[] = $flt[1];
00469                                                         }
00470                                                 }
00471                                         }
00472                                 }
00473                         }
00474                 }
00475                 // decode the stream
00476                 $remaining_filters = array();
00477                 foreach ($filters as $filter) {
00478                         if (in_array($filter, $this->FilterDecoders->getAvailableFilters())) {
00479                                 $stream = $this->FilterDecoders->decodeFilter($filter, $stream);
00480                         } else {
00481                                 // add missing filter to array
00482                                 $remaining_filters[] = $filter;
00483                         }
00484                 }
00485                 return array($stream, $remaining_filters);
00486         }
00487 
00494         public function Error($msg) {
00495                 // exit program and print error
00496                 die('<strong>TCPDF_PARSER ERROR: </strong>'.$msg);
00497         }
00498 
00499 } // END OF TCPDF_PARSER CLASS
00500 
00501 //============================================================+
00502 // END OF FILE
00503 //============================================================+
 All Data Structures Namespaces Files Functions Variables Enumerations