* All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * The name of the author may not be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * @category XML * @package XML_Beautifier * @author Stephan Schmidt * @copyright 2003-2008 Stephan Schmidt * @license http://opensource.org/licenses/bsd-license New BSD License * @version CVS: $Id: Tokenizer.php,v 1.10 2008/08/24 19:44:14 ashnazg Exp $ * @link http://pear.php.net/package/XML_Beautifier */ /** * XML_Parser is needed to parse the document */ require_once 'XML/Parser.php'; /** * Tokenizer for XML_Beautifier * * This class breaks an XML document in seperate tokens * that will be rendered by an XML_Beautifier renderer. * * @category XML * @package XML_Beautifier * @author Stephan Schmidt * @copyright 2003-2008 Stephan Schmidt * @license http://opensource.org/licenses/bsd-license New BSD License * @version Release: 1.2.0 * @link http://pear.php.net/package/XML_Beautifier * @todo tokenize DTD * @todo check for xml:space attribute */ class XML_Beautifier_Tokenizer extends XML_Parser { /** * current depth * @var integer * @access private */ var $_depth = 0; /** * stack for all found elements * @var array * @access private */ var $_struct = array(); /** * current parsing mode * @var string * @access private */ var $_mode = "xml"; /** * indicates, whether parser is in cdata section * @var boolean * @access private */ var $_inCDataSection = false; /** * Tokenize a document * * @param string $document filename or XML document * @param boolean $isFile flag to indicate whether * the first parameter is a file * * @return mixed */ function tokenize($document, $isFile = true) { $this->folding = false; $this->XML_Parser(); $this->_resetVars(); if ($isFile === true) { $this->setInputFile($document); $result = $this->parse(); } else { $result = $this->parseString($document); } if ($this->isError($result)) { return $result; } return $this->_struct; } /** * Start element handler for XML parser * * @param object $parser XML parser object * @param string $element XML element * @param array $attribs attributes of XML tag * * @return void * @access protected */ function startHandler($parser, $element, $attribs) { $struct = array( "type" => XML_BEAUTIFIER_ELEMENT, "tagname" => $element, "attribs" => $attribs, "contains" => XML_BEAUTIFIER_EMPTY, "depth" => $this->_depth++, "children" => array() ); array_push($this->_struct, $struct); } /** * End element handler for XML parser * * @param object $parser XML parser object * @param string $element element * * @return void * @access protected */ function endHandler($parser, $element) { $struct = array_pop($this->_struct); if ($struct["depth"] > 0) { $parent = array_pop($this->_struct); array_push($parent["children"], $struct); $parent["contains"] = $parent["contains"] | XML_BEAUTIFIER_ELEMENT; array_push($this->_struct, $parent); } else { array_push($this->_struct, $struct); } $this->_depth--; } /** * Handler for character data * * @param object $parser XML parser object * @param string $cdata CDATA * * @return void * @access protected */ function cdataHandler($parser, $cdata) { if ((string)$cdata === '') { return true; } if ($this->_inCDataSection === true) { $type = XML_BEAUTIFIER_CDATA_SECTION; } else { $type = XML_BEAUTIFIER_CDATA; } $struct = array( "type" => $type, "data" => $cdata, "depth" => $this->_depth ); $this->_appendToParent($struct); } /** * Handler for processing instructions * * @param object $parser XML parser object * @param string $target target * @param string $data data * * @return void * @access protected */ function piHandler($parser, $target, $data) { $struct = array( "type" => XML_BEAUTIFIER_PI, "target" => $target, "data" => $data, "depth" => $this->_depth ); $this->_appendToParent($struct); } /** * Handler for external entities * * @param object $parser XML parser object * @param string $open_entity_names entity name * @param string $base ?? (unused?) * @param string $system_id ?? (unused?) * @param string $public_id ?? (unused?) * * @return bool * @access protected * @todo revisit parameter signature... doesn't seem to be correct * @todo PEAR CS - need to shorten arg list for 85-char rule */ function entityrefHandler($parser, $open_entity_names, $base, $system_id, $public_id) { $struct = array( "type" => XML_BEAUTIFIER_ENTITY, "name" => $open_entity_names, "depth" => $this->_depth ); $this->_appendToParent($struct); return true; } /** * Handler for all other stuff * * @param object $parser XML parser object * @param string $data data * * @return void * @access protected */ function defaultHandler($parser, $data) { switch ($this->_mode) { case "xml": $this->_handleXMLDefault($data); break; case "doctype": $this->_handleDoctype($data); break; } } /** * handler for all data inside the doctype declaration * * @param string $data data * * @return void * @access private * @todo improve doctype parsing to split the declaration into seperate tokens */ function _handleDoctype($data) { if (eregi(">", $data)) { $last = $this->_getLastToken(); if ($last["data"] == "]" ) { $this->_mode = "xml"; } } $struct = array( "type" => XML_BEAUTIFIER_DT_DECLARATION, "data" => $data, "depth" => $this->_depth ); $this->_appendToParent($struct); } /** * handler for all default XML data * * @param string $data data * * @return bool * @access private */ function _handleXMLDefault($data) { if (strncmp("", $data, $regs); $comment = trim($regs[1]); $struct = array( "type" => XML_BEAUTIFIER_COMMENT, "data" => $comment, "depth" => $this->_depth ); } elseif ($data == "_inCDataSection = true; $struct = null; } elseif ($data == "]]>") { /* * handle end of cdata section */ $this->_inCDataSection = false; $struct = null; } elseif (strncmp(" XML_BEAUTIFIER_XML_DECLARATION, "version" => $attribs["version"], "encoding" => $attribs["encoding"], "standalone" => $attribs["standalone"], "depth" => $this->_depth ); } elseif (eregi("^_mode = "doctype"; $struct = array( "type" => XML_BEAUTIFIER_DT_DECLARATION, "data" => $data, "depth" => $this->_depth ); } else { /* * handle all other data */ $struct = array( "type" => XML_BEAUTIFIER_DEFAULT, "data" => $data, "depth" => $this->_depth ); } if (!is_null($struct)) { $this->_appendToParent($struct); } return true; } /** * append a struct to the last struct on the stack * * @param array $struct structure to append * * @return bool * @access private */ function _appendToParent($struct) { if ($this->_depth > 0) { $parent = array_pop($this->_struct); array_push($parent["children"], $struct); $parent["contains"] = $parent["contains"] | $struct["type"]; array_push($this->_struct, $parent); return true; } array_push($this->_struct, $struct); } /** * get the last token * * @access private * @return array */ function _getLastToken() { $parent = array_pop($this->_struct); if (isset($parent["children"]) && is_array($parent["children"])) { $last = array_pop($parent["children"]); array_push($parent["children"], $last); } else { $last = $parent; } array_push($this->_struct, $parent); return $last; } /** * reset all used object properties * * This method is called before parsing a new document * * @return void * @access private */ function _resetVars() { $this->_depth = 0; $this->_struct = array(); $this->_mode = "xml"; $this->_inCDataSection = false; } } ?>