. * * Consult LICENSE file for details ************************************************/ class WBXMLDecoder extends WBXMLDefs { private $in; private $version; private $publicid; private $publicstringid; private $charsetid; private $stringtable; private $tagcp = 0; private $attrcp = 0; private $ungetbuffer; private $logStack = array(); private $inputBuffer = ""; private $isWBXML = true; const VERSION = 0x03; /** * WBXML Decode Constructor * * @param stream $input the incoming data stream * * @access public */ public function WBXMLDecoder($input) { // make sure WBXML_DEBUG is defined. It should be at this point if (!defined('WBXML_DEBUG')) define('WBXML_DEBUG', false); $this->in = $input; $this->readVersion(); if (isset($this->version) && $this->version != self::VERSION) { $this->isWBXML = false; return; } $this->publicid = $this->getMBUInt(); if($this->publicid == 0) { $this->publicstringid = $this->getMBUInt(); } $this->charsetid = $this->getMBUInt(); $this->stringtable = $this->getStringTable(); } /** * Returns either start, content or end, and auto-concatenates successive content * * @access public * @return element/value */ public function getElement() { $element = $this->getToken(); switch($element[EN_TYPE]) { case EN_TYPE_STARTTAG: return $element; case EN_TYPE_ENDTAG: return $element; case EN_TYPE_CONTENT: while(1) { $next = $this->getToken(); if($next == false) return false; else if($next[EN_TYPE] == EN_CONTENT) { $element[EN_CONTENT] .= $next[EN_CONTENT]; } else { $this->ungetElement($next); break; } } return $element; } return false; } /** * Get a peek at the next element * * @access public * @return element */ public function peek() { $element = $this->getElement(); $this->ungetElement($element); return $element; } /** * Get the element of a StartTag * * @param $tag * * @access public * @return element/boolean returns false if not available */ public function getElementStartTag($tag) { $element = $this->getToken(); if($element[EN_TYPE] == EN_TYPE_STARTTAG && $element[EN_TAG] == $tag) return $element; else { ZLog::Write(LOGLEVEL_WBXMLSTACK, sprintf("WBXMLDecoder->getElementStartTag(): unmatched WBXML tag: '%s' matching '%s' type '%s' flags '%s'", $tag, ((isset($element[EN_TAG]))?$element[EN_TAG]:""), ((isset($element[EN_TYPE]))?$element[EN_TYPE]:""), ((isset($element[EN_FLAGS]))?$element[EN_FLAGS]:""))); $this->ungetElement($element); } return false; } /** * Get the element of a EndTag * * @access public * @return element/boolean returns false if not available */ public function getElementEndTag() { $element = $this->getToken(); if($element[EN_TYPE] == EN_TYPE_ENDTAG) return $element; else { ZLog::Write(LOGLEVEL_WBXMLSTACK, sprintf("WBXMLDecoder->getElementEndTag(): unmatched WBXML tag: '%s' type '%s' flags '%s'", ((isset($element[EN_TAG]))?$element[EN_TAG]:""), ((isset($element[EN_TYPE]))?$element[EN_TYPE]:""), ((isset($element[EN_FLAGS]))?$element[EN_FLAGS]:""))); $bt = debug_backtrace(); ZLog::Write(LOGLEVEL_ERROR, sprintf("WBXMLDecoder->getElementEndTag(): could not read end tag in '%s'. Please enable the LOGLEVEL_WBXML and send the log to the Z-Push dev team.", $bt[0]["file"] . ":" . $bt[0]["line"])); // log the remaining wbxml content $this->ungetElement($element); while($el = $this->getElement()); } return false; } /** * Get the content of an element * * @access public * @return string/boolean returns false if not available */ public function getElementContent() { $element = $this->getToken(); if($element[EN_TYPE] == EN_TYPE_CONTENT) { return $element[EN_CONTENT]; } else { ZLog::Write(LOGLEVEL_WBXMLSTACK, sprintf("WBXMLDecoder->getElementContent(): unmatched WBXML content: '%s' type '%s' flags '%s'", ((isset($element[EN_TAG]))?$element[EN_TAG]:""), ((isset($element[EN_TYPE]))?$element[EN_TYPE]:""), ((isset($element[EN_FLAGS]))?$element[EN_FLAGS]:""))); $this->ungetElement($element); } return false; } /** * 'Ungets' an element writing it into a buffer to be 'get' again * * @param element $element the element to get ungetten * * @access public * @return */ public function ungetElement($element) { if($this->ungetbuffer) ZLog::Write(LOGLEVEL_ERROR,sprintf("WBXMLDecoder->ungetElement(): WBXML double unget on tag: '%s' type '%s' flags '%s'", ((isset($element[EN_TAG]))?$element[EN_TAG]:""), ((isset($element[EN_TYPE]))?$element[EN_TYPE]:""), ((isset($element[EN_FLAGS]))?$element[EN_FLAGS]:""))); $this->ungetbuffer = $element; } /** * Returns the plain input stream * * @access public * @return string */ public function GetPlainInputStream() { $plain = $this->inputBuffer; while($data = fread($this->in, 4096)) $plain .= $data; return $plain; } /** * Returns if the input is WBXML * * @access public * @return boolean */ public function IsWBXML() { return $this->isWBXML; } /**---------------------------------------------------------------------------------------------------------- * Private WBXMLDecoder stuff */ /** * Returns the next token * * @access private * @return token */ private function getToken() { // See if there's something in the ungetBuffer if($this->ungetbuffer) { $element = $this->ungetbuffer; $this->ungetbuffer = false; return $element; } $el = $this->_getToken(); $this->logToken($el); return $el; } /** * Log the a token to ZLog * * @param string $el token * * @access private * @return */ private function logToken($el) { if(!WBXML_DEBUG) return; $spaces = str_repeat(" ", count($this->logStack)); switch($el[EN_TYPE]) { case EN_TYPE_STARTTAG: if($el[EN_FLAGS] & EN_FLAGS_CONTENT) { ZLog::Write(LOGLEVEL_WBXML,"I " . $spaces . " <". $el[EN_TAG] . ">"); array_push($this->logStack, $el[EN_TAG]); } else ZLog::Write(LOGLEVEL_WBXML,"I " . $spaces . " <" . $el[EN_TAG] . "/>"); break; case EN_TYPE_ENDTAG: $tag = array_pop($this->logStack); ZLog::Write(LOGLEVEL_WBXML,"I " . $spaces . ""); break; case EN_TYPE_CONTENT: ZLog::Write(LOGLEVEL_WBXML,"I " . $spaces . " " . $el[EN_CONTENT]); break; } } /** * Returns either a start tag, content or end tag * * @access private * @return */ private function _getToken() { // Get the data from the input stream $element = array(); while(1) { $byte = $this->getByte(); if(!isset($byte)) break; switch($byte) { case WBXML_SWITCH_PAGE: $this->tagcp = $this->getByte(); continue; case WBXML_END: $element[EN_TYPE] = EN_TYPE_ENDTAG; return $element; case WBXML_ENTITY: $entity = $this->getMBUInt(); $element[EN_TYPE] = EN_TYPE_CONTENT; $element[EN_CONTENT] = $this->entityToCharset($entity); return $element; case WBXML_STR_I: $element[EN_TYPE] = EN_TYPE_CONTENT; $element[EN_CONTENT] = $this->getTermStr(); return $element; case WBXML_LITERAL: $element[EN_TYPE] = EN_TYPE_STARTTAG; $element[EN_TAG] = $this->getStringTableEntry($this->getMBUInt()); $element[EN_FLAGS] = 0; return $element; case WBXML_EXT_I_0: case WBXML_EXT_I_1: case WBXML_EXT_I_2: $this->getTermStr(); // Ignore extensions continue; case WBXML_PI: // Ignore PI $this->getAttributes(); continue; case WBXML_LITERAL_C: $element[EN_TYPE] = EN_TYPE_STARTTAG; $element[EN_TAG] = $this->getStringTableEntry($this->getMBUInt()); $element[EN_FLAGS] = EN_FLAGS_CONTENT; return $element; case WBXML_EXT_T_0: case WBXML_EXT_T_1: case WBXML_EXT_T_2: $this->getMBUInt(); // Ingore extensions; continue; case WBXML_STR_T: $element[EN_TYPE] = EN_TYPE_CONTENT; $element[EN_CONTENT] = $this->getStringTableEntry($this->getMBUInt()); return $element; case WBXML_LITERAL_A: $element[EN_TYPE] = EN_TYPE_STARTTAG; $element[EN_TAG] = $this->getStringTableEntry($this->getMBUInt()); $element[EN_ATTRIBUTES] = $this->getAttributes(); $element[EN_FLAGS] = EN_FLAGS_ATTRIBUTES; return $element; case WBXML_EXT_0: case WBXML_EXT_1: case WBXML_EXT_2: continue; case WBXML_OPAQUE: $length = $this->getMBUInt(); $element[EN_TYPE] = EN_TYPE_CONTENT; $element[EN_CONTENT] = $this->getOpaque($length); return $element; case WBXML_LITERAL_AC: $element[EN_TYPE] = EN_TYPE_STARTTAG; $element[EN_TAG] = $this->getStringTableEntry($this->getMBUInt()); $element[EN_ATTRIBUTES] = $this->getAttributes(); $element[EN_FLAGS] = EN_FLAGS_ATTRIBUTES | EN_FLAGS_CONTENT; return $element; default: $element[EN_TYPE] = EN_TYPE_STARTTAG; $element[EN_TAG] = $this->getMapping($this->tagcp, $byte & 0x3f); $element[EN_FLAGS] = ($byte & 0x80 ? EN_FLAGS_ATTRIBUTES : 0) | ($byte & 0x40 ? EN_FLAGS_CONTENT : 0); if($byte & 0x80) $element[EN_ATTRIBUTES] = $this->getAttributes(); return $element; } } } /** * Gets attributes * * @access private * @return */ private function getAttributes() { $attributes = array(); $attr = ""; while(1) { $byte = $this->getByte(); if(count($byte) == 0) break; switch($byte) { case WBXML_SWITCH_PAGE: $this->attrcp = $this->getByte(); break; case WBXML_END: if($attr != "") $attributes += $this->splitAttribute($attr); return $attributes; case WBXML_ENTITY: $entity = $this->getMBUInt(); $attr .= $this->entityToCharset($entity); return $attr; /* fmbiete's contribution r1534, ZP-324 */ case WBXML_STR_I: $attr .= $this->getTermStr(); return $attr; /* fmbiete's contribution r1534, ZP-324 */ case WBXML_LITERAL: if($attr != "") $attributes += $this->splitAttribute($attr); $attr = $this->getStringTableEntry($this->getMBUInt()); return $attr; /* fmbiete's contribution r1534, ZP-324 */ case WBXML_EXT_I_0: case WBXML_EXT_I_1: case WBXML_EXT_I_2: $this->getTermStr(); continue; case WBXML_PI: case WBXML_LITERAL_C: // Invalid return false; case WBXML_EXT_T_0: case WBXML_EXT_T_1: case WBXML_EXT_T_2: $this->getMBUInt(); continue; case WBXML_STR_T: $attr .= $this->getStringTableEntry($this->getMBUInt()); return $attr; /* fmbiete's contribution r1534, ZP-324 */ case WBXML_LITERAL_A: return false; case WBXML_EXT_0: case WBXML_EXT_1: case WBXML_EXT_2: continue; case WBXML_OPAQUE: $length = $this->getMBUInt(); $attr .= $this->getOpaque($length); return $attr; /* fmbiete's contribution r1534, ZP-324 */ case WBXML_LITERAL_AC: return false; default: if($byte < 128) { if($attr != "") { $attributes += $this->splitAttribute($attr); $attr = ""; } } $attr .= $this->getMapping($this->attrcp, $byte); break; } } } /** * Splits an attribute * * @param string $attr attribute to be splitted * * @access private * @return array */ private function splitAttribute($attr) { $attributes = array(); $pos = strpos($attr,chr(61)); // equals sign if($pos) $attributes[substr($attr, 0, $pos)] = substr($attr, $pos+1); else $attributes[$attr] = null; return $attributes; } /** * Reads from the stream until getting a string terminator * * @access private * @return string */ private function getTermStr() { $str = ""; while(1) { $in = $this->getByte(); if($in == 0) break; else $str .= chr($in); } return $str; } /** * Reads $len from the input stream * * @param int $len * * @access private * @return string */ private function getOpaque($len) { // TODO check if it's possible to do it other way // fread stops reading because the following condition is true (from php.net): // if the stream is read buffered and it does not represent a plain file, // at most one read of up to a number of bytes equal to the chunk size // (usually 8192) is made; depending on the previously buffered data, // the size of the returned data may be larger than the chunk size. // using only return fread it will return only a part of stream if chunk is smaller // than $len. Read from stream in a loop until the $len is reached. $d = ""; $l = 0; while (1) { $l = (($len - strlen($d)) > 8192) ? 8192 : ($len - strlen($d)); if ($l > 0) { $data = fread($this->in, $l); // Stream ends prematurely on instable connections and big mails if ($data === false || feof($this->in)) throw new HTTPReturnCodeException(sprintf("WBXMLDecoder->getOpaque() connection unavailable while trying to read %d bytes from stream. Aborting after %d bytes read.", $len, strlen($d)), HTTP_CODE_500, null, LOGLEVEL_WARN); else $d .= $data; } if (strlen($d) >= $len) break; } return $d; } /** * Reads one byte from the input stream * * @access private * @return int */ private function getByte() { $ch = fread($this->in, 1); if(strlen($ch) > 0) return ord($ch); else return; } /** * Reads string length from the input stream * * @access private * @return */ private function getMBUInt() { $uint = 0; while(1) { $byte = $this->getByte(); $uint |= $byte & 0x7f; if($byte & 0x80) $uint = $uint << 7; else break; } return $uint; } /** * Reads string table from the input stream * * @access private * @return int */ private function getStringTable() { $stringtable = ""; $length = $this->getMBUInt(); if($length > 0) $stringtable = fread($this->in, $length); return $stringtable; } /** * Returns the mapping for a specified codepage and id * * @param $cp codepage * @param $id * * @access public * @return string */ private function getMapping($cp, $id) { if(!isset($this->dtd["codes"][$cp]) || !isset($this->dtd["codes"][$cp][$id])) return false; else { if(isset($this->dtd["namespaces"][$cp])) { return $this->dtd["namespaces"][$cp] . ":" . $this->dtd["codes"][$cp][$id]; } else return $this->dtd["codes"][$cp][$id]; } } /** * Reads one byte from the input stream * * @access private * @return void */ private function readVersion() { $ch = $this->getByte(); if($ch != NULL) { $this->inputBuffer .= chr($ch); $this->version = $ch; } } } ?>