* RTF parser/formatter
* This code reads RTF files and formats the RTF data to HTML.
* PHP version 5
* @author Alexander van Oostenrijk
* @copyright 2014 Alexander van Oostenrijk
* @license GNU
* @version 1
* @link http://www.independent-software.com
* Sample of use:
* $reader = new RtfReader();
* $rtf = file_get_contents("itc.rtf"); // or use a string
* $reader->Parse($rtf);
* //$reader->root->dump(); // to see what the reader read
* $formatter = new RtfHtml();
* echo $formatter->Format($reader->root);
class RtfElement
protected function Indent($level)
for($i = 0; $i < $level * 2; $i++) echo " ";
class RtfGroup extends RtfElement
public $parent;
public $children;
public function __construct()
$this->parent = null;
$this->children = array();
public function GetType()
// No children?
if(sizeof($this->children) == 0) return null;
// First child not a control word?
$child = $this->children[0];
if(get_class($child) != "RtfControlWord") return null;
return $child->word;
public function IsDestination()
// No children?
if(sizeof($this->children) == 0) return null;
// First child not a control symbol?
$child = $this->children[0];
if(get_class($child) != "RtfControlSymbol") return null;
return $child->symbol == '*';
public function dump($level = 0)
echo "
echo "{";
echo "
foreach($this->children as $child)
if(get_class($child) == "RtfGroup")
if ($child->GetType() == "fonttbl") continue;
if ($child->GetType() == "colortbl") continue;
if ($child->GetType() == "stylesheet") continue;
if ($child->GetType() == "info") continue;
// Skip any pictures:
if (substr($child->GetType(), 0, 4) == "pict") continue;
if ($child->IsDestination()) continue;
$child->dump($level + 2);
echo "";
echo "}";
echo "
class RtfControlWord extends RtfElement
public $word;
public $parameter;
public function dump($level)
echo "";
echo "WORD {$this->word} ({$this->parameter})";
echo "
class RtfControlSymbol extends RtfElement
public $symbol;
public $parameter = 0;
public function dump($level)
echo "";
echo "SYMBOL {$this->symbol} ({$this->parameter})";
echo "
class RtfText extends RtfElement
public $text;
public function dump($level)
echo "";
echo "TEXT {$this->text}";
echo "
class RtfReader
public $root = null;
protected function GetChar()
$this->char = $this->rtf[$this->pos++];
protected function ParseStartGroup()
// Store state of document on stack.
$group = new RtfGroup();
if($this->group != null) $group->parent = $this->group;
if($this->root == null)
$this->group = $group;
$this->root = $group;
array_push($this->group->children, $group);
$this->group = $group;
protected function is_letter()
if(ord($this->char) >= 65 && ord($this->char) <= 90) return TRUE;
if(ord($this->char) >= 90 && ord($this->char) <= 122) return TRUE;
return FALSE;
protected function is_digit()
if(ord($this->char) >= 48 && ord($this->char) <= 57) return TRUE;
return FALSE;
protected function ParseEndGroup()
// Retrieve state of document from stack.
$this->group = $this->group->parent;
protected function ParseControlWord()
$word = "";
$word .= $this->char;
// Read parameter (if any) consisting of digits.
// Paramater may be negative.
$parameter = null;
$negative = false;
if($this->char == '-')
$negative = true;
if($parameter == null) $parameter = 0;
$parameter = $parameter * 10 + $this->char;
if($parameter === null) $parameter = 1;
if($negative) $parameter = -$parameter;
// If this is \u, then the parameter will be followed by
// a character.
if($word == "u")
// If the current character is a space, then
// it is a delimiter. It is consumed.
// If it's not a space, then it's part of the next
// item in the text, so put the character back.
if($this->char != ' ') $this->pos--;
$rtfword = new RtfControlWord();
$rtfword->word = $word;
$rtfword->parameter = $parameter;
array_push($this->group->children, $rtfword);
protected function ParseControlSymbol()
// Read symbol (one character only).
$symbol = $this->char;
// Symbols ordinarily have no parameter. However,
// if this is \', then it is followed by a 2-digit hex-code:
$parameter = 0;
if($symbol == '\'')
$parameter = $this->char;
$parameter = hexdec($parameter . $this->char);
$rtfsymbol = new RtfControlSymbol();
$rtfsymbol->symbol = $symbol;
$rtfsymbol->parameter = $parameter;
array_push($this->group->children, $rtfsymbol);
protected function ParseControl()
// Beginning of an RTF control word or control symbol.
// Look ahead by one character to see if it starts with
// a letter (control world) or another symbol (control symbol):
protected function ParseText()
// Parse plain text up to backslash or brace,
// unless escaped.
$text = "";
$terminate = false;
$escape = false;
// Is this an escape?
if($this->char == '\\')
// Perform lookahead to see if this
// is really an escape sequence.
case '\\': $text .= '\\'; break;
case '{': $text .= '{'; break;
case '}': $text .= '}'; break;
// Not an escape. Roll back.
$this->pos = $this->pos - 2;
$terminate = true;
else if($this->char == '{' || $this->char == '}')
$terminate = true;
if(!$terminate && !$escape)
$text .= $this->char;
while(!$terminate && $this->pos < $this->len);
$rtftext = new RtfText();
$rtftext->text = $text;
array_push($this->group->children, $rtftext);
public function Parse($rtf)
$this->rtf = $rtf;
$this->pos = 0;
$this->len = strlen($this->rtf);
$this->group = null;
$this->root = null;
while($this->pos < $this->len)
// Read next character:
// Ignore \r and \n
if($this->char == "\n" || $this->char == "\r") continue;
// What type of character is this?
case '{':
case '}':
case '\\':
class RtfState
public function __construct()
public function Reset()
$this->bold = false;
$this->italic = false;
$this->underline = false;
$this->end_underline = false;
$this->strike = false;
$this->hidden = false;
$this->fontsize = 0;
class RtfHtml
public function Format($root)
$this->output = "";
// Create a stack of states:
$this->states = array();
// Put an initial standard state onto the stack:
$this->state = new RtfState();
array_push($this->states, $this->state);
return $this->output;
protected function FormatGroup($group)
// Can we ignore this group?
if ($group->GetType() == "fonttbl") return;
if ($group->GetType() == "colortbl") return;
if ($group->GetType() == "stylesheet") return;
if ($group->GetType() == "info") return;
// Skip any pictures:
if (substr($group->GetType(), 0, 4) == "pict") return;
if ($group->IsDestination()) return;
// Push a new state onto the stack:
$this->state = clone $this->state;
array_push($this->states, $this->state);
foreach($group->children as $child)
if(get_class($child) == "RtfGroup") $this->FormatGroup($child);
if(get_class($child) == "RtfControlWord") $this->FormatControlWord($child);
if(get_class($child) == "RtfControlSymbol") $this->FormatControlSymbol($child);
if(get_class($child) == "RtfText") $this->FormatText($child);
// Pop state from stack.
$this->state = $this->states[sizeof($this->states)-1];
protected function FormatControlWord($word)
if($word->word == "plain") $this->state->Reset();
if($word->word == "b") $this->state->bold = $word->parameter;
if($word->word == "i") $this->state->italic = $word->parameter;
if($word->word == "ul") $this->state->underline = $word->parameter;
if($word->word == "ulnone") $this->state->end_underline = $word->parameter;
if($word->word == "strike") $this->state->strike = $word->parameter;
if($word->word == "v") $this->state->hidden = $word->parameter;
if($word->word == "fs") $this->state->fontsize = ceil(($word->parameter / 24) * 16);
if($word->word == "par") $this->output .= "";
// Characters:
if($word->word == "lquote") $this->output .= "‘";
if($word->word == "rquote") $this->output .= "’";
if($word->word == "ldblquote") $this->output .= "“";
if($word->word == "rdblquote") $this->output .= "”";
if($word->word == "emdash") $this->output .= "—";
if($word->word == "endash") $this->output .= "–";
if($word->word == "bullet") $this->output .= "•";
if($word->word == "u") $this->output .= "◊";
protected function BeginState()
$span = "";
if($this->state->bold) $span .= "font-weight:bold;";
if($this->state->italic) $span .= "font-style:italic;";
if($this->state->underline) $span .= "text-decoration:underline;";
if($this->state->end_underline) $span .= "text-decoration:none;";
if($this->state->strike) $span .= "text-decoration:strikethrough;";
if($this->state->hidden) $span .= "display:none;";
if($this->state->fontsize != 0) $span .= "font-size: {$this->state->fontsize}px;";
$this->output .= "";
protected function EndState()
$this->output .= "";
protected function FormatControlSymbol($symbol)
if($symbol->symbol == '\'')
$this->output .= htmlentities(chr($symbol->parameter), ENT_QUOTES, 'ISO-8859-1');
protected function FormatText($text)
$this->output .= $text->text;