unit UniRtf2HtmlUnit1;
{------------------------------------------------------------------------------
This RTF to HTML conversion program supports the following RTF statements:
- Bold, Italic, Underline
- Alignment
- Fonts + font size + font color
- Bullet
It doesn't support itemnumbers, instead it writes bullets.
RTF code generated by Microsoft Word may not work because it contains
an incredible amount of crap which may confuse the conversion routines.
RTF code generated by TRichEdit and WordPad seem to work perfectly.
Updated: 10-jun-2002
Comments may be sent to: ruthjes@chello.nl
-------------------------------------------------------------------------------}
interface
uses
Windows, Messages, SysUtils, Classes, Graphics, Controls, Forms,
StdCtrls, Dialogs;
type
TSwitch = (Activate, Deactivate, Neutral);
TAlignment = (Left, Right, Center);
TFontTable = record
List: TStringList;
Status: TSwitch;
GroupCount: integer;
end;
TColorTable = record
List: TStringList;
GroupCount: integer;
end;
TTextFormat = record
FontTable: TFontTable;
ColorTable: TColorTable;
Bold: TSwitch;
Italic: TSwitch;
Underline: TSwitch;
NewFont: integer;
CurrentFont: integer;
NewSize: integer;
CurrentSize: integer;
NewColor: integer;
CurrentColor: integer;
IsOpen: boolean;
IsUpdated: boolean;
SizeFactor: integer;
RftDefaultSize: integer;
end;
TItemize = record
IsItemized: boolean;
IndentStatus: TSwitch;
BulletStatus: TSwitch;
BulletIsOpen: boolean;
//NumberedBullet: boolean; ItemNumbers is not supported because RTF is a piece of crap
//NumberingChars: string;
IndentCount: integer;
IndentDist: integer;
TextToBulletDist: integer;
end;
TParagraphFormat = record
CurrentAlign: TAlignment;
NewAlign: TAlignment;
NewLine: boolean;
IsOpen: boolean;
Itemize: TItemize;
end;
TUniRtf2Html = class(TObject)
function ConvertRtfToHtml(strRTFText: string): string;
private
{ private declaration }
TextFormat: TTextFormat;
ParagraphFormat: TParagraphFormat;
function QueueControlWords(var strRTFText: string;
var iPos, iGroupCount, iLength: integer): boolean;
function GetControlWord(var strRtfText, strControlWord, strParam : string;
var iPos, iGroupCount, iLength: integer): boolean;
function ConvertSpecialChars(c: char): string;
function WriteHeaderFooter(strHtmlText: string): string;
function WriteHtmlControlChar: string;
public
{ public declaration }
end;
var
UniRtf2Html: TUniRtf2Html;
implementation
function TUniRtf2Html.ConvertRtfToHtml(strRTFText: string): string;
var
strHtmlText: string;
iPos: integer;
iLength: integer;
iGroupCount: integer;
begin
iLength := Length(strRTFText);
iPos := 0;
iGroupCount := 0;
TextFormat.FontTable.Status := Neutral;
TextFormat.FontTable.List := TStringList.Create;
TextFormat.ColorTable.List := TStringList.Create;
TextFormat.Bold := Neutral;
TextFormat.Italic := Neutral;
TextFormat.Underline := Neutral;
TextFormat.CurrentFont := -1;
TextFormat.NewFont := -1;
TextFormat.CurrentSize := 20;
TextFormat.CurrentColor := 0;
TextFormat.NewColor := 0;
TextFormat.NewSize := -1;
TextFormat.IsOpen := False;
TextFormat.IsUpdated := False;
ParagraphFormat.Itemize.IsItemized := False;
ParagraphFormat.Itemize.IndentStatus := Neutral;
ParagraphFormat.Itemize.BulletStatus := Neutral;
ParagraphFormat.Itemize.BulletIsOpen := False;
//ParagraphFormat.Itemize.NumberedBullet := False;
ParagraphFormat.Itemize.IndentCount := 0;
ParagraphFormat.Itemize.IndentDist := 0;
ParagraphFormat.Itemize.TextToBulletDist := 0;
ParagraphFormat.CurrentAlign := Left;
ParagraphFormat.NewAlign := Left;
ParagraphFormat.NewLine := False;
ParagraphFormat.IsOpen := False;
strHtmlText := '';
while iPos <= iLength do
begin
case strRtfText[iPos] of
'{': inc(iGroupCount);
'}': dec(iGroupCount);
'\':
begin
if QueueControlWords(strRtfText, iPos, iGroupCount, iLength) then
strHtmlText := strHtmlText + WriteHtmlControlChar;
end;
else
begin
if iGroupCount > 0 then
strHtmlText := strHtmlText + WriteHtmlControlChar +
ConvertSpecialChars(strRtfText[iPos]);
end;
end;
inc(iPos);
end;
Result := WriteHeaderFooter(strHtmlText);
TextFormat.FontTable.List.Free;
TextFormat.ColorTable.List.Free;
end;
function TUniRtf2Html.GetControlWord(var strRtfText, strControlWord,
strParam : string; var iPos, iGroupCount, iLength: integer): boolean;
var
iTempGroupCount: integer;
begin
Result := False;
case strRtfText[iPos + 1] of
'*': // = Unknown controlword
begin
iTempGroupCount := iGroupCount;
// {/*...} ignore entire group when it contains: \*:
while (iGroupCount > iTempGroupCount - 1) and (iPos < iLength) do
begin
inc(iPos);
case strRtfText[iPos] of
'{': inc(iGroupCount);
'}': dec(iGroupCount);
end;
end;
end;
'\', '{', '}': inc(iPos); // these char must just be added the HTML string
else
begin
Result := True;
// Capture the controlword:
strControlWord := '';
repeat
strControlWord := strControlWord + strRtfText[iPos];
inc(iPos);
until (strRtfText[iPos] in ['{', '}', '\', ' ', ';', '-', '0'..'9'])
or(iPos > iLength);
// Capture parameter of controlword:
strParam := '';
while (strRtfText[iPos] in ['a'..'z', '-', '0'..'9']) and (iPos <= iLength) do
begin
strParam := strParam + strRtfText[iPos];
inc(iPos);
end;
if strRtfText[iPos] = ' ' then inc(iPos);
dec(iPos);
end;
end;
end;
function TUniRtf2Html.QueueControlWords(var strRtfText: string;
var iPos, iGroupCount, iLength: integer): boolean;
var
strControlWord: string;
strParam: string;
strFont, strColor, strRGB: string;
iRGB: integer;
begin
// strRtfText[i] = '\': start of a controlword
Result := False; // True = Forced propagation to HTML string
if GetControlWord(strRtfText, strControlWord, strParam,
iPos, iGroupCount, iLength) then
begin
if strControlWord = '\b' then
begin
if (strParam = '') or (StrToInt(strParam) > 0) then
TextFormat.Bold := Activate
else
TextFormat.Bold := Deactivate;
Exit;
end;
if strControlWord = '\i' then
begin
if (strParam = '') or (StrToInt(strParam) > 0) then
TextFormat.Italic := Activate
else
TextFormat.Italic := Deactivate;
Exit;
end;
if strControlWord = '\ul' then
begin
TextFormat.Underline := Activate;
Exit;
end;
if strControlWord = '\ulnone' then
begin
TextFormat.Underline := Deactivate;
Exit;
end;
if strControlWord = '\qc' then
begin
ParagraphFormat.NewAlign := Center;
Exit;
end;
if strControlWord = '\qr' then
begin
ParagraphFormat.NewAlign := Right;
Exit;
end;
if strControlWord = '\fi' then
begin
// \fi: starts an indent which is closed by: \pard
ParagraphFormat.Itemize.IndentStatus := Activate;
ParagraphFormat.Itemize.TextToBulletDist := Abs(StrToInt(strParam));
Exit;
end;
if strControlWord = '\li' then
begin
// \li: display a bullet which is closed by: \par
ParagraphFormat.Itemize.BulletStatus := Activate;
ParagraphFormat.Itemize.IndentDist := strToInt(strParam);
// Assuming that an indent of 400 in RTF is 1 indent in HTML
ParagraphFormat.Itemize.IndentCount :=
Round((ParagraphFormat.Itemize.IndentDist -
ParagraphFormat.Itemize.TextToBulletDist) 400);
Exit;
end;
{ ---------------------------------------------------------------
Itemnumbers is not supported because RTF is piece of crap.
TRichEdit doesn't support it either so I don't need it anyway.
A bullet will be written instead and the itemnumber appears
as part of the text after the bullet.
---------------------------------------------------------------
if strControlWord = '\tx' then
begin
// \tx: display an itemnumber instead of a bullet
ParagraphFormat.Itemize.NumberedBullet := True;
// A space marks the beginning the number:
while (strRtfText[iPos] <> ' ') and (iPos <= iLength) do
begin
inc(iPos);
end;
inc(iPos); // Skip the space char.
// A backslash marks the end of a fontname:
ParagraphFormat.Itemize.NumberingChars := '';
while (strRtfText[iPos] <> '\') and (iPos <= iLength) do
begin
ParagraphFormat.Itemize.NumberingChars :=
ParagraphFormat.Itemize.NumberingChars + strRtfText[iPos];
inc(iPos);
end;
dec(iPos); // 1 step back so the \ isn't skipped in the next run.
Exit;
end;
--------
This code works for the first item but the following don't because
in rft the following itemnumbers are written without a \tx or any
other controlword.
--------
}
if strControlWord = '\pard' then
begin
Result := True; // Force propagation of to HTML string
// Restore paragraph defaults:
// Re-align to the left:
ParagraphFormat.NewAlign := Left;
// Close itemized section:
if ParagraphFormat.Itemize.IsItemized then
ParagraphFormat.Itemize.IndentStatus := Deactivate;
Exit;
end;
if strControlWord = '\par' then
begin
// Close bullet:
if ParagraphFormat.Itemize.BulletIsOpen then
ParagraphFormat.Itemize.BulletStatus := Deactivate;
// New line <BR>, paragraphs <P> are not used because of the line distance:
ParagraphFormat.NewLine := True;
Exit;
end;
if strControlWord = '\line' then
begin
// New line:
ParagraphFormat.NewLine := True;
Exit;
end;
if strControlWord = '\fonttbl' then
begin
// Create a fonttable.
TextFormat.FontTable.Status := Activate;
// The fonttable section ends when the current group ends.
TextFormat.FontTable.GroupCount := iGroupCount;
Exit;
end;
if (TextFormat.FontTable.Status = Activate) and
(TextFormat.FontTable.GroupCount > iGroupCount) then
TextFormat.FontTable.Status := Deactivate; // Fonttable group has ended, so: no more fonttable.
if strControlWord = '\f' then
begin
case TextFormat.FontTable.Status of
Activate:
begin
// Add a font to the list
strFont := '';
// A space marks the beginning a fontname:
while (strRtfText[iPos] <> ' ') and (iPos <= iLength) do
begin
inc(iPos);
end;
inc(iPos); // Skip the space char.
// A semicolon marks the end of a fontname:
while (strRtfText[iPos] <> ';') and (iPos <= iLength) do
begin
strFont := strFont + strRtfText[iPos];
inc(iPos);
end;
// Add font to the list
TextFormat.FontTable.List.Add(strFont);
end;
Deactivate:
begin
TextFormat.NewFont := StrToInt(strParam);
TextFormat.IsUpdated := True;
end;
end;
Exit;
end;
if strControlWord = '\fs' then
begin
TextFormat.NewSize := StrToInt(strParam);
TextFormat.IsUpdated := True;
end;
if strControlWord = '\colortbl' then
begin
// Create a colortable.
// The colortable section ends when the current group ends.
TextFormat.ColorTable.GroupCount := iGroupCount;
// Extract RGB values
// A semicolon marks the end of a color:
strColor := '000000'; // black
while TextFormat.ColorTable.GroupCount <= iGroupCount do
begin
inc(iPos);
case strRtfText[iPos] of
'{': inc(iGroupCount);
'}': dec(iGroupCount);
'\':
begin
if GetControlWord(strRtfText, strControlWord, strParam,
iPos, iGroupCount, iLength) then
begin
if strControlWord = '\red' then
begin
iRGB := StrToInt(strParam);
strRGB := IntToHex(iRGB, 2);
Insert(strRGB, strColor, 1);
Delete(strColor, 3, 2);
end;
if strControlWord = '\green' then
begin
iRGB := StrToInt(strParam);
strRGB := IntToHex(iRGB, 2);
Insert(strRGB, strColor, 3);
Delete(strColor, 5, 2);
end;
if strControlWord = '\blue' then
begin
iRGB := StrToInt(strParam);
strRGB := IntToHex(iRGB, 2);
Insert(strRGB, strColor, 5);
Delete(strColor, 7, 2);
end;
end;
end;
end;
if strRtfText[iPos] = ';' then
TextFormat.ColorTable.List.Add(strColor);
end;
Exit;
end;
if strControlWord = '\cf' then
begin
TextFormat.NewColor := StrToInt(strParam);
TextFormat.IsUpdated := True;
Exit;
end;
end;
end;
function TUniRtf2Html.WriteHtmlControlChar: string;
var
j: integer;
begin
Result := '';
with ParagraphFormat do
begin
if NewLine = True then
begin
Result := Result + '<BR>';
NewLine := False;
end;
if Itemize.IndentStatus = Activate then
begin
Itemize.IsItemized := True;
j := 0;
while Itemize.IndentCount - j > 0 do
begin
inc(j);
{if Itemize.NumberedBullet then
Result := Result + '<OL>'
else}
Result := Result + '<UL>';
end;
Itemize.IndentStatus := Neutral;
end;
if Itemize.BulletStatus = Activate then
begin
Result := Result + '<LI>';
Itemize.BulletStatus := Neutral;
Itemize.BulletIsOpen := True;
end;
if Itemize.BulletStatus = Deactivate then
begin
Result := Result + '</LI>';
Itemize.BulletIsOpen := False;
if (Itemize.IndentStatus = Neutral) and Itemize.IsItemized then
begin
Result := Result + '<LI>';
Itemize.BulletIsOpen := True;
end;
Itemize.BulletStatus := Neutral;
end;
if Itemize.IndentStatus = Deactivate then
begin
j := 0;
while Itemize.IndentCount - j > 0 do
begin
inc(j);
{if Itemize.NumberedBullet then
Result := Result + '</OL>'
else}
Result := Result + '</UL>';
end;
Itemize.IndentStatus := Neutral;
Itemize.IsItemized := False;
//Itemize.NumberedBullet := False;
end;
if CurrentAlign <> NewAlign then
begin
if IsOpen then
Result := Result + '</P>';
case NewAlign of
Left: Result := Result + '<P Align=LEFT>';
Center: Result := Result + '<P Align=CENTER>';
Right: Result := Result + '<P Align=RIGHT>';
end;
IsOpen := True;
CurrentAlign := NewAlign;
end;
end;
with TextFormat do
begin
if IsUpdated then
begin
if IsOpen then
begin
Result := Result + '</FONT>';
TextFormat.CurrentFont := -1;
TextFormat.CurrentSize := 20;
TextFormat.CurrentColor := 0;
IsOpen := False;
end;
if CurrentFont <> NewFont then
begin
// Symbol font may be activated due to a bullet but will be ignored:
if FontTable.List.Strings[NewFont] <> 'Symbol' then
begin
Result := Result + '<FONT FACE="' +
FontTable.List.Strings[NewFont] + '"';
CurrentFont := NewFont;
IsOpen := True;
end;
end;
SizeFactor := 6; // Whatever value looks good.
RftDefaultSize := 20; // equivalent to screenfont height 10.
if Trunc(RftDefaultSize SizeFactor) <> Trunc(NewSize SizeFactor) then
begin
if Trunc((NewSize SizeFactor) - (RftDefaultSize SizeFactor)) > 0 then
begin
if not IsOpen then
Result := Result + '<FONT';
Result := Result + ' SIZE="+' +
IntToStr(Trunc((NewSize SizeFactor) - (RftDefaultSize SizeFactor))) + '"';
CurrentSize := NewSize;
IsOpen := True;
end;
if Trunc((NewSize SizeFactor) - (RftDefaultSize SizeFactor)) < 0 then
begin
if not IsOpen then
Result := Result + '<FONT';
Result := Result + ' SIZE="' +
IntToStr(Trunc((NewSize SizeFactor) - (RftDefaultSize SizeFactor))) + '"';
CurrentSize := NewSize;
IsOpen := True;
end;
end;
if CurrentColor <> NewColor then
begin
if not IsOpen then
Result := Result + '<FONT';
Result := Result + ' COLOR="#' +
ColorTable.List.Strings[NewColor] + '"';
CurrentColor := NewColor;
IsOpen := True;
end;
if IsOpen then
begin
Result := Result + '>';
end;
IsUpdated := False;
end;
if Bold = Activate then
begin
Result := Result + '<B>';
Bold := Neutral;
end;
if Bold = Deactivate then
begin
Result := Result + '</B>';
Bold := Neutral;
end;
if Italic = Activate then
begin
Result := Result + '<I>';
Italic := Neutral;
end;
if Italic = Deactivate then
begin
Result := Result + '</I>';
Italic := Neutral;
end;
if Underline = Activate then
begin
Result := Result + '<U>';
Underline := Neutral;
end;
if Underline = Deactivate then
begin
Result := Result + '</U>';
Underline := Neutral;
end;
end;
end;
function TUniRtf2Html.ConvertSpecialChars(c: char): string;
begin
Result := '';
case c of
#0 :
Result := Result; // Writes pending codes only
#9 :
Result:= Result + #9; // Writes tab char
'>' :
Result:= Result + '>'; // Writes "greater than"
'<' :
Result:= Result + '<'; // Writes "less than"
else
Result:= Result + c; // Writes a character
end;
end;
function TUniRtf2Html.WriteHeaderFooter(strHtmlText: string): string;
begin
Result := '<HTML><HEAD><TITLE></TITLE></HEAD><BODY>' +
strHtmlText;
if ParagraphFormat.IsOpen then
Result := Result + '</P>';
Result := Result + '</BODY></HTML>';
end;
end.