unit UniRtf2HtmlUnit1;
{------------------------------------------------------------------------------
  This RTF to HTML conversion program supports the following RTF statements:
  - Bold, Italic, Underline
  - Alignment
  - Fonts + font size + font color
  - Bullet
  It doesn't support itemnumbers, instead it writes bullets.

  RTF code generated by Microsoft Word may not work because it contains
  an incredible amount of crap which may confuse the conversion routines.
  RTF code generated by TRichEdit and WordPad seem to work perfectly.

  Updated: 10-jun-2002
  Comments may be sent to: ruthjes@chello.nl
-------------------------------------------------------------------------------}
interface

uses
  Windows, Messages, SysUtils, Classes, Graphics, Controls, Forms,
  StdCtrls, Dialogs;

type
  TSwitch = (Activate, Deactivate, Neutral);
  TAlignment = (Left, Right, Center);

  TFontTable = record
    List: TStringList;
    Status: TSwitch;
    GroupCount: integer;
  end;
  TColorTable = record
    List: TStringList;
    GroupCount: integer;
  end;
  TTextFormat = record
    FontTable: TFontTable;
    ColorTable: TColorTable;
    Bold: TSwitch;
    Italic: TSwitch;
    Underline: TSwitch;
    NewFont: integer;
    CurrentFont: integer;
    NewSize: integer;
    CurrentSize: integer;
    NewColor: integer;
    CurrentColor: integer;
    IsOpen: boolean;
    IsUpdated: boolean;
    SizeFactor: integer;
    RftDefaultSize: integer;
  end;
  TItemize = record
    IsItemized: boolean;
    IndentStatus: TSwitch;
    BulletStatus: TSwitch;
    BulletIsOpen: boolean;
    //NumberedBullet: boolean; ItemNumbers is not supported because RTF is a piece of crap
    //NumberingChars: string;
    IndentCount: integer;
    IndentDist: integer;
    TextToBulletDist: integer;
  end;
  TParagraphFormat = record
    CurrentAlign: TAlignment;
    NewAlign: TAlignment;
    NewLine: boolean;
    IsOpen: boolean;
    Itemize: TItemize;
  end;

  TUniRtf2Html = class(TObject)
    function ConvertRtfToHtml(strRTFText: string): string;
  private
    { private declaration }
    TextFormat: TTextFormat;
    ParagraphFormat: TParagraphFormat;

    function QueueControlWords(var strRTFText: string;
      var iPos, iGroupCount, iLength: integer): boolean;
    function GetControlWord(var strRtfText, strControlWord, strParam : string;
      var iPos, iGroupCount, iLength: integer): boolean;
    function ConvertSpecialChars(c: char): string;
    function WriteHeaderFooter(strHtmlText: string): string;
    function WriteHtmlControlChar: string;
  public
    { public declaration }
  end;

var
  UniRtf2Html: TUniRtf2Html;

implementation

function TUniRtf2Html.ConvertRtfToHtml(strRTFText: string): string;
var
  strHtmlText: string;
  iPos: integer;
  iLength: integer;
  iGroupCount: integer;
begin
  iLength := Length(strRTFText);
  iPos := 0;
  iGroupCount := 0;

  TextFormat.FontTable.Status := Neutral;
  TextFormat.FontTable.List := TStringList.Create;
  TextFormat.ColorTable.List := TStringList.Create;
  TextFormat.Bold := Neutral;
  TextFormat.Italic := Neutral;
  TextFormat.Underline := Neutral;
  TextFormat.CurrentFont := -1;
  TextFormat.NewFont := -1;
  TextFormat.CurrentSize := 20;
  TextFormat.CurrentColor := 0;
  TextFormat.NewColor := 0;
  TextFormat.NewSize := -1;
  TextFormat.IsOpen := False;
  TextFormat.IsUpdated := False;

  ParagraphFormat.Itemize.IsItemized := False;
  ParagraphFormat.Itemize.IndentStatus := Neutral;
  ParagraphFormat.Itemize.BulletStatus := Neutral;
  ParagraphFormat.Itemize.BulletIsOpen := False;
  //ParagraphFormat.Itemize.NumberedBullet := False;
  ParagraphFormat.Itemize.IndentCount := 0;
  ParagraphFormat.Itemize.IndentDist := 0;
  ParagraphFormat.Itemize.TextToBulletDist := 0;
  ParagraphFormat.CurrentAlign := Left;
  ParagraphFormat.NewAlign := Left;
  ParagraphFormat.NewLine := False;
  ParagraphFormat.IsOpen := False;

  strHtmlText := '';
  while iPos <= iLength do
  begin
    case strRtfText[iPos] of
      '{': inc(iGroupCount);
      '}': dec(iGroupCount);
      '\':
      begin
        if QueueControlWords(strRtfText, iPos, iGroupCount, iLength) then
          strHtmlText := strHtmlText + WriteHtmlControlChar;
      end;
      else
      begin
        if iGroupCount > 0 then
          strHtmlText := strHtmlText + WriteHtmlControlChar +
            ConvertSpecialChars(strRtfText[iPos]);
      end;
    end;
    inc(iPos);
  end;
  Result := WriteHeaderFooter(strHtmlText);

  TextFormat.FontTable.List.Free;
  TextFormat.ColorTable.List.Free;
end;

function TUniRtf2Html.GetControlWord(var strRtfText, strControlWord,
           strParam : string; var iPos, iGroupCount, iLength: integer): boolean;
var
  iTempGroupCount: integer;

begin
  Result := False;
  case strRtfText[iPos + 1] of
    '*': // = Unknown controlword
      begin
        iTempGroupCount := iGroupCount;
        // {/*...} ignore entire group when it contains: \*:
        while (iGroupCount > iTempGroupCount - 1) and (iPos < iLength) do
        begin
          inc(iPos);
          case strRtfText[iPos] of
            '{': inc(iGroupCount);
            '}': dec(iGroupCount);
          end;
        end;
      end;
    '\', '{', '}': inc(iPos); // these char must just be added the HTML string
    else
      begin
        Result := True;
        // Capture the controlword:
        strControlWord := '';
        repeat
          strControlWord := strControlWord + strRtfText[iPos];
          inc(iPos);
        until (strRtfText[iPos] in ['{', '}', '\', ' ', ';', '-', '0'..'9'])
          or(iPos > iLength);

        // Capture parameter of controlword:
        strParam := '';
        while (strRtfText[iPos] in ['a'..'z', '-', '0'..'9']) and (iPos <= iLength) do
        begin
          strParam := strParam + strRtfText[iPos];
          inc(iPos);
        end;
        if strRtfText[iPos] = ' ' then inc(iPos);
        dec(iPos);
      end;
  end;
end;

function TUniRtf2Html.QueueControlWords(var strRtfText: string;
           var iPos, iGroupCount, iLength: integer): boolean;
var
  strControlWord: string;
  strParam: string;
  strFont, strColor, strRGB: string;
  iRGB: integer;
begin
  // strRtfText[i] = '\': start of a controlword
  Result := False; // True = Forced propagation to HTML string

  if GetControlWord(strRtfText, strControlWord, strParam,
    iPos, iGroupCount, iLength) then
  begin
    if strControlWord = '\b' then
    begin
      if (strParam = '') or (StrToInt(strParam) > 0) then
        TextFormat.Bold := Activate
      else
        TextFormat.Bold := Deactivate;
      Exit;
    end;

    if strControlWord = '\i' then
    begin
      if (strParam = '') or (StrToInt(strParam) > 0) then
        TextFormat.Italic := Activate
      else
        TextFormat.Italic := Deactivate;
      Exit;
    end;

    if strControlWord = '\ul' then
    begin
      TextFormat.Underline := Activate;
      Exit;
    end;

    if strControlWord = '\ulnone' then
    begin
      TextFormat.Underline := Deactivate;
      Exit;
    end;

    if strControlWord = '\qc' then
    begin
      ParagraphFormat.NewAlign := Center;
      Exit;
    end;

    if strControlWord = '\qr' then
    begin
      ParagraphFormat.NewAlign := Right;
      Exit;
    end;

    if strControlWord = '\fi' then
    begin
      // \fi: starts an indent which is closed by: \pard
      ParagraphFormat.Itemize.IndentStatus := Activate;
      ParagraphFormat.Itemize.TextToBulletDist := Abs(StrToInt(strParam));
      Exit;
    end;

    if strControlWord = '\li' then
    begin
      // \li: display a bullet which is closed by: \par
      ParagraphFormat.Itemize.BulletStatus := Activate;
      ParagraphFormat.Itemize.IndentDist := strToInt(strParam);
      // Assuming that an indent of 400 in RTF is 1 indent in HTML
      ParagraphFormat.Itemize.IndentCount :=
        Round((ParagraphFormat.Itemize.IndentDist -
          ParagraphFormat.Itemize.TextToBulletDist)  400);
      Exit;
    end;
    { ---------------------------------------------------------------
      Itemnumbers is not supported because RTF is piece of crap.
      TRichEdit doesn't support it either so I don't need it anyway.
      A bullet will be written instead and the itemnumber appears
      as part of the text after the bullet.
      ---------------------------------------------------------------
    if strControlWord = '\tx' then
    begin
      // \tx: display an itemnumber instead of a bullet
      ParagraphFormat.Itemize.NumberedBullet := True;
      // A space marks the beginning the number:
      while (strRtfText[iPos] <> ' ') and (iPos <= iLength) do
      begin
        inc(iPos);
      end;
      inc(iPos); // Skip the space char.
      // A backslash marks the end of a fontname:
      ParagraphFormat.Itemize.NumberingChars := '';
      while (strRtfText[iPos] <> '\') and (iPos <= iLength) do
      begin
        ParagraphFormat.Itemize.NumberingChars :=
          ParagraphFormat.Itemize.NumberingChars + strRtfText[iPos];
        inc(iPos);
      end;
      dec(iPos); // 1 step back so the \ isn't skipped in the next run.
      Exit;
    end;
    --------
    This code works for the first item but the following don't because
    in rft the following itemnumbers are written without a \tx or any
    other controlword.
    --------
    }

    if strControlWord = '\pard' then
    begin
      Result := True; // Force propagation of to HTML string
      // Restore paragraph defaults:
      // Re-align to the left:
      ParagraphFormat.NewAlign := Left;
      // Close itemized section:
      if ParagraphFormat.Itemize.IsItemized then
        ParagraphFormat.Itemize.IndentStatus := Deactivate;
      Exit;
    end;

    if strControlWord = '\par' then
    begin
      // Close bullet:
      if ParagraphFormat.Itemize.BulletIsOpen then
        ParagraphFormat.Itemize.BulletStatus := Deactivate;
      // New line <BR>, paragraphs <P> are not used because of the line distance:
      ParagraphFormat.NewLine := True;
      Exit;
    end;

    if strControlWord = '\line' then
    begin
      // New line:
      ParagraphFormat.NewLine := True;
      Exit;
    end;

    if strControlWord = '\fonttbl' then
    begin
      // Create a fonttable.
      TextFormat.FontTable.Status := Activate;
      // The fonttable section ends when the current group ends.
      TextFormat.FontTable.GroupCount := iGroupCount;
      Exit;
    end;

    if (TextFormat.FontTable.Status = Activate) and
       (TextFormat.FontTable.GroupCount > iGroupCount) then
      TextFormat.FontTable.Status := Deactivate; // Fonttable group has ended, so: no more fonttable.

    if strControlWord = '\f' then
    begin
      case TextFormat.FontTable.Status of
        Activate:
          begin
            // Add a font to the list
            strFont := '';
            // A space marks the beginning a fontname:
            while (strRtfText[iPos] <> ' ') and (iPos <= iLength) do
            begin
              inc(iPos);
            end;
            inc(iPos); // Skip the space char.
            // A semicolon marks the end of a fontname:
            while (strRtfText[iPos] <> ';') and (iPos <= iLength) do
            begin
              strFont := strFont + strRtfText[iPos];
              inc(iPos);
            end;
            // Add font to the list
            TextFormat.FontTable.List.Add(strFont);
          end;
        Deactivate:
          begin
            TextFormat.NewFont := StrToInt(strParam);
            TextFormat.IsUpdated := True;
          end;
      end;
      Exit;
    end;
    if strControlWord = '\fs' then
    begin
      TextFormat.NewSize := StrToInt(strParam);
      TextFormat.IsUpdated := True;
    end;

    if strControlWord = '\colortbl' then
    begin
      // Create a colortable.
      // The colortable section ends when the current group ends.
      TextFormat.ColorTable.GroupCount := iGroupCount;
      // Extract RGB values
      // A semicolon marks the end of a color:
      strColor := '000000'; // black
      while TextFormat.ColorTable.GroupCount <= iGroupCount do
      begin
        inc(iPos);
        case strRtfText[iPos] of
          '{': inc(iGroupCount);
          '}': dec(iGroupCount);
          '\':
            begin
              if GetControlWord(strRtfText, strControlWord, strParam,
                iPos, iGroupCount, iLength) then
              begin
                if strControlWord = '\red' then
                begin
                  iRGB := StrToInt(strParam);
                  strRGB := IntToHex(iRGB, 2);
                  Insert(strRGB, strColor, 1);
                  Delete(strColor, 3, 2);
                end;
                if strControlWord = '\green' then
                begin
                  iRGB := StrToInt(strParam);
                  strRGB := IntToHex(iRGB, 2);
                  Insert(strRGB, strColor, 3);
                  Delete(strColor, 5, 2);
                end;
                if strControlWord = '\blue' then
                begin
                  iRGB := StrToInt(strParam);
                  strRGB := IntToHex(iRGB, 2);
                  Insert(strRGB, strColor, 5);
                  Delete(strColor, 7, 2);
                end;
              end;
            end;
        end;
        if strRtfText[iPos] = ';' then
          TextFormat.ColorTable.List.Add(strColor);
      end;
      Exit;
    end;

    if strControlWord = '\cf' then
    begin
      TextFormat.NewColor := StrToInt(strParam);
      TextFormat.IsUpdated := True;
      Exit;
    end;
 end;
end;

function TUniRtf2Html.WriteHtmlControlChar: string;
var
  j: integer;
begin
  Result := '';

  with ParagraphFormat do
  begin
    if NewLine = True then
    begin
      Result := Result + '<BR>';
      NewLine := False;
    end;

    if Itemize.IndentStatus = Activate then
    begin
      Itemize.IsItemized := True;
      j := 0;
      while Itemize.IndentCount - j > 0 do
      begin
        inc(j);
        {if Itemize.NumberedBullet then
          Result := Result + '<OL>'
        else}
        Result := Result + '<UL>';
      end;
      Itemize.IndentStatus := Neutral;
    end;

    if Itemize.BulletStatus = Activate then
    begin
      Result := Result + '<LI>';
      Itemize.BulletStatus := Neutral;
      Itemize.BulletIsOpen := True;
    end;

    if Itemize.BulletStatus = Deactivate then
    begin
      Result := Result + '</LI>';
      Itemize.BulletIsOpen := False;
      if (Itemize.IndentStatus = Neutral) and Itemize.IsItemized then
      begin
        Result := Result + '<LI>';
        Itemize.BulletIsOpen := True;
      end;
      Itemize.BulletStatus := Neutral;
    end;

    if Itemize.IndentStatus = Deactivate then
    begin
      j := 0;
      while Itemize.IndentCount - j > 0 do
      begin
        inc(j);
        {if Itemize.NumberedBullet then
          Result := Result + '</OL>'
        else}
        Result := Result + '</UL>';
      end;
      Itemize.IndentStatus := Neutral;
      Itemize.IsItemized := False;
      //Itemize.NumberedBullet := False;
    end;

    if CurrentAlign <> NewAlign then
    begin
      if IsOpen then
        Result := Result + '</P>';
      case NewAlign of
        Left: Result := Result + '<P Align=LEFT>';
        Center: Result := Result + '<P Align=CENTER>';
        Right: Result := Result + '<P Align=RIGHT>';
      end;
      IsOpen := True;
      CurrentAlign := NewAlign;
    end;
  end;

  with TextFormat do
  begin
    if IsUpdated then
    begin
      if IsOpen then
      begin
        Result := Result + '</FONT>';
        TextFormat.CurrentFont := -1;
        TextFormat.CurrentSize := 20;
        TextFormat.CurrentColor := 0;
        IsOpen := False;
      end;

      if CurrentFont <> NewFont then
      begin
        // Symbol font may be activated due to a bullet but will be ignored:
        if FontTable.List.Strings[NewFont] <> 'Symbol' then
        begin
          Result := Result + '<FONT FACE="' +
            FontTable.List.Strings[NewFont] + '"';
          CurrentFont := NewFont;
          IsOpen := True;
        end;
      end;
      SizeFactor := 6; // Whatever value looks good.
      RftDefaultSize := 20; // equivalent to screenfont height 10.
      if Trunc(RftDefaultSize  SizeFactor) <> Trunc(NewSize  SizeFactor) then
      begin
        if Trunc((NewSize  SizeFactor) - (RftDefaultSize  SizeFactor)) > 0 then
        begin
          if not IsOpen then
            Result := Result + '<FONT';
          Result := Result + ' SIZE="+' +
            IntToStr(Trunc((NewSize  SizeFactor) - (RftDefaultSize  SizeFactor))) + '"';
          CurrentSize := NewSize;
          IsOpen := True;
        end;
        if Trunc((NewSize  SizeFactor) - (RftDefaultSize  SizeFactor)) < 0 then
        begin
          if not IsOpen then
            Result := Result + '<FONT';
          Result := Result + ' SIZE="' +
            IntToStr(Trunc((NewSize  SizeFactor) - (RftDefaultSize  SizeFactor))) + '"';
          CurrentSize := NewSize;
          IsOpen := True;
        end;
      end;
      if CurrentColor <> NewColor then
      begin
        if not IsOpen then
          Result := Result + '<FONT';
        Result := Result + ' COLOR="#' +
          ColorTable.List.Strings[NewColor] + '"';
        CurrentColor := NewColor;
        IsOpen := True;
      end;
      if IsOpen then
      begin
        Result := Result + '>';
      end;
      IsUpdated := False;
    end;

    if Bold = Activate then
    begin
      Result := Result + '<B>';
      Bold := Neutral;
    end;
    if Bold = Deactivate then
    begin
      Result := Result + '</B>';
      Bold := Neutral;
    end;

    if Italic = Activate then
    begin
      Result := Result + '<I>';
      Italic := Neutral;
    end;
    if Italic = Deactivate then
    begin
      Result := Result + '</I>';
      Italic := Neutral;
    end;

    if Underline = Activate then
    begin
      Result := Result + '<U>';
      Underline := Neutral;
    end;
    if Underline = Deactivate then
    begin
      Result := Result + '</U>';
      Underline := Neutral;
    end;
  end;
end;

function TUniRtf2Html.ConvertSpecialChars(c: char): string;
begin
  Result := '';
  case c of
    #0  :
      Result := Result;          // Writes pending codes only
    #9  :
      Result:= Result + #9;       // Writes tab char
    '>' :
      Result:= Result + '&gt';    // Writes "greater than"
    '<' :
      Result:= Result + '&lt';    // Writes "less than"
    else
      Result:= Result + c;        // Writes a character
  end;
end;

function TUniRtf2Html.WriteHeaderFooter(strHtmlText: string): string;
begin
  Result := '<HTML><HEAD><TITLE></TITLE></HEAD><BODY>' +
    strHtmlText;
  if ParagraphFormat.IsOpen then
    Result := Result + '</P>';
  Result := Result + '</BODY></HTML>';
end;

end.