[fpc-devel] utf8 reading

Florian Klaempfl florian at freepascal.org
Wed Mar 9 10:12:39 CET 2005


C Western wrote:

> Can I offer the attached patch for reading utf8 strings from resource 
> files? It simply converts the utf string to a normal string by 
> discarding the high bits, 

What would be the correct solution?

> but I had some files from Delphi or Kylix that 
> were otherwise unreadable.
> Colin
> 
> 
> ------------------------------------------------------------------------
> 
> diff -uNr fpc/rtl/objpas/classes/classesh.inc fpc.w/rtl/objpas/classes/classesh.inc
> --- fpc/rtl/objpas/classes/classesh.inc	2005-03-07 20:43:48.487255015 +0000
> +++ fpc.w/rtl/objpas/classes/classesh.inc	2005-03-07 20:53:11.494604156 +0000
> @@ -777,7 +777,7 @@
>  
>    TValueType = (vaNull, vaList, vaInt8, vaInt16, vaInt32, vaExtended,
>      vaString, vaIdent, vaFalse, vaTrue, vaBinary, vaSet, vaLString,
> -    vaNil, vaCollection, vaSingle, vaCurrency, vaDate, vaWString, vaInt64);
> +    vaNil, vaCollection, vaSingle, vaCurrency, vaDate, vaWString, vaInt64, vaUTF8String);
>  
>    TFilerFlag = (ffInherited, ffChildPos, ffInline);
>    TFilerFlags = set of TFilerFlag;
> diff -uNr fpc/rtl/objpas/classes/classes.inc fpc.w/rtl/objpas/classes/classes.inc
> --- fpc/rtl/objpas/classes/classes.inc	2005-03-07 20:43:48.462262468 +0000
> +++ fpc.w/rtl/objpas/classes/classes.inc	2005-03-07 20:53:11.496603567 +0000
> @@ -877,6 +877,41 @@
>  {$endif HASWIDESTRING}
>    end;
>  
> +  function ReadUTF8Str: String;
> +  var
> +    len, f, t: Integer;
> +  begin
> +    len := Input.ReadDWord;
> +    SetLength(Result, len);
> +    if len > 0 then begin
> +      Input.Read(Result[1], len);
> +      { For now simply take bottom 8 bits of Unicode character }
> +      t := 1;
> +      f := 1;
> +      while f <= len do begin
> +        if (Ord(Result[f]) and $80) <> 0 then begin
> +          if (Ord(Result[f]) and %11100000) = %11000000 then
> +            Inc(f)
> +          else if (Ord(Result[f]) and %11110000) = %11100000 then
> +            Inc(f,2)
> +          else if (Ord(Result[f]) and %11111000) = %11110000 then
> +            Inc(f,3)
> +          else if (Ord(Result[f]) and %11111100) = %11111000 then
> +            Inc(f,4)
> +          else if (Ord(Result[f]) and %11111110) = %11111100 then
> +            Inc(f,5)
> +          else
> +            WriteLn('Bad UTF8 Sequence');
> +          Result[t] := Char((Ord(Result[f]) and %111111) or ((Ord(Result[f-1]) and %11) shl 6));
> +        end else
> +          Result[t] := Result[f];
> +        Inc(f);
> +        Inc(t);
> +      end;
> +      SetLength(Result, t-1);
> +    end;
> +  end;
> +  
>    procedure ReadPropList(indent: String);
>  
>      procedure ProcessValue(ValueType: TValueType; Indent: String);
> @@ -988,6 +1023,10 @@
>          {vaSingle: begin OutLn('!!Single!!'); exit end;
>          vaCurrency: begin OutLn('!!Currency!!'); exit end;
>          vaDate: begin OutLn('!!Date!!'); exit end;}
> +        vaUTF8String: begin
> +            OutString(ReadUTF8Str);
> +            OutLn('');
> +          end;
>          else
>            Stop(IntToStr(Ord(ValueType)));
>        end;
> 
> 
> ------------------------------------------------------------------------
> 
> _______________________________________________
> fpc-devel maillist  -  fpc-devel at lists.freepascal.org
> http://lists.freepascal.org/mailman/listinfo/fpc-devel





More information about the fpc-devel mailing list