[fpc-devel] Differences in Regular Expression handling between FPC 3.3.1 and 3.2.0

Russ russeld.lists at gmail.com
Wed Jun 29 17:48:36 CEST 2022


Hi,

It looks like in FPC trunk that regular expressions use unicode 
properties and is returning unicode results as it is now built, because 
{$DEFINE Unicode} is now defined in the regexpr unit.

The problem is I can't get it to return the same results as with FPC 
3.2.0, even with type casting. The following program tests combinations 
of literal and string/unicodestring expressions and matches and shows 
the differences in results produced by FPC 3.2.2 and 3.3.1, shown below 
the code.

With a literal expression and a string search parameter, the trunk 
version doesn't find a match.

<code>
    unit Unit1;
    {$mode objfpc}{$H+}{$Macro on}
    interface

    uses
      Classes, SysUtils, Forms, Controls, Graphics, Dialogs, StdCtrls, 
RegExpr;
    type
      TForm1 = class(TForm)
        Memo1: TMemo;
        procedure FormCreate(Sender: TObject);
      private
      public
      end;

    var
      Form1: TForm1;
      R: TRegExpr;

    implementation
    {$R *.lfm}
     // Macro used instead of const, to ensure literal will be used
     {$define LIT_EXPR := '(?i)(\+\/\-|±)\d{1,3}(\.\d+)?(°|\s+deg)'}
     {$define LIT_SRCH := 'The range is ±90.0° '}

    procedure TForm1.FormCreate(Sender: TObject);
    const
      FOUND = '  Matches found:';
      NOT_FOUND = '  No match found';
    var
      R: TRegExpr;
      expr, srch, match: string;
      u_expr, u_srch, u_match: unicodestring;
      i: Integer;
    begin
      R := TRegExpr.Create;

      // Literal expression, literal search.
      Memo1.Text := 'Literal expression and literal search:';
      R.Expression := LIT_EXPR;
      if R.Exec(LIT_SRCH) then begin
        Memo1.Lines.Add(FOUND);
        for i := 0 to 9 do begin
          match := string(R.Match[i]);
          if match <> '' then
            Memo1.Lines.Add('  '+i.ToString + ': ' + match);
        end;
        end
      else
        Memo1.Lines.Add(NOT_FOUND);

      // Literal expression, search in string.
      Memo1.Lines.Add(LineEnding + 'Literal expression, search in string:');
      srch := 'The range is ±90.0° ';
      if R.Exec(srch) then begin
        Memo1.Lines.Add(FOUND);
        for i := 0 to 9 do begin
          match := R.Match[i];
          if match <> '' then
            Memo1.Lines.Add('  '+i.ToString + ': ' + match);
        end;
        end
      else
      Memo1.Lines.Add(NOT_FOUND);

      // Expression, search in string.
      Memo1.Lines.Add(LineEnding + 'Expression and search both in 
strings:');
      expr := LIT_EXPR;
      srch := LIT_SRCH;
      R.Expression := expr;
      if R.Exec(srch) then begin
        Memo1.Lines.Add(FOUND);
        for i := 0 to 9 do begin
          match := R.Match[i];
          if match <> '' then
            Memo1.Lines.Add('  '+i.ToString + ': ' + match);
        end;
        end
      else
      Memo1.Lines.Add(NOT_FOUND);

      // Using unicode strings.
      Memo1.Lines.Add(LineEnding + 'Expression and search both in 
Unicode strings:');
      u_expr := LIT_EXPR;
      u_srch := LIT_SRCH;
      R.Expression := u_expr;
      if R.Exec(u_srch) then begin
        Memo1.Lines.Add(FOUND);
        for i := 0 to 9 do begin
          u_match := R.Match[i];
          if u_match <> '' then
            Memo1.Lines.Add('  '+i.ToString + ': ' + u_match);
        end;
        end
      else
        Memo1.Lines.Add(NOT_FOUND);
    end;

    end.
</code>

--- Lazarus 2.2.2 FPC 3.2.2 ---
Literal expression and literal search:
   Matches found:
   0: ±90.0°
   1: ±
   2: .0
   3: °

Literal expression, search in string:
   Matches found:
   0: ±90.0°
   1: ±
   2: .0
   3: °

Expression and search both in strings:
   Matches found:
   0: ±90.0°
   1: ±
   2: .0
   3: °

Expression and search both in Unicode strings:
   Matches found:
   0: ±90.0°
   1: ±
   2: .0
   3: °

--- Lazarus 2.3.0 (rev main-2_3-1940-g01d6b3230d) FPC 3.3.1 
i386-win32-win32/win64
Literal expression and literal search:
   Matches found:
   0: ±90.0°
   1: ±
   2: .0
   3: °

Literal expression, search in string:
   No match found

Expression and search both in strings:
   Matches found:
   0: ±90.0°
   1: ±
   2: .0
   3: °

Expression and search both in Unicode strings:
   Matches found:
   0: ±90.0°
   1: ±
   2: .0
   3: °

-------------

Regards
Russ


More information about the fpc-devel mailing list