[fpc-pascal] CSV via PCRE
S. Fisher
expandafter at yahoo.com
Sat Nov 10 04:25:25 CET 2007
Fields are separated by commas, but if a field is surrounded by
double quotes it can contain commas---in fact, can contain any
byte whatsoever; double quotes (") within the field must be
doubled, just as single quotes within a Pascal string are doubled.
All we need in order to parse a csv record is a single regex and
a few trimmings:
'\G\s*"([^"]*(?:""[^"]*)*)"\s*,|\G([^,"]*),'
The part to the left of the pipe is for fields that are enclosed
in quotes; the part to the right is for ordinary fields.
{$mode objfpc}
uses pcre, strutils;
{
Regex PCRE options
Some of these things can be triggered by "(?x)" at the start of the
regex, where x is a lowercase letter. In particular, "(?i)" forces
case-blind matching, "(?s)" forces dot to match newline, "(?x)" forces
whitespace to be ignored, and "(?m)" forces ^ and $ to match around
newlines, not just at the beginning/end of the string. (\A and \Z
match the beginning and end of string respectively independent of this
flag, so you can get the effect of PCRE_ANCHORED by wrapping your
pattern with them.)
}
type
string_list = array of ansiString;
procedure parse_csv( s: ansiString; var ary: string_list );
const
field_pat : iRegex = nil;
var
captures: iMatch;
str, field: ansistring;
where, truelength: longint;
begin
// Has the regular expression engine been created?
if field_pat = nil then
field_pat := regexCreate(
'\G\s*"([^"]*(?:""[^"]*)*)"\s*,|\G([^,"]*),' );
str := s + ',';
setlength( ary, 32 );
truelength := 0;
where := 0;
while where < length( str ) do
begin
captures := field_pat.match( str, where );
if captures.groups[1].index > -1 then
// Field was surrounded by quotes.
field := AnsiReplaceStr(captures.groups[1].value, '""', '"' )
else
field := captures.groups[2].value;
if truelength >= length( ary ) then
setlength( ary, truelength + 32 );
ary[ truelength ] := field;
inc( truelength );
where := captures.groups[0].index + captures.groups[0].length;
end;
setlength( ary, truelength );
end;
var
fields: string_list;
i: word;
begin
parse_csv(
' "Make one'#13#10'or two lines" ,brick,"""Look at that,"" he said"',
fields );
for i := 0 to high(fields) do
begin
writeln( ' Field ', i+1, ' ---------------------' );
writeln( fields[i] );
end;
end.
__________________________________________________
Do You Yahoo!?
Tired of spam? Yahoo! Mail has the best spam protection around
http://mail.yahoo.com
More information about the fpc-pascal
mailing list