[fpc-pascal] Re: Firebird: bulk insert performance: suggestions?

Reinier Olislagers reinierolislagers at gmail.com
Fri Sep 7 13:42:30 CEST 2012


On 7-9-2012 13:22, Ludo Brands wrote:
>> For my Dutch postcode program https://bitbucket.org/reiniero/postcode
>> with an embedded Firebird 2.5 database, I allow users to read 
>> in a CSV file with new or updated postcode data. I use sqldb, 
>> FPC x86. I'd like to get your suggestions on speed improvements.
>>
> 
> Turn of indices when inserting and turn them on again when the inserting is
> done.
No indices/constraints on that table; the stored procedure that
processes the records from that table will go through them line by line
anyway:
CREATE TABLE BULKINSERTDATA
(
  PROVINCENAME VARCHAR(255),
  CITYNAME VARCHAR(64),
  POSTCODE VARCHAR(6),
  STREETNAME VARCHAR(255),
  LOW INTEGER,
  HIGH INTEGER,
  EVEN BIT DEFAULT NULL, --basically a SMALLINT
  LATITUDE DECIMAL(10,8),
  LONGITUDE DECIMAL(10,8)
);

> Since you are the only user and concurrent access is not that important (I
> guess), I believe isc_tpb_concurrency is not the best choice. IIRC
> isc_tpb_read_committed + isc_tpb_no_rec_version has the less overhead. 
Mmmm, I remember having figured this out earlier. At least I
investigated enough to write
http://wiki.lazarus.freepascal.org/Firebird_in_action#Advanced_transactions
... but didn't document enough so that I can justify my choice ;)

I'll do some more digging and get back on this.

Thanks,
Reinier

Oh, if anybody has suggestions about improving the SP, I'd be grateful...
It's meant to either add new data or replace existing matching data.
City 1:N CityName
Province and Country are not used ATM (the Pascal code passess NULL values)
Realstreet has postcode details (e.g. the letters AB in 1012AB)
Postcode.FourPP has the postcode digits (e.g. 1012 in 1012AB)

SET TERM ^ ;
CREATE PROCEDURE BULKUPDATE
AS
DECLARE VARIABLE localPROVINCENAME VARCHAR(255);
DECLARE VARIABLE localCITYNAME VARCHAR(64);
DECLARE VARIABLE localPOSTCODE VARCHAR(6);
DECLARE VARIABLE localSTREETNAME VARCHAR(255);
DECLARE VARIABLE localLOW INTEGER;
DECLARE VARIABLE localHIGH INTEGER;
DECLARE VARIABLE localEVEN BIT;
DECLARE VARIABLE localLAT DECIMAL(10,8);
DECLARE VARIABLE localLNG DECIMAL(10,8);
DECLARE VARIABLE localCOUNTRYID INTEGER;
DECLARE VARIABLE localPROVINCEID INTEGER;
DECLARE VARIABLE localCITYNAMEID INTEGER;
DECLARE VARIABLE localCITYID INTEGER;
DECLARE VARIABLE localPOSTCODEID INTEGER;
DECLARE VARIABLE localSTREETNAMEID INTEGER;
DECLARE VARIABLE localFOURPP INTEGER;
DECLARE VARIABLE localPOSTCODECHARS POSTCODECHARS;
BEGIN

FOR SELECT
  PROVINCENAME,
  CITYNAME,
  POSTCODE,
  STREETNAME,
  LOW,
  HIGH,
  EVEN,
  LATITUDE,
  LONGITUDE
  FROM BULKINSERTDATA
  INTO
  :localPROVINCENAME,
  :localCITYNAME,
  :localPOSTCODE,
  :localSTREETNAME,
  :localLOW,
  :localHIGH,
  :localEVEN,
  :localLAT,
  :localLNG
DO
BEGIN
    /* 1. Test for required input */
    IF (:localCITYNAME IS NULL) THEN
    BEGIN
        IN AUTONOMOUS TRANSACTION
        DO
        BEGIN
          INSERT INTO LOGS (LOGMESSAGE) VALUES ('CITYNAME is null.
Exception will be called: DATAMAYNOTBENULL');
        END
        EXCEPTION DATAMAYNOTBENULL;
    END


    IF (:localPOSTCODE IS NULL) THEN
    BEGIN
        IN AUTONOMOUS TRANSACTION
        DO
        BEGIN
          INSERT INTO LOGS (LOGMESSAGE) VALUES ('POSTCODE is null.
Exception will be called: DATAMAYNOTBENULL');
        END
        EXCEPTION DATAMAYNOTBENULL;
    END


    IF (:localSTREETNAME IS NULL) THEN
    BEGIN
        IN AUTONOMOUS TRANSACTION
        DO
        BEGIN
          INSERT INTO LOGS (LOGMESSAGE) VALUES ('STREETNAME is null.
Exception will be called: DATAMAYNOTBENULL');
        END
        EXCEPTION DATAMAYNOTBENULL;
    END

    /* 2. Test for valid input, initialize variables        */
    localCOUNTRYID=NULL;
    localPROVINCEID=NULL;
    localCITYNAMEID=NULL;
    localCITYID=NULL;
    localPOSTCODEID=NULL;
    localSTREETNAMEID=NULL;
    localFOURPP=LEFT(localPOSTCODE, 4);
    localPOSTCODECHARS=RIGHT(localPOSTCODE, 2);

    /* Fill database */
    -- We use update or insert instead of merge because we can use the
returning clause.
    UPDATE OR INSERT INTO COUNTRY(COUNTRYNAME) VALUES ('Nederland')
    MATCHING (COUNTRYNAME)
    RETURNING ID INTO :localCOUNTRYID;

    IF (:localPROVINCENAME IS NULL) THEN
    BEGIN
        localPROVINCEID=NULL;
    END
    ELSE
    BEGIN
        UPDATE OR INSERT INTO PROVINCE(PROVINCENAME, COUNTRY_ID)
VALUES(:localPROVINCENAME, :localCOUNTRYID)
        MATCHING (PROVINCENAME)
        RETURNING ID INTO :localPROVINCEID;
    END


    -- City is special, only add something if we don't have a valid CITYNAME
    -- Also, we assume the city name given is the official cityname.
    SELECT ID FROM CITYNAME WHERE NAME=:localCITYNAME INTO :localCITYNAMEID;
    IF (:localCITYNAMEID IS NULL) THEN
    BEGIN
        -- Add a city record first, then a CITYNAME
        INSERT INTO CITY (PROVINCE_ID) VALUES (:localPROVINCEID)
RETURNING ID INTO :localCITYID;
        INSERT INTO CITYNAME(NAME, CITY_ID, OFFICIAL) VALUES
(:localCITYNAME, :localCITYID, 1) RETURNING ID INTO :localCITYNAMEID;
    END -- (:localCITYNAMEID IS NULL)
    ELSE
    BEGIN
    --:localCITYNAMEID IS NOT NULL
    -- Note we use the CITYNAME table to get the CITY ID.
        SELECT CITY_ID FROM CITYNAME WHERE NAME=:localCITYNAME INTO
:localCITYID;
    END

    -- For Postcode, unique columns are the 4 character postcode
    UPDATE OR INSERT INTO POSTCODE(FOURPP, CITY_ID) VALUES
(:localFOURPP, :localCITYID)
    MATCHING (FOURPP)
    RETURNING ID INTO :localPOSTCODEID;

    UPDATE OR INSERT INTO STREETNAMES(STREETNAME) VALUES (:localSTREETNAME)
    MATCHING (STREETNAME)
    RETURNING ID INTO :localSTREETNAMEID;

    -- For Realstreet, unique columns are postcode, street name and even/odd
    UPDATE OR INSERT INTO
REALSTREET(CHARS,EVEN,LOW,HIGH,LAT,LNG,POSTCODE_ID,STREETNAME_ID)
    VALUES
(:localPOSTCODECHARS,:localEVEN,:localLOW,:localHIGH,:localLAT,:localLNG,:localPOSTCODEID,:localSTREETNAMEID)
    MATCHING(CHARS,POSTCODE_ID,STREETNAME_ID,EVEN);
END /* Insert loop */
END^
SET TERM ; ^



More information about the fpc-pascal mailing list