[fpc-pascal] THashedStringList vs TFPHashTable

Graeme Geldenhuys graemeg.lists at gmail.com
Wed Sep 27 19:42:34 CEST 2006


Thanks to all that replied.  The string that is going to be stored in
the List (with associated object) is a GUID string, so is very random
by nature.  Would this have any negative affects in the hash table?

I will try it out anyway, I can always swap the internal list out if
needed.  What I am creating is a Flyweight (design pattern) to store
all common objecs I use.  I will be fetching the objects based on the
OID (GUID) string.

Regards,
  - Graeme -


On 27/09/06, Dean Zobec <dezobec at tin.it> wrote:
> Graeme Geldenhuys ha scritto:
> > Hi,
> >
> > Is TFPHashTable the same as Delphi's THashedStringList?
> >
> > I am looking for a List class that can hold large amounts of objects
> > with a ID string associated for quick lookups.
> >
> > Regards,
> >  - Graeme -
>
> Yes, similar to a THashedStringList, but with a special implementation
> The TFPHashTable was highly optimized with a lot of profiling, while
> trying to achieve the ease of use through object orientation and ease of
> maintainance. It's a hash table with a customizable hash function (to
> achieve constant performance in searches), chaining is used as a
> collision resolution scheme. Some statistics are also provided to be
> able to choose the appropriate hash function and the appropriate hash
> table size.
> The difference in performance with respect to a simple ordered
> TStringList is evident when more then 100.000 elements are added to the
> container, the number of elements the container can hold is huge
> (longword, and obviously ram size, is the limit :). I have another idea
> to further improve the performance of searches and I'm planning to
> further profile it in the next weeks to see if there are other speed gains.
> Be aware that the version in 2.0.4 and before contains a bug that was
> solved by Marco in 2.1.1. and merged in 2.0.5 (an AV if the insertion is
> made after a clear) due to the use of longwords in the for cycles.
> I'm attaching the fpcunit tests for you to see how to use it, and I'll
> give you all the assistance that you need. I'll be glad to receive some
> feedback as usual.
> Regards,
> Dean
>
>
> unit testfphashtable;
>
> {$mode objfpc}{$H+}
>
> interface
>
> uses
>   Classes, SysUtils, fpcunit, testutils, testregistry, contnrs;
>
> type
>
>   { TTestHtNode }
>
>   TTestHtNode = class(TTestCase)
>   published
>     procedure TestNodeCreation;
>     procedure TestKeyComparison;
>   end;
>
>
>
>
>   //inherited to be able to get access to protected methods
>   TMyHashTable = class(TFPHashTable)
>   end;
>
>   { TTestFPHashTable }
>
>   TTestFPHashTable= class(TTestCase)
>   private
>     ht: TMyHashTable;
>     FSum: integer;
>   protected
>     procedure SetUp; override;
>     procedure TearDown; override;
>     procedure SumTest(Item: Pointer; const Key: string;
>       var Continue: Boolean);
>     procedure SumTestUntilFound100(Item: Pointer; const Key: string;
>       var Continue: Boolean);
>   published
>     procedure TestCreate;
>     procedure TestCreateWith;
>     procedure TestIsEmpty;
>     procedure TestAdd;
>     procedure TestAddSimpleSyntax;
>     procedure TestGetData;
>     procedure TestChainLength;
>     procedure TestDelete;
>     procedure TestClear;
>     procedure TestForEachCall;
>     procedure TestForEachCallBreak;
>     procedure TestHashTableGrow;
>     procedure TestVoidSlots;
>     //test for bug 0007292 fixed by marco guard all for loops with unsigned
>     //loopcounter against overflow (rev.4507)
>     procedure TestAddAfterClear;
>   end;
>
>
>
> implementation
>
> procedure TTestFPHashTable.SetUp;
> begin
>   ht := TMyHashTable.CreateWith(9973, @RSHash);
>   AssertEquals(12289, ht.HashTableSize);
> end;
>
> procedure TTestFPHashTable.TearDown;
> begin
>   ht.Free;
> end;
>
> procedure TTestFPHashTable.TestAdd;
> begin
>   ht.Add('1', pointer(1));
>   ht.Add('2', pointer(2));
>   ht.Add('nil', nil);
>   AssertEquals('wrong number of items', 3, ht.Count);
> end;
>
> procedure TTestFPHashTable.TestAddSimpleSyntax;
> begin
>   ht['1'] := pointer(1);
>   ht['2'] := pointer(2);
>   ht['nil'] := nil;
>   AssertEquals('wrong number of items', 3, ht.Count);
> end;
>
> procedure TTestFPHashTable.TestGetData;
> var
>   i: integer;
> begin
>   for i := 0 to 9999 do
>     ht.Add(intToStr(i), pointer(i));
>   AssertEquals(10000, ht.Count);
>   for i := 0 to 9999 do
>     AssertEquals(i, integer(ht[PChar(IntToStr(i))]));
>   for i := 9999 downto 0 do
>     AssertEquals(i, integer(ht[PChar(IntToStr(i))]));
> end;
>
> procedure TTestFPHashTable.TestChainLength;
> var
>   i: integer;
>   sum: int64;
> begin
>   sum := 0;
>   for i := 0 to 9999 do
>     ht.Add(intToStr(i), pointer(i));
>   AssertEquals(10000, ht.Count);
>   for i := 0 to ht.HashTableSize-1 do
>     if Assigned(ht.HashTable[i]) then
>       Sum := Sum + ht.ChainLength(i);
>   AssertEquals(10000, sum);
> end;
>
> procedure TTestFPHashTable.TestDelete;
> var
>   i: DWord;
> begin
>   for i := 0 to 9999 do
>     ht.Add(intToStr(i), pointer(i));
>   ht.Delete('994');
>   AssertEquals('Wrong number of items after delete', 9999, ht.Count);
>   AssertNull('Item not deleted', ht.Find('994'));
> end;
>
> procedure TTestFPHashTable.TestClear;
> var
>   i: integer;
> begin
>   for i := 0 to 9999 do
>     ht.Add(intToStr(i), pointer(i));
>   ht.Clear;
>   AssertTrue('container not empty', ht.IsEmpty);
> end;
>
> procedure TTestFPHashTable.TestHashTableGrow;
> var
>   i: integer;
> begin
>   for i := 0 to 9999 do
>     ht.Add(intToStr(i), pointer(i));
>   ht.HashTableSize := ht.HashTableSize + 1;
>   AssertEquals(24593, ht.HashTableSize);
>   AssertEquals(10000, ht.Count);
>   for i := 0 to 9999 do
>     AssertEquals(i, integer(ht[PChar(IntToStr(i))]));
> end;
>
> procedure TTestFPHashTable.TestVoidSlots;
> begin
>   AssertEquals(12289, ht.VoidSlots);
>   ht.Add('a', nil);
>   AssertEquals(12288, ht.VoidSlots);
> end;
>
> procedure TTestFPHashTable.TestAddAfterClear;
> var
>   i: integer;
> begin
>   for i := 0 to 9999 do
>     ht.Add(intToStr(i), pointer(i));
>   ht.Clear;
>   AssertTrue('container not empty', ht.IsEmpty);
>   for i := 0 to 9999 do
>     ht.Add(intToStr(i), pointer(i));
>   AssertEquals(10000, ht.Count);
>   for i := 0 to 9999 do
>     AssertEquals(i, integer(ht[PChar(IntToStr(i))]));
>   for i := 9999 downto 0 do
>     AssertEquals(i, integer(ht[PChar(IntToStr(i))]));
> end;
>
> procedure TTestFPHashTable.TestForEachCall;
> var
>   i: integer;
>   p: THTNode;
> begin
>   FSum := 0;
>   for i := 1 to 10000 do
>     ht.Add(intToStr(i), pointer(i));
>   p := ht.ForEachCall(@SumTest);
>   AssertEquals(10000*10001/2, FSum);
>   AssertNull(p);
> end;
>
> procedure TTestFPHashTable.TestForEachCallBreak;
> var
>   i: integer;
>   p: THTNode;
> begin
>   FSum := 0;
>   for i := 1 to 10000 do
>     ht.Add(intToStr(i), pointer(i));
>   p := ht.ForEachCall(@SumTestUntilFound100);
>   AssertEquals(100, integer(p.Data));
> end;
>
> procedure TTestFPHashTable.SumTest(Item: Pointer; const Key: string;
>   var Continue: Boolean);
> begin
>   FSum := FSum + Integer(Item);
> end;
>
> procedure TTestFPHashTable.SumTestUntilFound100(Item: Pointer; const Key: string;
>   var Continue: Boolean);
> begin
>   FSum := FSum + Integer(Item);
>   if Integer(Item) = 100 then
>     Continue := false;
> end;
>
> procedure TTestFPHashTable.TestCreate;
> var
>   t: TFPHashTable;
> begin
>   t := TFPHashTable.Create;
>   try
>     AssertEquals(196613, t.HashTableSize);
>   finally
>     t.Free;
>   end;
> end;
>
> procedure TTestFPHashTable.TestCreateWith;
> var
>   h: TMyHashTable;
> begin
>   h := TMyHashTable.CreateWith(7, @RSHash);
>   try
>     AssertEquals('wrong table size', 53, h.HashTableSize);
>     AssertSame('wrong hash function', @RSHash, h.HashFunction);
>   finally
>     h.Free;
>   end;
> end;
>
> procedure TTestFPHashTable.TestIsEmpty;
> begin
>   AssertTrue(ht.IsEmpty);
> end;
>
> { TTestHtNode }
>
>
> procedure TTestHtNode.TestNodeCreation;
> var
>   node: THTNode;
> begin
>   try
>     node := THTNode.CreateWith('Dean');
>     AssertEquals(4, Length(node.Key));
>     AssertEquals('D', Node.Key[1]);
>     AssertEquals('e', Node.Key[2]);
>     AssertEquals('a', Node.Key[3]);
>     AssertEquals('n', Node.Key[4]);
>     AssertEquals(#0, Node.Key[5]);
>   finally
>     node.Free;
>   end;
> end;
>
> procedure TTestHtNode.TestKeyComparison;
> var
>   node: THTNode;
> begin
>   try
>     node := THTNode.CreateWith('Dean');
>     AssertTrue('key not found', node.HasKey('Dean'));
>     AssertFalse('wrong key found', node.HasKey('Dea'));
>     AssertFalse('wrong key found', node.HasKey('Deanz'));
>   finally
>     node.Free;
>   end;
> end;
>
>
> initialization
>
>   RegisterTests( [TTestHTNode, TTestFPHashTable]);
> end.
>
>
>
> _______________________________________________
> fpc-pascal maillist  -  fpc-pascal at lists.freepascal.org
> http://lists.freepascal.org/mailman/listinfo/fpc-pascal
>
>


-- 
There's no place like 127.0.0.1



More information about the fpc-pascal mailing list