[fpc-devel] Kit's ambitions!

Wolf wv99999 at gmail.com
Thu May 17 00:56:26 CEST 2018



On 14/05/2018 04:30, David Pethes wrote:
> Hi,
> I would welcome inlining of (simple) asm routines.
>
I do not know what you consider to be the existing obstacles to inlining 
assembler routines. What I do know is that in the attached program, 
inlining does work. It summarises my (current) understanding of how to 
measure time with nanosecond reliability
(asking for time via the Linux function "if 
clock_gettime(CLOCK_MONOTONIC, @ts)=0 then" does indeed return 
nanoseconds, but takes some 270 ns (or about 1000 clock ticks) to 
execute and thus does not produce nanosecond reliability)
but repeated measurements do not produce the same output, and therefore 
my little program does not have the reliability I want. Statistical 
processing does something to improve the situation, but not quite what I 
want.

What I can say about inlining assembler routines is this: if the 
variables onto which registers are to be saved are on the stack, they 
can be inlined. Never mind the hints in Lazarus' message pane. Take the
/function GetProcessorUsed: longint;    inline;//
//var//
//  ProcUsed: longint;//
//begin//
//  asm//
//    CPUID//
//    .byte 0x0F, 0x01, 0xF9      // read the Time-Stamp Counter rdtscp 
(as op-code format),//
//    movl %ecx, ProcUsed      // This is the processor on which 
measurements take place. Measurements on other processors are discarded.//
//  end  ['eax','ebx','ecx','edx'];//
//  GetProcessorUsed:=ProcUsed;//
//end;/
Because /ProcUsed/ is on the stack, I can move %ecx into it. But I 
cannot get %ecx directly into /GetProcessorUsed/. That requires a 
separate line of code.

wolf

Here is the full code, as promised. If anybody has a suggestion on how 
to improve it, please let me know, in a separate thread.

/program Speed_Test;
{$ASMMODE att}

uses sysutils, Linux, math;
type
   TtscCount = record
       Group: longint;
       Count: longint;
       CumFreq: Int64;
       end;
type
   TCumFreq = record
       Group: longint;
       CumFreq: real;
       end;
   TCumFrequency= array of TCumFreq;
   TTimeSpec = record
     tv_sec: int64;  //time_t;    //Seconds
     tv_nsec: int64; //clong;     //Nanoseconds
   end;
var
   TscCount: array of TtscCount;
   Measured: TCumFrequency;
   MeasurementsToDo: int64=1000000;
   ProcessorUsed: LongInt;
   Range: array[0..9999] of longint;
   ValidMeasurements: Int64;

function Get_ClockFreq(CPU: Char): real;
{Since there is no way I can find to extract actual clock frequency, I 
read it from /proc/cpuinfo }
var
   FileHandle: LongInt;
   i: integer;
   Data: ansistring;
   rc:real;
   NumRead: int64;
   Buffer : packed array[0..4095] of char;
   SourceFile: AnsiString= '/proc/cpuinfo';
begin
   if not FileExists(SourceFile) then
   begin
     writeln('Error: Input file "',SourceFile,'" has not been found');
     halt;
   end;
   FileHandle:=FileOpen('/proc/cpuinfo',fmOpenRead);
   NumRead:=FileRead(FileHandle, Buffer,SizeOf(Buffer));
   Data:=Buffer[0..NumRead];
   i:=0;
   while i<=NumRead do
   begin
     inc(i);
     if CompareText(Data[i..i+8],'Processor')=0 then
     begin
       if char(Data[i+12])=CPU then
       begin
         i:=i+12;
         repeat inc(i); until CompareText(Data[i..i+6],'cpu MHz')=0 ;
         try
           rc:=StrToFloat(Data[i+11..i+18]);
         except
         on E : exception do
           begin
             writeln('Data read error: cannot convert 
',Data[i+11..i+18],' into number');
             writeln('Program aborted');
             halt;
           end;
         end;
         break;
       end;
     end;
   end;
   FileClose(FileHandle);
   Get_ClockFreq:=rc;
end;

procedure ReadProcessorFrequencyInformationLeaf;  inline;
var
   CPUID_16H_AX: Word;      // Processor Base Frequency (in MHz)
   CPUID_16H_BX: Word;      // Maximum Frequency (in MHz)
   CPUID_16H_CX: Word;      // Bus (Reference) frequency (in MHz)
   CPUID_16H_DX: Word;      // Reserved = 0
begin
   CPUID_16H_AX:=0;
   CPUID_16H_BX:=0;
   CPUID_16H_CX:=0;
   asm
     mov $0x16, %eax               // select Processor Frequency 
Information Leaf 0x16
     cpuid                         // access it
     mov %ax, CPUID_16H_AX         // Processor Base Frequency (in MHz)
     mov %bx, CPUID_16H_BX         // Maximum Frequency (in MHz)
     mov %cx, CPUID_16H_CX         // Bus (Reference) frequency (in MHz)
     mov %dx, CPUID_16H_DX      // Reserved = 0
   end  ['ax','bx','cx','dx'];
end;

function GetProcessorUsed: longint;    inline;
var
   ProcUsed: longint;
begin
   asm
     CPUID
     .byte 0x0F, 0x01, 0xF9      // read the Time-Stamp Counter rdtscp 
(as op-code format),
     movl %ecx, ProcUsed    // This is the processor on which 
measurements take place. Measurements on other processors are discarded.
   end  ['eax','ebx','ecx','edx'];
   GetProcessorUsed:=ProcUsed;
end;

procedure MeasureCode;
var
   ts: TTimeSpec;
   MilliSecondTime: extended;
   AX, BX, CX: Word;
   Start,Stop,i,k,l: int64;   // saves starting value from the Time 
Stamp counter
   Hi: int64;
   x:real;
   y: real=2;
   ProcessorUsed_Start, ProcessorUsed_Stop, ProcUsed: longint;
   IA32_TSC_AUX_Base,IA32_TSC_AUX_Core: longint;     // content of 
IA32_TSC_AUX MSR register = which CPU?
   Clock_denominator: Cardinal;
   Clock_numerator: Cardinal;
   CPUID_15H_ECX, CPUID_15H_EDX: Cardinal;     // reserved = 0
   ProcessorBaseFrequency: Word;
begin
   for i:=0 to High(Range) do Range[i]:=0;
   Start:=0;   Stop:=0;
     for k:=0 to 4 do ReadProcessorFrequencyInformationLeaf;   // this 
loop is just for warm-up
   ProcessorUsed:=GetProcessorUsed;
   for i:=1 to MeasurementsToDo do
   begin
     Start:=0;   Stop:=0;
     asm
       cpuid                  // force serialization
       .byte 0x0F, 0x01, 0xF9 // read the Time-Stamp Counter rdtscp (as 
op-code format),
       movl %eax, Start+0     // save least-significant longword
       movl %edx, Start+4     // save most-significant longword
       movl %ecx, ProcessorUsed_Start
     end  ['eax','ebx','ecx','edx'];
     // insert instruction to be tested below this line

if clock_gettime(CLOCK_MONOTONIC, @ts)=0 then    // return time in 
milliseconds, rounded to 1 nanosecond
MilliSecondTime:=RoundTo(1e3*ts.tv_sec+1e-6*ts.tv_nsec,-6);

// insert instruction to be tested above this line
     asm
       .byte 0x0F, 0x01, 0xF9 // read the Time-Stamp Counter rdtscp (as 
op-code format),
       movl %eax, Stop+0      // save least-significant longword
       movl %edx, Stop+4      // save most-significant longword
       movl %ecx, ProcessorUsed_Stop
       cpuid
     end  ['eax','ebx','ecx','edx'];
     if (ProcessorUsed_Start=ProcessorUsed) and 
(ProcessorUsed_Stop=ProcessorUsed) then   // ignore measurements that 
were not done on ProcessorUsed
       if (Stop-Start<High(Range)) then inc(Range[Stop-Start]) else 
inc(Range[High(Range)]);  // build cumulative frequency array
   end;
end;

function AccumulateValidMeasurements: Int64;
var
   i: int64;
   Hi: int64;
begin
   ValidMeasurements:=0;
   for i:=1 to High(Range) do ValidMeasurements:=ValidMeasurements+Range[i];
   AccumulateValidMeasurements:=ValidMeasurements;

   SetLength(TscCount,1);
   TscCount[0].Group:=0;
   TscCount[0].CumFreq:=Range[0];
   for i:=0 to High(Range)-1 do
   begin
     if Range[i]>0 then
     begin
       Hi:=High(TscCount)+1;
       SetLength(TscCount,Hi+1);
       TscCount[Hi].Group:=i;
       TscCount[Hi].Count:=Range[i];
       TscCount[Hi].CumFreq:=Range[i]+TscCount[Hi-1].CumFreq;
     end;
   end;
end;

procedure ShowMeasurements;
var
   i: int64;
begin
   for i:=1 to High(TscCount) do
   begin
   writeln(TscCount[i].Group,'   ',TscCount[i].Count,' 
',TscCount[i].CumFreq,'   ',100*Measured[i].CumFreq:6:3);
   if Measured[i].Cumfreq>0.999 then exit;
   end;
end;

procedure EvaluateMeasurements;
var
   i: int64;
begin
   SetLength(Measured,Length(TscCount));
   Measured[0].Group:=0;
   Measured[0].CumFreq:=TscCount[0].CumFreq/ValidMeasurements;
   for i:=0 to High(TscCount) do
   begin
     Measured[i].Group:=TscCount[i].Group;
     Measured[i].CumFreq:=TscCount[i].Cumfreq/ValidMeasurements;
   end;
end;

function Limit(CF: TCumFrequency; Frequency: real):real;  // do linear 
interpolation between two points of cumulative frequency curve
var
   i: int64;
   Slope,Intercept: real;
begin
   i:=0;
   while (CF[i].CumFreq<= Frequency) do inc(i);
Intercept:=(CF[i-1].Group*CF[i].CumFreq-CF[i].Group*CF[i-1].CumFreq) 
/(CF[i].CumFreq-CF[i-1].CumFreq);
Slope:=(CF[i].Group-CF[i-1].Group)/(CF[i].CumFreq-CF[i-1].CumFreq);
   Limit:=Frequency*Slope+Intercept;
end;

procedure Difference;
begin
   writeln;
   writeln('Clock ticks used:  ');
   writeln('1% Limit=',Limit(Measured,0.01):6:2);
   writeln('5% Limit=',Limit(Measured,0.05):6:2);
   writeln('20% Limit=',Limit(Measured,0.20):6:2);
   writeln('Median=',Limit(Measured,0.50):6:2);
   writeln('80% Limit=',Limit(Measured,0.80):6:2);
   writeln('95% Limit=',Limit(Measured,0.95):6:2);
   writeln('99% Limit=',Limit(Measured,0.99):6:2);
end;

begin
  // taskset -c 1;               // taskset allows to change processor, 
if used from the command line
   MeasureCode;
   ValidMeasurements:=AccumulateValidMeasurements;      //
   writeln('Tests done on processor ',ProcessorUsed,', running at 
',Get_ClockFreq(IntToStr(ProcessorUsed)[1]):7:3,'MHz',' doing 
',ValidMeasurements,' valid measurements');      //
   EvaluateMeasurements;
   ShowMeasurements;
   Difference;
end./

> _______________________________________________
> fpc-devel maillist  -  fpc-devel at lists.freepascal.org
> http://lists.freepascal.org/cgi-bin/mailman/listinfo/fpc-devel

-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.freepascal.org/pipermail/fpc-devel/attachments/20180517/f81e615f/attachment.html>


More information about the fpc-devel mailing list