[fpc-devel] Kit's ambitions!
Wolf
wv99999 at gmail.com
Thu May 17 00:56:26 CEST 2018
On 14/05/2018 04:30, David Pethes wrote:
> Hi,
> I would welcome inlining of (simple) asm routines.
>
I do not know what you consider to be the existing obstacles to inlining
assembler routines. What I do know is that in the attached program,
inlining does work. It summarises my (current) understanding of how to
measure time with nanosecond reliability
(asking for time via the Linux function "if
clock_gettime(CLOCK_MONOTONIC, @ts)=0 then" does indeed return
nanoseconds, but takes some 270 ns (or about 1000 clock ticks) to
execute and thus does not produce nanosecond reliability)
but repeated measurements do not produce the same output, and therefore
my little program does not have the reliability I want. Statistical
processing does something to improve the situation, but not quite what I
want.
What I can say about inlining assembler routines is this: if the
variables onto which registers are to be saved are on the stack, they
can be inlined. Never mind the hints in Lazarus' message pane. Take the
/function GetProcessorUsed: longint; inline;//
//var//
// ProcUsed: longint;//
//begin//
// asm//
// CPUID//
// .byte 0x0F, 0x01, 0xF9 // read the Time-Stamp Counter rdtscp
(as op-code format),//
// movl %ecx, ProcUsed // This is the processor on which
measurements take place. Measurements on other processors are discarded.//
// end ['eax','ebx','ecx','edx'];//
// GetProcessorUsed:=ProcUsed;//
//end;/
Because /ProcUsed/ is on the stack, I can move %ecx into it. But I
cannot get %ecx directly into /GetProcessorUsed/. That requires a
separate line of code.
wolf
Here is the full code, as promised. If anybody has a suggestion on how
to improve it, please let me know, in a separate thread.
/program Speed_Test;
{$ASMMODE att}
uses sysutils, Linux, math;
type
TtscCount = record
Group: longint;
Count: longint;
CumFreq: Int64;
end;
type
TCumFreq = record
Group: longint;
CumFreq: real;
end;
TCumFrequency= array of TCumFreq;
TTimeSpec = record
tv_sec: int64; //time_t; //Seconds
tv_nsec: int64; //clong; //Nanoseconds
end;
var
TscCount: array of TtscCount;
Measured: TCumFrequency;
MeasurementsToDo: int64=1000000;
ProcessorUsed: LongInt;
Range: array[0..9999] of longint;
ValidMeasurements: Int64;
function Get_ClockFreq(CPU: Char): real;
{Since there is no way I can find to extract actual clock frequency, I
read it from /proc/cpuinfo }
var
FileHandle: LongInt;
i: integer;
Data: ansistring;
rc:real;
NumRead: int64;
Buffer : packed array[0..4095] of char;
SourceFile: AnsiString= '/proc/cpuinfo';
begin
if not FileExists(SourceFile) then
begin
writeln('Error: Input file "',SourceFile,'" has not been found');
halt;
end;
FileHandle:=FileOpen('/proc/cpuinfo',fmOpenRead);
NumRead:=FileRead(FileHandle, Buffer,SizeOf(Buffer));
Data:=Buffer[0..NumRead];
i:=0;
while i<=NumRead do
begin
inc(i);
if CompareText(Data[i..i+8],'Processor')=0 then
begin
if char(Data[i+12])=CPU then
begin
i:=i+12;
repeat inc(i); until CompareText(Data[i..i+6],'cpu MHz')=0 ;
try
rc:=StrToFloat(Data[i+11..i+18]);
except
on E : exception do
begin
writeln('Data read error: cannot convert
',Data[i+11..i+18],' into number');
writeln('Program aborted');
halt;
end;
end;
break;
end;
end;
end;
FileClose(FileHandle);
Get_ClockFreq:=rc;
end;
procedure ReadProcessorFrequencyInformationLeaf; inline;
var
CPUID_16H_AX: Word; // Processor Base Frequency (in MHz)
CPUID_16H_BX: Word; // Maximum Frequency (in MHz)
CPUID_16H_CX: Word; // Bus (Reference) frequency (in MHz)
CPUID_16H_DX: Word; // Reserved = 0
begin
CPUID_16H_AX:=0;
CPUID_16H_BX:=0;
CPUID_16H_CX:=0;
asm
mov $0x16, %eax // select Processor Frequency
Information Leaf 0x16
cpuid // access it
mov %ax, CPUID_16H_AX // Processor Base Frequency (in MHz)
mov %bx, CPUID_16H_BX // Maximum Frequency (in MHz)
mov %cx, CPUID_16H_CX // Bus (Reference) frequency (in MHz)
mov %dx, CPUID_16H_DX // Reserved = 0
end ['ax','bx','cx','dx'];
end;
function GetProcessorUsed: longint; inline;
var
ProcUsed: longint;
begin
asm
CPUID
.byte 0x0F, 0x01, 0xF9 // read the Time-Stamp Counter rdtscp
(as op-code format),
movl %ecx, ProcUsed // This is the processor on which
measurements take place. Measurements on other processors are discarded.
end ['eax','ebx','ecx','edx'];
GetProcessorUsed:=ProcUsed;
end;
procedure MeasureCode;
var
ts: TTimeSpec;
MilliSecondTime: extended;
AX, BX, CX: Word;
Start,Stop,i,k,l: int64; // saves starting value from the Time
Stamp counter
Hi: int64;
x:real;
y: real=2;
ProcessorUsed_Start, ProcessorUsed_Stop, ProcUsed: longint;
IA32_TSC_AUX_Base,IA32_TSC_AUX_Core: longint; // content of
IA32_TSC_AUX MSR register = which CPU?
Clock_denominator: Cardinal;
Clock_numerator: Cardinal;
CPUID_15H_ECX, CPUID_15H_EDX: Cardinal; // reserved = 0
ProcessorBaseFrequency: Word;
begin
for i:=0 to High(Range) do Range[i]:=0;
Start:=0; Stop:=0;
for k:=0 to 4 do ReadProcessorFrequencyInformationLeaf; // this
loop is just for warm-up
ProcessorUsed:=GetProcessorUsed;
for i:=1 to MeasurementsToDo do
begin
Start:=0; Stop:=0;
asm
cpuid // force serialization
.byte 0x0F, 0x01, 0xF9 // read the Time-Stamp Counter rdtscp (as
op-code format),
movl %eax, Start+0 // save least-significant longword
movl %edx, Start+4 // save most-significant longword
movl %ecx, ProcessorUsed_Start
end ['eax','ebx','ecx','edx'];
// insert instruction to be tested below this line
if clock_gettime(CLOCK_MONOTONIC, @ts)=0 then // return time in
milliseconds, rounded to 1 nanosecond
MilliSecondTime:=RoundTo(1e3*ts.tv_sec+1e-6*ts.tv_nsec,-6);
// insert instruction to be tested above this line
asm
.byte 0x0F, 0x01, 0xF9 // read the Time-Stamp Counter rdtscp (as
op-code format),
movl %eax, Stop+0 // save least-significant longword
movl %edx, Stop+4 // save most-significant longword
movl %ecx, ProcessorUsed_Stop
cpuid
end ['eax','ebx','ecx','edx'];
if (ProcessorUsed_Start=ProcessorUsed) and
(ProcessorUsed_Stop=ProcessorUsed) then // ignore measurements that
were not done on ProcessorUsed
if (Stop-Start<High(Range)) then inc(Range[Stop-Start]) else
inc(Range[High(Range)]); // build cumulative frequency array
end;
end;
function AccumulateValidMeasurements: Int64;
var
i: int64;
Hi: int64;
begin
ValidMeasurements:=0;
for i:=1 to High(Range) do ValidMeasurements:=ValidMeasurements+Range[i];
AccumulateValidMeasurements:=ValidMeasurements;
SetLength(TscCount,1);
TscCount[0].Group:=0;
TscCount[0].CumFreq:=Range[0];
for i:=0 to High(Range)-1 do
begin
if Range[i]>0 then
begin
Hi:=High(TscCount)+1;
SetLength(TscCount,Hi+1);
TscCount[Hi].Group:=i;
TscCount[Hi].Count:=Range[i];
TscCount[Hi].CumFreq:=Range[i]+TscCount[Hi-1].CumFreq;
end;
end;
end;
procedure ShowMeasurements;
var
i: int64;
begin
for i:=1 to High(TscCount) do
begin
writeln(TscCount[i].Group,' ',TscCount[i].Count,'
',TscCount[i].CumFreq,' ',100*Measured[i].CumFreq:6:3);
if Measured[i].Cumfreq>0.999 then exit;
end;
end;
procedure EvaluateMeasurements;
var
i: int64;
begin
SetLength(Measured,Length(TscCount));
Measured[0].Group:=0;
Measured[0].CumFreq:=TscCount[0].CumFreq/ValidMeasurements;
for i:=0 to High(TscCount) do
begin
Measured[i].Group:=TscCount[i].Group;
Measured[i].CumFreq:=TscCount[i].Cumfreq/ValidMeasurements;
end;
end;
function Limit(CF: TCumFrequency; Frequency: real):real; // do linear
interpolation between two points of cumulative frequency curve
var
i: int64;
Slope,Intercept: real;
begin
i:=0;
while (CF[i].CumFreq<= Frequency) do inc(i);
Intercept:=(CF[i-1].Group*CF[i].CumFreq-CF[i].Group*CF[i-1].CumFreq)
/(CF[i].CumFreq-CF[i-1].CumFreq);
Slope:=(CF[i].Group-CF[i-1].Group)/(CF[i].CumFreq-CF[i-1].CumFreq);
Limit:=Frequency*Slope+Intercept;
end;
procedure Difference;
begin
writeln;
writeln('Clock ticks used: ');
writeln('1% Limit=',Limit(Measured,0.01):6:2);
writeln('5% Limit=',Limit(Measured,0.05):6:2);
writeln('20% Limit=',Limit(Measured,0.20):6:2);
writeln('Median=',Limit(Measured,0.50):6:2);
writeln('80% Limit=',Limit(Measured,0.80):6:2);
writeln('95% Limit=',Limit(Measured,0.95):6:2);
writeln('99% Limit=',Limit(Measured,0.99):6:2);
end;
begin
// taskset -c 1; // taskset allows to change processor,
if used from the command line
MeasureCode;
ValidMeasurements:=AccumulateValidMeasurements; //
writeln('Tests done on processor ',ProcessorUsed,', running at
',Get_ClockFreq(IntToStr(ProcessorUsed)[1]):7:3,'MHz',' doing
',ValidMeasurements,' valid measurements'); //
EvaluateMeasurements;
ShowMeasurements;
Difference;
end./
> _______________________________________________
> fpc-devel maillist - fpc-devel at lists.freepascal.org
> http://lists.freepascal.org/cgi-bin/mailman/listinfo/fpc-devel
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.freepascal.org/pipermail/fpc-devel/attachments/20180517/f81e615f/attachment.html>
More information about the fpc-devel
mailing list