forked from synopse/mORMot
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathSynDoubleToText.inc
950 lines (891 loc) · 26.6 KB
/
SynDoubleToText.inc
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
/// efficient double to text conversion using the GRISU-1 algorithm
// - as a complement to SynCommons, which tended to increase too much
// - licensed under a MPL/GPL/LGPL tri-license; version 1.18
{
Implement 64-bit floating point (double) to ASCII conversion using the
GRISU-1 efficient algorithm.
Original Code in flt_core.inc flt_conv.inc flt_pack.inc from FPC RTL.
Copyright (C) 2013 by Max Nazhalov
Licenced with LGPL 2 with the linking exception.
If you don't agree with these License terms, disable this feature
by undefining DOUBLETOSHORT_USEGRISU in Synopse.inc
GRISU Original Algorithm
Copyright (c) 2009 Florian Loitsch
We extracted a double-to-ascii only cut-down version of those files,
and made a huge refactoring to reach the best performance, especially
tuning the Intel target with some dedicated asm and code rewrite.
With Delphi 10.3 on Win32: (no benefit)
100000 FloatToText in 38.11ms i.e. 2,623,570/s, aver. 0us, 47.5 MB/s
100000 str in 43.19ms i.e. 2,315,082/s, aver. 0us, 50.7 MB/s
100000 DoubleToShort in 45.50ms i.e. 2,197,367/s, aver. 0us, 43.8 MB/s
100000 DoubleToAscii in 42.44ms i.e. 2,356,045/s, aver. 0us, 47.8 MB/s
With Delphi 10.3 on Win64:
100000 FloatToText in 61.83ms i.e. 1,617,233/s, aver. 0us, 29.3 MB/s
100000 str in 53.20ms i.e. 1,879,663/s, aver. 0us, 41.2 MB/s
100000 DoubleToShort in 18.45ms i.e. 5,417,998/s, aver. 0us, 108 MB/s
100000 DoubleToAscii in 18.19ms i.e. 5,496,921/s, aver. 0us, 111.5 MB/s
With FPC on Win32:
100000 FloatToText in 115.62ms i.e. 864,842/s, aver. 1us, 15.6 MB/s
100000 str in 57.30ms i.e. 1,745,109/s, aver. 0us, 39.9 MB/s
100000 DoubleToShort in 23.88ms i.e. 4,187,078/s, aver. 0us, 83.5 MB/s
100000 DoubleToAscii in 23.34ms i.e. 4,284,490/s, aver. 0us, 86.9 MB/s
With FPC on Win64:
100000 FloatToText in 76.92ms i.e. 1,300,052/s, aver. 0us, 23.5 MB/s
100000 str in 27.70ms i.e. 3,609,456/s, aver. 0us, 82.6 MB/s
100000 DoubleToShort in 14.73ms i.e. 6,787,944/s, aver. 0us, 135.4 MB/s
100000 DoubleToAscii in 13.78ms i.e. 7,253,735/s, aver. 0us, 147.2 MB/s
With FPC on Linux x86_64:
100000 FloatToText in 81.48ms i.e. 1,227,249/s, aver. 0us, 22.2 MB/s
100000 str in 36.98ms i.e. 2,703,871/s, aver. 0us, 61.8 MB/s
100000 DoubleToShort in 13.11ms i.e. 7,626,601/s, aver. 0us, 152.1 MB/s
100000 DoubleToAscii in 12.59ms i.e. 7,942,180/s, aver. 0us, 161.2 MB/s
- Our rewrite is twice faster than original flt_conv.inc from FPC RTL (str)
- Delphi Win32 has trouble making 64-bit computation - no benefit since it
has good optimized i87 asm (but slower than our code with FPC/Win32)
- FPC is more efficient when compiling integer arithmetic; we avoided slow
division by calling our Div100(), but Delphi Win64 is still far behind
- Delphi Win64 has very slow FloatToText and str()
}
// Controls printing of NaN-sign.
// Undefine to print NaN sign during float->ASCII conversion.
// IEEE does not interpret the sign of a NaN, so leave it defined.
{$define GRISU1_F2A_NAN_SIGNLESS}
// Controls rounding of generated digits when formatting with narrowed
// width (either fixed or exponential notation).
// Traditionally, FPC and BP7/Delphi use "roundTiesToAway" mode.
// Undefine to use "roundTiesToEven" approach.
{$define GRISU1_F2A_HALF_ROUNDUP}
// This one is a hack against Grusu sub-optimality.
// It may be used only strictly together with GRISU1_F2A_HALF_ROUNDUP.
// It does not violate most general rules due to the fact that it is
// applicable only when formatting with narrowed width, where the fine
// view is more desirable, and the precision is already lost, so it can
// be used in general-purpose applications.
// Refer to its implementation.
{$define GRISU1_F2A_AGRESSIVE_ROUNDUP} // Defining this fixes several tests.
// Undefine to enable SNaN support.
// Note: IEEE [754-2008, page 31] requires (1) to recognize "SNaN" during
// ASCII->float, and (2) to generate the "invalid FP operation" exception
// either when SNaN is printed as "NaN", or "SNaN" is evaluated to QNaN,
// so it would be preferable to undefine these settings,
// but the FPC RTL is not ready for this right now..
{$define GRISU1_F2A_NO_SNAN}
/// If Value=0 would just store '0', whatever frac_digits is supplied.
{$define GRISU1_F2A_ZERONOFRACT}
{$ifndef FPC}
// those functions are intrinsics with FPC :)
function BSRdword(c: cardinal): cardinal;
asm
{$ifdef CPU64}
.noframe
mov eax, c
{$endif}
bsr eax, eax
end; // in our code below, we are sure that c<>0
function BSRqword(const q: qword): cardinal;
asm
{$ifdef CPU32}
bsr eax, [esp + 8]
jz @1
add eax, 32
ret
@1: bsr eax, [esp + 4]
@2: {$else}
.noframe
mov rax, q
bsr rax, rax
{$endif}
end; // in our code below, we are sure that q<>0
{$endif FPC}
const
// TFloatFormatProfile for double
nDig_mantissa = 17;
nDig_exp10 = 3;
type
// "Do-It-Yourself Floating Point" structures
TDIY_FP = record
f: qword;
e: integer;
end;
TDIY_FP_Power_of_10 = record
c: TDIY_FP;
e10: integer;
end;
PDIY_FP_Power_of_10 = ^TDIY_FP_Power_of_10;
const
ROUNDER = $80000000;
{$ifdef CPUINTEL} // our faster version using 128-bit x86_64 multiplication
procedure d2a_diy_fp_multiply(var x, y: TDIY_FP; normalize: boolean;
out result: TDIY_FP); {$ifdef HASINLINE} inline; {$endif}
var
p: THash128Rec;
begin
mul64x64(x.f, y.f, p); // fast x86_64 / i386 asm
if (p.c1 and ROUNDER) <> 0 then
inc(p.h);
result.f := p.h;
result.e := PtrInt(x.e) + PtrInt(y.e) + 64;
if normalize then
if (PQWordRec(@result.f)^.h and ROUNDER) = 0 then
begin
result.f := result.f * 2;
dec(result.e);
end;
end;
{$else} // regular Grisu method - optimized for 32-bit CPUs
procedure d2a_diy_fp_multiply(var x, y: TDIY_FP; normalize: boolean; out result: TDIY_FP);
var
_x: TQWordRec absolute x;
_y: TQWordRec absolute y;
r: TQWordRec absolute result;
ac, bc, ad, bd, t1: TQWordRec;
begin
ac.v := qword(_x.h) * _y.h;
bc.v := qword(_x.l) * _y.h;
ad.v := qword(_x.h) * _y.l;
bd.v := qword(_x.l) * _y.l;
t1.v := qword(ROUNDER) + bd.h + bc.l + ad.l;
result.f := ac.v + ad.h + bc.h + t1.h;
result.e := x.e + y.e + 64;
if normalize then
if (r.h and ROUNDER) = 0 then
begin
inc(result.f, result.f);
dec(result.e);
end;
end;
{$endif CPUINTEL}
const
// alpha =-61; gamma = 0
// full cache: 1E-450 .. 1E+432, step = 1E+18
// sparse = 1/10
C_PWR10_DELTA = 18;
C_PWR10_COUNT = 50;
type
TDIY_FP_Cached_Power10 = record
base: array [ 0 .. 9 ] of TDIY_FP_Power_of_10;
factor_plus: array [ 0 .. 1 ] of TDIY_FP_Power_of_10;
factor_minus: array [ 0 .. 1 ] of TDIY_FP_Power_of_10;
// extra mantissa correction [ulp; signed]
corrector: array [ 0 .. C_PWR10_COUNT - 1 ] of shortint;
end;
const
CACHED_POWER10: TDIY_FP_Cached_Power10 = (
base: (
( c: ( f: qword($825ECC24C8737830); e: -362 ); e10: -90 ),
( c: ( f: qword($E2280B6C20DD5232); e: -303 ); e10: -72 ),
( c: ( f: qword($C428D05AA4751E4D); e: -243 ); e10: -54 ),
( c: ( f: qword($AA242499697392D3); e: -183 ); e10: -36 ),
( c: ( f: qword($9392EE8E921D5D07); e: -123 ); e10: -18 ),
( c: ( f: qword($8000000000000000); e: -63 ); e10: 0 ),
( c: ( f: qword($DE0B6B3A76400000); e: -4 ); e10: 18 ),
( c: ( f: qword($C097CE7BC90715B3); e: 56 ); e10: 36 ),
( c: ( f: qword($A70C3C40A64E6C52); e: 116 ); e10: 54 ),
( c: ( f: qword($90E40FBEEA1D3A4B); e: 176 ); e10: 72 )
);
factor_plus: (
( c: ( f: qword($F6C69A72A3989F5C); e: 534 ); e10: 180 ),
( c: ( f: qword($EDE24AE798EC8284); e: 1132 ); e10: 360 )
);
factor_minus: (
( c: ( f: qword($84C8D4DFD2C63F3B); e: -661 ); e10: -180 ),
( c: ( f: qword($89BF722840327F82); e: -1259 ); e10: -360 )
);
corrector: (
0, 0, 0, 0, 1, 0, 0, 0, 1, -1,
0, 1, 1, 1, -1, 0, 0, 1, 0, -1,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-1, 0, 0, -1, 0, 0, 0, 0, 0, -1,
0, 0, 0, 0, 1, 0, 0, 0, -1, 0
));
CACHED_POWER10_MIN10 = -90 -360;
// = ref.base[low(ref.base)].e10 + ref.factor_minus[high(ref.factor_minus)].e10
// return normalized correctly rounded approximation of the power of 10
// scaling factor, intended to shift a binary exponent of the original number
// into selected [ alpha .. gamma ] range
procedure d2a_diy_fp_cached_power10(exp10: integer; out factor: TDIY_FP_Power_of_10);
var
i, xmul: integer;
A, B: PDIY_FP_Power_of_10;
cx: PtrInt;
ref: ^TDIY_FP_Cached_Power10;
begin
ref := @CACHED_POWER10; // much better code generation on PIC/x86_64
// find non-sparse index
if exp10 <= CACHED_POWER10_MIN10 then
i := 0
else
begin
i := (exp10 - CACHED_POWER10_MIN10) div C_PWR10_DELTA;
if i * C_PWR10_DELTA + CACHED_POWER10_MIN10 <> exp10 then
inc(i); // round-up
if i > C_PWR10_COUNT - 1 then
i := C_PWR10_COUNT - 1;
end;
// generate result
xmul := i div length(ref.base);
A := @ref.base[i - (xmul * length(ref.base))]; // fast mod
dec(xmul, length(ref.factor_minus));
if xmul = 0 then
begin
// base
factor := A^;
exit;
end;
// surrogate
if xmul > 0 then
begin
dec(xmul);
B := @ref.factor_plus[xmul];
end
else
begin
xmul := -(xmul + 1);
B := @ref.factor_minus[xmul];
end;
factor.e10 := A.e10 + B.e10;
if A.e10 <> 0 then
begin
d2a_diy_fp_multiply(A.c, B.c, true, factor.c);
// adjust mantissa
cx := ref.corrector[i];
if cx <> 0 then
inc(int64(factor.c.f), int64(cx));
end
else
// exact
factor.c := B^.c;
end;
procedure d2a_unpack_float(const f: double; out minus: boolean; out result: TDIY_FP);
{$ifdef HASINLINE} inline;{$endif}
type
TSplitFloat = packed record
case byte of
0: (f: double);
1: (b: array[0..7] of byte);
2: (w: array[0..3] of word);
3: (d: array[0..1] of cardinal);
4: (l: qword);
end;
var
doublebits: TSplitFloat;
begin
{$ifdef FPC_DOUBLE_HILO_SWAPPED}
// high and low cardinal are swapped when using the arm fpa
doublebits.d[0] := TSplitFloat(f).d[1];
doublebits.d[1] := TSplitFloat(f).d[0];
{$else not FPC_DOUBLE_HILO_SWAPPED}
doublebits.f := f;
{$endif FPC_DOUBLE_HILO_SWAPPED}
{$ifdef endian_big}
minus := (doublebits.b[0] and $80 <> 0);
result.e := (doublebits.w[0] shr 4) and $7FF;
{$else endian_little}
minus := (doublebits.b[7] and $80 <> 0);
result.e := (doublebits.w[3] shr 4) and $7FF;
{$endif endian}
result.f := doublebits.l and $000FFFFFFFFFFFFF;
end;
const
C_FRAC2_BITS = 52;
C_EXP2_BIAS = 1023;
C_DIY_FP_Q = 64;
C_GRISU_ALPHA = -61;
C_GRISU_GAMMA = 0;
C_EXP2_SPECIAL = C_EXP2_BIAS * 2 + 1;
C_MANT2_INTEGER = qword(1) shl C_FRAC2_BITS;
type
TAsciiDigits = array[0..47] of byte;
PAsciiDigits = ^TAsciiDigits;
// convert unsigned integers into decimal digits
{$ifdef FPC_64} // leverage efficient FPC 64-bit division as mul reciprocal
function d2a_gen_digits_64(buf: PAsciiDigits; x: qword): PtrInt;
var
tab: PWordArray;
P: PAnsiChar;
c100: qword;
begin
tab := @TwoDigitByteLookupW; // 0..99 value -> two byte digits (0..9)
P := PAnsiChar(@buf[24]); // append backwards
repeat
if x >= 100 then
begin
dec(P, 2);
c100 := x div 100;
dec(x, c100 * 100);
PWord(P)^ := tab[x];
if c100 = 0 then
break;
x := c100;
continue;
end;
if x < 10 then
begin
dec(P);
P^ := AnsiChar(x);
break;
end;
dec(P, 2);
PWord(P)^ := tab[x];
break;
until false;
PQWordArray(buf)[0] := PQWordArray(P)[0]; // faster than MoveSmall(P,buf,result)
PQWordArray(buf)[1] := PQWordArray(P)[1];
PQWordArray(buf)[2] := PQWordArray(P)[2];
result := PAnsiChar(@buf[24]) - P;
end;
{$else not FPC_64} // use three 32-bit groups of digit
function d2a_gen_digits_32(buf: PAsciiDigits; x: dword; pad_9zero: boolean): PtrInt;
const
digits: array[0..9] of cardinal = (
0, 10, 100, 1000, 10000, 100000, 1000000, 10000000, 100000000, 1000000000);
var
n: PtrInt;
m: cardinal;
{$ifdef FPC}
z: cardinal;
{$else}
d100: TDiv100Rec;
{$endif FPC}
tab: PWordArray;
begin
// Calculate amount of digits
if x = 0 then
n := 0 // emit nothing if padding is not required
else
begin
n := integer((BSRdword(x) + 1) * 1233) shr 12;
if x >= digits[n] then
inc(n);
end;
if pad_9zero and (n < 9) then
n := 9;
result := n;
if n = 0 then
exit;
// Emit digits
dec(PByte(buf));
tab := @TwoDigitByteLookupW;
m := x;
while (n >= 2) and (m <> 0) do
begin
dec(n);
{$ifdef FPC} // FPC will use fast mul reciprocal
z := m div 100; // compute two 0..9 digits
PWord(@buf[n])^ := tab^[m - z * 100];
m := z;
{$else}
Div100(m, d100); // our asm is faster than Delphi div operation
PWord(@buf[n])^ := tab^[d100.M];
m := d100.D;
{$endif FPC}
dec(n);
end;
if n = 0 then
exit;
if m <> 0 then
begin
if m > 9 then
m := m mod 10; // compute last 0..9 digit
buf[n] := m;
dec(n);
if n = 0 then
exit;
end;
repeat
buf[n] := 0; // padding with 0
dec(n);
until n = 0;
end;
function d2a_gen_digits_64(buf: PAsciiDigits; const x: qword): PtrInt;
var
n_digits: PtrInt;
temp: qword;
splitl, splitm, splith: cardinal;
begin
// Split X into 3 unsigned 32-bit integers; lower two should be < 10 digits long
n_digits := 0;
if x < 1000000000 then
splitl := x
else
begin
temp := x div 1000000000;
splitl := x - temp * 1000000000;
if temp < 1000000000 then
splitm := temp
else
begin
splith := temp div 1000000000;
splitm := cardinal(temp) - splith * 1000000000;
n_digits := d2a_gen_digits_32(buf, splith, false); // Generate hi digits
end;
inc(n_digits, d2a_gen_digits_32(@buf[n_digits], splitm, n_digits <> 0));
end;
// Generate digits
inc(n_digits, d2a_gen_digits_32(@buf[n_digits], splitl, n_digits <> 0));
result := n_digits;
end;
{$endif FPC_64}
// Performs digit sequence rounding, returns decimal point correction
function d2a_round_digits(var buf: TAsciiDigits; var n_current: integer;
n_max: PtrInt; half_round_to_even: boolean = true): PtrInt;
var
n: PtrInt;
dig_round, dig_sticky: byte;
{$ifdef GRISU1_F2A_AGRESSIVE_ROUNDUP}
i: PtrInt;
{$endif}
begin
result := 0;
n := n_current;
n_current := n_max;
// Get round digit
dig_round := buf[n_max];
{$ifdef GRISU1_F2A_AGRESSIVE_ROUNDUP}
// Detect if rounding-up the second last digit turns the "dig_round"
// into "5"; also make sure we have at least 1 digit between "dig_round"
// and the second last.
if not half_round_to_even then
if (dig_round = 4) and (n_max < n - 3) then
if buf[n - 2] >= 8 then // somewhat arbitrary...
begin
// check for only "9" are in between
i := n - 2;
repeat
dec(i);
until (i = n_max) or (buf[i] <> 9);
if i = n_max then
// force round-up
dig_round := 9; // any value ">=5"
end;
{$endif GRISU1_F2A_AGRESSIVE_ROUNDUP}
if dig_round < 5 then
exit;
// Handle "round half to even" case
if (dig_round = 5) and half_round_to_even and
((n_max = 0) or (buf[n_max - 1] and 1 = 0)) then
begin
// even and a half: check if exactly the half
dig_sticky := 0;
while (n > n_max + 1) and (dig_sticky = 0) do
begin
dec(n);
dig_sticky := buf[n];
end;
if dig_sticky = 0 then
exit; // exactly a half -> no rounding is required
end;
// Round-up
while n_max > 0 do
begin
dec(n_max);
inc(buf[n_max]);
if buf[n_max] < 10 then
begin
// no more overflow: stop now
n_current := n_max + 1;
exit;
end;
// continue rounding
end;
// Overflow out of the 1st digit, all n_max digits became 0
buf[0] := 1;
n_current := 1;
result := 1;
end;
// format the number in the fixed-point representation
procedure d2a_return_fixed(str: PAnsiChar; minus: boolean; var digits: TAsciiDigits;
n_digits_have, fixed_dot_pos, frac_digits: integer);
var
p: PAnsiChar;
d: PByte;
cut_digits_at, n_before_dot, n_before_dot_pad0, n_after_dot_pad0,
n_after_dot, n_tail_pad0: integer;
begin
// Round digits if necessary
cut_digits_at := fixed_dot_pos + frac_digits;
if cut_digits_at < 0 then
// zero
n_digits_have := 0
else if cut_digits_at < n_digits_have then
// round digits
inc(fixed_dot_pos, d2a_round_digits(digits, n_digits_have, cut_digits_at
{$ifdef GRISU1_F2A_HALF_ROUNDUP}, false {$endif} ));
// Before dot: digits, pad0
if (fixed_dot_pos <= 0) or (n_digits_have = 0) then
begin
n_before_dot := 0;
n_before_dot_pad0 := 1;
end
else if fixed_dot_pos > n_digits_have then
begin
n_before_dot := n_digits_have;
n_before_dot_pad0 := fixed_dot_pos - n_digits_have;
end
else
begin
n_before_dot := fixed_dot_pos;
n_before_dot_pad0 := 0;
end;
// After dot: pad0, digits, pad0
if fixed_dot_pos < 0 then
n_after_dot_pad0 := -fixed_dot_pos
else
n_after_dot_pad0 := 0;
if n_after_dot_pad0 > frac_digits then
n_after_dot_pad0 := frac_digits;
n_after_dot := n_digits_have - n_before_dot;
n_tail_pad0 := frac_digits - n_after_dot - n_after_dot_pad0;
p := str + 1;
// Sign
if minus then
begin
p^ := '-';
inc(p);
end;
// Integer significant digits
d := @digits;
if n_before_dot > 0 then
repeat
p^ := AnsiChar(d^ + ord('0'));
inc(p);
inc(d);
dec(n_before_dot);
until n_before_dot = 0;
// Integer 0-padding
if n_before_dot_pad0 > 0 then
repeat
p^ := '0';
inc(p);
dec(n_before_dot_pad0);
until n_before_dot_pad0 = 0;
//
if frac_digits <> 0 then
begin
// Dot
p^ := '.';
inc(p);
// Pre-fraction 0-padding
if n_after_dot_pad0 > 0 then
repeat
p^ := '0';
inc(p);
dec(n_after_dot_pad0);
until n_after_dot_pad0 = 0;
// Fraction significant digits
if n_after_dot > 0 then
repeat
p^ := AnsiChar(d^ + ord('0'));
inc(p);
inc(d);
dec(n_after_dot);
until n_after_dot = 0;
// Tail 0-padding
if n_tail_pad0 > 0 then
repeat
p^ := '0';
inc(p);
dec(n_tail_pad0);
until n_tail_pad0 = 0;
end;
// Store length
str[0] := AnsiChar(p - str - 1);
end;
// formats the number as exponential representation
procedure d2a_return_exponential(str: PAnsiChar; minus: boolean;
digits: PByte; n_digits_have, n_digits_req, d_exp: PtrInt);
var
p, exp: PAnsiChar;
begin
p := str + 1;
// Sign
if minus then
begin
p^ := '-';
inc(p);
end;
// Integer part
if n_digits_have > 0 then
begin
p^ := AnsiChar(digits^ + ord('0'));
dec(n_digits_have);
end
else
p^ := '0';
inc(p);
// Dot
if n_digits_req > 1 then
begin
p^ := '.';
inc(p);
end;
// Fraction significant digits
if n_digits_req < n_digits_have then
n_digits_have := n_digits_req;
if n_digits_have > 0 then
begin
repeat
inc(digits);
p^ := AnsiChar(digits^ + ord('0'));
inc(p);
dec(n_digits_have);
until n_digits_have = 0;
while p[-1] = '0' do
dec(p); // trim #.###00000 -> #.###
if p[-1] = '.' then
dec(p); // #.0 -> #
end;
// Exponent designator
p^ := 'E';
inc(p);
// Exponent sign (+ is not stored, as in Delphi)
if d_exp < 0 then
begin
p^ := '-';
d_exp := -d_exp;
inc(p);
end;
// Exponent digits
exp := pointer(SmallUInt32UTF8[d_exp]); // 0..999 range is fine
PCardinal(p)^ := PCardinal(exp)^;
inc(p, PStrLen(exp - _STRLEN)^);
// Store length
str[0] := AnsiChar(p - str - 1);
end;
/// set one of special results with proper sign
procedure d2a_return_special(str: PAnsiChar; sign: integer; const spec: shortstring);
begin
// Compute length
str[0] := spec[0];
if sign <> 0 then
inc(str[0]);
inc(str);
// Sign
if sign <> 0 then
begin
if sign > 0 then
str^ := '+'
else
str^ := '-';
inc(str);
end;
// Special text (3 chars)
PCardinal(str)^ := PCardinal(@spec[1])^;
end;
// Calculates the exp10 of a factor required to bring the binary exponent
// of the original number into selected [ alpha .. gamma ] range:
// result := ceiling[ ( alpha - e ) * log10(2) ]
function d2a_k_comp(e, alpha{, gamma}: integer): integer;
var
dexp: double;
const
D_LOG10_2: double = 0.301029995663981195213738894724493027; // log10(2)
var
x, n: integer;
begin
x := alpha - e;
dexp := x * D_LOG10_2;
// ceil( dexp )
n := trunc(dexp);
if x > 0 then
if dexp <> n then
inc(n); // round-up
result := n;
end;
/// raw function to convert a 64-bit double into a shortstring, stored in str
// - implements Fabian Loitsch's Grisu algorithm dedicated to double values
// - currently, SynCommnons only set min_width=0 (for DoubleToShortNoExp to avoid
// any scientific notation ) or min_width=C_NO_MIN_WIDTH (for DoubleToShort to
// force the scientific notation when the double cannot be represented as
// a simple fractinal number)
procedure DoubleToAscii(min_width, frac_digits: integer; const v: double; str: PAnsiChar);
var
w, D: TDIY_FP;
c_mk: TDIY_FP_Power_of_10;
n, mk, dot_pos, n_digits_need, n_digits_have: integer;
n_digits_req, n_digits_sci: integer;
minus: boolean;
fl, one_maskl: qword;
one_e: integer;
{$ifdef CPU32}
one_mask, f: cardinal; // run a 2nd loop with 32-bit range
{$endif CPU32}
buf: TAsciiDigits;
begin
// Limit parameters
if frac_digits > 216 then
frac_digits := 216; // Delphi compatible
if min_width <= C_NO_MIN_WIDTH then
min_width := -1 // no minimal width
else if min_width < 0 then
min_width := 0; // minimal width is as short as possible
// Format profile: select "n_digits_need" (and "n_digits_exp")
n_digits_req := nDig_mantissa;
// number of digits to be calculated by Grisu
n_digits_need := nDig_mantissa;
if n_digits_req < n_digits_need then
n_digits_need := n_digits_req;
// number of mantissa digits to be printed in exponential notation
if min_width < 0 then
n_digits_sci := n_digits_req
else
begin
n_digits_sci := min_width -1 {sign} -1 {dot} -1 {E} -1 {E-sign} - nDig_exp10;
if n_digits_sci < 2 then
n_digits_sci := 2; // at least 2 digits
if n_digits_sci > n_digits_req then
n_digits_sci := n_digits_req; // at most requested by real_type
end;
// Float -> DIY_FP
d2a_unpack_float(v, minus, w);
// Handle Zero
if (w.e = 0) and (w.f = 0) then
begin
{$ifdef GRISU1_F2A_ZERONOFRACT}
PWord(str)^ := 1 + ord('0') shl 8; // just return '0'
{$else}
if frac_digits >= 0 then
d2a_return_fixed(str, minus, buf, 0, 1, frac_digits)
else
d2a_return_exponential(str, minus, @buf, 0, n_digits_sci, 0);
{$endif GRISU1_F2A_ZERONOFRACT}
exit;
end;
// Handle specials
if w.e = C_EXP2_SPECIAL then
begin
n := 1 - ord(minus) * 2; // default special sign [-1|+1]
if w.f = 0 then
d2a_return_special(str, n, C_STR_INF)
else
begin
// NaN [also pseudo-NaN, pseudo-Inf, non-normal for floatx80]
{$ifdef GRISU1_F2A_NAN_SIGNLESS}
n := 0;
{$endif}
{$ifndef GRISU1_F2A_NO_SNAN}
if (w.f and (C_MANT2_INTEGER shr 1)) = 0 then
return_special(str, n, C_STR_SNAN)
else
{$endif GRISU1_F2A_NO_SNAN}
d2a_return_special(str, n, C_STR_QNAN);
end;
exit;
end;
// Handle denormals
if w.e <> 0 then
begin
// normal
w.f := w.f or C_MANT2_INTEGER;
n := C_DIY_FP_Q - C_FRAC2_BITS - 1;
end
else
begin
// denormal (w.e=0)
n := 63 - BSRqword(w.f); // we are sure that w.f<>0 - see Handle Zero above
inc(w.e);
end;
// Final normalization
w.f := w.f shl n;
dec(w.e, C_EXP2_BIAS + n + C_FRAC2_BITS);
// 1. Find the normalized "c_mk = f_c * 2^e_c" such that
// "alpha <= e_c + e_w + q <= gamma"
// 2. Define "V = D * 10^k": multiply the input number by "c_mk", do not
// normalize to land into [ alpha .. gamma ]
// 3. Generate digits ( n_digits_need + "round" )
if (C_GRISU_ALPHA <= w.e) and (w.e <= C_GRISU_GAMMA) then
begin
// no scaling required
D := w;
c_mk.e10 := 0;
end
else
begin
mk := d2a_k_comp(w.e, C_GRISU_ALPHA{, C_GRISU_GAMMA} );
d2a_diy_fp_cached_power10(mk, c_mk);
// Let "D = f_D * 2^e_D := w (*) c_mk"
if c_mk.e10 = 0 then
D := w
else
d2a_diy_fp_multiply(w, c_mk.c, false, D);
end;
// Generate digits: integer part
n_digits_have := d2a_gen_digits_64(@buf, D.f shr (-D.e));
dot_pos := n_digits_have;
// Generate digits: fractional part
{$ifdef CPU32}
f := 0; // "sticky" digit
{$endif CPU32}
if D.e < 0 then
repeat
// MOD by ONE
one_e := D.e;
one_maskl := qword(1) shl (-D.e) - 1;
fl := D.f and one_maskl;
// 64-bit loop (very efficient on x86_64, slower on i386)
while {$ifdef CPU32} (one_e < -29) and {$endif}
(n_digits_have < n_digits_need + 1) and (fl <> 0) do
begin
// f := f * 5;
inc(fl, fl shl 2);
// one := one / 2
one_maskl := one_maskl shr 1;
inc(one_e);
// DIV by one
buf[n_digits_have] := fl shr (-one_e);
// MOD by one
fl := fl and one_maskl;
// next
inc(n_digits_have);
end;
{$ifdef CPU32}
if n_digits_have >= n_digits_need + 1 then
begin
// only "sticky" digit remains
f := ord(fl <> 0);
break;
end;
one_mask := cardinal(one_maskl);
f := cardinal(fl);
// 32-bit loop
while (n_digits_have < n_digits_need + 1) and (f <> 0) do
begin
// f := f * 5;
inc(f, f shl 2);
// one := one / 2
one_mask := one_mask shr 1;
inc(one_e);
// DIV by one
buf[n_digits_have] := f shr (-one_e);
// MOD by one
f := f and one_mask;
// next
inc(n_digits_have);
end;
{$endif CPU32}
until true;
{$ifdef CPU32}
// Append "sticky" digit if any
if (f <> 0) and (n_digits_have >= n_digits_need + 1) then
begin
// single "<>0" digit is enough
n_digits_have := n_digits_need + 2;
buf[n_digits_need + 1] := 1;
end;
{$endif CPU32}
// Round to n_digits_need using "roundTiesToEven"
if n_digits_have > n_digits_need then
inc(dot_pos, d2a_round_digits(buf, n_digits_have, n_digits_need));
// Generate output
if frac_digits >= 0 then
begin
d2a_return_fixed(str, minus, buf, n_digits_have, dot_pos - c_mk.e10,
frac_digits);
exit;
end;
if n_digits_have > n_digits_sci then
inc(dot_pos, d2a_round_digits(buf, n_digits_have, n_digits_sci
{$ifdef GRISU1_F2A_HALF_ROUNDUP}, false {$endif} ));
d2a_return_exponential(str, minus, @buf, n_digits_have, n_digits_sci,
dot_pos - c_mk.e10 - 1);
end;