-
Notifications
You must be signed in to change notification settings - Fork 6
/
Copy pathErrorEstimate.h
102 lines (75 loc) · 2.95 KB
/
ErrorEstimate.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
/*
RealLib, a library for efficient exact real computation
Copyright (C) 2006 Branimir Lambov
This library is licensed under the Apache License, Version 2.0 (the "License");
you may not use this library except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
/*
ErrorEstimate.h
Error math.
Classes:
ErrorEstimate - a simple evaluation of error as 32-bit
mantissa and 32-bit exponent. Operations give results
that are always greater than or equal to the actual
result. Note the exponent in a ErrorEstimate is in
bits, not in words as in a LongFloat, and the mantissa
always has 1 in its most significant bit.
*/
#ifndef FILE_ERROR_ESTIMATE_H
#define FILE_ERROR_ESTIMATE_H
#include <stdlib.h>
#include <limits.h>
#include <exception>
#include "defs.h"
#include "LongFloat.h"
namespace RealLib {
class ErrorEstimate {
public:
u32 m_Man; // >= 2^31
i32 m_Exp; // error = m_Man * 2 ^ (m_Exp - 32)
enum Inf { minusinf = -I32_MAX, plusinf = I32_MAX } ;
enum RoundingMode { Down = 0, Up = 1 };
static inline i32 exp_saturated(exp_type a)
{ a = a < plusinf ? a : plusinf - 1;
return i32(a > minusinf ? a : minusinf + 1); }
explicit ErrorEstimate(const u32 man = 0, const i32 exp = 0);
// conversions (rounding up)
ErrorEstimate(const double err);
ErrorEstimate(const LongFloat &src, RoundingMode rnd = Up);
ErrorEstimate RoundDownLongFloat(const LongFloat &src);
// destructor, operator = not needed
// operations
// round-up addition
ErrorEstimate operator + (const ErrorEstimate &rhs) const;
// round-down substraction
ErrorEstimate operator - (const ErrorEstimate &rhs) const;
// round-up multiplication
ErrorEstimate operator * (const ErrorEstimate &rhs) const;
// round-up reciprocal
ErrorEstimate recip() const;
// round-up division
ErrorEstimate operator / (const ErrorEstimate &rhs) const
{ return *this * rhs.recip(); }
// round-up <<
ErrorEstimate operator << (i32 howmuch) const; // shift left in bits
// comparisons
bool operator >= (const ErrorEstimate &rhs) const;
bool operator > (const ErrorEstimate &rhs) const;
ErrorEstimate& operator ++ (); // add the minimum
// conversions
double AsDouble() const;
LongFloat AsLongFloat() const;
};
// two needed operations
ErrorEstimate max(const ErrorEstimate &a, const ErrorEstimate &b);
ErrorEstimate min(const ErrorEstimate &a, const ErrorEstimate &b);
ErrorEstimate RoundingError(const LongFloat &lf, i32 re);
} // namespace
#endif