-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathregular_numbers.cc
317 lines (269 loc) · 9.03 KB
/
regular_numbers.cc
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
#include <vector>
#include <array>
#include <set>
#include <utility>
#include <cmath>
#include <iostream>
#include <numeric>
#include "regular_numbers.hh"
/*
This file contains several functions for calculating
the Nth regular number.
Regular numbers, also known as ugly numbers, Hamming
numbers, and 5-smooth numbers, are numbers whose only
prime divisors are 2, 3, and 5.
They are described by
2^i * 3^j * 5^k
for non-negative i,j,k
The first 15 regular numbers are
1, 2, 3, 4, 5, 6, 8, 9, 10, 12, 15, 16, 18, 20, 24
While that might make it look like regular numbers are
densely distributed, they are not. After around 60, the
ratio of regular numbers to non-regular (normal? irregular?)
numbers decreases precipitously, and they are very sparse
among larger integers.
Because they are dependent on the distribution of primes,
regular numbers are randomly distributed throughout
number space, and cannot be predicted analytically.
However, advanced geometric reasoning let's us compute
deep into the series while only calculating a small
fraction of the series.
INCLUDES:
get_regular_factor()
get_regular_set()
get_regular_log_set()
get_regular_compact()
get_regular_divide_conquer()
get_regular_fast_geometric()
*/
// GLOBALS
const double lb3{std::log2(3.0)};
const double lb5{std::log2(5.0)};
// HELPER FUNCTIONS
double coef2log2(const std::array<size_t, 3>& arr) {
return arr[0] + arr[1]*lb3 + arr[2]*lb5;
}
int factor(int val) {
while (val % 2 == 0) { val /= 2; }
while (val % 3 == 0) { val /= 3; }
while (val % 5 == 0) { val /= 5; }
return val;
}
// FUNCTIONS TO CALCULATE NTH REGULAR NUMBER
// simplest method to calculate Nth regular number
int get_regular_factor(const size_t n_series)
{
// stop
if (n_series > 1500) {
std::cout << "Run with n <= 1500 or computer will be too hot.\n";
return 0;
}
std::vector<int> series;
series.push_back(1);
int num{2};
while (series.size() < n_series) {
int n{num};
int remainder = factor(n);
if (remainder == 1) {
series.push_back(num);
}
++num;
}
// return last value placed into series
return series.back();
}
// this method uses std::set to sort the series and filter duplicates
// start at an initial value (1) and multiply it by 2,3,5
// to create new values at every step
unsigned long long get_regular_set(size_t n) {
// use std::set to filter duplicates and sort entries
std::set<unsigned long long> series;
series.insert(1);
while (n != 1) {
const auto smallest = *series.begin();
series.erase(series.begin());
series.insert(smallest*2);
series.insert(smallest*3);
series.insert(smallest*5);
--n;
}
return *series.begin();
}
// this is similar to the previous method, but uses log2 values
double get_regular_log_set(size_t n)
{
// custom compare function for FP values
const auto cmp = [] (const auto n1, const auto n2) {
return (n2 - n1) > (1.0e-15 * n1);
};
std::set<double, decltype(cmp)> log_series(cmp);
log_series.insert(0.0); // insert first value
while (n != 1) {
const auto smallest = *log_series.begin();
log_series.erase(log_series.begin());
log_series.insert(smallest + 1.0);
log_series.insert(smallest + lb3);
log_series.insert(smallest + lb5);
--n;
}
return *log_series.begin();
}
// this is an improved method that uses a compact algorithm
// compared to other methods with similar or better performance
// single loop over N that launches an iteration over 2, 3, 5
// when a candidate leaves the queue
double get_regular_compact(size_t n)
{
std::array<const double, 3> lg2{1.0, lb3, lb5};
std::array<double, 3> candidates{1.0, lb3, lb5};
std::array<int, 3> series_idx{0, 0, 0};
std::vector<double> series;
series.reserve(65536);
size_t vec_cnt = 0;
while (n != 1) {
// manage memory
if ((series.capacity()/2) < vec_cnt) {
const auto min_i = *std::min_element(series_idx.begin(), series_idx.end());
for (auto& el : series_idx) el -= min_i;
series.erase(series.begin(), series.begin() + min_i);
vec_cnt = 0;
}
// push minimum candidate into series and calculate new products of the pushed value
series.push_back(*std::min_element(candidates.begin(), candidates.end()));
const auto current = series.back();
for (int i = 0; i < 3; ++i) {
if (std::abs(candidates[i] - current) < current * 1.0e-15) {
// replace candidate if this value just went into series
candidates[i] = series[series_idx[i]++] + lg2[i];
}
}
vec_cnt++;
n--;
}
return series.back();
}
// this algorithm uses a binary divide and conquer
// approach to minimize the number of operations
// that must be performed to calculate series
std::pair<double, std::array<size_t, 3>>
get_regular_divide_conquer(size_t n)
{
// containers to store problem state and series
std::array<size_t, 3> coef_ijk{1,0,0};
std::array<size_t, 3> coef_jk{0,2,0};
std::array<size_t, 3> coef_k{0,0,1};
std::array<size_t, 3> coef_minjk{0,1,0};
std::vector<std::array<size_t, 3>> series;
std::vector<std::array<size_t, 3>> jk_series;
series.reserve(65536);
jk_series.reserve(65536);
// initial values of series in log2-space
double log_ijk = 1.0;
double log_jk = 2.0 * lb3;
double log_k = lb5;
double min_jk = lb3;
size_t ii = 0, jj = 0;
while (n != 1) {
if ((series.capacity()/2) < ii) {
series.erase(series.begin(), series.begin() + ii); ii = 0;
}
if (log_ijk < min_jk) {
series.push_back(coef_ijk);
coef_ijk = series[ii++];
coef_ijk[0]++;
log_ijk = coef2log2(coef_ijk);
} else {
series.push_back(coef_minjk);
if (log_jk < log_k) {
coef_minjk = coef_jk;
coef_jk = jk_series[jj++];
coef_jk[1]++;
log_jk = coef2log2(coef_jk);
} else {
coef_minjk = coef_k;
coef_k[2]++;
log_k = coef2log2(coef_k);
}
if ((jk_series.capacity()/2) < jj) {
jk_series.erase(jk_series.begin(), jk_series.begin() + jj); jj = 0;
}
jk_series.push_back(coef_minjk);
min_jk = coef2log2(coef_minjk);
}
--n;
} // end while
return std::make_pair(coef2log2(series.back()), series.back());
}
/*
this algorithm takes advantage of a formula that provides a finite limit
on how many N exist beneath a given threshold
Formula is:
(log2( N * 30^0.5))^3
n = -----------------
6 log2(3) log2(5)
where n indicates the nth position in the series
and N is the threshold the nth series member must exist beneath
we can solve this for N to obtain an estimate of the nth member's value
log2(N) = estimated value ~= (n * 6 * lg2(3) * log2(5))^1/3 - log2(30^0.5)
It has been reported that it is adequate to set the error bounds as:
estimated value +- (1 / estimated value)
*/
std::pair<double, std::array<size_t, 3>>
get_regular_fast_geometric(const size_t n)
{
// check for n too small
if (n < 1) {
std::cout << "Error: N must be an integer > 0\n";
return std::make_pair<double,std::array<size_t, 3>>(-1.0, {0,0,0});
}
if (n < 2) {
std::cout << "Special inclusion of 1 as first member of series\n";
return std::make_pair<double,std::array<size_t, 3>>(0.0, {0,0,0});
}
if (n < 3) {
return std::make_pair<double,std::array<size_t, 3>>(1.0, {1,0,0});
}
const double numerator = std::pow((6.0 * lb3 * lb5 * n), 1.0/3.0);
const double denominator = std::log2(std::sqrt(30.0));
const double estval = numerator - denominator;
const double high = estval + 1.0 / estval;
const double low = 2.0 * estval - high;
size_t count{0};
std::vector<double> series;
std::vector<std::array<size_t, 3>> coeffs;
const size_t kmax = static_cast<size_t>(high / lb5) + 1;
for (size_t k = 0; k < kmax; k++) {
const double fives = lb5 * k;
const size_t jmax = static_cast<size_t>((high - fives) / lb3) + 1;
for (size_t j = 0; j < jmax; j++) {
const double threes = fives + static_cast<double>(j) * lb3;
const double twos = high - threes;
const double candidate = threes + std::floor(twos);
count += static_cast<size_t>(twos) + 1;
if (candidate >= low) {
series.push_back(candidate);
coeffs.push_back({static_cast<size_t>(twos), j, k});
}
}
}
// ensure value came from within search band
if (n > count) {
std::cout << "Error: high estimate needs to be higher\n";
std::array<size_t, 3> out = {0, 0, 0};
return std::make_pair(0.0, out);
}
// target index of solution when sorted in descending order
const size_t target = static_cast<int>(count - n);
if (target >= series.size()) {
std::cout << "Error: low estimate needs to be lower\n";
std::array<size_t, 3> out = {0, 0, 0};
return std::make_pair(0.0, out);
}
// the data need to be sorted
// use sort_idx to get index of series
std::vector<size_t> sort_idx(series.size());
std::iota(sort_idx.begin(), sort_idx.end(), 0);
std::sort(sort_idx.begin(), sort_idx.end(),
[&series] (const auto i1, const auto i2) {return series[i1] > series[i2];});
return std::make_pair(series[sort_idx[target]], coeffs[sort_idx[target]]);
}