-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathIntegerHeap.py
218 lines (182 loc) · 7.73 KB
/
IntegerHeap.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
"""IntegerHeap.py
Priority queues of integer keys based on van Emde Boas trees.
Only the keys are stored; caller is responsible for keeping
track of any data associated with the keys in a separate dictionary.
We use a version of vEB trees in which all accesses to subtrees
are performed indirectly through a hash table and the data structures
for the subtrees are only created when they are nonempty. As a
consequence, the data structure takes only linear space
(linear in the number of keys stored in the heap) while still preserving
the O(log log U) time per operation of vEB trees. For better performance,
we switch to bitvectors for sufficiently small integer sizes.
Usage:
Q = BitVectorHeap() # Bit-vector based heap for integers
Q = FlatHeap(i) # Flat heap for 2^i-bit integers
Q = LinearHeap() # Set-based heap with linear-time min operation
Q = IntegerHeap(i) # Choose between BVH and FH depending on i
Q.add(x) # Include x among the values in the heap
Q.remove(x) # Remove x from the values in the heap
Q.min() # Return the minimum value in the heap
if Q # True if Q is nonempty, false if empty
Because the min operation in LinearHeap is a Python primitive rather than
a sequence of interpreted Python instructions, it is actually quite fast;
testing indicates that, for 32-bit keys, FlatHeap(5) beats LinearHeap only
for heaps of 250 or more items. This breakeven point would likely be
different for different numbers of bits per word or when runtime optimizers
such as psyco are in use.
D. Eppstein, January 2010
"""
def IntegerHeap(i):
"""Return an integer heap for 2^i-bit integers.
We use a BitVectorHeap for small i and a FlatHeap for large i.
Timing tests indicate that the cutoff i <= 3 is slightly
faster than the also-plausible cutoff i <= 2, and that both
are much faster than the way-too-large cutoff i <= 4.
The resulting IntegerHeap objects will use 255-bit long integers,
still small compared to the overhead of a FlatHeap."""
if i <= 3:
return BitVectorHeap()
return FlatHeap(i)
Log2Table = {} # Table of powers of two, with their logs
def Log2(b):
"""Return log_2(b), where b must be a power of two."""
while b not in Log2Table:
i = len(Log2Table)
Log2Table[1<<i] = i
return Log2Table[b]
# ======================================================================
# BitVectorHeap
# ======================================================================
class BitVectorHeap:
"""Maintain the minimum of a set of integers using bitvector operations."""
def __init__(self):
"""Create a new BitVectorHeap."""
self._S = 0
def __nonzero__(self):
"""True if this heap is nonempty, false if empty."""
return self._S != 0
def __bool__(self):
"""True if this heap is nonempty, false if empty."""
return self._S != 0
def add(self,x):
"""Include x among the values in the heap."""
self._S |= 1<<x
def remove(self,x):
"""Remove x from the values in the heap."""
self._S &=~ 1<<x
def min(self):
"""Return the minimum value in the heap."""
if not self._S:
raise ValueError("BitVectorHeap is empty")
return Log2(self._S &~ (self._S - 1))
# ======================================================================
# FlatHeap
# ======================================================================
class FlatHeap:
"""Maintain the minimum of a set of 2^i-bit integer values."""
def __init__(self,i):
"""Create a new FlatHeap for 2^i-bit integers."""
self._min = None
self._order = i
self._shift = (1 << (i - 1))
self._max = (1 << (1 << i)) - 1
self._HQ = IntegerHeap(i-1) # Heap of high halfwords
self._LQ = {} # Map high half to heaps of low halfwords
def _rangecheck(self,x):
"""Make sure x is a number we can include in this FlatHeap."""
if x < 0 or x > self._max:
raise ValueError("FlatHeap: %s out of range" % repr(x))
def __nonzero__(self):
"""True if this heap is nonempty, false if empty."""
return not (self._min is None)
def __bool__(self):
"""True if this heap is nonempty, false if empty."""
return not (self._min is None)
def min(self):
"""Return the minimum value in the heap."""
if self._min is None:
raise ValueError("FlatHeap is empty")
return self._min
def add(self,x):
"""Include x among the values in the heap."""
self._rangecheck(x)
if self._min is None or self._min == x:
# adding to an empty heap is easy
self._min = x
return
if x < self._min:
# swap to make sure the value we're adding is non-minimal
x, self._min = self._min, x
H = x >> self._shift # split into high and low halfwords
L = x - (H << self._shift)
if H not in self._LQ:
self._HQ.add(H)
self._LQ[H] = IntegerHeap(self._order-1)
self._LQ[H].add(L)
def remove(self,x):
"""Remove x from the values in the heap."""
self._rangecheck(x)
if self._min == x:
# Removing minimum, move next value into place
# and prepare to remove that next value from secondary heaps
if not self._HQ:
self._min = None
return
H = self._HQ.min()
L = self._LQ[H].min()
x = self._min = (H << self._shift) + L
else:
H = x >> self._shift # split into high and low halfwords
L = x - (H << self._shift)
if H not in self._LQ:
return # ignore removal when not in heap
self._LQ[H].remove(L)
if not self._LQ[H]:
del self._LQ[H]
self._HQ.remove(H)
# ======================================================================
# LinearHeap
# ======================================================================
class LinearHeap:
"""Maintain the minimum of a set of integers using a set object."""
def __init__(self):
"""Create a new BitVectorHeap."""
self._S = set()
def __nonzero__(self):
"""True if this heap is nonempty, false if empty."""
return len(self._S) > 0
def __bool__(self):
"""True if this heap is nonempty, false if empty."""
return len(self._S) > 0
def add(self,x):
"""Include x among the values in the heap."""
self._S.add(x)
def remove(self,x):
"""Remove x from the values in the heap."""
self._S.remove(x)
def min(self):
"""Return the minimum value in the heap."""
return min(self._S)
# ======================================================================
# Unit tests
# ======================================================================
if __name__ == "__main__":
import unittest,random
random.seed(1234)
class IntegerHeapTest(unittest.TestCase):
def testHeaps(self):
o = 5 # do tests on 2^5-bit integers
N = LinearHeap()
I = IntegerHeap(o)
for iteration in range(20000):
self.assertEqual(bool(N),bool(I)) # both have same emptiness
if (not N) or random.randrange(2): # flip coin for add/remove
x = random.randrange(1<<(1<<o))
N.add(x)
I.add(x)
else:
x = N.min()
self.assertEqual(x,I.min())
N.remove(x)
I.remove(x)
unittest.main()