1 : #include <stdio.h>
2 : #include <stdlib.h>
3 : #include <string.h>
4 : #include "intset.h"
5 : #include "zmalloc.h"
6 : #include "endianconv.h"
7 :
8 : /* Note that these encodings are ordered, so:
9 : * INTSET_ENC_INT16 < INTSET_ENC_INT32 < INTSET_ENC_INT64. */
10 : #define INTSET_ENC_INT16 (sizeof(int16_t))
11 : #define INTSET_ENC_INT32 (sizeof(int32_t))
12 : #define INTSET_ENC_INT64 (sizeof(int64_t))
13 :
14 : /* Return the required encoding for the provided value. */
15 : static uint8_t _intsetValueEncoding(int64_t v) {
16 51684 : if (v < INT32_MIN || v > INT32_MAX)
17 21751 : return INTSET_ENC_INT64;
18 29933 : else if (v < INT16_MIN || v > INT16_MAX)
19 14017 : return INTSET_ENC_INT32;
20 : else
21 15916 : return INTSET_ENC_INT16;
22 : }
23 :
24 : /* Return the value at pos, given an encoding. */
25 201573 : static int64_t _intsetGetEncoded(intset *is, int pos, uint8_t enc) {
26 : int64_t v64;
27 : int32_t v32;
28 : int16_t v16;
29 :
30 201573 : if (enc == INTSET_ENC_INT64) {
31 149529 : memcpy(&v64,((int64_t*)is->contents)+pos,sizeof(v64));
32 : memrev64ifbe(&v64);
33 149529 : return v64;
34 52044 : } else if (enc == INTSET_ENC_INT32) {
35 15575 : memcpy(&v32,((int32_t*)is->contents)+pos,sizeof(v32));
36 : memrev32ifbe(&v32);
37 15575 : return v32;
38 : } else {
39 36469 : memcpy(&v16,((int16_t*)is->contents)+pos,sizeof(v16));
40 : memrev16ifbe(&v16);
41 36469 : return v16;
42 : }
43 : }
44 :
45 : /* Return the value at pos, using the configured encoding. */
46 : static int64_t _intsetGet(intset *is, int pos) {
47 200774 : return _intsetGetEncoded(is,pos,intrev32ifbe(is->encoding));
48 : }
49 :
50 : /* Set the value at pos, using the configured encoding. */
51 : static void _intsetSet(intset *is, int pos, int64_t value) {
52 30397 : uint32_t encoding = intrev32ifbe(is->encoding);
53 :
54 30397 : if (encoding == INTSET_ENC_INT64) {
55 16548 : ((int64_t*)is->contents)[pos] = value;
56 : memrev64ifbe(((int64_t*)is->contents)+pos);
57 13849 : } else if (encoding == INTSET_ENC_INT32) {
58 4199 : ((int32_t*)is->contents)[pos] = value;
59 : memrev32ifbe(((int32_t*)is->contents)+pos);
60 : } else {
61 9650 : ((int16_t*)is->contents)[pos] = value;
62 : memrev16ifbe(((int16_t*)is->contents)+pos);
63 : }
64 : }
65 :
66 : /* Create an empty intset. */
67 17208 : intset *intsetNew(void) {
68 17208 : intset *is = zmalloc(sizeof(intset));
69 17208 : is->encoding = intrev32ifbe(INTSET_ENC_INT16);
70 17208 : is->length = 0;
71 17208 : return is;
72 : }
73 :
74 : /* Resize the intset */
75 : static intset *intsetResize(intset *is, uint32_t len) {
76 36612 : uint32_t size = len*intrev32ifbe(is->encoding);
77 36612 : is = zrealloc(is,sizeof(intset)+size);
78 36612 : return is;
79 : }
80 :
81 : /* Search for the position of "value". Return 1 when the value was found and
82 : * sets "pos" to the position of the value within the intset. Return 0 when
83 : * the value is not present in the intset and sets "pos" to the position
84 : * where "value" can be inserted. */
85 31293 : static uint8_t intsetSearch(intset *is, int64_t value, uint32_t *pos) {
86 31293 : int min = 0, max = intrev32ifbe(is->length)-1, mid = -1;
87 31293 : int64_t cur = -1;
88 :
89 : /* The value can never be found when the set is empty */
90 31293 : if (intrev32ifbe(is->length) == 0) {
91 4891 : if (pos) *pos = 0;
92 4891 : return 0;
93 : } else {
94 : /* Check for the case where we know we cannot find the value,
95 : * but do know the insert position. */
96 52804 : if (value > _intsetGet(is,intrev32ifbe(is->length)-1)) {
97 5959 : if (pos) *pos = intrev32ifbe(is->length);
98 5959 : return 0;
99 20443 : } else if (value < _intsetGet(is,0)) {
100 2193 : if (pos) *pos = 0;
101 2193 : return 0;
102 : }
103 : }
104 :
105 103253 : while(max >= min) {
106 94049 : mid = (min+max)/2;
107 94049 : cur = _intsetGet(is,mid);
108 94049 : if (value > cur) {
109 44118 : min = mid+1;
110 49931 : } else if (value < cur) {
111 40885 : max = mid-1;
112 : } else {
113 : break;
114 : }
115 : }
116 :
117 18250 : if (value == cur) {
118 9046 : if (pos) *pos = mid;
119 9046 : return 1;
120 : } else {
121 9204 : if (pos) *pos = min;
122 9204 : return 0;
123 : }
124 : }
125 :
126 : /* Upgrades the intset to a larger encoding and inserts the given integer. */
127 : static intset *intsetUpgradeAndAdd(intset *is, int64_t value) {
128 9780 : uint8_t curenc = intrev32ifbe(is->encoding);
129 9780 : uint8_t newenc = _intsetValueEncoding(value);
130 9780 : int length = intrev32ifbe(is->length);
131 9780 : int prepend = value < 0 ? 1 : 0;
132 :
133 : /* First set new encoding and resize */
134 9780 : is->encoding = intrev32ifbe(newenc);
135 19560 : is = intsetResize(is,intrev32ifbe(is->length)+1);
136 :
137 : /* Upgrade back-to-front so we don't overwrite values.
138 : * Note that the "prepend" variable is used to make sure we have an empty
139 : * space at either the beginning or the end of the intset. */
140 10579 : while(length--)
141 799 : _intsetSet(is,length+prepend,_intsetGetEncoded(is,length,curenc));
142 :
143 : /* Set the value at the beginning or the end. */
144 9780 : if (prepend)
145 : _intsetSet(is,0,value);
146 : else
147 9776 : _intsetSet(is,intrev32ifbe(is->length),value);
148 9780 : is->length = intrev32ifbe(intrev32ifbe(is->length)+1);
149 9780 : return is;
150 : }
151 :
152 13503 : static void intsetMoveTail(intset *is, uint32_t from, uint32_t to) {
153 : void *src, *dst;
154 13503 : uint32_t bytes = intrev32ifbe(is->length)-from;
155 13503 : uint32_t encoding = intrev32ifbe(is->encoding);
156 :
157 13503 : if (encoding == INTSET_ENC_INT64) {
158 12460 : src = (int64_t*)is->contents+from;
159 12460 : dst = (int64_t*)is->contents+to;
160 12460 : bytes *= sizeof(int64_t);
161 1043 : } else if (encoding == INTSET_ENC_INT32) {
162 206 : src = (int32_t*)is->contents+from;
163 206 : dst = (int32_t*)is->contents+to;
164 206 : bytes *= sizeof(int32_t);
165 : } else {
166 837 : src = (int16_t*)is->contents+from;
167 837 : dst = (int16_t*)is->contents+to;
168 837 : bytes *= sizeof(int16_t);
169 : }
170 13503 : memmove(dst,src,bytes);
171 13503 : }
172 :
173 : /* Insert an integer in the intset */
174 31598 : intset *intsetAdd(intset *is, int64_t value, uint8_t *success) {
175 31598 : uint8_t valenc = _intsetValueEncoding(value);
176 : uint32_t pos;
177 31598 : if (success) *success = 1;
178 :
179 : /* Upgrade encoding if necessary. If we need to upgrade, we know that
180 : * this value should be either appended (if > 0) or prepended (if < 0),
181 : * because it lies outside the range of existing values. */
182 31598 : if (valenc > intrev32ifbe(is->encoding)) {
183 : /* This always succeeds, so we don't need to curry *success. */
184 9780 : return intsetUpgradeAndAdd(is,value);
185 : } else {
186 : /* Abort if the value is already present in the set.
187 : * This call will populate "pos" with the right position to insert
188 : * the value when it cannot be found. */
189 21818 : if (intsetSearch(is,value,&pos)) {
190 2000 : if (success) *success = 0;
191 2000 : return is;
192 : }
193 :
194 39636 : is = intsetResize(is,intrev32ifbe(is->length)+1);
195 19818 : if (pos < intrev32ifbe(is->length)) intsetMoveTail(is,pos,pos+1);
196 : }
197 :
198 19818 : _intsetSet(is,pos,value);
199 19818 : is->length = intrev32ifbe(intrev32ifbe(is->length)+1);
200 19818 : return is;
201 : }
202 :
203 : /* Delete integer from intset */
204 8618 : intset *intsetRemove(intset *is, int64_t value, int *success) {
205 8618 : uint8_t valenc = _intsetValueEncoding(value);
206 : uint32_t pos;
207 8618 : if (success) *success = 0;
208 :
209 8618 : if (valenc <= intrev32ifbe(is->encoding) && intsetSearch(is,value,&pos)) {
210 7014 : uint32_t len = intrev32ifbe(is->length);
211 :
212 : /* We know we can delete */
213 7014 : if (success) *success = 1;
214 :
215 : /* Overwrite value with tail and update length */
216 7014 : if (pos < (len-1)) intsetMoveTail(is,pos+1,pos);
217 14028 : is = intsetResize(is,len-1);
218 7014 : is->length = intrev32ifbe(len-1);
219 : }
220 8618 : return is;
221 : }
222 :
223 : /* Determine whether a value belongs to this set */
224 1688 : uint8_t intsetFind(intset *is, int64_t value) {
225 1688 : uint8_t valenc = _intsetValueEncoding(value);
226 1688 : return valenc <= intrev32ifbe(is->encoding) && intsetSearch(is,value,NULL);
227 : }
228 :
229 : /* Return random member */
230 3044 : int64_t intsetRandom(intset *is) {
231 6088 : return _intsetGet(is,rand()%intrev32ifbe(is->length));
232 : }
233 :
234 : /* Sets the value to the value at the given position. When this position is
235 : * out of range the function returns 0, when in range it returns 1. */
236 92771 : uint8_t intsetGet(intset *is, uint32_t pos, int64_t *value) {
237 92771 : if (pos < intrev32ifbe(is->length)) {
238 113672 : *value = _intsetGet(is,pos);
239 56836 : return 1;
240 : }
241 35935 : return 0;
242 : }
243 :
244 : /* Return intset length */
245 48330 : uint32_t intsetLen(intset *is) {
246 48330 : return intrev32ifbe(is->length);
247 : }
248 :
249 : /* Return intset blob size in bytes. */
250 9834 : size_t intsetBlobLen(intset *is) {
251 9834 : return sizeof(intset)+intrev32ifbe(is->length)*intrev32ifbe(is->encoding);
252 : }
253 :
254 : #ifdef INTSET_TEST_MAIN
255 : #include <sys/time.h>
256 :
257 : void intsetRepr(intset *is) {
258 : int i;
259 : for (i = 0; i < intrev32ifbe(is->length); i++) {
260 : printf("%lld\n", (uint64_t)_intsetGet(is,i));
261 : }
262 : printf("\n");
263 : }
264 :
265 : void error(char *err) {
266 : printf("%s\n", err);
267 : exit(1);
268 : }
269 :
270 : void ok(void) {
271 : printf("OK\n");
272 : }
273 :
274 : long long usec(void) {
275 : struct timeval tv;
276 : gettimeofday(&tv,NULL);
277 : return (((long long)tv.tv_sec)*1000000)+tv.tv_usec;
278 : }
279 :
280 : #define assert(_e) ((_e)?(void)0:(_assert(#_e,__FILE__,__LINE__),exit(1)))
281 : void _assert(char *estr, char *file, int line) {
282 : printf("\n\n=== ASSERTION FAILED ===\n");
283 : printf("==> %s:%d '%s' is not true\n",file,line,estr);
284 : }
285 :
286 : intset *createSet(int bits, int size) {
287 : uint64_t mask = (1<<bits)-1;
288 : uint64_t i, value;
289 : intset *is = intsetNew();
290 :
291 : for (i = 0; i < size; i++) {
292 : if (bits > 32) {
293 : value = (rand()*rand()) & mask;
294 : } else {
295 : value = rand() & mask;
296 : }
297 : is = intsetAdd(is,value,NULL);
298 : }
299 : return is;
300 : }
301 :
302 : void checkConsistency(intset *is) {
303 : int i;
304 :
305 : for (i = 0; i < (intrev32ifbe(is->length)-1); i++) {
306 : uint32_t encoding = intrev32ifbe(is->encoding);
307 :
308 : if (encoding == INTSET_ENC_INT16) {
309 : int16_t *i16 = (int16_t*)is->contents;
310 : assert(i16[i] < i16[i+1]);
311 : } else if (encoding == INTSET_ENC_INT32) {
312 : int32_t *i32 = (int32_t*)is->contents;
313 : assert(i32[i] < i32[i+1]);
314 : } else {
315 : int64_t *i64 = (int64_t*)is->contents;
316 : assert(i64[i] < i64[i+1]);
317 : }
318 : }
319 : }
320 :
321 : int main(int argc, char **argv) {
322 : uint8_t success;
323 : int i;
324 : intset *is;
325 : sranddev();
326 :
327 : printf("Value encodings: "); {
328 : assert(_intsetValueEncoding(-32768) == INTSET_ENC_INT16);
329 : assert(_intsetValueEncoding(+32767) == INTSET_ENC_INT16);
330 : assert(_intsetValueEncoding(-32769) == INTSET_ENC_INT32);
331 : assert(_intsetValueEncoding(+32768) == INTSET_ENC_INT32);
332 : assert(_intsetValueEncoding(-2147483648) == INTSET_ENC_INT32);
333 : assert(_intsetValueEncoding(+2147483647) == INTSET_ENC_INT32);
334 : assert(_intsetValueEncoding(-2147483649) == INTSET_ENC_INT64);
335 : assert(_intsetValueEncoding(+2147483648) == INTSET_ENC_INT64);
336 : assert(_intsetValueEncoding(-9223372036854775808ull) == INTSET_ENC_INT64);
337 : assert(_intsetValueEncoding(+9223372036854775807ull) == INTSET_ENC_INT64);
338 : ok();
339 : }
340 :
341 : printf("Basic adding: "); {
342 : is = intsetNew();
343 : is = intsetAdd(is,5,&success); assert(success);
344 : is = intsetAdd(is,6,&success); assert(success);
345 : is = intsetAdd(is,4,&success); assert(success);
346 : is = intsetAdd(is,4,&success); assert(!success);
347 : ok();
348 : }
349 :
350 : printf("Large number of random adds: "); {
351 : int inserts = 0;
352 : is = intsetNew();
353 : for (i = 0; i < 1024; i++) {
354 : is = intsetAdd(is,rand()%0x800,&success);
355 : if (success) inserts++;
356 : }
357 : assert(intrev32ifbe(is->length) == inserts);
358 : checkConsistency(is);
359 : ok();
360 : }
361 :
362 : printf("Upgrade from int16 to int32: "); {
363 : is = intsetNew();
364 : is = intsetAdd(is,32,NULL);
365 : assert(intrev32ifbe(is->encoding) == INTSET_ENC_INT16);
366 : is = intsetAdd(is,65535,NULL);
367 : assert(intrev32ifbe(is->encoding) == INTSET_ENC_INT32);
368 : assert(intsetFind(is,32));
369 : assert(intsetFind(is,65535));
370 : checkConsistency(is);
371 :
372 : is = intsetNew();
373 : is = intsetAdd(is,32,NULL);
374 : assert(intrev32ifbe(is->encoding) == INTSET_ENC_INT16);
375 : is = intsetAdd(is,-65535,NULL);
376 : assert(intrev32ifbe(is->encoding) == INTSET_ENC_INT32);
377 : assert(intsetFind(is,32));
378 : assert(intsetFind(is,-65535));
379 : checkConsistency(is);
380 : ok();
381 : }
382 :
383 : printf("Upgrade from int16 to int64: "); {
384 : is = intsetNew();
385 : is = intsetAdd(is,32,NULL);
386 : assert(intrev32ifbe(is->encoding) == INTSET_ENC_INT16);
387 : is = intsetAdd(is,4294967295,NULL);
388 : assert(intrev32ifbe(is->encoding) == INTSET_ENC_INT64);
389 : assert(intsetFind(is,32));
390 : assert(intsetFind(is,4294967295));
391 : checkConsistency(is);
392 :
393 : is = intsetNew();
394 : is = intsetAdd(is,32,NULL);
395 : assert(intrev32ifbe(is->encoding) == INTSET_ENC_INT16);
396 : is = intsetAdd(is,-4294967295,NULL);
397 : assert(intrev32ifbe(is->encoding) == INTSET_ENC_INT64);
398 : assert(intsetFind(is,32));
399 : assert(intsetFind(is,-4294967295));
400 : checkConsistency(is);
401 : ok();
402 : }
403 :
404 : printf("Upgrade from int32 to int64: "); {
405 : is = intsetNew();
406 : is = intsetAdd(is,65535,NULL);
407 : assert(intrev32ifbe(is->encoding) == INTSET_ENC_INT32);
408 : is = intsetAdd(is,4294967295,NULL);
409 : assert(intrev32ifbe(is->encoding) == INTSET_ENC_INT64);
410 : assert(intsetFind(is,65535));
411 : assert(intsetFind(is,4294967295));
412 : checkConsistency(is);
413 :
414 : is = intsetNew();
415 : is = intsetAdd(is,65535,NULL);
416 : assert(intrev32ifbe(is->encoding) == INTSET_ENC_INT32);
417 : is = intsetAdd(is,-4294967295,NULL);
418 : assert(intrev32ifbe(is->encoding) == INTSET_ENC_INT64);
419 : assert(intsetFind(is,65535));
420 : assert(intsetFind(is,-4294967295));
421 : checkConsistency(is);
422 : ok();
423 : }
424 :
425 : printf("Stress lookups: "); {
426 : long num = 100000, size = 10000;
427 : int i, bits = 20;
428 : long long start;
429 : is = createSet(bits,size);
430 : checkConsistency(is);
431 :
432 : start = usec();
433 : for (i = 0; i < num; i++) intsetSearch(is,rand() % ((1<<bits)-1),NULL);
434 : printf("%ld lookups, %ld element set, %lldusec\n",num,size,usec()-start);
435 : }
436 :
437 : printf("Stress add+delete: "); {
438 : int i, v1, v2;
439 : is = intsetNew();
440 : for (i = 0; i < 0xffff; i++) {
441 : v1 = rand() % 0xfff;
442 : is = intsetAdd(is,v1,NULL);
443 : assert(intsetFind(is,v1));
444 :
445 : v2 = rand() % 0xfff;
446 : is = intsetRemove(is,v2,NULL);
447 : assert(!intsetFind(is,v2));
448 : }
449 : checkConsistency(is);
450 : ok();
451 : }
452 : }
453 : #endif
|