/* ex: set ts=2 et: */ /* weilawei came into freenode ##c and asked if anyone knew a faster way to * convert an unsigned int into an ascii binary representation than his * decToBin() function, so here's my investigation. */ /* pizza@pizzabox:~/c$ cat /proc/cpuinfo | grep bogomips bogomips : 2005.91 pizza@pizzabox:~/c$ gcc -O3 -o int2bin int2bin.c pizza@pizzabox:~/c$ ./int2bin 20000000 iterations: decToBin 1.819 secs int2bin tiny 1.217 secs int2bin 0.334 secs int2bin_unroll 0.234 secs int2bin32 0.297 secs int2bin32_unroll 0.218 secs int2bin32_unroll_order 0.253 secs */ #include #include #include #include #include #include const char *decToBin(unsigned n, char *buf, size_t buflen) { unsigned i = 32; for (i = 0; i < 32; i++) { buf[i] = (n & 0x80000000u) ? '1' : '0'; n <<= 1; } buf[32] = '\0'; return buf; } /** * a function built for small size, not for speed; relies on no external * resources; this would be a candidate for an embedded system */ const char * int2bin_tiny(unsigned n, char *buf, size_t buflen) { unsigned i = 32; buf += 32; *buf-- = '\0'; do { *buf-- = '0' + (n & 1); n >>= 1; } while (--i); return buf + 1; } static const char Asc4[16][4] = { "0000", "0001", "0010", "0011", "0100", "0101", "0110", "0111", "1000", "1001", "1010", "1011", "1100", "1101", "1110", "1111" }; const char * int2bin(unsigned n, char *buf, size_t buflen) { unsigned i = 8; char *w = buf + 28; do { memcpy(w, Asc4[n & 0xF], 4); w -= 4; n >>= 4; } while (--i); buf[32] = '\0'; return buf; } const char * int2bin_unroll(unsigned n, char *buf, size_t buflen) { memcpy(buf + 28, Asc4[(n >> 0) & 0xF], 4); memcpy(buf + 24, Asc4[(n >> 4) & 0xF], 4); memcpy(buf + 20, Asc4[(n >> 8) & 0xF], 4); memcpy(buf + 16, Asc4[(n >> 12) & 0xF], 4); memcpy(buf + 12, Asc4[(n >> 16) & 0xF], 4); memcpy(buf + 8, Asc4[(n >> 20) & 0xF], 4); memcpy(buf + 4, Asc4[(n >> 24) & 0xF], 4); memcpy(buf + 0, Asc4[(n >> 28) & 0xF], 4); buf[32] = '\0'; return buf; } static const char Asc8[256][8] = { "00000000", "00000001", "00000010", "00000011", "00000100", "00000101", "00000110", "00000111", "00001000", "00001001", "00001010", "00001011", "00001100", "00001101", "00001110", "00001111", "00010000", "00010001", "00010010", "00010011", "00010100", "00010101", "00010110", "00010111", "00011000", "00011001", "00011010", "00011011", "00011100", "00011101", "00011110", "00011111", "00100000", "00100001", "00100010", "00100011", "00100100", "00100101", "00100110", "00100111", "00101000", "00101001", "00101010", "00101011", "00101100", "00101101", "00101110", "00101111", "00110000", "00110001", "00110010", "00110011", "00110100", "00110101", "00110110", "00110111", "00111000", "00111001", "00111010", "00111011", "00111100", "00111101", "00111110", "00111111", "01000000", "01000001", "01000010", "01000011", "01000100", "01000101", "01000110", "01000111", "01001000", "01001001", "01001010", "01001011", "01001100", "01001101", "01001110", "01001111", "01010000", "01010001", "01010010", "01010011", "01010100", "01010101", "01010110", "01010111", "01011000", "01011001", "01011010", "01011011", "01011100", "01011101", "01011110", "01011111", "01100000", "01100001", "01100010", "01100011", "01100100", "01100101", "01100110", "01100111", "01101000", "01101001", "01101010", "01101011", "01101100", "01101101", "01101110", "01101111", "01110000", "01110001", "01110010", "01110011", "01110100", "01110101", "01110110", "01110111", "01111000", "01111001", "01111010", "01111011", "01111100", "01111101", "01111110", "01111111", "10000000", "10000001", "10000010", "10000011", "10000100", "10000101", "10000110", "10000111", "10001000", "10001001", "10001010", "10001011", "10001100", "10001101", "10001110", "10001111", "10010000", "10010001", "10010010", "10010011", "10010100", "10010101", "10010110", "10010111", "10011000", "10011001", "10011010", "10011011", "10011100", "10011101", "10011110", "10011111", "10100000", "10100001", "10100010", "10100011", "10100100", "10100101", "10100110", "10100111", "10101000", "10101001", "10101010", "10101011", "10101100", "10101101", "10101110", "10101111", "10110000", "10110001", "10110010", "10110011", "10110100", "10110101", "10110110", "10110111", "10111000", "10111001", "10111010", "10111011", "10111100", "10111101", "10111110", "10111111", "11000000", "11000001", "11000010", "11000011", "11000100", "11000101", "11000110", "11000111", "11001000", "11001001", "11001010", "11001011", "11001100", "11001101", "11001110", "11001111", "11010000", "11010001", "11010010", "11010011", "11010100", "11010101", "11010110", "11010111", "11011000", "11011001", "11011010", "11011011", "11011100", "11011101", "11011110", "11011111", "11100000", "11100001", "11100010", "11100011", "11100100", "11100101", "11100110", "11100111", "11101000", "11101001", "11101010", "11101011", "11101100", "11101101", "11101110", "11101111", "11110000", "11110001", "11110010", "11110011", "11110100", "11110101", "11110110", "11110111", "11111000", "11111001", "11111010", "11111011", "11111100", "11111101", "11111110", "11111111", }; const char * int2bin32(unsigned n, char *buf, size_t buflen) { unsigned i = 4; char *w = buf + 24; do { memcpy(w, Asc8[n & 0xFF], 8); w -= 8; n >>= 8; } while (--i); buf[32] = '\0'; return buf; } /** * this is the fastest function i can make, it is very fast for the following reasons: * - branchless code * - table lookups, good balance between table size and number of lookups * - gcc optimizes memcpy() call away because of small, fixed-sized params, wow * - casting to (uint8_t) requires fewer operations than & 0xFF * questions: * - for some reason moving backwards through buf is faster than forwards; maybe cache-related? */ const char * int2bin32_unroll(unsigned n, char *buf, size_t buflen) { buf[32] = '\0'; memcpy(buf + 24, Asc8[(uint8_t)(n)], 8); memcpy(buf + 16, Asc8[(uint8_t)(n >> 8)], 8); memcpy(buf + 8, Asc8[(uint8_t)(n >> 16)], 8); memcpy(buf, Asc8[(uint8_t)(n >> 24)], 8); return buf; } /** * slightly slower; for some reason accessing in non-sequential order is slightly faster * than sequential... */ const char * int2bin32_unroll_order(unsigned n, char *buf, size_t buflen) { memcpy(buf, Asc8[(uint8_t)(n >> 24)], 8); memcpy(buf + 16, Asc8[(uint8_t)(n >> 8)], 8); memcpy(buf + 24, Asc8[(uint8_t)(n)], 8); memcpy(buf + 8, Asc8[(uint8_t)(n >> 16)], 8); buf[32] = '\0'; return buf; } /** * just some basic validation */ static void test(const char *name, const char *(*f)(unsigned, char *, size_t)) { char buf[36]; printf("%30s ", name); fflush(stdout); assert(32 == sizeof(int) * CHAR_BIT); assert(0 == strcmp("00000000000000000000000000000000", f(0x0, buf, sizeof buf))); assert(0 == strcmp("00000000000000000000000000000001", f(0x1, buf, sizeof buf))); assert(0 == strcmp("00000000000000000000000000000011", f(0x3, buf, sizeof buf))); assert(0 == strcmp("00000000000000000000000000010000", f(1 << 4, buf, sizeof buf))); assert(0 == strcmp("00000000000000010000000000000000", f(1 << 16, buf, sizeof buf))); assert(0 == strcmp("10000000000000000000000000000000", f(0x80000000, buf, sizeof buf))); assert(0 == strcmp("11111111111111111111111111111111", f(0xFFFFFFFF, buf, sizeof buf))); } #define N_TIMES 20000000 static void speed(const char *name, const char *(*f)(unsigned, char *, size_t)) { char buf[36]; struct timeval tv[2]; double d[2]; unsigned i; /* test for correctness ... */ test(name, f); /* test speed */ i = N_TIMES; gettimeofday(tv, NULL); do f(i, buf, sizeof buf); while (i--); gettimeofday(tv + 1, NULL); d[0] = (tv[0].tv_sec * 1000000) + tv[0].tv_usec; d[1] = (tv[1].tv_sec * 1000000) + tv[1].tv_usec; printf(" %.3f secs\n", (d[1] - d[0]) / 1000000); } int main(void) { printf("%u iterations:\n", N_TIMES); speed("decToBin", decToBin); speed("int2bin tiny", int2bin_tiny); speed("int2bin", int2bin); speed("int2bin_unroll", int2bin_unroll); speed("int2bin32", int2bin32); speed("int2bin32_unroll", int2bin32_unroll); speed("int2bin32_unroll_order", int2bin32_unroll_order); return 0; }