/* ex: set ts=2 et: */ /* when implementing a circular buffer is it faster to wrap current position to * the front of your buffer by using pos % buflen or if (pos >= buflen) pos-= buflen ? * I suspect the latter is faster because modulus uses division which is slow, but * modulus has the benefit of being branchless... does it help? */ /* * Results: * * $ gcc -O3 -o modulus-vs-if-minus modulus-vs-if-minus.c * pizza@yeti:/vh/pe/www/pizza/c * $ ./modulus-vs-if-minus * 20000000 iterations: * modulus 1.535 secs * ifminus 2.212 secs * * I must say I'm surprised. It must be the deep pipelines on today's modern CPUs? */ #include #include #include #include #include #include #define BUFLEN 1024 static size_t buflen = BUFLEN; static size_t modulus(size_t pos, size_t add) { pos += add; pos %= buflen; return pos; } static size_t ifcheck(size_t pos, size_t add) { pos += add; if (pos >= buflen) pos -= buflen; return pos; } #define N_TIMES 20000000 #define MAX_ADD 2 static void speed(const char *name, size_t (*f)(size_t, size_t)) { char buf[36]; struct timeval tv[2]; double d[2]; unsigned i; size_t pos = 0, add; #if 0 /* test for correctness ... */ test(name, f); #endif printf("%24s", name); fflush(stdout); /* test speed */ i = N_TIMES; gettimeofday(tv, NULL); do { add = random() & (MAX_ADD - 1); /* NOTE: doesn't need to be actually random */ pos = f(pos, add); assert(pos < buflen); } while (i--); gettimeofday(tv + 1, NULL); d[0] = (tv[0].tv_sec * 1000000) + tv[0].tv_usec; d[1] = (tv[1].tv_sec * 1000000) + tv[1].tv_usec; printf(" %.3f secs\n", (d[1] - d[0]) / 1000000); } int main(void) { srandom((int)time(NULL)); printf("%u iterations:\n", N_TIMES); speed("modulus", modulus); speed("ifminus", ifcheck); return 0; }