/* ex: set ts=2 et: */ /* $Id$ */ /* * Copyright 2009 Ryan Flynn * * Implementation of a genetic algorithm finder and an x86 machine code generator. * * Why is this interesting? * * Typically software development involves humans writing a function to * generate output from input. * * Genetic algorithms take input and output data and try to write a function * to map between them; thus it is a sort of automated reverse engineering, * allowing us to discover the relationship between sets of data. * * This implementation expresses its candidate "creature" functions in raw * x86 machine code, which means evaluation of them is extremely fast, many * orders of magnitude faster than generating Lisp, C or some higher level * language. Genetic algorithm is highly CPU-intensive and any gain in * efficiency translates productivity-wise in a linear fashion. * * Works approximately like so: * * generate population of N random x86 machine code functions (creatures) * do * execute creature against target data * calculate the aggregate difference between creature's output vs. target output * keep best creatures * re-generate population of N random x86 machine code functions (creatures) * possibly mutate new creatures with "best" creatures from previous population * while we haven't found a succinct, perfect match * * Suggested use: * gcc -lm -O3 -o execbytes execbytes.c # compile * ./execbytes # run * sudo renice +19 $(pgrep execbytes) # in another terminal to be nice * * NOTE: the x86 ops used are all controlled and "safe", meaning they only * operate on eax, ebx, ecx, edx registers. this isn't going to blow anything * up. * */ /* * FIXME: still, very occasionally (once every billion genotypes or so) * produces code that crashes(!) */ /* * TODO: * implement a "simplify" function. this would identify effectless or redundant * operations, such as performing some operations more than once in a row, and * reduce them to canonical, equivalent operations * * fix mutation * * instead of generating random new genotypes and mutating in sections of * the best genotypes... * * copy a random "best" new genotype and randomly mutate a section of it * * this will make a huge difference in the speed! * * */ #define _XOPEN_SOURCE 500 #include #include #include #include #include #include #include #include #include #include #include #include #define CHROMO_MAX 20 /* maximum chromosomes in a genotype */ #define POP_SIZE 64 * 1024 /* total genotypes in a population generation */ #define MAX_CONST 0xFF /* maximum possible random integer value */ typedef uint8_t u8; typedef int8_t s8; typedef int32_t s32; typedef uint32_t u32; typedef uint64_t u64; /* * the function for which we try to match; * for now, for simplicity, we already know the answer to the question; * however, genetic algorithms are not limited to this trivial use; * we can also score against a set of output from an unknown algorithm. * that's a big TODO! */ static int magic(int a) { /* * we have succeeded in finding exact matches (score=0) for the * following functions: * * a < 64 ? a : a * 2; * a * 0xFFFFFF * (a * a * 3) + 5 * (a+1)*(a+2)*(a+3) * a*a+100 * (a*a)*(a+10) * a*a*a * a*a+5 * a*a * a+7 * a+a+a * a+a * a * * our original expression we used in the scheme version: * (+ (* a a) (+ (* 2 b) (+ (* 3 a) 5))) */ return (int)sqrt(a); } enum { #define FUNC_PREFIX_LEN 2 /* number of chromosomes needed to initialize function */ ENTER, MOV_8_EBP_EAX, #define FUNC_SUFFIX_LEN 2 /* chromosomes always at the end */ LEAVE, RET, #define X86_FIRST ADD_IMM8 ADD_IMM8, ADD_R32, IMUL_IMM, IMUL_R32, MOV_R32, XCHG_R32, XOR_R32, XOR_IMM32, XADD_R32, SHR_IMM8, SHL_IMM8, OR_R32, AND_R32, AND_IMM32, NEG_R32, NOT_R32, SUB_R32, SUB_IMM32, BT_R32, BT_IMM8, BSF_R32, BSR_R32, BTC_R32, BTC_IMM8, BTR_R32, BTR_IMM8, CMP_R32, CMP_IMM32, CMPXCHG_R32, //CRC32_R32, RCL_IMM8, RCR_IMM8, ROL_IMM8, ROR_IMM8, CMOVA, CMOVB, CMOVBE, CMOVC, CMOVE, CMOVG, CMOVGE, CMOVL, CMOVLE, CMOVNA, CMOVNAE, CMOVNB, CMOVNBE, CMOVNC, CMOVNE, CMOVNG, CMOVNGE, CMOVNL, CMOVNLE, CMOVNO, CMOVNP, CMOVNS, CMOVNZ, CMOVO, CMOVP, CMOVPE, CMOVPO, CMOVS, CMOVZ, //POPCNT, //BSWAP_EDX, //IDIV_R32, #if 0 ADD_EAX, XOR_R32, JE, JNZ #endif X86_COUNT /* last, special */ }; /** * ranged [min, max] u32 * NOTE: this function gets called ALOT * make it faster(!) */ static u32 randr(u32 min, u32 max) { #if 1 double scaled = random() / (RAND_MAX + 1.0); u32 r = min + ((max - min + 1) * scaled); #else /* trade quicker time for lower quality psuedo-random numbers */ u32 r = min + ((u32)random() % (max - min + 1)); #if 0 assert(r >= min); assert(r <= max); #endif #endif return r; } /** * return a random double within the range [0.0, 1.0) */ #if 0 static double rand01(void) { return drand48(); } #else #define rand01() drand48() #endif /** * modr/m byte specifies src and dst registers */ static u8 gen_modrm(u8 digit) { static const u8 x[] = { /* dst <- src */ 0xc0, /* eax <- eax */ 0xd8, /* ebx <- eax */ 0xc8, /* ecx <- eax */ 0xd0, /* edx <- eax */ 0xc3, /* eax <- ebx */ 0xdb, /* ebx <- ebx */ 0xcb, /* ecx <- ebx */ 0xd3, /* edx <- ebx */ 0xc1, /* eax <- ecx */ 0xd9, /* ebx <- ecx */ 0xc9, /* ecx <- ecx */ 0xd1, /* edx <- ecx */ 0xc2, /* eax <- edx */ 0xda, /* ebx <- edx */ 0xca, /* ecx <- edx */ 0xd2 /* edx <- edx */ }; u8 modrm; if (0 == digit) { modrm = x[random() & 0xF];//randr(0, sizeof x / sizeof x[0] - 1)]; } else { modrm = (0xc0 | (digit << 3)) + (random() & 0x3);//randr(0, 3); /* FIXME: can't include ebp! */ } return modrm; } static const const char * disp_modrm(u8 n, const u8 modrm, char *buf, size_t len) { static const char reg[8][4] = { "eax", "ecx", "edx", "ebx", "esp", "ebp", "esi", "edi" }; if (0 == modrm) { n -= 0xc0; snprintf(buf, len, "%%%s, %%%s", reg[n >> 3], reg[n & 7]); } else { snprintf(buf, len, "%%%s", reg[n & 7]); } return buf; } /** * the different instruction sets through time; * at least they're backwards compatible; * this allows us to target different CPUs */ enum { CPU_88, CPU_86, CPU_286, CPU_386, CPU_486, CPU_586, CPU_686, CPU_MMX1, CPU_MMX2, CPU_MMX3, CPU_COUNT /* last, special */ }; /* * set of x86 instruction templates */ static const struct x86 { const char *descr; /* printf-friendly format, which * displays the 'rest' bytes (if any) */ const u8 op[4], /* instruction bytes */ oplen, /* length of instruction op, the */ modrmlen, modrm, /* /n digit for some operations */ immlen; /* number of variable bytes */ } X86[] = { /* function op */ /* descr opcode oplen,modrmlen,modrm,imm */ { "enter" , { 0xc8, 0x00, 0x00, 0x00 }, 4, 0, 0, 0 }, /* */ { "mov 0x8(%%ebp), %%eax",{0x8b, 0x45, 0x08 }, 3, 0, 0, 0 }, /* */ /* function suffix */ { "leave" , { 0xc9 }, 1, 0, 0, 0 }, /* */ { "ret" , { 0xc3 }, 1, 0, 0, 0 }, /* */ /* function contents */ { "add 0x%02" PRIx8 "," , { 0x83 }, 1, 1, 0, 1 }, /* /r ADD r/m32, imm8 */ { "add " , { 0x01 }, 1, 1, 0, 0 }, /* /r ADD r/m32, r32 */ { "imul 0x%02" PRIx8 "," , { 0x6b }, 1, 1, 0, 1 }, /* /r ib IMUL r32, r/m32, imm8 */ { "imul " , { 0x0f, 0xaf }, 2, 1, 0, 0 }, /* /r IMUL r32, r/m32 */ { "mov " , { 0x8b }, 1, 1, 0, 0 }, /* /r MOV r32, r/m32 */ { "xchg " , { 0x87 }, 1, 1, 0, 0 }, /* /r XCHG r32, r/m32 */ { "xor " , { 0x33 }, 1, 1, 0, 0 }, /* /r XOR r32, r/m32 */ { "xor " , { 0x81 }, 1, 1, 6, 4 }, /* /6 id XOR r/m32, imm32 */ { "xadd " , { 0x0f, 0xc1 }, 2, 1, 0, 0 }, /* /r XADD r/m32, r32 */ { "shr 0x%02" PRIx8 "," , { 0xc1 }, 1, 1, 5, 1 }, /* /5 ib SHR r/m32, imm8 */ { "shl 0x%02" PRIx8 "," , { 0xc1 }, 1, 1, 4, 1 }, /* /4 ib SHL r/m32, imm8 */ { "or " , { 0x0b }, 1, 1, 0, 0 }, /* /r OR r32, r/m32 */ { "and " , { 0x23 }, 1, 1, 0, 0 }, /* /r AND r32, r/m32 */ { "and 0x%08" PRIx32 ",", { 0x81 }, 1, 1, 4, 4 }, /* /4 id AND r/m32, imm32 */ { "neg " , { 0xf7 }, 1, 1, 3, 0 }, /* /3 NEG r/m32 */ { "not " , { 0xf7 }, 1, 1, 2, 0 }, /* /2 NOT r/m32 */ { "sub " , { 0x29 }, 1, 1, 0, 0 }, /* /r SUB r/m32, r32 */ { "sub 0x%08" PRIx32 ",", { 0x81 }, 1, 1, 5, 4 }, /* /5 id SUB r/m32, imm32 */ { "bt " , { 0x0f, 0xa3 }, 2, 1, 0, 0 }, /* BT r/m32, r32 */ { "bt 0x%02" PRIx8 "," , { 0x0f, 0xba }, 2, 1, 4, 1 }, /* /4 ib BT r/m32, imm8 */ { "bsf " , { 0x0f, 0xbc }, 2, 1, 0, 0 }, /* /r BSF r32, r/m32 */ { "bsr " , { 0x0f, 0xbd }, 2, 1, 0, 0 }, /* /r BSR r32, r/m32 */ { "btc " , { 0x0f, 0xbb }, 2, 1, 0, 0 }, /* BTC r/m32, r32 */ { "btc 0x%02" PRIx8 "," , { 0x0f, 0xba }, 2, 1, 7, 1 }, /* /7 ib BTC r/m32, imm8 */ { "btr " , { 0x0f, 0xb3 }, 2, 1, 0, 0 }, /* BTR r/m32, r32 */ { "btr 0x%02" PRIx8 "," , { 0x0f, 0xba }, 2, 1, 6, 1 }, /* /6 ib BTR r/m32, imm8 */ { "cmp " , { 0x39 }, 1, 1, 0, 0 }, /* /r CMP r/m32, r32 */ { "cmp 0x%08" PRIx32 ",", { 0x81 }, 1, 1, 7, 4 }, /* /7 id CMP r/m32, imm32 */ { "cmpxchg" , { 0x0f, 0xb1 }, 2, 1, 0, 0 }, /* 0F B1 /r CMPXCHG r/m32, r32 */ //{ "crc32 " , { 0xf2, 0x0f, 0x38, 0xf1 }, 4, 1, 0, 0 }, /* F2 0F 38 F1 /r CRC32 r32, r/m32 */ { "rcl 0x%02" PRIx8 "," , { 0xc1 }, 1, 1, 2, 1 }, /* C1 /2 ib RCL r/m32, imm8 */ { "rcr 0x%02" PRIx8 "," , { 0xc1 }, 1, 1, 3, 1 }, /* C1 /3 ib RCR r/m32, imm8 */ { "rol 0x%02" PRIx8 "," , { 0xc1 }, 1, 1, 0, 1 }, /* C1 /0 ib ROL r/m32, imm8 */ { "ror 0x%02" PRIx8 "," , { 0xc1 }, 1, 1, 1, 1 }, /* C1 /1 ib ROR r/m32, imm8 */ /* conditional mov */ { "cmova " , { 0x0f, 0x47 }, 2, 1, 0, 0 }, /* /r CMOVA r32, r/m32 */ { "cmovb " , { 0x0f, 0x42 }, 2, 1, 0, 0 }, /* 0F 42 /r CMOVB r32, r/m32 */ { "cmovbe " , { 0x0f, 0x46 }, 2, 1, 0, 0 }, /* /r CMOVBE r32, r/m32 */ { "cmovc " , { 0x0f, 0x42 }, 2, 1, 0, 0 }, /* 0F 42 /r CMOVC r32, r/m32 */ { "cmove " , { 0x0f, 0x44 }, 2, 1, 0, 0 }, /* /r CMOVE r32, r/m32 */ { "cmovg " , { 0x0f, 0x4f }, 2, 1, 0, 0 }, /* /r CMOVG r32, r/m32 */ { "cmovge " , { 0x0f, 0x4d }, 2, 1, 0, 0 }, /* /r CMOVGE r32, r/m32 */ { "cmovl " , { 0x0f, 0x4c }, 2, 1, 0, 0 }, /* /r CMOVL r32, r/m32 */ { "cmovle " , { 0x0f, 0x4e }, 2, 1, 0, 0 }, /* 0F 4E /r CMOVLE r32, r/m32 */ { "cmovna " , { 0x0f, 0x46 }, 2, 1, 0, 0 }, /* 0F 46 /r CMOVNA r32, r/m32 */ { "cmovnae" , { 0x0f, 0x42 }, 2, 1, 0, 0 }, /* 0F 42 /r CMOVNAE r32, r/m32 */ { "cmovnb " , { 0x0f, 0x43 }, 2, 1, 0, 0 }, /* 0F 43 /r CMOVNB r32, r/m32 */ { "cmovnbe" , { 0x0f, 0x47 }, 2, 1, 0, 0 }, /* 0F 47 /r CMOVNBE r32, r/m32 */ { "cmovnc " , { 0x0f, 0x43 }, 2, 1, 0, 0 }, /* 0F 43 /r CMOVNC r32, r/m32 */ { "cmovne " , { 0x0f, 0x45 }, 2, 1, 0, 0 }, /* 0F 45 /r CMOVNE r32, r/m32 */ { "cmovng " , { 0x0f, 0x4e }, 2, 1, 0, 0 }, /* 0F 4E /r CMOVNG r32, r/m32 */ { "cmovnge" , { 0x0f, 0x4c }, 2, 1, 0, 0 }, /* 0F 4C /r CMOVNGE r32, r/m32 */ { "cmovnl " , { 0x0f, 0x4d }, 2, 1, 0, 0 }, /* 0F 4D /r CMOVNL r32, r/m32 */ { "cmovnle" , { 0x0f, 0x4f }, 2, 1, 0, 0 }, /* 0F 4F /r CMOVNLE r32, r/m32 */ { "cmovno " , { 0x0f, 0x41 }, 2, 1, 0, 0 }, /* 0F 41 /r CMOVNO r32, r/m32 */ { "cmovnp " , { 0x0f, 0x4b }, 2, 1, 0, 0 }, /* 0F 4B /r CMOVNP r32, r/m32 */ { "cmovns " , { 0x0f, 0x49 }, 2, 1, 0, 0 }, /* 0F 49 /r CMOVNS r32, r/m32 */ { "cmovnz " , { 0x0f, 0x45 }, 2, 1, 0, 0 }, /* 0F 45 /r CMOVNZ r32, r/m32 */ { "cmovo " , { 0x0f, 0x40 }, 2, 1, 0, 0 }, /* 0F 40 /r CMOVO r32, r/m32 */ { "cmovp " , { 0x0f, 0x4a }, 2, 1, 0, 0 }, /* 0F 4A /r CMOVP r32, r/m32 */ { "cmovpe " , { 0x0f, 0x4a }, 2, 1, 0, 0 }, /* 0F 4A /r CMOVPE r32, r/m32 */ { "cmovpo " , { 0x0f, 0x4b }, 2, 1, 0, 0 }, /* 0F 4B /r CMOVPO r32, r/m32 */ { "cmovs " , { 0x0f, 0x48 }, 2, 1, 0, 0 }, /* 0F 48 /r CMOVS r32, r/m32 */ { "cmovz " , { 0x0f, 0x44 }, 2, 1, 0, 0 }, /* 0F 44 /r CMOVZ r32, r/m32 */ #if 0 /* crash, why? */ { "popcnt" , { 0xf3, 0x0f, 0xb8 }, 3, 1, 0, 0 }, /* /r POPCNT r32, r/m32 */ #endif #if 0 /* NOTE: not really helpful... */ /* TODO: implement field for +rd and friends... */ //0f ca bswap %edx { "bswap %%eax" , { 0x0f, 0xc8 }, 2, 0, 0, 0 }, /* +rd BSWAP r32 */ //{ "bswap %%edx" , { 0x0f, 0xca }, 1, 0, 0, 0 }, /* +rd BSWAP r32 */ #endif #if 0 /* DAMMIT; now if we divide by a register containing zero we get SIGFPE... */ { "idiv" , { 0xf7 }, 1, 1, 7, 0 }, /* /7 IDIV r/m32 */ #endif /* */ //{ 0, 1, 1, { 0x3b }, "cmp" }, #if 0 /* NOTE: these now work, except almost always result in a never-ending program. either we give them a real short timeslice in which to run, or we need to verify that the contents of the loop DO something :-/ */ { 1, 0, 1, { 0x74 }, "je" }, { 1, 0, 1, { 0x75 }, "jnz" }, #endif }; struct genotype { u32 len; struct op { u8 x86, /* index into X86[] */ modrm, /* mod/rm byte, if used */ data[4]; /* random integer data, if used */ } chromo[CHROMO_MAX]; }; typedef struct genotype genotype; struct pop { struct genoscore { u32 score; struct genotype geno; } indiv[POP_SIZE]; }; /* populate prefix */ #define GEN_PREFIX(g) do { \ (g)->chromo[0].x86 = ENTER; \ (g)->chromo[1].x86 = MOV_8_EBP_EAX; \ } while (0) /* populate genotype suffix */ #define GEN_SUFFIX(g) do { \ (g)->chromo[(g)->len++].x86 = LEAVE; \ (g)->chromo[(g)->len++].x86 = RET; \ } while (0) typedef int (*func)(int); static void gen_dump(const struct genotype *g, FILE *f) { char hex[32], *h; u32 i, j; assert(g->len <= CHROMO_MAX); for (i = 0; i < g->len; i++) { const struct x86 *x = X86 + g->chromo[i].x86; h = hex; for (j = 0; j < x->oplen; j++) { sprintf(h, "%02" PRIx8 " ", x->op[j]); h += 3; } if (x->modrmlen) { sprintf(h, "%02" PRIx8 " ", g->chromo[i].modrm); h += 3; } for (j = 0; j < x->immlen; j++) { sprintf(h, "%02" PRIx8 " ", g->chromo[i].data[j]); h += 3; } memset(h, ' ', 32 - (h - hex)); h += 32 - (h - hex); *h = '\0'; fprintf(f, "%2" PRIu32 " %s", i, hex); fprintf(f, x->descr, *(u32 *)&g->chromo[i].data); if (x->modrmlen) { char modbuf[16]; fprintf(f, " %s", disp_modrm(g->chromo[i].modrm, x->modrm, modbuf, sizeof modbuf)); } fputc('\n', f); } } #if 0 /** * find an x86 instruction by name */ static u8 x86_by_name(const char *descr) { # define X86_NOTFOUND 0xFF unsigned i; u8 off = X86_NOTFOUND; size_t dlen = strlen(descr); for (i = 0; i < sizeof X86 / sizeof X86[0]; i++) { if (0 == strncmp(descr, X86[i].descr, dlen)) { off = (u8)i; break; } } return off; } /** * load a genotype representation from gen_dump() */ static int gen_load(FILE *f, genotype *g) { char line[256]; u8 byte[8], tmp; char descr[16]; int tmp; struct op *o; /* setup prefix */ GEN_PREFIX(g); g->len = FUNC_PREFIX_LEN; /* read genotype from file */ while (NULL != fgets(line, sizeof line[0], f)) { if ( 8 == sscanf("%2hhu %02" PRIx8 " %02" PRIx8 " %02" PRIx8 " %02" PRIx8 " %02" PRIx8 " %02" PRIx8 " %.*s", &tmp, byte+0, byte+1, byte+2, byte+3, byte+4, byte+5, (int)(sizeof descr) - 1, descr)) { o = g->chromo + g->len; o->x86 = x86_by_descr(descr); if (X86_NOTFOUND == o->x86) { fprintf(stderr, "Could not locate x86 instruction '%s'!\n", descr); return 0; } else { const x86 *x = X86 + o->x86; unsigned boff = x->oplen; if (x->modrmlen) o->modrm = byte[boff++]; if (x->immlen) memcpy(o->data, byte+boff, x->immlen); } g->len++; } } GEN_SUFFIX(g); return 1; } #endif static void chromo_random(genotype *g, unsigned off, unsigned len) { u32 i; for (i = off; i < off + len; i++) { const struct x86 *x; g->chromo[i].x86 = randr(X86_FIRST, X86_COUNT - 1); x = X86 + g->chromo[i].x86; g->chromo[i].modrm = gen_modrm(x->modrm); if (x->immlen) *(u32 *)&g->chromo[i].data = randr(0, MAX_CONST); } } static void gen_mutate(genotype *g, const double mutate_rate) { /* calculate an offset within g for a mutation occur; * calculate the length of the area to be mutated; * calculate the length of the replacement; * this allows for insertions, deletions and replacements */ u32 doff = randr(FUNC_PREFIX_LEN, FUNC_PREFIX_LEN + g->len - FUNC_SUFFIX_LEN), /* NOTE: incorporate mutate_rate somehow! */ dlen = (u32)(rand01() * (g->len - doff)), slen = (u32)(rand01() * (CHROMO_MAX - FUNC_SUFFIX_LEN - (g->len - dlen))), suflen = g->len - (doff + dlen); /* data after the mutation */ assert(g->len + (int)(slen - dlen) <= CHROMO_MAX - FUNC_SUFFIX_LEN); if (suflen > 0) memmove(g->chromo + doff + slen, g->chromo + doff, suflen * sizeof g->chromo[0]); if (slen > 0) chromo_random(g, doff, slen); g->len += (int)(slen - dlen); assert(g->len <= CHROMO_MAX - FUNC_SUFFIX_LEN); } static void gen_gen(genotype *dst, const genotype *src, const double cross_rate, const double mutate_rate) { if (src) { /* mutate an existing genotype */ memcpy(dst, src, sizeof *dst); dst->len -= FUNC_SUFFIX_LEN; gen_mutate(dst, mutate_rate); } else { /* initial generation */ dst->len = randr(FUNC_PREFIX_LEN + 1, CHROMO_MAX - FUNC_SUFFIX_LEN); chromo_random(dst, FUNC_PREFIX_LEN, dst->len); GEN_PREFIX(dst); } GEN_SUFFIX(dst); } static inline int asm_func_shim(func f, int in) { int out; __asm__( /* save and zero regs to prevent cheating by called function */ /* note: eax contains param 'in' */ "push %%ebx\n" "push %%ecx\n" "push %%edx\n" "xor %%ebx, %%ebx\n" "xor %%ecx, %%ecx\n" "xor %%edx, %%edx\n" /* pass in first parameter */ "movl %1, (%%esp)\n" /* call function pointer */ "call *%2\n" /* restore regs */ "pop %%edx\n" "pop %%ecx\n" "pop %%ebx\n" : "=a" (out) : "r" (in), "m" (f)); return out; } static struct target { int in, out; } Target[256]; static int TargetLen = 0; static u32 score(func f, int verbose) { u32 score = 0; int i; if (verbose) printf("%11s %11s %11s %11s %11s\n" "----------- ----------- ----------- ----------- -----------\n", "in", "target", "actual", "diff", "diffsum"); for (i = 0; i < TargetLen; i++) { int diff, /* difference between target and sc */ sc = asm_func_shim(f, i); #if 0 /* why do this? */ if (INT_MAX == sc) { score = (u32)0xFFFFFFFF; break; } if (sc < 0) sc = -sc; #endif diff = Target[i].out - sc; diff = abs(diff); /* detect overflow */ if ((u32)(INT_MAX - diff) < score) { score = (u32)0xFFFFFFFF; break; } score += diff; if (verbose) printf("%11d %11d %11d %11d %11" PRIu32 "\n", Target[i].in, Target[i].out, sc, diff, score); } if (verbose) printf("score=%" PRIu32 "\n", score); return score; } static void pop_gen(struct pop *p, u32 keep, const double cross_rate, const double mutate_rate) { u32 i; if (keep > 0) { /* * if a set if [0..keep-1] "best" are set, select a random one * to serve as the basis for each member of the new generation */ for (i = keep; i < sizeof p->indiv / sizeof p->indiv[0]; i++) { const genotype *src = &p->indiv[randr(0, keep-1)].geno; gen_gen(&p->indiv[i].geno, src, cross_rate, mutate_rate); p->indiv[i].score = 0; } } else { /* * initial generation, calculate completely random */ for (i = keep; i < sizeof p->indiv / sizeof p->indiv[0]; i++) { gen_gen(&p->indiv[i].geno, NULL, cross_rate, mutate_rate); p->indiv[i].score = 0; } } } /** * append a command to the buffer */ static u32 inline chromo_add(const struct op *op, u8 *buf, u32 len) { const struct x86 *x = X86 + op->x86; #if 0 assert(op->x86 < sizeof X86 / sizeof X86[0]); assert(x->oplen <= 4); assert(x->modrmlen <= 1); assert(x->immlen <= 4); #endif memcpy(buf + len, x->op, x->oplen); len += x->oplen; if (x->modrmlen) buf[len++] = op->modrm; if (x->immlen) { memcpy(buf + len, op->data, x->immlen); len += x->immlen; } return len; } #if 0 /* jmp-related */ /** * calculate the total size of the chromosome in bytes */ static u32 chromo_bytes(const struct op *op) { const struct x86 *x = X86 + op->x86; u32 len = 0; len += x->oplen; len += x->modrmlen; len += x->immlen; return len; } /** * given an offset, find the closest beginning of a * chromosome that we can jump to; we don't want to land in the middle! * @param off the total offset of the jump * @param rel the relative offset of the jump */ static s8 gen_jmp_pos(const genotype *g, const u32 off, u8 rel) { u32 total = 0, i; s8 res; rel %= g->len - 1; /* choose a chromosome in the genome to jump to */ if (0 == rel) rel = 1; for (i = 0; i < rel; i++) total += chromo_bytes(g->chromo + i); /* calculate the total offset of that chromosome */ res = (s8)(total - off); /* calculate byte offset */ if (-2 == res) /* jumping to self causes never-ending loop */ res = 0; return res; } #endif static u32 gen_compile(genotype *g, u8 *buf) { u32 i, len = 0; for (i = 0; i < g->len; i++) { #if 0 if (JE == g->chromo[i].x86 || JNZ == g->chromo[i].x86) { /* if relative jump, adjust the random jump destination to a valid offset */ g->chromo[i].data[0] = gen_jmp_pos(g, len + chromo_bytes(g->chromo + i), g->chromo[i].data[0]); } #endif len = chromo_add(g->chromo + i, buf, len); } return len; } /** * given 2 genoscores, find the one with the lowest (closest) score. * if the scores are identical, favor the shorter one. */ static int genoscore_cmp(const void *va, const void *vb) { const struct genoscore *a = va, *b = vb; if (a->score == b->score) /* shorter is better, given same score */ return (a->geno.len > b->geno.len) - (a->geno.len < b->geno.len); return (a->score > b->score) - (a->score < b->score); } static void x86_dump(const u8 *x86, u32 len) { while (len--) printf("%02" PRIx8 " ", *x86++); fputc('\n', stdout); } static u8 x86[4096]; static int Dump = 0; static void pop_score(struct pop *p) { u32 i; for (i = 0; i < sizeof p->indiv / sizeof p->indiv[0]; i++) { u32 x86len = gen_compile(&p->indiv[i].geno, x86); #if 0 assert(x86len <= 8 * CHROMO_MAX && "Try to catch obviously bogus stuff, do I still need this?!"); #endif if (Dump > 0) x86_dump(x86, x86len); if (Dump > 1) (void)gen_dump(&p->indiv[i].geno, stdout); p->indiv[i].score = score((func)x86, 0); } qsort(p->indiv, sizeof p->indiv / sizeof p->indiv[0], sizeof p->indiv[0], genoscore_cmp); } static void calc_target(const int in[], unsigned cnt) { unsigned i; TargetLen = (int)cnt; for (i = 0; i < TargetLen; i++) { Target[i].in = in[i]; Target[i].out = magic(in[i]); } } static struct pop Pop; static void test_load(void) { genotype g; FILE *f = fopen("success-sqrt.txt", "r"); gen_load(f, &g); printf("loaded:\n"); gen_dump(&g, stdout); } int main(int argc, char *argv[]) { /* mutation rates */ const double Mutate_Rate = 0.2, Cross_Rate = 0.7; genotype CurrBest; u64 indivs = 0; u32 generation = 0; #if 0 test_load(); exit(EXIT_SUCCESS); #endif /* show sizes of our core types in bytes */ printf("sizeof Pop.indiv[0]=%u\n", sizeof Pop.indiv[0]); printf("sizeof Pop=%u\n", sizeof Pop); /* double-check instruction enum and table */ printf("X86_COUNT=%u (sizeof X86 / sizeof X86[0])=%u\n", X86_COUNT, sizeof X86 / sizeof X86[0]); assert(0 == strncmp("or", X86[OR_R32] .descr, 2)); assert(0 == strncmp("cmpxchg", X86[CMPXCHG_R32].descr, 7)); assert(0 == strncmp("cmova", X86[CMOVA] .descr, 5)); assert(0 == strncmp("cmovge", X86[CMOVGE] .descr, 6)); assert(0 == strncmp("cmovne", X86[CMOVNE] .descr, 6)); assert(0 == strncmp("cmovz", X86[CMOVZ] .descr, 5)); assert(X86_COUNT == sizeof X86 / sizeof X86[0]); /* one-time initialization */ if (argc > 1) { Dump += 'd' == argv[1][1]; Dump += (2 * ('D' == argv[1][1])); } { const int in[] = { 0, 7, 21, 49, 105, 217, 441, 889, 1785, 3577, 7161, 14329, 28665, 57337, 114681, 229369, 458745, 917497, 1835001, 3670009, 7340025 }; calc_target(in, sizeof in / sizeof in[0]); /* TODO: make this run-time configurable */ } memset(&CurrBest, 0, sizeof CurrBest); srandom(time(NULL)); setvbuf(stdout, (char *)NULL, _IONBF, 0); /* unbuffer stdout */ memset(&Pop, 0, sizeof Pop); pop_gen(&Pop, 0, Cross_Rate, Mutate_Rate); /* seminal generation */ /* evolve */ do { int diff; pop_score(&Pop); /* test it and sort best ones */ indivs += sizeof Pop.indiv / sizeof Pop.indiv[0]; diff = memcmp(&CurrBest, &Pop.indiv[0].geno, sizeof CurrBest); if (diff || 0 == generation % 100) { /* display generation regularly or on progress */ time_t t = time(NULL); printf("GENERATION %5" PRIu32 " %10" PRIu64 " genotypes @%s", generation, indivs, ctime(&t)); if (diff) { /* only display best if it's changed; raises signal/noise ration */ static u8 x86[4096]; gen_dump(&Pop.indiv[0].geno, stdout); printf("score=%" PRIu32 "\n", Pop.indiv[0].score); /* show detailed score from best candidate */ (void)gen_compile(&Pop.indiv[0].geno, x86); (void)score((func)x86, 1); memcpy(&CurrBest, &Pop.indiv[0].geno, sizeof CurrBest); } } generation++; pop_gen(&Pop, 5, Cross_Rate, Mutate_Rate); /* retain best from previous */ } while ( 0 != Pop.indiv[0].score /* perfect match */ /* favor shorter solutions even after perfect match found */ || FUNC_PREFIX_LEN + FUNC_SUFFIX_LEN + 1 != Pop.indiv[0].geno.len ); printf("done.\n"); return 0; }