Main navigation | Main content
/* 00000000 <mystery>: */ /* 0: 55 push %ebp */ /* 1: 57 push %edi */ /* 2: 56 push %esi */ /* 3: 53 push %ebx */ /* 4: 83 ec 04 sub $0x4,%esp */ /* These instructions save the callee-saved registers and set up the stack frame; we don't need to translate them. */ int mystery1(int arg) { /* 7: 8b 44 24 18 mov 0x18(%esp),%eax */ /* Because we pushed/reserved 20 bytes of stack in the prolog, 0x18(%esp) corresponds to the first (and only) argument. */ int eax = arg; /* b: c7 04 24 00 a3 e1 11 movl $0x11e1a300,(%esp) */ /* A local variable on the stack */ int temp = 0x11e1a300; /* 12: 85 c0 test %eax,%eax */ /* This tests sets ZF iff %eax is 0, SF if it's negative, and clears OF. */ /* 14: 7e 36 jle 4c <mystery+0x4c> */ /* Thus, the condition "le" corresponds to %eax being signed less than or equal to zero. The branch is used to skip over code, so we turn it into an "if" with the opposite condition. */ if (eax > 0) { /* 16: be 03 00 00 00 mov $0x3,%esi */ int esi = 3; /* 1b: bb 00 e1 f5 05 mov $0x5f5e100,%ebx */ int ebx = 0x5f5e100; /* 20: b9 01 00 00 00 mov $0x1,%ecx */ int ecx = 1; int edx, ebp; /* 25: 8d 76 00 lea 0x0(%esi),%esi */ /* This instruction has no effect, it's just used for padding because the next instruction is the target of a loop back edge. */ do { /* 28: 89 cf mov %ecx,%edi */ int edi = ecx; /* 2a: 89 cd mov %ecx,%ebp */ ebp = ecx; /* 2c: 0f af fe imul %esi,%edi */ edi *= esi; /* 2f: 83 c1 01 add $0x1,%ecx */ ecx++; /* 32: 89 da mov %ebx,%edx */ edx = ebx; /* 34: 89 d8 mov %ebx,%eax */ eax = ebx; /* 36: c1 fa 1f sar $0x1f,%edx */ /* This is a sign-extending shift that throws away all but the sign bit, so it sets edx to -1 if it was negative and to 0 otherwise. */ edx >>= 31; /* 39: f7 db neg %ebx */ ebx = -ebx; /* 3b: 83 c6 02 add $0x2,%esi */ esi += 2; /* 3e: 0f af f9 imul %ecx,%edi */ edi *= ecx; /* 41: f7 ff idiv %edi */ { long long dividend = (long long)edx << 32 | eax; eax = dividend / edi; edx = dividend % edi; } /* 43: 01 04 24 add %eax,(%esp) */ temp += eax; /* 46: 3b 6c 24 18 cmp 0x18(%esp),%ebp */ /* 4a: 75 dc jne 28 <mystery+0x28> */ } while (ebp != arg); } /* 4c: 8b 04 24 mov (%esp),%eax */ eax = temp; /* 4f: 83 c4 04 add $0x4,%esp */ /* 52: 5b pop %ebx */ /* 53: 5e pop %esi */ /* 54: 5f pop %edi */ /* 55: 5d pop %ebp */ /* Matching the pushes and sub at the beginning */ /* 56: c3 ret */ /* The calling convention uses eax for the return value. */ return eax; }And here's just the C code:
int mystery1(int arg) { int eax = arg; int temp = 0x11e1a300; if (eax > 0) { int esi = 3; int ebx = 0x5f5e100; int ecx = 1; int edx, ebp; do { int edi = ecx; ebp = ecx; edi *= esi; ecx++; edx = ebx; eax = ebx; edx >>= 31; ebx = -ebx; esi += 2; edi *= ecx; { long long dividend = (long long)edx << 32 | eax; eax = dividend / edi; edx = dividend % edi; } temp += eax; } while (ebp != arg); } eax = temp; return eax; }This still looks rather complicated, so let's try cleaning up and simplifying it in various ways:
@@ -8,23 +8,17 @@ int ebx = 0x5f5e100; int ecx = 1; - int edx, ebp; + int ebp; do { int edi = ecx; ebp = ecx; edi *= esi; ecx++; - edx = ebx; eax = ebx; - edx >>= 31; ebx = -ebx; esi += 2; edi *= ecx; - { - long long dividend = (long long)edx << 32 | eax; - eax = dividend / edi; - edx = dividend % edi; - } + eax /= edi; temp += eax; } while (ebp != arg); }
@@ -1,11 +1,11 @@ int mystery1(int arg) { int eax = arg; - int temp = 0x11e1a300; + int ebx = 100000000; + int temp = 3 * ebx; if (eax > 0) { int esi = 3; - int ebx = 0x5f5e100; int ecx = 1; int ebp;
@@ -1,15 +1,14 @@ int mystery1(int arg) { - int eax = arg; - int ebx = 100000000; - int temp = 3 * ebx; + int total = 3 * ebx; - if (eax > 0) { + if (arg > 0) { int esi = 3; int ecx = 1; int ebp; do { + int eax; int edi = ecx; ebp = ecx; edi *= esi; @@ -19,9 +18,8 @@ esi += 2; edi *= ecx; eax /= edi; - temp += eax; + total += eax; } while (ebp != arg); } - eax = temp; - return eax; + return total; }
@@ -4,19 +4,19 @@ if (arg > 0) { int esi = 3; - int ecx = 1; + int i = 1; int ebp; do { int eax; - int edi = ecx; - ebp = ecx; + int edi = i; + ebp = i; edi *= esi; - ecx++; + i++; eax = ebx; ebx = -ebx; esi += 2; - edi *= ecx; + edi *= i; eax /= edi; total += eax; } while (ebp != arg);
@@ -9,16 +9,14 @@ int ebp; do { int eax; - int edi = i; + int divisor = i * esi * (i + 1); ebp = i; - edi *= esi; - i++; eax = ebx; ebx = -ebx; esi += 2; - edi *= i; - eax /= edi; + eax /= divisor; total += eax; + i++; } while (ebp != arg); } return total;
@@ -8,14 +8,11 @@ int ebp; do { - int eax; int divisor = i * esi * (i + 1); ebp = i; - eax = ebx; - ebx = -ebx; esi += 2; - eax /= divisor; - total += eax; + total += ebx / divisor; + ebx = -ebx; i++; } while (ebp != arg); }
@@ -6,15 +6,13 @@ int esi = 3; int i = 1; - int ebp; do { int divisor = i * esi * (i + 1); - ebp = i; esi += 2; total += ebx / divisor; ebx = -ebx; i++; - } while (ebp != arg); + } while (i - 1 != arg); } return total; }
@@ -3,12 +3,10 @@ int total = 3 * ebx; if (arg > 0) { - int esi = 3; int i = 1; do { - int divisor = i * esi * (i + 1); - esi += 2; + int divisor = i * (2*i + 1) * (i + 1); total += ebx / divisor; ebx = -ebx; i++;
if (cond) { do { ... } while (cond) }would be equivalent to:
while (cond) { ... }
The problem is that the two conditions arg > 0 and i - 1 != arg look rather different. But maybe they both came from the same condition that the compiler optimized. For the if condition, the compiler knows that the initial value of the loop counter is 1, so 0 might be the optimized version of i - 1. Similarly when we're inside the loop the compiler knows that i-1 can never be greater than arg, because it was initialized to be less than or equal and we stop after the iteration when they're equal. So it looks like both conditions are equivalent to i - 1 < arg, or more idiomatically i <= arg.
However there's a subtle problem lurking here, which you would probably only notice if you were paranoid about integer overflow attacks, or if you were testing the function with the largest possible positive integer (INT_MAX) as an argument. If we write the condition as i <= arg, then when arg is equal to INT_MAX, the loop's exit condition will never be satisfied. (The program won't actually loop forever, because when i loops around back near zero, you'll get a divide by zero crash.) You might think you would be safe if you kept the condition as the less natural-looking i - 1 < arg, since that condition looks like it should still be false when arg is INT_MAX and i is INT_MAX + 1. (2's complement arithmetic is associative, so (x + 1) - 1 = x for all values of x.) Unfortunately, the C compiler is still allowed to "optimize" the condition i - 1 < arg into the condition i <= arg, even though they have the differing behavior we just described. The reason is that the C standard says that overflow of a signed integer causes undefined behavior. This means that a program is entitled to do whatever it wants if an overflow might occur, or equivalently, optimize as if the situation triggering undefined behavior could never occur. It's the programmer's responsibility to ensure that the undefined behavior can never occur.
Undefined behavior has turned out to be a ongoing source of friction between C programmers and compiler makers, since programmers' intuitions about how the compiler works often differ from what the standard allows. In particular problems tend to arise when compilers become more sophisticated at optimization: optimizations that take better advantage of the undefined-behavior rules can make some previously-working programs run faster, and other previously-working programs crash in hard-to-debug ways. John Regehr's blog has some lucid discussions of these issues.
For now, we can work around this behavior by adding an extra condition on i to prevent overflow.
@@ -2,15 +2,12 @@ int ebx = 100000000; int total = 3 * ebx; - if (arg > 0) { int i = 1; - - do { + while (i < 0x7fffffff && i <= arg) { int divisor = i * (2*i + 1) * (i + 1); total += ebx / divisor; ebx = -ebx; i++; - } while (i - 1 != arg); } return total; }
@@ -2,12 +2,11 @@ int ebx = 100000000; int total = 3 * ebx; - int i = 1; - while (i < 0x7fffffff && i - 1 < arg) { + int i; + for (i = 1; i < 0x7fffffff && i - 1 < arg; i++) { int divisor = i * (2*i + 1) * (i + 1); total += ebx / divisor; ebx = -ebx; - i++; } return total; }
@@ -2,9 +2,9 @@ int ebx = 100000000; int total = 3 * ebx; - int i; - for (i = 1; i < 0x7fffffff && i - 1 < arg; i++) { - int divisor = i * (2*i + 1) * (i + 1); + int j; + for (j = 0; j < arg; j++) { + int divisor = (j + 1) * (2*j + 3) * (j + 2); total += ebx / divisor; ebx = -ebx; }
int mystery1(int terms) { int unit = 100000000; int total = 3 * unit; int j; for (j = 0; j < terms; j++) { int divisor = (j + 1) * (2*j + 3) * (j + 2); total += unit / divisor; unit = -unit; } return total; }For comparison, here's the code as the instructor originally wrote it:
int mystery(int steps) { int m = 100000000; int sum = 3*m; int i; for (i = 0; i < steps; i++) { int denom = (i+1) * (2*i+3) * (i+2); int change = m / denom; sum += change; m = -m; } return sum; }