State-of-the-art binary code analysis tools

# Comparisons of PowerPC disassembly and decompilation

Here are some side-by-side comparisons of disassembly and decompiler for PowerPC. Please maximize the window too see both columns simultaneously.

The following examples are displayed on this page:

## Simple code

This simple function calculates the sum of the squares of the first N natural numbers. While the function logic is obvious by just looking at the decompiler output, the assembly listing has too much noise and requires studying it. The decompiler saves your time and allows you to concentrate on more exciting aspects of reverse engineering.

Assembler code
f: .set back_chain, -0x20 .set var_4, -4 stw r31, var_4(r1) stwu r1, back_chain(r1) mr r31, r1 stw r3, 0x14(r31) mr r4, r3 cmpwi r3, 0 stw r4, 8(r31) bgt loc_30 b loc_24 loc_24: li r3, 0 stw r3, 0x18(r31) b loc_88 loc_30: li r3, 0 stw r3, 0x10(r31) stw r3, 0xC(r31) b loc_40 loc_40: lwz r3, 0x14(r31) lwz r4, 0xC(r31) cmpw r4, r3 bge loc_7C b loc_54 loc_54: lwz r3, 0xC(r31) mullw r3, r3, r3 lwz r4, 0x10(r31) add r3, r4, r3 stw r3, 0x10(r31) b loc_6C loc_6C: lwz r3, 0xC(r31) addi r3, r3, 1 stw r3, 0xC(r31) b loc_40 loc_7C: lwz r3, 0x10(r31) stw r3, 0x18(r31) b loc_88 loc_88: lwz r3, 0x18(r31) addi r1, r1, 0x20 lwz r31, var_4(r1) blr # End of function f
Pseudocode
int __fastcall f(int a1) { int i; // [sp+Ch] [-14h]@3 int v3; // [sp+10h] [-10h]@3 if ( a1 ) return 0; v3 = 0; for ( i = 0; i < a1; ++i ) v3 += i * i; return v3; }

## Linear execution

The PowerPC processor has a number of instructions which can be used to avoid branches (for example cntlzw). The decompiler restores the conditional logic and makes code easier to understand.

Assembler code
# _DWORD c_eq_s(void) .globl _Z6c_eq_sv _Z6c_eq_sv: .set back_chain, -0x10 .set var_8, -8 .set var_4, -4 .set sender_lr, 4 stwu r1, back_chain(r1) mflr r0 stw r0, 0x10+sender_lr(r1) stw r30, 0x10+var_8(r1) stw r31, 0x10+var_4(r1) mr r31, r1 bl c mr r9, r3 extsh r30, r9 bl s mr r9, r3 xor r9, r30, r9 cntlzw r9, r9 srwi r9, r9, 5 clrlwi r9, r9, 24 mr r3, r9 addi r11, r31, 0x10 lwz r0, 4(r11) mtlr r0 lwz r30, -8(r11) lwz r31, -4(r11) mr r1, r11 blr # End of function c_eq_s(void)
Pseudocode
bool c_eq_s(void) { int v0; // r30@1 v0 = c(); return v0 == s(); }

## 64-bit comparison

64-bit comparison usually involves several compare and branch instructions which do not improve the code readability.

Assembler code
.globl i_ge_uh i_ge_uh: .set back_chain, -0x10 .set var_4, -4 stwu r1, back_chain(r1) stw r31, 0x10+var_4(r1) mr r31, r1 lis r9, i@ha lwz r9, i@l(r9) mr r8, r9 srawi r9, r9, 0x1F mr r7, r9 lis r9, uh@ha addi r9, r9, uh@l lwz r10, (uh+4 - uh)(r9) lwz r9, 0(r9) cmplw cr7, r9, r7 bgt cr7, loc_7028 cmplw cr7, r9, r7 bne cr7, loc_7020 cmplw cr7, r10, r8 bgt cr7, loc_7028 loc_7020: li r9, 1 b loc_702C loc_7028: li r9, 2 loc_702C: mr r3, r9 addi r11, r31, 0x10 lwz r31, -4(r11) mr r1, r11 blr # End of function i_ge_uh
Pseudocode
signed int i_ge_uh() { signed int v0; // r9@2 7029 TYPED if ( uh unsigned __int64)i ) v0 = 1; else v0 = 2; return v0; }

## System calls

System call is always mysterious, but decompiler helps you with its name and arguments.

Assembler code
mr r3, r26 # set bl .sigfillset li r0, 0xAE li r3, 2 mr r4, r26 mr r5, r29 li r6, 8 sc mfcr r0 lwz r5, (off_F9A704C - dword_F9A7130)(r30) # sub_F9920A4 # start_routine mr r4, r31 # attr mr r6, r28 # arg addi r3, r1, 0x180+var_54 # newthread bl .pthread_create li r0, 0xAE mr r26, r3 mr r4, r29 li r3, 2 li r5, 0 li r6, 8 sc mfcr r0 mr r3, r31 # attr bl .pthread_attr_destroy
Pseudocode
... sigset_t v36; // [sp+8h] [-178h]@47 F992C04 TYPED sigset_t v37; // [sp+88h] [-F8h]@47 F992BEC TYPED pthread_attr_t v38; // [sp+108h] [-78h]@47 F992BC4 TYPED __int16 v39; // [sp+12Ch] [-54h]@47 F992C1C ... _sigfillset(&v37); v29 = linux_syscall(__NR_rt_sigprocmask, 2, &v37, &v36); v30 = _pthread_create((pthread_t *)&v39, &v38, (void *(*)(void *))0x93C10018, v11); v31 = linux_syscall(__NR_rt_sigprocmask, 2, &v36, 0); _pthread_attr_destroy(&v38);

## Compiler helpers

Compiler sometime uses helpers and decompiler knows the meaning of the many helpers and uses it to simplify code.

Assembler code
.globl lldiv # weak lldiv: .set back_chain, -0x30 .set var_18, -0x18 .set var_14, -0x14 .set var_10, -0x10 .set var_C, -0xC .set var_8, -8 .set var_4, -4 .set sender_lr, 4 stwu r1, back_chain(r1) mflr r0 stw r28, 0x30+var_10(r1) mr r28, r5 stw r29, 0x30+var_C(r1) mr r29, r6 stw r31, 0x30+var_4(r1) mr r5, r7 mr r31, r3 mr r6, r8 mr r3, r28 mr r4, r29 stw r0, 0x30+sender_lr(r1) stw r26, 0x30+var_18(r1) mr r26, r7 stw r27, 0x30+var_14(r1) mr r27, r8 stw r30, 0x30+var_8(r1) bl __divdi3 stw r3, 0(r31) mr r5, r26 stw r4, 4(r31) mr r6, r27 mr r3, r28 mr r4, r29 bl __moddi3 lwz r0, 0x30+sender_lr(r1) stw r3, 8(r31) mr r3, r31 stw r4, 0xC(r31) mtlr r0 lwz r26, 0x30+var_18(r1) lwz r27, 0x30+var_14(r1) lwz r28, 0x30+var_10(r1) lwz r29, 0x30+var_C(r1) lwz r30, 0x30+var_8(r1) lwz r31, 0x30+var_4(r1) addi r1, r1, 0x30 blr # End of function lldiv
Pseudocode
__int64 *__fastcall lldiv(__int64 *result, int a2, __int64 a3, __int64 a4) { *result = a3 / a4; result[1] = a3 % a4; return result; }

## Floating point arithmetic

The PowerPC processor contains a number of complex floating point instructions which perform several operations at once. It is not easy to recover an expression from the assembler code but not for the decompiler.

Assembler code
.globl _x2y2m1f _x2y2m1f: lis r9, unk_20@ha lfs f0, unk_20@l(r9) fsub f12, f1, f0 fadd f0, f1, f0 fmul f0, f12, f0 fmadd f1, f1, f2, f0 blr # End of function _x2y2m1f
Pseudocode
double __fastcall x2y2m1f(double a1, double a2) { return a1 * ((a1 - 1.0) * (a1 + 1.0)) + a2; }

## Magic multiplication/division operations

Compilers can decompose a multiplication/division instruction into a sequence of cheaper instructions (additions, shifts, etc). This example demonstrates how the decompiler recognizes them and coagulates back to the original operation.

Assembler code
# __int64 __fastcall int_u_mod_10() .globl int_u_mod_10 int_u_mod_10: .set back_chain, -0x20 .set var_C, -0xC .set var_8, -8 .set var_4, -4 .set sender_lr, 4 stwu r1, back_chain(r1) mflr r0 stw r0, 0x20+sender_lr(r1) stw r29, 0x20+var_C(r1) stw r30, 0x20+var_8(r1) stw r31, 0x20+var_4(r1) mr r31, r1 bl u mr r10, r3 lis r9, -0x3334 ori r9, r9, 0xCCCD # 0xCCCCCCCD mulhwu r9, r10, r9 srwi r9, r9, 3 mulli r9, r9, 0xA subf r9, r9, r10 mr r30, r9 li r29, 0 mr r9, r29 mr r10, r30 mr r3, r9 mr r4, r10 addi r11, r31, 0x20 lwz r0, 4(r11) mtlr r0 lwz r29, -0xC(r11) lwz r30, -8(r11) lwz r31, -4(r11) mr r1, r11 blr # End of function int_u_mod_10
Pseudocode
__int64 __fastcall int_u_mod_10() { return u() % 0xAu; }

## VLE code

This example demonstrates that the decompiler can handle VLE code without problems.

Assembler code
sub_498E: se_mr r6, r3 se_mr r7, r4 se_add r7, r6 se_subi r7, 1 se_li r5, 0 se_b loc_49A2 # --------------------------------------------------------------------------- loc_499A: se_lbz r4, 0(r6) se_add r5, r4 se_extzh r5 se_addi r6, 1 loc_49A2: se_cmpl r6, r7 se_ble loc_499A se_mr r7, r5 se_mr r3, r7 se_blr # End of function sub_498E
Pseudocode
int __fastcall sub_498E(unsigned __int8 *a1, int a2) { unsigned __int8 *v2; // r6@1 498F TYPED int v3; // r5@1 4997 v2 = a1; v3 = 0; while ( v2 a1[a2 - 1] ) v3 = (unsigned __int16)(v3 + *v2++); return v3; }

## Interactive decompiler

The pseudocode is not something static because the decompiler is interactive the same way as IDA. You can change variable types and names, change function prototypes, add comments and more. The example above presents the result after these modifications.

Surely the result is not ideal, and there is a lot of room for improvement, but we hope that you got the idea.

And you can compare the result with the original: http://lxr.free-electrons.com/source/fs/fat/namei_msdos.c#L224

Assembler code