Intrinsic functions
|
_EAX = a1;
_ECX = 1;
__asm { lock xadd [eax], ecx }
return _ECX + 1;
|
return _InterlockedIncrement(a1);
|
|
Scalar SSE floating point instructions
|
int __cdecl sub_578C(int a1, __int64 a2)
{
void *v3; // ST08_4@1
Tcl_TimerProc *v4; // ST04_4@1
int v7; // [sp+1Ch] [bp-Ch]@1
_EBX = 22425;
__asm { movsd xmm0, [ebp+arg_4] }
v7 = 0;
v3 = &v7;
v4 = (Tcl_TimerProc *)sub_54CB;
__asm
{
mulsd xmm0, ds:(qword_19FC8 - 5799h)[ebx]
cvttsd2si eax, xmm0
}
tclStubsPtr->tcl_CreateTimerHandler(_EAX, v4, v3);
while ( !v7 )
tclStubsPtr->tcl_DoOneEvent(0);
return 0;
}
|
int __cdecl sub_578C(int _30, double arg4)
{
int varC; // [sp+1Ch] [bp-Ch]@1
varC = 0;
tclStubsPtr->tcl_CreateTimerHandler((signed int)floor(arg4 * 1000.0),
(Tcl_TimerProc *)sub_54CB, &varC);
while ( !varC )
tclStubsPtr->tcl_DoOneEvent(0);
return 0;
}
|
The decompiler can handle SSE scalar floating point instructions directly, without
any need of intrinsic functions. Mac OS X users will appreciate this improvement
very much because short SSE scalar sequences are very popular in Mach-O files.
|
SSE intrinsic functions
|
v6 = &unk_B36C;
_EAX = *(_DWORD *)(a1 + 8);
_EDX = *(_DWORD *)(v2 + 12);
__asm
{
movd xmm1, eax
movd xmm0, edx
punpckldq xmm0, xmm1
punpckldq xmm0, ds:(xmmword_C1E0 - 42D6h)[ebx]
subpd xmm0, ds:(xmmword_C1F0 - 42D6h)[ebx]
haddpd xmm0, xmm0
movapd [ebp+var_18], xmm0
}
v12 = _FT0;
result = printf("\t%s: %.*f(%sbytes)", v4, 0, v12, v6);
|
xmm0_3 = _mm_sub_pd(
(__m128d)_mm_unpacklo_epi32(
_mm_unpacklo_epi32(_mm_cvtsi32_si128(eax0->fC),
_mm_cvtsi32_si128(eax0->f8)),
(__m128i)xmmword_C1E0),
(__m128d)xmmword_C1F0);
result = printf("\t%s: %.*f(%sbytes)",
edx0,
0,
*(_OWORD *)&_mm_hadd_pd(xmm0_3, xmm0_3),
&unk_B36C);
|
While operations on packed values are difficult to decipher with and without
intrinsic functions, there is still a side effect: the decompiler has more
information about the data flows and unknown assembly instructions do not
disrupt the analysis. See how the v6 temporary variable disappears.
|
SSE scalar operations - 2
|
int __cdecl casual(int a1, int a2)
{
int result; // eax@1
_EBX = 31521;
_EDI = a1;
__asm { cvtsi2sd xmm2, edi }
_EAX = random() & 0x7FFFFFFF;
__asm
{
cvtsi2sd xmm0, eax
divsd xmm0, ds:(qword_9FD0 - 7B21h)[ebx]
}
_ESI = a2 - a1;
__asm
{
cvtsi2sd xmm1, esi
mulsd xmm0, xmm1
addsd xmm2, xmm0
cvttsd2si eax, xmm2
}
return result;
}
|
signed int __cdecl casual(signed int arg0, int arg4)
{
return (signed int)floor((double)arg0
+ (double)(random() & 0x7FFFFFFF) / 2147483647.0
* (double)(arg4 - arg0));
}
|
The old version was producing a page of code, the new version - just one line.
Always a pleasure to have a shorter text: no intermediary variables,
no inline assembly, just straightforward code.
|
SSE scalar operations - 3
|
long double __cdecl auto_time_interval(__int64 a1, __int64 a2)
{
long double v3; // fst7@1
long double result; // fst7@2
double v5; // [sp+0h] [bp-10h]@2
_ECX = -1866029031;
v3 = (long double)(a1 - a2);
if ( (signed int)((unsigned __int64)(a1 - a2) >> 32) < 0 )
{
v5 = v3 + flt_90C78FEC;
__asm
{
movsd xmm0, [ebp+var_10]
divsd xmm0, ds:(qword_90C78FF0 - 90C6A819h)[ecx]
movsd [ebp+var_10], xmm0
}
result = v5;
}
else
{
v5 = v3;
__asm
{
movsd xmm0, [ebp+var_10]
divsd xmm0, ds:(qword_90C78FF0 - 90C6A819h)[ecx]
movsd [ebp+var_10], xmm0
}
result = v5;
}
return result;
}
|
long double __cdecl auto_time_interval(__int64 arg0, __int64 arg8)
{
return (double)(unsigned __int64)(arg0 - arg8) / 1000000.0;
}
|
The decompiler could recognize an unsigned conversion and represented in concisely.
It looks surprising but the line on the right side is identical to the long text on the right side.
|
Better ternary operations
|
v36 = sub_804CF6C(v20, (-(v40 < 1) & 0xFFFFFFDF) + 35);
|
v36 = sub_804CF6C(v20, v40 < 1 ? 2 : 35);
|
Yet another common compiler idiom is recognized and the output becomes better.
|
Better recognition of inlined functions
|
v4 = "1234567";
v3 = a2[1];
while ( 1 )
{
v5 = *v3 < (unsigned __int8)*v4;
if ( *v3 != *v4 )
break;
if ( !*v3 )
return 6;
v6 = v3[1];
v5 = v6 < v4[1];
if ( v6 != v4[1] )
break;
v3 += 2;
v4 += 2;
if ( !v6 )
return 6;
}
result = (-(-v5 != v5 - 1) & 0xFFFFFFFA) + 6;
|
if ( strcmp(a2[1], "1234567") )
result = 0;
else
result = 6;
|
More inlined string functions are recognized.
|
Recognition of signed power2 modulos
|
result = (v4 + a1) & 0x80000003;
if ( result < 0 )
result = ((result - 1) | 0xFFFFFFFC) + 1;
|
result = (v4 + a1) % 4;
|
One more common compiler idiom. There are many others, we chose only one sample.
|
NOTE: these are just some selected examples that can be illustrated as a side-by-side difference.
Hex-Rays Decompiler v1.2 includes are many other improvements and new features that are not mentioned on this page -
simply because there was nothing to compare them with.
Also, some improvements have already been illustrated in the previous comparisons.
We continue to improve 64-bit arithmetics and inline function recognition, but the examples
would somewhat repeat older stuff, so we did not include them.
Please refer to the news page for more details.
This is all for the moment. Please come back for more examples!