Hello. I am want to propose two features from MSVC compiler that not available in ICC. All tests are made on windows x86.
1) Combining several small mov's to the one. Example:
struct struct_t
{
char a, b, c, d;
};
void __declspec(noinline) test(struct_t& s)
{
s.a = 'a';
s.b = 'b';
s.c = 'c';
s.d = 'd';
}Code by the current ICC with -Ox:
mov BYTE PTR [eax], 97
mov BYTE PTR [1+eax], 98
mov BYTE PTR [2+eax], 99
mov BYTE PTR [3+eax], 100
retThis four byte mov's can be combined to the single dword mov like it does MSVC:
mov DWORD PTR [ecx], 1684234849 ; 64636261H
ret2) Eliminate useless copying from volatile memory to registers. I think it's correct, and MSVC does this optimization. Example:
#include <stdio.h>
bool isInt(int)
{
return true;
}
bool isInt(short)
{
return false;
}
void __declspec(noinline) test()
{
volatile int a = 5;
volatile short b = 2;
printf("int = %i, short = %i\n", isInt(a), isInt(b));
}Result:
sub esp, 8
mov eax, 2
mov DWORD PTR [esp], 5
mov WORD PTR [4+esp], ax
mov edx, DWORD PTR [esp] ; <- unnecessary copying
movzx ecx, WORD PTR [4+esp] ; <- unnecessary copying
; here was a copying from edx and ecx to non-volatile memory, but it was eliminated as a deadcode
push 0
push 1
push OFFSET "int = %i, float = %i\n"
call DWORD PTR [__imp__printf]
add esp, 12
add esp, 8
retAnd here we can also see a two uncombined add's before return.