[IPOL discuss] Fast DCT using FFTW3
Miguel Colom
Miguel.Colom at cmla.ens-cachan.fr
Tue Feb 28 22:57:36 CET 2012
Hi Pascal,
> Hi Miguel,
>
>> It even analyzes the numerical expressions and tries to compute it the
>> shorter and faster way.
>> For example, if y = f(y) + 2*f(y) + 3*f(y), it doesn't call f(y) three
>> times.
>
> I doubt that the compiler would dare such an aggressive optimization. f
> may
> have side effects that the compiler is not able to analyze, therefore it
> may
> think it is safer to call f(y) three times...
GCC is really smart doing optimizations. Of course, it can be configured
with the -O flag to set the level of optimization.
To prove it, I wrote this small program:
#include <stdio.h>
int f(int x) {
return x*x*x;
}
int main(int argc, char *argv[]) {
int result;
int x = 23;
result = 2*f(x) + 3*f(x) + 4*f(x);
printf("The result is %d\n", result);
return 0;
}
That is, f(x) is the x^3 function and then the program just computes
2*f(x) + 3*f(x) + 4*f(x) and prints the result.
If no optimization is set, the intermediate assembly GCC produces is the
following:
------------------------------------
.file "main.c"
.text
.globl f
.type f, @function
f:
.LFB0:
.cfi_startproc
pushl %ebp
.cfi_def_cfa_offset 8
.cfi_offset 5, -8
movl %esp, %ebp
.cfi_def_cfa_register 5
movl 8(%ebp), %eax
imull 8(%ebp), %eax
imull 8(%ebp), %eax
popl %ebp
.cfi_def_cfa 4, 4
.cfi_restore 5
ret
.cfi_endproc
.LFE0:
.size f, .-f
.section .rodata
.LC0:
.string "The result is %d\n"
.text
.globl main
.type main, @function
main:
.LFB1:
.cfi_startproc
pushl %ebp
.cfi_def_cfa_offset 8
.cfi_offset 5, -8
movl %esp, %ebp
.cfi_def_cfa_register 5
pushl %ebx
andl $-16, %esp
subl $32, %esp
movl $23, 24(%esp)
movl 24(%esp), %eax
movl %eax, (%esp)
.cfi_offset 3, -12
call f
leal (%eax,%eax), %ebx
movl 24(%esp), %eax
movl %eax, (%esp)
call f
movl %eax, %edx
movl %edx, %eax
addl %eax, %eax
addl %edx, %eax
addl %eax, %ebx
movl 24(%esp), %eax
movl %eax, (%esp)
call f
sall $2, %eax
addl %ebx, %eax
movl %eax, 28(%esp)
movl $.LC0, %eax
movl 28(%esp), %edx
movl %edx, 4(%esp)
movl %eax, (%esp)
call printf
movl $0, %eax
movl -4(%ebp), %ebx
leave
.cfi_restore 5
.cfi_def_cfa 4, 4
.cfi_restore 3
ret
.cfi_endproc
.LFE1:
.size main, .-main
.ident "GCC: (Ubuntu/Linaro 4.6.1-9ubuntu3) 4.6.1"
.section .note.GNU-stack,"", at progbits
------------------------------------
The compiler defines the function f and then calls it three times.
Nevertheless, if we compile it with "gcc -O99 -S main.c" the output is the
following:
------------------------------------
.file "main.c"
.text
.p2align 4,,15
.globl f
.type f, @function
f:
.LFB22:
.cfi_startproc
movl 4(%esp), %edx
movl %edx, %eax
imull %edx, %eax
imull %edx, %eax
ret
.cfi_endproc
.LFE22:
.size f, .-f
.section .rodata.str1.1,"aMS", at progbits,1
.LC0:
.string "The result is %d\n"
.section .text.startup,"ax", at progbits
.p2align 4,,15
.globl main
.type main, @function
main:
.LFB23:
.cfi_startproc
pushl %ebp
.cfi_def_cfa_offset 8
.cfi_offset 5, -8
movl %esp, %ebp
.cfi_def_cfa_register 5
andl $-16, %esp
subl $16, %esp
movl $109503, 8(%esp)
movl $.LC0, 4(%esp)
movl $1, (%esp)
call __printf_chk
xorl %eax, %eax
leave
.cfi_restore 5
.cfi_def_cfa 4, 4
ret
.cfi_endproc
.LFE23:
.size main, .-main
.ident "GCC: (Ubuntu/Linaro 4.6.1-9ubuntu3) 4.6.1"
.section .note.GNU-stack,"", at progbits
------------------------------------
The difference is clear: now the function f is NEVER called.
The compiler analyzes the expression, computes the result at compile-time
if it's possible and substitutes the call f(x) with its result (109503)
with the instruction "movl $109503, 8(%esp)"
However, if the compiler detects that the function does something else
that the program needs, the function is called three times.
int f(int x) {
printf("Do something\n");
return x*x*x;
}
Best,
Miguel
More information about the discuss
mailing list