[IPOL discuss] Fast DCT using FFTW3

Miguel Colom Miguel.Colom at cmla.ens-cachan.fr
Tue Feb 28 22:57:36 CET 2012


Hi Pascal,

> Hi Miguel,
>
>> It even analyzes the numerical expressions and tries to compute it the
>> shorter and faster way.
>> For example, if y = f(y) + 2*f(y) + 3*f(y), it doesn't call f(y) three
>> times.
>
> I doubt that the compiler would dare such an aggressive optimization. f
> may
> have side effects that the compiler is not able to analyze, therefore it
> may
> think it is safer to call f(y) three times...

GCC is really smart doing optimizations. Of course, it can be configured
with the -O flag to set the level of optimization.

To prove it, I wrote this small program:

#include <stdio.h>

int f(int x) {
  return x*x*x;
}

int main(int argc, char *argv[]) {
 int result;
 int x = 23;

 result = 2*f(x) + 3*f(x) + 4*f(x);

 printf("The result is %d\n", result);
 return 0;
}

That is, f(x) is the x^3 function and then the program just computes
2*f(x) + 3*f(x) + 4*f(x) and prints the result.

If no optimization is set, the intermediate assembly GCC produces is the
following:

------------------------------------
	.file	"main.c"
	.text
	.globl	f
	.type	f, @function
f:
.LFB0:
	.cfi_startproc
	pushl	%ebp
	.cfi_def_cfa_offset 8
	.cfi_offset 5, -8
	movl	%esp, %ebp
	.cfi_def_cfa_register 5
	movl	8(%ebp), %eax
	imull	8(%ebp), %eax
	imull	8(%ebp), %eax
	popl	%ebp
	.cfi_def_cfa 4, 4
	.cfi_restore 5
	ret
	.cfi_endproc
.LFE0:
	.size	f, .-f
	.section	.rodata
.LC0:
	.string	"The result is %d\n"
	.text
	.globl	main
	.type	main, @function
main:
.LFB1:
	.cfi_startproc
	pushl	%ebp
	.cfi_def_cfa_offset 8
	.cfi_offset 5, -8
	movl	%esp, %ebp
	.cfi_def_cfa_register 5
	pushl	%ebx
	andl	$-16, %esp
	subl	$32, %esp
	movl	$23, 24(%esp)
	movl	24(%esp), %eax
	movl	%eax, (%esp)
	.cfi_offset 3, -12
	call	f
	leal	(%eax,%eax), %ebx
	movl	24(%esp), %eax
	movl	%eax, (%esp)
	call	f
	movl	%eax, %edx
	movl	%edx, %eax
	addl	%eax, %eax
	addl	%edx, %eax
	addl	%eax, %ebx
	movl	24(%esp), %eax
	movl	%eax, (%esp)
	call	f
	sall	$2, %eax
	addl	%ebx, %eax
	movl	%eax, 28(%esp)
	movl	$.LC0, %eax
	movl	28(%esp), %edx
	movl	%edx, 4(%esp)
	movl	%eax, (%esp)
	call	printf
	movl	$0, %eax
	movl	-4(%ebp), %ebx
	leave
	.cfi_restore 5
	.cfi_def_cfa 4, 4
	.cfi_restore 3
	ret
	.cfi_endproc
.LFE1:
	.size	main, .-main
	.ident	"GCC: (Ubuntu/Linaro 4.6.1-9ubuntu3) 4.6.1"
	.section	.note.GNU-stack,"", at progbits
------------------------------------
The compiler defines the function f and then calls it three times.


Nevertheless, if we compile it with "gcc -O99 -S main.c" the output is the
following:
------------------------------------
	.file	"main.c"
	.text
	.p2align 4,,15
	.globl	f
	.type	f, @function
f:
.LFB22:
	.cfi_startproc
	movl	4(%esp), %edx
	movl	%edx, %eax
	imull	%edx, %eax
	imull	%edx, %eax
	ret
	.cfi_endproc
.LFE22:
	.size	f, .-f
	.section	.rodata.str1.1,"aMS", at progbits,1
.LC0:
	.string	"The result is %d\n"
	.section	.text.startup,"ax", at progbits
	.p2align 4,,15
	.globl	main
	.type	main, @function
main:
.LFB23:
	.cfi_startproc
	pushl	%ebp
	.cfi_def_cfa_offset 8
	.cfi_offset 5, -8
	movl	%esp, %ebp
	.cfi_def_cfa_register 5
	andl	$-16, %esp
	subl	$16, %esp
	movl	$109503, 8(%esp)
	movl	$.LC0, 4(%esp)
	movl	$1, (%esp)
	call	__printf_chk
	xorl	%eax, %eax
	leave
	.cfi_restore 5
	.cfi_def_cfa 4, 4
	ret
	.cfi_endproc
.LFE23:
	.size	main, .-main
	.ident	"GCC: (Ubuntu/Linaro 4.6.1-9ubuntu3) 4.6.1"
	.section	.note.GNU-stack,"", at progbits
------------------------------------

The difference is clear: now the function f is NEVER called.
The compiler analyzes the expression, computes the result at compile-time
if it's possible and substitutes the call f(x) with its result (109503)
with the instruction "movl	$109503, 8(%esp)"

However, if the compiler detects that the function does something else
that the program needs, the function is called three times.

int f(int x) {
  printf("Do something\n");
  return x*x*x;
}

Best,
Miguel




More information about the discuss mailing list