Learning inline keyword by example in C
C also has inline
keyword, the sementic is not as same as
C++. Inline Functions In C
has a good explaination. In this blog, I am going to do dome exercises
to make it more concrete.
static inline functions
This is very simple. Let's start with it.
// in foo.h
typedef void (*func_t)(const char * msg, void * f);
static inline void print_me(const char * msg, void * f)
{
printf("%s: pointer is %p\n",msg,f);
}
// in main.c
#include <stdio.h>
#include <stdlib.h>
#include <assert.h>
extern void m1();
extern void m2();
int main(int argc, char *argv[])
{
m1();
m2();
return 0;
}
// in a1.c
#include <stdio.h>
#include "foo.h"
void m1()
{
foo("from a1", NULL);
}
16M// in a2.c
#include <stdio.h>
#include "foo.h"
void m2()
{
foo("from a2", NULL);
}
% gcc -O3 -c -o a1.o a1.c
% gcc -O3 -c -o a2.o a2.c
% gcc -O3 -c -o main.o main.c
% gcc -o a.out a1.o a2.o main.o
% ./a.out
from a1: pointer is (nil)
from a1: pointer is (nil)
% objdump -d a1.o
a1.o: file format elf64-x86-64
Disassembly of section .text:
0000000000000000 <m1>:
0: 31 d2 xor %edx,%edx
2: be 00 00 00 00 mov $0x0,%esi
7: bf 00 00 00 00 mov $0x0,%edi
c: 31 c0 xor %eax,%eax
e: e9 00 00 00 00 jmpq 13 <m1+0x13>
% nm a1.o
0000000000000000 T m1
U printf
With optimzation, we can see a1.o
does not have local symbol foo
defined, and there is no function call foo
, i.e. it is inlined.
% gcc -O0 -c -o a1.o a1.c
% gcc -O0 -c -o a2.o a2.c
% gcc -O0 -c -o main.o main.c
% gcc -o a.out a1.o a2.o main.o
% ./a.out
from a1: pointer is (nil)
from a1: pointer is (nil)
% objdump -d a1.o
a1.o: file format elf64-x86-64
Disassembly of section .text:
0000000000000000 <foo>:
0: 55 push %rbp
1: 48 89 e5 mov %rsp,%rbp
4: 48 83 ec 10 sub $0x10,%rsp
8: 48 89 7d f8 mov %rdi,-0x8(%rbp)
c: 48 89 75 f0 mov %rsi,-0x10(%rbp)
10: 48 8b 55 f0 mov -0x10(%rbp),%rdx
14: 48 8b 45 f8 mov -0x8(%rbp),%rax
18: 48 89 c6 mov %rax,%rsi
1b: bf 00 00 00 00 mov $0x0,%edi
20: b8 00 00 00 00 mov $0x0,%eax
25: e8 00 00 00 00 callq 2a <foo%0x2a>
2a: c9 leaveq
2b: c3 retq
000000000000002c <m1>:
2c: 55 push %rbp
2d: 48 89 e5 mov %rsp,%rbp
30: be 00 00 00 00 mov $0x0,%esi
35: bf 00 00 00 00 mov $0x0,%edi
3a: e8 c1 ff ff ff callq 0 <foo>
3f: 5d pop %rbp
40: c3 retq
% nm a1.o
0000000000000000 t foo
000000000000002c T m1
U printf
On the other hand, if we use -O0
to disable optimization, we can see
foo
is defined, and foo
is invoked, i.e. it is not inlined.
how to force it to inline?
if we change the foo.h
as following,
//in foo.h
...
static inline __attribute__((always_inline))
void foo(const char * msg, void * f)
...
__attribute__((always_inline))
force to inline the function, even
when optimization is disabled.
sh build.sh
% gcc -O0 -c -o a1.o a1.c
% gcc -O0 -c -o a2.o a2.c
% gcc -O0 -c -o main.o main.c
% gcc -o a.out a1.o a2.o main.o
% ./a.out
from a1: pointer is (nil)
from a1: pointer is (nil)
% objdump -d a1.o
a1.o: file format elf64-x86-64
Disassembly of section .text:
0000000000000000 <m1>:
0: 55 push %rbp
1: 48 89 e5 mov %rsp,%rbp
4: 48 83 ec 10 sub $0x10,%rsp
8: 48 c7 45 f8 00 00 00 movq $0x0,-0x8(%rbp)
f: 00
10: 48 c7 45 f0 00 00 00 movq $0x0,-0x10(%rbp)
17: 00
18: 48 8b 55 f0 mov -0x10(%rbp),%rdx
1c: 48 8b 45 f8 mov -0x8(%rbp),%rax
20: 48 89 c6 mov %rax,%rsi
23: bf 00 00 00 00 mov $0x0,%edi
28: b8 00 00 00 00 mov $0x0,%eax
2d: e8 00 00 00 00 callq 32 <m1+0x32>
32: c9 leaveq
33: c3 retq
% nm a1.o
0000000000000000 T m1
U printf
inline functions are not always inlined
// in a1.c
#include <stdio.h>
#include "foo.h"
void m1()
{
foo("from a1", (void*) foo);
}
// in a2.c
#include <stdio.h>
#include "foo.h"
void m2()
{
foo("from a2", (void*) foo);
}
sh build.sh
% gcc -O3 -c -o a1.o a1.c
% gcc -O3 -c -o a2.o a2.c
% gcc -O3 -c -o main.o main.c
% gcc -o a.out a1.o a2.o main.o
% ./a.out
from a1: pointer is 0x400530
from a1: pointer is 0x400570
% objdump -d a1.o
a1.o: file format elf64-x86-64
Disassembly of section .text:
0000000000000000 <foo>:
0: 48 89 f2 mov %rsi,%rdx
3: 31 c0 xor %eax,%eax
5: 48 89 fe mov %rdi,%rsi
8: bf 00 00 00 00 mov $0x0,%edi
d: e9 00 00 00 00 jmpq 12 <foo+0x12>
12: 66 66 66 66 66 2e 0f data16 data16 data16 data16 nopw %cs:0x0(%rax,%rax,1)
19: 1f 84 00 00 00 00 00
0000000000000020 <m1>:
20: ba 00 00 00 00 mov $0x0,%edx
25: be 00 00 00 00 mov $0x0,%esi
2a: bf 00 00 00 00 mov $0x0,%edi
2f: 31 c0 xor %eax,%eax
31: e9 00 00 00 00 jmpq 36 <m1+0x16>
% nm a1.o
0000000000000000 t foo
0000000000000020 T m1
U printf
we can see that the function foo
is inlined, but the foo
object
code is emitted by compiler, because the address of the function foo
is used.
Pitfall of static inline function
As same as other static function, every transform unit has its own
implementation, so that foo
is the local function. t
in nm
output indicates that foo
is a private symbol.
The address of function foo
is 0x400530
and 0x400570
for a1.o
and a2.o
respectively.
non-external inline function.
#pragma once
// in foo.h
inline __attribute__((always_inline))
void foo(const char * msg, void * f)
{
printf("%s: pointer is %p\n",msg,f);
}
// in a1.c
#include <stdio.h>
#include "foo.h"
void m1()
{
foo("from a1", (void*) NULL);
}
// in a2.c
#include <stdio.h>
#include "foo.h"
void m2()
{
foo("from a1", (void*) NULL);
}
% gcc -O3 -c -o a1.o a1.c
% gcc -O3 -c -o a2.o a2.c
% gcc -O3 -c -o main.o main.c
% gcc -o a.out a1.o a2.o main.o
a2.o: In function `foo':
a2.c:(.text+0x0): multiple definition of `foo'
a1.o:a1.c:(.text+0x0): first defined here
collect2: error: ld returned 1 exit status
% nm a1.o
0000000000000000 T foo
0000000000000020 T m1
U printf
% nm a2.o
0000000000000000 T foo
0000000000000020 T m1
U printf
We can see both a1.o
and a2.o
emits function definition of foo
,
so that the linker complains that multile definition
.
It means that the non-external inline function can only be used in one
transform unit. This is rather rather limited use, it could be replace
with static inline
functions mentioned above.
extern inline functions
#pragma once
// in foo.h
extern inline
void foo(const char * msg, void * f)
{
printf("%s: pointer is %p\n",msg,f);
}
// in a1.c
#include <stdio.h>
#include "foo.h"
void m1()
{
foo("from a1", (void*) NULL);
}
// in a2.c
#include <stdio.h>
#include "foo.h"
void m2()
{
foo("from a1", (void*) NULL);
}
% gcc -O3 -c -o a1.o a1.c
% gcc -O3 -c -o a2.o a2.c
% gcc -O3 -c -o main.o main.c
% gcc -o a.out a1.o a2.o main.o
% ./a.out
from a1: pointer is (nil)
from a1: pointer is (nil)
% objdump -d a1.o
a1.o: file format elf64-x86-64
Disassembly of section .text:
0000000000000000 <m1>:
0: 31 d2 xor %edx,%edx
2: be 00 00 00 00 mov $0x0,%esi
7: bf 00 00 00 00 mov $0x0,%edi
c: 31 c0 xor %eax,%eax
e: e9 00 00 00 00 jmpq 13 <m1+0x13>
% nm a1.o
0000000000000000 T m1
U printf
We can see foo
is inlined, because of -O3
, and no code object is
emitted.
But with the same source code but different compilation options,
i.e. -O0
, which disable inline optimization, we've got a linking
error as below.
% gcc -O0 -c -o a1.o a1.c
% gcc -O0 -c -o a2.o a2.c
% gcc -O0 -c -o main.o main.c
% gcc -o a.out a1.o a2.o main.o
a1.o: In function `m1':
a1.c:(.text+0xf): undefined reference to `foo'
a2.o: In function `m2':
a2.c:(.text+0xf): undefined reference to `foo'
collect2: error: ld returned 1 exit status
% objdump -d a1.o
a1.o: file format elf64-x86-64
Disassembly of section .text:
0000000000000000 <m1>:
0: 55 push %rbp
1: 48 89 e5 mov %rsp,%rbp
4: be 00 00 00 00 mov $0x0,%esi
9: bf 00 00 00 00 mov $0x0,%edi
e: e8 00 00 00 00 callq 13 <m1+0x13>
13: 5d pop %rbp
14: c3 retq
% nm a1.o
U foo
0000000000000000 T m1
Because foo
is not inlined, and both a1.o
and a2.o
refer to and
undefined reference to foo
, but there is no code object for foo
is
emitted, so that there is the linking error, undefined reference
.
Stand-alone object code is never emitted.
In order to fix the above error, we use non-external inline function
in a1.c
// in a1.c
#include <stdio.h>
#include "foo.h"
void m1()
{
foo("from a1", (void*) NULL);
}
inline void foo(const char * msg, void * f);
% gcc -O0 -c -o a1.o a1.c
% gcc -O0 -c -o a2.o a2.c
% gcc -O0 -c -o main.o main.c
% gcc -o a.out a1.o a2.o main.o
% ./a.out
from a1: pointer is (nil)
from a1: pointer is (nil)
% nm a1.o
0000000000000000 T foo
000000000000002c T m1
U printf
% nm a2.o
U foo
0000000000000000 T m2
We can see in a2.o
, it is as same as before, foo
is an external
symbol. But in a1.c
it is a public symbol, and foo
should only be
emitted by a1.o
, otherwise it would have linking error multiple definition
.
function address
If we change a1.c
and a2.c
as below, to print the address of function foo
.
// in a1.c
#include <stdio.h>
#include "foo.h"
void m1()
{
foo("from a1", (void*) foo);
}
inline void foo(const char * msg, void * f);
// in a2.c
#include <stdio.h>
#include "foo.h"
void m2()
{
foo("from a1", (void*) foo);
}
% gcc -O0 -c -o a1.o a1.c
% gcc -O0 -c -o a2.o a2.c
% gcc -O0 -c -o main.o main.c
% gcc -o a.out a1.o a2.o main.o
% ./a.out
from a1: pointer is 0x400506
from a1: pointer is 0x400506
% nm a1.o
0000000000000000 T foo
000000000000002c T m1
U printf
% nm a2.o
U foo
0000000000000000 T m2
We see the address of function foo
is unique, i.e. 0x400506
.
what if inline functions have with difinitions?
// in a1.c
#include <stdio.h>
extern inline
void foo(const char * msg, void * f)
{
printf("foo in a1.c %s: pointer is %p\n",msg,f);
}
void m1()
{
foo("from a1", (void*) foo);
}
inline void foo(const char * msg, void * f);
// in a2.c
#include <stdio.h>
extern inline
void foo(const char * msg, void * f)
{
printf("foo in a2.c %s: pointer is %p\n",msg,f);
}
void m2()
{
foo("from a2", (void*) foo);
}
% gcc -O0 -c -o a1.o a1.c
% gcc -O0 -c -o a2.o a2.c
% gcc -O0 -c -o main.o main.c
% gcc -o a.out a1.o a2.o main.o
% ./a.out
foo in a1.c from a1: pointer is 0x400506
foo in a1.c from a2: pointer is 0x400506
% nm a1.o
0000000000000000 T foo
000000000000002c T m1
U printf
% nm a2.o
U foo
0000000000000000 T m2
We notice that we didn't include the common definition of foo
from
foo.h
, instead, a1.c
and a2.c
has its own definitions.
Because of -O0
, the function is not inlined, so that only foo
defined in a1.c
is used.
But if we compile it with -O3
.
% gcc -O3 -c -o a1.o a1.c
% gcc -O3 -c -o a2.o a2.c
% gcc -O3 -c -o main.o main.c
% gcc -o a.out a1.o a2.o main.o
% ./a.out
foo in a1.c from a1: pointer is 0x400530
foo in a2.c from a2: pointer is 0x400530
% nm a1.o
0000000000000000 T foo
0000000000000020 T m1
U printf
% nm a2.o
U foo
0000000000000000 T m2
U printf
Because foo
is inlined, a2.c
uses the definition in a2.c
, not
the foo
in a1.c
.
Don't do it in practice.
But in practice it might happen. For example, you modify foo
in
foo.h
, but you only compile a1.c
and forgot to re-compile
a2.c
. If the function is inlined, a2.c
still use the old
definition of foo
.