Contents

64-bit-GCC Runtime Stack Usage

CPU registers in 64-bit mode

* rax
* rbx
* rbp
* rsp
        first
* rdi <--+
* rsi    | in 64-bit mode the first 6 parameters are placed in
* rdx    | these registers from rdi to r9 when calling a function,
* rcx    | any extra params are passed through the stack like in
* r8     | 32-bit mode
* r9  <--+
        last
* r10-r15

Simple C program in assembly

#include <stdio.h>
extern int B();
int A(int x, int y) {
    int d, e, f;
    d = 4; e = 5; f = 6;
    f = B(d,e);
}
cc -m32 -S <cprog>.c # compile into 32-bit assembly code
    .text
    .globl A
; ------------- Entry: -------------+
A:                          # A() start code location
    pushl    %ebp
    movl     %esp, %ebp     # establishing the stack frame
                                
    subl     $24, %esp      # allocate space for local vars etc
; ----------------------------------+

; ------- Function body code: ------+
    movl     $4, -20(%ebp)   # d = 4
    movl     $5, -16(%ebp)   # e = 5
    movl     $6, -12(%ebp)   # f = 6

    subl     $8, %esp        # create 8 byte slot for tmp
    pushl    -16(%ebp)       # push e
    pushl    -20(%ebp)       # push d
    call     B
    
    addl     $16, %esp       # clean stack
    movl     %eax, -4(%ebp)  # f = return value in AX register
; ----------------------------------+
    
; ----------- Exit code: -----------+
    leave
    ret
; ----------------------------------+

There are 3 parts to the assembly code:

  1. Entry: the stack frame is established, local variables and working space are allocated on the stack.
  • First FP is saved on the stack (%ebp)
  • Second point SP (%esp) to that saved FP, the stack looks like:
--------------------- > low address
XXXX | PC | FP |
               ^
           FP,SP
---------------------
  • Then allocate 24 bytes of space for the local variables and working area by shifting SP (%esp) downward
  1. Function Body Code: assign the values to the variables and extra for tmp space.
  • FP is used to locate the local variables, as they are all ints (sizeof(int) == 4) just do -4(FP), … descending in multiples of 4. The stack looks like:
---------------- -4 -8  -12 -16 -20 -24 ----- 
XXXX | PC | FP | ? | ? | 6 | 5 | 4 | ? |
               ^         f   e   d     ^
              FP                      SP
---------------------------------------------
  • Then add the tmp space, using SP (%esp) as the guide before finally B is called, pushing the parameters onto the stack (in reverse order)
---------------- -4 -8  -12 -16 -20 -24 ------------------
XXXX | PC | FP | ? | ? | 6 | 5 | 4 | ? | ?? | ?? | e | d |
               ^         f   e   d      TMP SPACE        ^
              FP                                        SP
----------------------------------------------------------
  1. Exit Code: returning the stack frame to the caller
  • leave is the same as doing:
;leave
    movl    %ebp, %esp
    popl    %ebp

Function call convention

/********* t.c file ********/
#include <stdio.h>
int sub(int a, int b, int c, int d, int e, int f, int g, int h) {
    int u, v, w;
    u = 9;
    v = 10;
    w = 11;
    return a+g+u+v; // use first and extra parameter, locals
}

int main() {
    int a, b, c, d, e, f, g, h, i;
    a = 1;
    b = 2;
    c = 3;
    d = 4;
    e = 5;
    f = 6;
    g = 7;
    h = 8;
    i = sub(a,b,c,d,e,f,g,h);
}

First, to compile the c program and output the assembly code:

gcc -S <cprog>.c # it will output a <cprog>.s file
#------------ t.s file generated by 64-bit GCC compiler ------------–
.globl sub
sub: # int sub(int a,b,c,d,e,f,g,h)
# first 6 parameters | a |,| b |,| c |,| d |,| e |,| f | 
#   in registers:    |rdi|,|rsi|,|rdx|,|rcx|,|r8d|,|r9d|

# 2 extra parameters g,h are on stack.
# Upon entry, stack top contains g, h
#    -------------------------------------
#    . . . ...| h | g | PC |     LOW address
                           ^
#                        RSP
#    ------------------------------------

# establish stack frame
    pushq %rbp
    movq  %rsp, %rbp
# no need to shift rsp down because each function has a 128 bytes
# reserved stack area.
# rsp will be shifted down if function define more locals

# save first 6 parameters in registers on stack
    movl %edi, -20(%rbp) # a
    movl %esi, -24(%rbp) # b
    movl %edx, -28(%rbp) # C
    movl %ecx, -32(%rbp) # d
    movl %r8d, -36(%rbp) # e
    movl %r9d, -40(%rbp) # f

# access locals u, v, w at rbp -4 to -12
    movl $9,  -4(%rbp)
    movl $10, -8(%rbp)
    movl $11, -12(%rbp)

# compute x + g + u + v:
    movl -20(%rbp), %edx # saved a on stack
    movl 16(%rbp),  %eax # g at 16(rbp)
    addl %eax, %edx
    movl -4(%rbp),  %eax # u at -4(rbp)
    addl %eax, %edx
    movl -8(%rbp),  %eax # v at -8(rbp)
    addl %edx, %eax

# did not shift rsp down, so just popQ to restore rbp
    popq %rbp
    ret

# ====== main function code in assembly ======
    .globl    main
main:
# establish stack frame
    pushq %rbp
    movq %rsp, %rbp

# shift rsp down 48 bytes for locals
    subq $48, %rsp

# locals are at rbp -4 to -32
    movl $1, -4(%rbp)  # a=1
    movl $2, -8(%rbp)  # b=2
    movl $3, -12(%rbp) # c=3
    movl $4, -16(%rbp) # d=4
    movl $5, -20(%rbp) # e=5
    movl $6, -24(%rbp) # f=6
    movl $7, -28(%rbp) # g=7
    movl $8, -32(%rbp) # h=8

# call sub(a,b,c,d,e,f,g,h): first 6 parameters in registers
    movl -24(%rbp), %r9d # f in r9
    movl -20(%rbp), %r8d # e in r8
    movl -16(%rbp), %ecx # d in ecx
    movl -12(%rbp), %edx # c in edx
    movl -8(%rbp),  %esi # b in esi
    movl -4(%rbp),  %eax # a in eax but will be in edi

# push 2 extra parameters h,g on stack
    movl -32(%rbp), %edi # int h in edi
    pushq %rdi # pushQ rdi ; only low 32-bits = h
    movl -28(%rbp), %edi # int g in edi
    pushq %rdi # pushQ rdi ; low 32-bits = g

    movl %eax, %edi # parameter a in edi
    call sub # call sub(a,b,c,d,e,f,g,h)

    addq $16, %rsp # pop stack: h,g, 16 bytes
    movl %eax, -36(%rbp) # i = sub return value in eax

    movl $0, %eax # return 0 to crt0.o
    leave
    ret