Workflow

A well-defined workflow provides structure, clarity, and efficiency to tasks, ensuring a systematic approach. This applies to exploit development like any other repetitive task.

This is my preferred workflow, you can adopt this or choose your own:

Example

The provided example demonstrates the application of the presented workflow (or your preferred workflow) for debugging shellcode. It's alright if you haven't fully grasped the assembly code at this stage, as this section primarily focuses on the workflow itself.

THis example displays a simple message box, but we can debug the shellcode just before the call to ensure the registers are set up correctly, and that we are calling the correct address.

In Visual Studio Code enter the following 64bit assembly:

MessageBox Shellcode
BITS 64
SECTION .text
global main

main:
    push  rbp                       ;
    and   rsp, 0FFFFFFFFFFFFFFF0h   ; Align the stack to a multiple of 16 bytes
    mov   rbp, rsp                  ;
    sub   rsp, 0x64                 ; 100 bytes of shadow space
    
find_kernel32:
    xor rcx, rcx                    ; RCX = 0
    mov rax, [gs:rcx + 0x60]        ; RAX = PEB
    mov rax, [rax + 0x18]           ; RAX = PEB->Ldr
    mov rsi, [rax + 0x20]           ; RSI = PEB->Ldr.InMemOrder
    lodsq                           ; RAX = Second module(NTDLL)
    xchg rax, rsi                   ; RAX = RSI, RSI = RAX
    lodsq                           ; RAX = Third(kernel32)
    mov rbx, [rax + 0x20]           ; RBX = Base address

get_function_address:
    lea rsi, [rel get_function + 0x41414141] 
                                    ; POP the function address in to RSI
    sub rsi, 0x41414141             ; 
    mov [rbp-0x20], rsi             ; [RBP-0x20] = get_function address
    jmp start                       ;

get_function:
    xor r8, r8                      ; R8 = 0
    mov r8d, [rbx + 0x3c]           ; R8D = DOS->e_lfanew offset
    mov rdx, r8                     ; RDX = DOS->e_lfanew
    add rdx, rbx                    ; RDX = PE Header

    add rdx, 0x44                   ; add 0x44 to RDX to avoid null bytes
    add rdx, 0x44                   ; add 0x44 to RDX to avoid null bytes

    mov r8d, [rdx]                  ; R8D = Offset export table - was [rdx + 0x88]
    add r8, rbx                     ; R8 = Export table
    xor rsi, rsi                    ; Clear RSI
    mov esi, [r8 + 0x20]            ; RSI = Offset namestable
    add rsi, rbx                    ; RSI = Names table
    xor rcx, rcx                    ; RCX = 0
next_function_name:
    inc rcx                         ; Increment the ordinal
    xor rax, rax                    ; RAX = 0
    mov eax, [rsi + rcx * 4]        ; Get name offset
    add rax, rbx                    ; Get function name
    cmp qword [rax], r9             ; Does it match the function name in R9 ?
    jnz next_function_name          ;
    
found_function:
    xor rsi, rsi                    ; RSI = 0
    mov esi, [r8 + 0x24]            ; ESI = Offset ordinals
    add rsi, rbx                    ; RSI = Ordinals table
    mov cx, [rsi + rcx * 2]         ; Number of function
    xor rsi, rsi                    ; RSI = 0
    mov esi, [r8 + 0x1c]            ; Offset address table
    add rsi, rbx                    ; ESI = Address table
    xor rdx, rdx                    ; RDX = 0
    mov edx, [rsi + rcx * 4]        ; EDX = Pointer(offset)
    add rdx, rbx                    ; RDX = Function Address
    mov rdi, rdx                    ; Save Function Address in RDI
    ret                             ;

start:
get_getprocaddress:
    mov r9, 0x41636f7250746547      ; GetProcA (in ASCII AcorPteG)
    call QWORD [rbp-0x20]           ; CALL get_function
    mov [rbp-0x18], rdi             ; [RBP-0x18] = *GetProcAddress

call_getprocaddress_loadlibrarya:
    mov [rbp-0x28], rbx             ; [RBP-0x28] = Kernel32 base address
    mov rcx, [rbp-0x28]             ; RCX = hModule = Kernel32 base address
    mov rax, 0x41797261             ;
    push rax                        ;
    mov rax, 0x7262694c64616f4c     ;
    push rax                        ;
    mov rdx, rsp                    ; RDX = lpProcName = LoadLibraryA  
    sub rsp, 0x2c                   ; Allocate stack space for the function call (+ alignment)
    call [rbp-0x18]                 ; CALL GetProcAddress
    add rsp, 0x2c                   ; Clean up allocated space
    add rsp, 0x10                   ; Clean up LoadLibraryA on stack
    mov [rbp-0x30], rax             ; [RBP=0x30] = *LoadLibraryA

call_loadlibrarya_user32.dll:
    mov rax, 0x6c6c                 ; PUSH user32.dll
    push rax                        ;
    mov rax, 0x642e323372657375     ;
    push rax                        ;
    mov rcx, rsp                    ; RCX = lpLibFileName = user32.dll
    sub rsp, 0x2c                   ; Allocate stack space for the function call (+ allignment)
    call [rbp-0x30]                 ; CALL LoadLibraryA
    add rsp, 0x2c                   ; Clean up allocated space
    add rsp, 0x10                   ; Clean up user32.dll on stack

call_getprocaddress_messageboxa:
    mov rcx, rax                    ; RCX = hModule = User32 base address
    mov rax, 0x41786f               ;
    push rax                        ;
    mov rax, 0x426567617373654d     ;
    push rax                        ;
    mov rdx, rsp                    ; RDX = lpProcName = MessageBoxA  
    sub rsp, 0x2c                   ; Allocate stack space for the function call (+ alignment)
    call [rbp-0x18]                 ; CALL GetProcAddress
    add rsp, 0x2c                   ; Clean up allocated space
    add rsp, 0x10                   ; Clean up MessageBoxA on stack
    mov r10, rax                    ; R10 = *MessageBoxA

; the address of MessageBoxA is in r10
call_messagebox:
    xor  rax, rax                     ; RAX = 0
    mov  rcx, rax                     ; RCX = hWnd = NULL
    mov  r9,  rax                     ; R9 = uType = NULL (default)
    mov  rax, 0x6e6f697461            ; PUSH string on to the stack
    push rax                          ; .
    mov  rax, 0x74696f6c70784520      ; .
    push rax                          ; .
    mov  rax, 0x73776f646e695720      ; .
    push rax                          ; .
    mov  rax, 0x6465636e61766441      ; .
    push rax                          ; ---
    mov  rdx, rsp                     ; RDX = lpText
    mov  r8,  rdx                     ; R8 = lpCaption
    sub  rsp, 0x2c                    ; align the stack for the call
    int3                              ; our breakpoint
    call r10                          ; CALL R10 (MessageBoxA)

This code looks intimidating, but it isn't; all will be explained soon. For now, notice that there is a breakpoint instruction on line 125 immediately before the call to MessageBoxA. Use the following batch file to compile the shellcode:

del x64.obj
del x64.exe
copy .\messagebox.asm .\x64.asm
nasm -f win64 x64.asm -o x64.obj
link /ENTRY:main /MACHINE:X64 /NODEFAULTLIB /SUBSYSTEM:CONSOLE x64.obj

Once the shellcode has been compiled in to a PE file we can run it in windbg.

Once you have opened the x64.exe binary Windbg will break, it does this whenever we launch an executable. Enter the g command to 'go'.

Windbg should break out of your executable again, but this time it will be on the breakpoint we placed in the shellcode.

For the purposes of this exercise we can check that the registers are set up correctly for the call to MessageBoxA and that the stack is aligned correctly.

In Windbg use the u rip L2 command to display the next instruction to be executed, and the instruction that has just been executed:

0:000> u rip L2
x64+0x1166:
00007ff7`67e31166 cc              int     3
00007ff7`67e31167 41ffd2          call    r10

Line 3 shows that we have hit our breakpoint just before we call MessageBoxA.

Next we can examine the registers, using the r command:

0:000> r
rax=0000000261eff90c rbx=00007ffcc5f90000 rcx=0000000000000000
rdx=0000000261eff90c rsi=00007ffcc602a398 rdi=00007ffcc5fab650
rip=00007ff767e31166 rsp=0000000261eff8e0 rbp=0000000261eff990
 r8=0000000261eff90c  r9=0000000000000000 r10=00007ffcc54490d0
r11=0000000261eff5c0 r12=0000000000000000 r13=0000000000000000
r14=0000000000000000 r15=0000000000000000
iopl=0         nv up ei pl nz na pe nc
cs=0033  ss=002b  ds=002b  es=002b  fs=0053  gs=002b             efl=00000202

Keep in mind that according to the calling convention, we are required to utilise rcx, rdx, r8, andr9 for our four arguments. Below is the syntax for MessageBoxA:

MessageBoxA syntax
int MessageBoxA(
  [in, optional] HWND   hWnd,
  [in, optional] LPCSTR lpText,
  [in, optional] LPCSTR lpCaption,
  [in]           UINT   uType
);

Let's examine each argument, the most simple are hWnd (rcx), and uType (r9) which we have set to NULL:

0:000> r rcx, r9
rcx=0000000000000000 r9=0000000000000000

Perfect! In our example lpText (rdx), and lpCaption (r8) are pointing to the same string that was pushed on to the stack:

0:000> r rdx, r8
rdx=0000000261eff90c r8=0000000261eff90c

We can examine these registers to see if our string resides in the address they reference:

0:000> da rdx
00000002`61eff90c  "Advanced Windows Exploitation"
0:000> da r8
00000002`61eff90c  "Advanced Windows Exploitation"

Finally we can check to see that the stack is correctly aligned:

0:000> ?rsp%10
Evaluate expression: 0 = 00000000`00000000

Everything is looking good and we can continue execution with the g command. We should be presented with a message box:

This is my preferred workflow, but you can choose your own; if you want to write shellcode efficiently you will need a good workflow that works for you.

Exercises

  1. Install a Windows VM that you can use as a debugging and development mahcine. Ensure that you have a workflow that you are comfortable with.

  2. Work through the workflow, using the presented MessageBox shellcode.

Last updated