; Copyright (C) 2025 Aiden Gall
;
; This program is free software: you can redistribute it and/or modify
; it under the terms of the GNU General Public License as published by
; the Free Software Foundation, either version 3 of the License, or
; (at your option) any later version.
;
; This program is distributed in the hope that it will be useful,
; but WITHOUT ANY WARRANTY; without even the implied warranty of
; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
; GNU General Public License for more details.
;
; You should have received a copy of the GNU General Public License
; along with this program. If not, see .
format ELF64
public tco_go
public tco_args
public tco_yield
; assembly-time configuration options
include 'config.inc'
assert (STACK_CAPACITY mod 16) = 0
assert MALLOC in
; circular singly-linked list containing callee-saved registers and instruction
; pointer to resume execution of coroutine when yielded to
struc ctx_node next {
label .
.next dq next
irps reg, rsp rbp rbx r12 r13 r14 r15 rip \{
.\#\.\#reg dq ?
\}
}
virtual
@@ ctx_node ?
CTX_NODE_SIZEOF = $-$$
end virtual
if MALLOC in
extrn MALLOC
extrn free
else if MALLOC eq mmap
SYS_MMAP = 9
SYS_MUNMAP = 11
PROT_READ = 1 shl 0
PROT_WRITE = 1 shl 1
MAP_PRIVATE = 1 shl 1
MAP_ANONYMOUS = 1 shl 5
MAP_GROWSDOWN = 1 shl 8
end if
section '.text' executable
; int tco_go(void (*f)(...))
; spawns a coroutine
tco_go:
call stash
; pushing rdi also aligns stack to 16 byte boundary for function call
push rdi
if MALLOC eq posix_memalign
sub rsp, 16
mov rdi, rsp
mov esi, 16
mov edx, STACK_CAPACITY + CTX_NODE_SIZEOF
call plt posix_memalign
test eax, eax
jnz .oom
mov rax, [rsp]
mov rcx, [rsp+16]
add rsp, 24
else if MALLOC eq aligned_alloc
mov edi, 16
mov esi, STACK_CAPACITY + CTX_NODE_SIZEOF
call plt aligned_alloc
test rax, rax
pop rcx
jz .oom
else if MALLOC eq mmap
mov eax, SYS_MMAP
xor edi, edi
mov esi, STACK_CAPACITY + CTX_NODE_SIZEOF
mov edx, PROT_READ or PROT_WRITE
mov r10d, MAP_PRIVATE or MAP_ANONYMOUS or MAP_GROWSDOWN
mov r8d, -1
xor r9d, r9d
syscall
pop rcx
cmp rax, -1
je .oom
end if
virtual at rax+STACK_CAPACITY
.new_ctx ctx_node ?
end virtual
lea rdi, [.new_ctx]
mov rsp, rdi
mov rsi, [current_ctx_ptr]
virtual at rsi
.current_ctx ctx_node ?
end virtual
mov rdx, [.current_ctx.next]
mov [.new_ctx.next], rdx
mov [.current_ctx.next], rdi
mov [current_ctx_ptr], rdi
mov [prev_ctx_ptr], rsi
; push deinit pointer to the stack, coroutine uses it as return address
lea rdx, [deinit]
push rdx
; restore coroutine arguments
irps reg, rax rdi rsi rdx r10 r8 r9 {
mov reg, [coroutine_args.#reg]
}
test rax, rax
jz @f
rept 8 n:0 {
movaps xmm#n, [coroutine_args.xmm#n]
}
@@:
jmp rcx
.oom:
if MALLOC eq posix_memalign
add rsp, 24
else if MALLOC in
mov eax, 12 ; ENOMEM
end if
ret
; void tco_args(...)
; sets the arguments of the next coroutine
tco_args:
irps reg, rax rdi rsi rdx r10 r8 r9 {
mov [coroutine_args.#reg], reg
}
test rax, rax
jz @f
rept 8 n:0 {
movaps [coroutine_args.xmm#n], xmm#n
}
@@:
ret
; void tco_yield(void)
; yield to next coroutine
tco_yield:
call stash
xor eax, eax
jmp switch
; stashes callee-saved registers
; implementation must not modify rdi as tco_go saves its argument there
stash:
mov rax, [current_ctx_ptr]
virtual at rax
.current_ctx ctx_node ?
end virtual
virtual at rsp+8
.return_address dq ?
.ctx_stack_pointer dq ?
end virtual
lea rdx, [.ctx_stack_pointer]
mov [.current_ctx.rsp], rdx
irps reg, rbp rbx r12 r13 r14 r15 {
mov [.current_ctx.#reg], reg
}
mov rdx, [.return_address]
mov [.current_ctx.rip], rdx
ret
; switches to next context
; implementation must not modify rax as tco_yield saves its return value there
switch:
mov rsi, [current_ctx_ptr]
virtual at rsi
.current_ctx ctx_node ?
end virtual
mov rdi, [.current_ctx.next]
virtual at rdi
.next_ctx ctx_node ?
end virtual
irps reg, rsp rbp rbx r12 r13 r14 r15 {
mov reg, [.next_ctx.#reg]
}
mov [current_ctx_ptr], rdi
mov [prev_ctx_ptr], rsi
jmp [.next_ctx.rip]
; removes coroutine context from linked list and frees stack/context
deinit:
mov rdi, [prev_ctx_ptr]
virtual at rdi
.prev_ctx ctx_node ?
end virtual
mov rsi, [current_ctx_ptr]
virtual at rsi-STACK_CAPACITY
.current_stack rb STACK_CAPACITY
.current_ctx ctx_node ?
end virtual
mov rdx, [.current_ctx.next]
mov [.prev_ctx.next], rdx
mov [current_ctx_ptr], rdi
if MALLOC in
mov rsp, [.prev_ctx.rsp]
lea rdi, [.current_stack]
call plt free
else if MALLOC eq mmap
mov eax, SYS_MUNMAP
lea rdi, [.current_stack]
mov esi, STACK_CAPACITY + CTX_NODE_SIZEOF
syscall
end if
jmp switch
section '.data' writeable
root_ctx ctx_node root_ctx
current_ctx_ptr dq root_ctx
prev_ctx_ptr dq root_ctx
section '.bss' writeable align 16
coroutine_args:
rept 8 n:0 {
label .xmm#n dqword
rq 2
}
irps reg, rax rdi rsi rdx r10 r8 r9 {
.#reg dq ?
}