1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
|
; Copyright (C) 2025 Aiden Gall
;
; This program is free software: you can redistribute it and/or modify
; it under the terms of the GNU General Public License as published by
; the Free Software Foundation, either version 3 of the License, or
; (at your option) any later version.
;
; This program is distributed in the hope that it will be useful,
; but WITHOUT ANY WARRANTY; without even the implied warranty of
; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
; GNU General Public License for more details.
;
; You should have received a copy of the GNU General Public License
; along with this program. If not, see <http://www.gnu.org/licenses/>.
format ELF64
; assembly-time configuration options
include 'config.inc'
assert (STACK_CAPACITY mod 16) = 0
assert MALLOC in <posix_memalign,aligned_alloc,mmap>
macro fn lbl {
label lbl
public lbl
if IBT_ENABLE
endbr64
end if
}
; circular singly-linked list containing callee-saved registers and instruction
; pointer to resume execution of coroutine when yielded to
struc ctx_node next {
label .
.next dq next
irps reg, rsp rbp rbx r12 r13 r14 r15 rip \{
.\#\.\#reg dq ?
\}
}
virtual
@@ ctx_node ?
CTX_NODE_SIZEOF = $-$$
end virtual
if MALLOC in <posix_memalign,aligned_alloc>
extrn MALLOC
extrn free
else if MALLOC eq mmap
SYS_MMAP = 9
SYS_MUNMAP = 11
PROT_READ = 1 shl 0
PROT_WRITE = 1 shl 1
MAP_PRIVATE = 1 shl 1
MAP_ANONYMOUS = 1 shl 5
MAP_GROWSDOWN = 1 shl 8
end if
section '.text' executable
; int tco_go(void (*f)(...))
; spawns a coroutine
fn tco_go
call stash
; pushing rdi also aligns stack to 16 byte boundary for function call
push rdi
if MALLOC eq posix_memalign
sub rsp, 16
mov rdi, rsp
mov esi, 16
mov edx, STACK_CAPACITY + CTX_NODE_SIZEOF
call plt posix_memalign
test eax, eax
jnz .oom
mov rax, [rsp]
mov rcx, [rsp+16]
add rsp, 24
else if MALLOC eq aligned_alloc
mov edi, 16
mov esi, STACK_CAPACITY + CTX_NODE_SIZEOF
call plt aligned_alloc
test rax, rax
pop rcx
jz .oom
else if MALLOC eq mmap
mov eax, SYS_MMAP
xor edi, edi
mov esi, STACK_CAPACITY + CTX_NODE_SIZEOF
mov edx, PROT_READ or PROT_WRITE
mov r10d, MAP_PRIVATE or MAP_ANONYMOUS or MAP_GROWSDOWN
mov r8d, -1
xor r9d, r9d
syscall
pop rcx
cmp rax, -1
je .oom
end if
virtual at rax+STACK_CAPACITY
.new_ctx ctx_node ?
end virtual
lea rdi, [.new_ctx]
mov rsp, rdi
mov rsi, [current_ctx_ptr]
virtual at rsi
.current_ctx ctx_node ?
end virtual
mov rdx, [.current_ctx.next]
mov [.new_ctx.next], rdx
mov [.current_ctx.next], rdi
mov [current_ctx_ptr], rdi
mov [prev_ctx_ptr], rsi
; push deinit pointer to the stack, coroutine uses it as return address
lea rdx, [deinit]
push rdx
; restore coroutine arguments
irps reg, rax rdi rsi rdx r10 r8 r9 {
mov reg, [coroutine_args.#reg]
}
test rax, rax
jz @f
rept 8 n:0 {
movaps xmm#n, [coroutine_args.xmm#n]
}
@@:
jmp rcx
.oom:
if MALLOC eq posix_memalign
add rsp, 24
else if MALLOC in <aligned_alloc,mmap>
mov eax, 12 ; ENOMEM
end if
ret
; void tco_args(...)
; sets the arguments of the next coroutine
fn tco_args
irps reg, rax rdi rsi rdx r10 r8 r9 {
mov [coroutine_args.#reg], reg
}
test rax, rax
jz @f
rept 8 n:0 {
movaps [coroutine_args.xmm#n], xmm#n
}
@@:
ret
; void tco_yield(void)
; yield to next coroutine
fn tco_yield
call stash
xor eax, eax
jmp switch
; stashes callee-saved registers
; implementation must not modify rdi as tco_go saves its argument there
stash:
mov rax, [current_ctx_ptr]
virtual at rax
.current_ctx ctx_node ?
end virtual
virtual at rsp+8
.return_address dq ?
.ctx_stack_pointer dq ?
end virtual
lea rdx, [.ctx_stack_pointer]
mov [.current_ctx.rsp], rdx
irps reg, rbp rbx r12 r13 r14 r15 {
mov [.current_ctx.#reg], reg
}
mov rdx, [.return_address]
mov [.current_ctx.rip], rdx
ret
; switches to next context
; implementation must not modify rax as tco_yield saves its return value there
switch:
mov rsi, [current_ctx_ptr]
virtual at rsi
.current_ctx ctx_node ?
end virtual
mov rdi, [.current_ctx.next]
virtual at rdi
.next_ctx ctx_node ?
end virtual
irps reg, rsp rbp rbx r12 r13 r14 r15 {
mov reg, [.next_ctx.#reg]
}
mov [current_ctx_ptr], rdi
mov [prev_ctx_ptr], rsi
jmp [.next_ctx.rip]
; removes coroutine context from linked list and frees stack/context
deinit:
mov rdi, [prev_ctx_ptr]
virtual at rdi
.prev_ctx ctx_node ?
end virtual
mov rsi, [current_ctx_ptr]
virtual at rsi-STACK_CAPACITY
.current_stack rb STACK_CAPACITY
.current_ctx ctx_node ?
end virtual
mov rdx, [.current_ctx.next]
mov [.prev_ctx.next], rdx
mov [current_ctx_ptr], rdi
if MALLOC in <posix_memalign,aligned_alloc>
mov rsp, [.prev_ctx.rsp]
lea rdi, [.current_stack]
call plt free
else if MALLOC eq mmap
mov eax, SYS_MUNMAP
lea rdi, [.current_stack]
mov esi, STACK_CAPACITY + CTX_NODE_SIZEOF
syscall
end if
jmp switch
section '.data' writeable
root_ctx ctx_node root_ctx
current_ctx_ptr dq root_ctx
prev_ctx_ptr dq root_ctx
section '.bss' writeable align 16
coroutine_args:
rept 8 n:0 {
label .xmm#n dqword
rq 2
}
irps reg, rax rdi rsi rdx r10 r8 r9 {
.#reg dq ?
}
|