summaryrefslogtreecommitdiff
path: root/tco.asm
blob: a26de6287681925f0647bbd9a326b5ca6eae8028 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
; Copyright (C) 2025 Aiden Gall
;
; This program is free software: you can redistribute it and/or modify
; it under the terms of the GNU General Public License as published by
; the Free Software Foundation, either version 3 of the License, or
; (at your option) any later version.
;
; This program is distributed in the hope that it will be useful,
; but WITHOUT ANY WARRANTY; without even the implied warranty of
; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
; GNU General Public License for more details.
;
; You should have received a copy of the GNU General Public License
; along with this program.  If not, see <http://www.gnu.org/licenses/>.

format ELF64

public tco_go
public tco_args
public tco_yield

; assembly-time configuration options
include 'config.inc'
assert (STACK_CAPACITY mod 16) = 0
assert MALLOC in <posix_memalign,aligned_alloc,mmap>

; circular singly-linked list containing callee-saved registers and instruction
; pointer to resume execution of coroutine when yielded to
struc ctx_node next {
	label .

	.next dq next

	irps reg, rsp rbp rbx r12 r13 r14 r15 rip \{
		.\#\.\#reg dq ?
	\}
}

virtual
	@@ ctx_node ?
	CTX_NODE_SIZEOF = $-$$
end virtual

if MALLOC in <posix_memalign,aligned_alloc>
	extrn MALLOC
	extrn free

else if MALLOC eq mmap
	SYS_MMAP      = 9
	SYS_MUNMAP    = 11

	PROT_READ     = 1 shl 0
	PROT_WRITE    = 1 shl 1

	MAP_PRIVATE   = 1 shl 1
	MAP_ANONYMOUS = 1 shl 5
	MAP_GROWSDOWN = 1 shl 8
end if

section '.text' executable
; int tco_go(void (*f)(...))
; spawns a coroutine
tco_go:
	call stash

	; pushing rdi also aligns stack to 16 byte boundary for function call
	push rdi

if MALLOC eq posix_memalign
	sub rsp, 16

	mov rdi, rsp
	mov esi, 16
	mov edx, STACK_CAPACITY + CTX_NODE_SIZEOF
	call plt posix_memalign

	test eax, eax
	jnz .oom

	mov rax, [rsp]
	mov rcx, [rsp+16]
	add rsp, 24

else if MALLOC eq aligned_alloc
	mov edi, 16
	mov esi, STACK_CAPACITY + CTX_NODE_SIZEOF
	call plt aligned_alloc

	test rax, rax
	pop rcx
	jz .oom

else if MALLOC eq mmap
	mov eax, SYS_MMAP
	xor edi, edi
	mov esi, STACK_CAPACITY + CTX_NODE_SIZEOF
	mov edx, PROT_READ or PROT_WRITE
	mov r10d, MAP_PRIVATE or MAP_ANONYMOUS or MAP_GROWSDOWN
	mov r8d, -1
	xor r9d, r9d
	syscall

	pop rcx
	cmp rax, -1
	je .oom
end if

	virtual at rax+STACK_CAPACITY
		.new_ctx ctx_node ?
	end virtual

	lea rdi, [.new_ctx]
	mov rsp, rdi

	mov rsi, [current_ctx_ptr]
	virtual at rsi
		.current_ctx ctx_node ?
	end virtual

	mov rdx, [.current_ctx.next]
	mov [.new_ctx.next], rdx
	mov [.current_ctx.next], rdi

	mov [current_ctx_ptr], rdi
	mov [prev_ctx_ptr], rsi

	; push deinit pointer to the stack, coroutine uses it as return address
	lea rdx, [deinit]
	push rdx

	; restore coroutine arguments
	irps reg, rax rdi rsi rdx r10 r8 r9 {
		mov reg, [coroutine_args.#reg]
	}

	test rax, rax
	jz @f

	rept 8 n:0 {
		movaps xmm#n, [coroutine_args.xmm#n]
	}
@@:
	jmp rcx

.oom:
if MALLOC eq posix_memalign
	add rsp, 24

else if MALLOC in <aligned_alloc,mmap>
	mov eax, 12 ; ENOMEM

end if
	ret

; void tco_args(...)
; sets the arguments of the next coroutine
tco_args:
	irps reg, rax rdi rsi rdx r10 r8 r9 {
		mov [coroutine_args.#reg], reg
	}

	test rax, rax
	jz @f

	rept 8 n:0 {
		movaps [coroutine_args.xmm#n], xmm#n
	}
@@:
	ret

; void tco_yield(void)
; yield to next coroutine
tco_yield:
	call stash
	xor eax, eax
	jmp switch

; stashes callee-saved registers
; implementation must not modify rdi as tco_go saves its argument there
stash:
	mov rax, [current_ctx_ptr]
	virtual at rax
		.current_ctx ctx_node ?
	end virtual

	virtual at rsp+8
		.return_address dq ?
		.ctx_stack_pointer dq ?
	end virtual
	lea rdx, [.ctx_stack_pointer]
	mov [.current_ctx.rsp], rdx

	irps reg, rbp rbx r12 r13 r14 r15 {
		mov [.current_ctx.#reg], reg
	}

	mov rdx, [.return_address]
	mov [.current_ctx.rip], rdx

	ret

; switches to next context
; implementation must not modify rax as tco_yield saves its return value there
switch:
	mov rsi, [current_ctx_ptr]
	virtual at rsi
		.current_ctx ctx_node ?
	end virtual

	mov rdi, [.current_ctx.next]
	virtual at rdi
		.next_ctx ctx_node ?
	end virtual

	irps reg, rsp rbp rbx r12 r13 r14 r15 {
		mov reg, [.next_ctx.#reg]
	}

	mov [current_ctx_ptr], rdi
	mov [prev_ctx_ptr], rsi

	jmp [.next_ctx.rip]

; removes coroutine context from linked list and frees stack/context
deinit:
	mov rdi, [prev_ctx_ptr]
	virtual at rdi
		.prev_ctx ctx_node ?
	end virtual

	mov rsi, [current_ctx_ptr]
	virtual at rsi-STACK_CAPACITY
		.current_stack rb STACK_CAPACITY
		.current_ctx ctx_node ?
	end virtual

	mov rdx, [.current_ctx.next]
	mov [.prev_ctx.next], rdx

	mov [current_ctx_ptr], rdi

if MALLOC in <posix_memalign,aligned_alloc>
	mov rsp, [.prev_ctx.rsp]

	lea rdi, [.current_stack]
	call plt free

else if MALLOC eq mmap
	mov eax, SYS_MUNMAP
	lea rdi, [.current_stack]
	mov esi, STACK_CAPACITY + CTX_NODE_SIZEOF
	syscall
end if
	jmp switch

section '.data' writeable
	root_ctx ctx_node root_ctx

	current_ctx_ptr dq root_ctx
	prev_ctx_ptr dq root_ctx

section '.bss' writeable align 16
coroutine_args:
	rept 8 n:0 {
		label .xmm#n dqword
		rq 2
	}

	irps reg, rax rdi rsi rdx r10 r8 r9 {
		.#reg dq ?
	}