-
Notifications
You must be signed in to change notification settings - Fork 30
/
Copy pathkernel.mu4
440 lines (332 loc) · 12.8 KB
/
kernel.mu4
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
| This file is part of muforth: https://muforth.dev/
|
| Copyright 2002-2025 David Frech. (Read the LICENSE for details.)
loading MSP430 Forth kernel
| Yes, you guessed it: The guts of Forth for the MSP430! This chip is
| awesomely well-suited to running Forth, and in fact, it will even run an
| ITC - indirect-threaded code - Forth with almost no overhead.
|
| Since this is the purest and most elegant expression of Forth, it's
| pretty great that it will fit nicely onto the chip. With 16-bit addresses
| and ITC, a ton of code will fit into the MSP430G2553's 16 KiB of flash.
| -------------------------------------------------------------------------
| Macros defining register conventions
| -------------------------------------------------------------------------
| Lower registers are callee-saved. Higher registers are caller-saved - ie,
| scratch.
assembler
| 0 pc
| 1 sp
| 2 sr
| 3 r3 )
4 reg cp ( cp - context pointer, from chat's perspective)
4 reg ip ( ip - forth instruction pointer)
5 reg rp ( rp - return stack pointer)
6 reg top ( cached top of data stack)
.ifndef loop-on-stack
7 reg ix ( current for/next/do/loop index)
.then
12 reg w ( w - "working" register: holds cfa during next; scratch)
13 reg x ( scratch x)
14 reg y ( scratch y)
15 reg z ( scratch z)
forth
( Tell the disassembler about these register names.)
-: ( reg)
2* 2*
z" pc sp sr r3 ip rp top ix r8 r9 r10 r11 w x y z " +
( 0000111122223333444455556666777788889999aaaabbbbccccddddeeeeffff)
4 -trailing type ; is .regname
| -------------------------------------------------------------------------
| Macros defining common VM operations
| -------------------------------------------------------------------------
assembler
| NOTE: I have chosen to use auto-increment to load the code pointer into
| pc. After next, w points to the parameter field rather than the code field.
|
| This adds one cycle to next - 5 instead of 4 - but simplifies all the
| Forth runtimes - for create, constant, :, etc - and saves an instruction
| and a cycle in each of them. The only execution path that slows down is
| calling native - code - words. I hope it's worth it! I like the simplicity
| and elegance of doing it this way.
: next asm{ ip )+ w mov w )+ pc mov } ;
: rpush ( ea) asm{ 2 # rp sub ( ea) rp ) mov } ;
: rpop ( ea) asm{ rp )+ \f swap ( ea) mov } ;
: nest asm{ ip rpush } ;
: unnest asm{ ip rpop } ;
forth
| -------------------------------------------------------------------------
| The kernel begins here!
| -------------------------------------------------------------------------
__meta
hex
label dodoes nest ip pop ( fall thru to dovar/docreate) ;c
( Note the structural similarities!)
meta: create new ;code label wpush
implements target-do-var
top push w top mov next ;c
meta: constant new , ;code
label doconst
implements target-do-const
top push w ) top mov next ;c
meta: : new ] ;code
label docolon
implements target-do-colon
nest w ip mov next ;c
definer: does> <;code> asm{ dodoes # call } ] ;
code* ^ [r] unnest begin next ;c
codes nope [r]
| Allocate buffer space _before_ defining the constant that pushes the
| buffer's address. This way we can define buffers in ram as well as in
| flash!
meta: buffer ( #bytes) h @ push ram here swap allot pop h ! constant ;
meta: variable cell buffer ; ( A variable is just a cell-sized buffer!)
meta: 2variable 2 cells buffer ; ( A 2variable is just a buffer of 2 cells!)
code* (lit) [r] ip )+ w mov wpush jmp ;c
code* (branch) [r]
label branch ip ) ip mov next ;c
code* (0branch) [r] top tst top pop branch 0!= until ( fall thru) ;c
label skip 2 # ip add next ;c
code* (?0branch) [r] top tst skip 0= until top pop branch j ;c
code* (=0branch) [r] top tst skip 0= until branch j ;c
code execute ( cfa) top w mov begin top pop w )+ pc mov ;c
code @execute ( 'cfa) top ) w mov again ;c
.ifndef loop-on-stack
( Fast version, using ix register)
code* (for) [r] ( u)
ix rpush top ix mov top pop next ;c
code* (next) [r]
1 # ix sub branch 0= until ix rpop skip j ;c
| Do-loop frame looks like this:
|
| +------------------+
| | saved ix reg |
| +------------------+
| | limit |<--- rp
| +------------------+
|
| Current index is in ix register; current "user-visible" index is
| calculated as index + limit.
code* (do) [r] ( limit start)
4 # rp sub ( make room on R stack - all in one go)
ix 2 rp +) mov ( save ix reg on R)
x pop x rp ) mov ( save limit to R)
x top sub top ix mov ( index = start - limit)
top pop next ;c
| Increment index. If it overflows to zero, restore ix register, pop
| stack frame, skip backwards jump, and continue. If non-zero, simply take
| the backwards jump.
code* (loop) [r]
1 # ix add branch 0= until
label undo
2 # rp add ix rpop skip j ;c
| Add incr to index. If the sign of index has changed, we've crossed the
| threshold, so restore index, pop frame, and skip jump. Otherwise, take
| the backwards jump.
code* (+loop) [r] ( incr)
ix x mov ( save index value)
top ix add top pop ix x xor undo 0>= until
branch j ;c
( Push current loop index. User-visible index = index + limit)
code i [r] ( - index)
ix w mov rp ) w add wpush jmp ;c
.else ( keeping for/next and do/loop index on R stack, not in a register)
( for is just >r. next is 3 cycles slower per iteration than fast next)
code* (next) [r]
1 # rp ) sub branch 0= until 2 # rp add skip j ;c
| Do-loop frame looks like this:
|
| +---------+
| | limit |
| +---------+
| | index |<--- rp
| +---------+
|
| "User-visible" index is calculated as index + limit.
code* (do) [r] ( limit start)
4 # rp sub ( make room on R stack - all in one go)
x pop x 2 rp +) mov ( save limit to R)
x top sub top rp ) mov ( index = start - limit)
top pop next ;c
| Increment index. If it overflows to zero, pop stack frame, skip backwards
| jump, and continue. If non-zero, simply take the backwards jump.
code* (loop) [r]
1 # rp ) add branch 0= until
label undo
4 # rp add skip j ;c
| Add incr to index. If the sign of index has changed, we've crossed the
| threshold, so pop stack frame, skip backwards jump, and continue.
| Otherwise, take the backwards jump.
code* (+loop) [r] ( incr)
rp ) x mov ( save index value)
top rp ) add top pop rp ) x xor undo 0>= until
branch j ;c
( Push current loop index. User-visible index = index + limit)
code i [r] ( - index)
rp ) w mov 2 rp +) w add wpush jmp ;c
.then
( Basic unary ops.)
code invert top inv next ;c
code negate top neg next ;c
code 2* top top add next ;c
code 2/ top asr next ;c
code u2/ clrc top ror next ;c
( Basic binary ops.)
code + sp )+ top add next ;c
code and sp )+ top and next ;c
code or sp )+ top bis next ;c
code xor sp )+ top xor next ;c
( Stack ops.)
code dup [r] ( t - t t) top push next ;c
code drop [r] ( x t - x) top pop next ;c
code nip [r] ( x t - t) 2 # sp add next ;c
code over [r] ( w t - w t w) sp ) w mov wpush jmp ;c
code swap [r] ( w t - t w) w pop wpush jmp ;c
code rot [r] ( w x t - x t w) x pop w pop x push wpush jmp ;c
code tuck [r] ( x t - t x t) x pop top push x push next ;c
: 2dup over over ; [r]
: -rot rot rot ; [r]
( Return stack ops.)
.ifdef loop-on-stack implements (for) .then
code >r [r] ( w) top rpush top pop next ;c
code r> [r] ( - w) w rpop wpush jmp ;c
code r@ [r] ( - w) rp ) w mov wpush jmp ;c
( Memory access.)
code @ ( a - w) top ) top mov next ;c
code c@ ( a - b) top ) top movb next ;c
code ! ( w a) sp )+ top ) mov begin top pop next ;c
code c! ( b a) sp )+ top ) movb again ;c
code @+ ( a - w a+2) top )+ w mov begin w push next ;c
code c@+ ( a - b a+1) top )+ w movb again ;c
code !+ ( w a - a+2) sp )+ top ) mov 2 # top add next ;c
code c!+ ( b a - a+1) sp )+ top ) movb 1 # top add next ;c
code +! ( w a) sp )+ top ) add begin begin begin begin
top pop next ;c
code set! ( w a) sp )+ top ) bis again ;c
code clr! ( w a) sp )+ top ) bic again ;c
( Byte-wide versions of set! and clr!)
code cset! ( w a) sp )+ top ) bisb again ;c
code cclr! ( w a) sp )+ top ) bicb again ;c
| These are a bit tricky, esp since borrow is ~carry. The idea is: get the
| inverse of the flag value we want into carry, then subtract top from
| itself - yielding zero - minus borrow, or -1 for true, 0 for false. It's
| efficient but non-obvious.
code 0= 1 # top sub ( ~Z -> C) ( fall thru) ;c
label makeflag top top subc next ;c
code 0< 8000 # top xor top top add ( ~N -> C) makeflag j ;c
code u< x pop top x cmp ( ~uless -> C) makeflag j ;c
code < x pop top x cmp clrc makeflag >= until
setc makeflag j ;c
( Putting - here so you can see that it's more like < than +)
code - x pop top x sub x top mov next ;c
( Another useful compare operator - equality!)
: = xor 0= ;
( Small constants.)
-2 constant -2 [r]
-1 constant -1 [r]
0 constant 0 [r]
1 constant 1 [r]
2 constant 2 [r]
| Incrementers by small constants. Shared code means they take up very
| little space!
meta: incr constant ;code w ) top add next ;c
1 incr 1+ [r]
2 incr 2+ [r]
-1 incr 1- [r]
-2 incr 2- [r]
( Words to make code a bit more portable.)
2 constant cell
: cells 2* ;
: cell/ 2/ ;
2 incr cell+
-2 incr cell-
{ h @ } ram
here ( save so we can restore later)
dp0 #24 cells - #64 - ( room for code below) goto
| Host should set PC -> continue-forth
| RP -> top of R stack
| DP -> top of D stack
| The host can push things onto host stack; they get copied to target
| stack, registers popped, words execute, re-push, copy back to host...
| Much easier than stuffing things into register slots on stack frame!
label continue-forth
cp sp mov ( D stack -> SP)
.ifdef loop-on-stack
w pop ( throw away)
.else
ix pop
.then
rp pop ip pop top pop ( restore Forth VM context)
next ;c
code bug [r]
top push
sr push ip push rp push ( capture Forth VM context)
.ifdef loop-on-stack
rp ) push
.else
ix push
.then
sp cp mov ( D stack -> CP)
rp sp mov ( R stack -> SP)
( jump through chat-entry at the start of flash)
.ifdef bsl-ram-chat @ram #ram 3 4 */ +
.else @boot
.then
4 + ( skip commit) & br ;c
label trampoline ( x0 .. xn cfa - y0 .. ym)
( make a fake colon word: no docolon, just a body)
] execute begin bug again [
label trampoline-end
goto ( back to saved)
__host
forth
( Cached local values of target registers.)
variable trp
variable tip
variable tix
: 2sp space space ;
: .b9 binary <# 4# 4# # #> type ;
: .h16 hex <# 4# #> type ;
: .r .h16 2sp ;
: .sr .b9 2sp ;
( If IP != trampoline + 4, a word is still executing.)
: executing? tip @ [ \l trampoline 4 + #] - ;
: .ip
tip @ .h16
executing? if ." * " ^ then ( done) 2sp ;
: .dp
dp@ 4 \m cells + .r ; ( skip SR IX RP IP)
| dp@ .r ;
: .forth-regs ( status)
radix preserve
cr ." V----INZC IP IX DP RP"
( 000100011 0000 0000 0000 0000)
cr .sr .ip tix @ .r
.dp trp @ .r ;
| Host should set PC -> continue-forth
| SP -> top of R stack
| CP -> top of D stack
defer ?copy-trampoline
-: \l continue-forth dup image+ swap \l trampoline-end over - t.WriteChunk
['] nope is ?copy-trampoline ; is ?copy-trampoline
( NOTE: For initial execution of a Forth word, xn is cfa!)
: continue ( x0 .. xn ip rp ix - y0 .. yn status ip rp ix)
?copy-trampoline
stack> \l continue-forth runwait stack<
tix ! trp ! tip ! .forth-regs ;
: cont ( ) ( continue forth execution)
tip @ trp @ tix @ continue ;
forth
-: ( cfa) ( execute target forth word)
\l trampoline rp0 0 ( ip rp ix) continue ; is remote
__meta
| comment ~~examples~~
| variable inc
| : lala do i bug drop inc @ +loop ;
|
| ( to demonstrate scripting target execution from the host)
| meta: grog ( start n) 0 do \t 1+ remote loop ;
| ( try: 44 10 grog)
|
| ~~examples~~
{ h ! }