-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsymbol.inc
658 lines (569 loc) · 14.8 KB
/
symbol.inc
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
;BSD 2-Clause License
;
;Copyright (c) 2021-2023, Stefan Jakobsson
;All rights reserved.
;Redistribution and use in source and binary forms, with or without
;modification, are permitted provided that the following conditions are met:
;
;1. Redistributions of source code must retain the above copyright notice, this
; list of conditions and the following disclaimer.
;
;2. Redistributions in binary form must reproduce the above copyright notice,
; this list of conditions and the following disclaimer in the documentation
; and/or other materials provided with the distribution.
;
;THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
;AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
;IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
;DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
;FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
;DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
;SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
;CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
;OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
;OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
; Symbol table layout
; -------------------
; In this code, symbols refer to BASIC commands, labels and variables.
; The symbol table data is stored in RAM banks SYMBOL_FIRST_BANK to
; SYMBOL_LAST_BANK.
;
; The symbols are kept in a hash map, where symbol names are used as keys.
; Pearson hashing is used to create hash values.
;
; As the hash function only produces an 8-bit value, collisions will happen
; frequently. Testing shows that the number of collisions per hash value
; still is reasonable for the purpose of efficient symbol lookup.
;
; Collisions are handled by chaining. Symbols sharing the same hash value
; are arranged in linked lists, which in the source are referred to as
; "buckets".
;
; The arrays "symbol_bucket_bank" and "symbol_bucket_offset" point to the
; head of these linked lists. The arrays are stored in RAM bank BASLOAD_RAM1.
; There is one head pointer for each possible hash value (0..255).
;
; An entry in the symbol table contains the following information:
;
; Offset Size Description
; -----------------------------------------------------------------------------
; 00 03 Next symbol (bank, addrl, addrh), bank=0 => end of chain
; 03 01 Symbol name length
; 04 01 Symbol type
; 05 02 Value
; 07 ?? Symbol name, max 64 bytes
;
; The symbol table entry length varies according to the length of the symbol
; name. If symbol names are 10 characters in avarage, the table fits
; about 3,800 symbols.
; Exports needed for unit testing
.export symbol_init, symbol_add, symbol_find, symbol_buckets_bank, file_buf, line_dstlin, symbol_next_bank, symbol_pointer
; Banked RAM usage
SYMBOL_FIRST_BANK = 2
SYMBOL_LAST_BANK = 9
; Symbol table entry fields
SYMBOL_NEXT_BANK = 0
SYMBOL_NEXT_ADDR = 1
SYMBOL_LEN = 3
SYMBOL_TYPE = 4
SYMBOL_VALUE = 5
SYMBOL_NAME = 7
SYMBOL_MAXLEN = 64
; Symbol entry types
SYMBOLTYPE_LABEL = 0
SYMBOLTYPE_VARIABLE = 1
SYMBOLTYPE_TOKEN = 2
SYMBOLTYPE_RESERVED_VAR = 3
SYMBOLTYPE_CONTROL_CHAR = 4
; Variables
.ZEROPAGE
symbol_pointer: .res 2
.segment "VARS"
symbol_next_var: .res 2
symbol_last_var: .res 2
symbol_next_bank: .res 1
symbol_next_addr: .res 2
.segment "RAM1"
symbol_buckets_bank: .res $100
symbol_buckets_addrl: .res $100
symbol_buckets_addrh: .res $100
.CODE
;******************************************************************************
;Function name: symbol_init
;Purpose......: Initializes symbol functions and data
;Input........: Nothing
;Output.......: Nothing
;Error........: None
.proc symbol_init
; Set all buckets to null
lda #BASLOAD_RAM1
sta RAM_SEL
ldx #0
lda #0
: sta symbol_buckets_bank,x
inx
cpx #0
bne :-
; Set next symbol table entry location in banked RAM
lda #SYMBOL_FIRST_BANK
sta symbol_next_bank
stz symbol_next_addr
lda #$a0
sta symbol_next_addr+1
; Set next available variable name to "A"
lda #'a'
sta symbol_next_var
lda #0
sta symbol_next_var+1
; Set last variable name to NULL
stz symbol_last_var
stz symbol_last_var+1
; Add ? as PRINT statement
lda #'?'
sta file_buf
ldx #0
ldy #0
lda #$99
sta token_next_id
stz token_next_id+1
lda #SYMBOLTYPE_TOKEN
jsr symbol_add
; Add reserved variables to the symbol table
lda #'s'
sta file_buf
lda #'t'
sta file_buf+1
ldx #0
ldy #1
lda #SYMBOLTYPE_RESERVED_VAR
jsr symbol_add
lda #'t'
sta file_buf
lda #'i'
sta file_buf+1
ldx #0
ldy #1
lda #SYMBOLTYPE_RESERVED_VAR
jsr symbol_add
lda #'d'
sta file_buf
lda #'a'
sta file_buf+1
ldx #0
ldy #1
lda #SYMBOLTYPE_RESERVED_VAR
jsr symbol_add
rts
PRINT_TOKEN:
.byt $99, $00
.endproc
;******************************************************************************
;Function name: symbol_add
;Purpose......: Adds an element to the symbol table
;Input........: X Index in file_buf where symbol starts
; Y Index in file_buf where symbol ends
; A Symbol type
; 0: Label
; 1: Variable
; 2: BASIC command/token
; 3: Reserved variable, stored as type 1=variable
; 4: PETSCII control char
; C 1: Skip duplicate check
;Output.......: X/Y Symbol value (low/high byte)
; A 0: OK
;Error........: A 1: Duplicate symbol
; 2: Symbol table full
; 3: No available variable name
; 4: Symbol name too long
.proc symbol_add
; Store input
stx index1
sty index2
sta type
; Check if duplicate
bcs :+ ; Skip duplicate label check if C=1
jsr symbol_find
bcs :+ ; Symbol not found, continue
lda #1
rts
; Calculate len
: sec
lda index2
sbc index1
inc
sta len
cmp #SYMBOL_MAXLEN+1
bcc :+
lda #4
rts
; Check if symbol table entry fits in current RAM bank
: clc
adc #7
adc symbol_next_addr
lda symbol_next_addr+1
adc #0
cmp #$c0
bcc :+
inc symbol_next_bank
stz symbol_next_addr ; Symbol doesn't fit, select next RAM bank
lda #$a0
sta symbol_next_addr+1
; Check if symbol table is full
: lda symbol_next_bank
cmp #SYMBOL_LAST_BANK+1
bcc :+
lda #2
rts
; Check variable name availability
: lda type
cmp #SYMBOLTYPE_LABEL
bne :+
lda symbol_next_var
cmp #'z'+1
bcc :+
lda #3
rts
; Calculate symbol name hash value
: lda #0
ldx index1
: eor file_buf,x
tay
lda pearson_tbl,y
cpx index2
beq :+
inx
bra :-
: sta hash
; Check if bucket is empty
lda #BASLOAD_RAM1
sta RAM_SEL
ldy hash
lda symbol_buckets_bank,y
bne bucket_not_empty
bucket_empty:
lda symbol_next_bank
sta symbol_buckets_bank,y
lda symbol_next_addr
sta symbol_buckets_addrl,y
lda symbol_next_addr+1
sta symbol_buckets_addrh,y
bra set_values
bucket_not_empty:
tax ; Store bank in X
lda symbol_buckets_addrl,y
sta symbol_pointer
lda symbol_buckets_addrh,y
sta symbol_pointer+1
find_tail:
; Select RAN bank where item is stored
stx RAM_SEL
; Check if end of chain (bank=0)
ldy #SYMBOL_NEXT_BANK
lda (symbol_pointer),y
beq tail_found
; There're more items in the list, prepare to look at next item
tax ; Next bank stored in X
ldy #SYMBOL_NEXT_ADDR
lda (symbol_pointer),y
pha
iny
lda (symbol_pointer),y
sta symbol_pointer+1
pla
sta symbol_pointer
bra find_tail
tail_found:
; Store link to the new item we're inserting in the current tail item
lda #SYMBOL_NEXT_BANK
lda symbol_next_bank
sta (symbol_pointer),y
ldy #SYMBOL_NEXT_ADDR
lda symbol_next_addr
sta (symbol_pointer),y
iny
lda symbol_next_addr+1
sta (symbol_pointer),y
set_values:
; Select RAM bank where the new item is stored
lda symbol_next_bank
sta RAM_SEL
; Set address to new item
lda symbol_next_addr
sta symbol_pointer
lda symbol_next_addr+1
sta symbol_pointer+1
; Copy symbol name
ldx index1
ldy #SYMBOL_NAME
: lda file_buf,x
sta (symbol_pointer),y
cpx index2
beq :+
inx
iny
bra :-
; Set symbol name len
: ldy #SYMBOL_LEN
lda len
sta (symbol_pointer),y
; Set next bank to 0 = NULL
ldy #SYMBOL_NEXT_BANK
lda #0
sta (symbol_pointer),y
; Set symbol type
ldy #SYMBOL_TYPE
lda type
sta (symbol_pointer),y
; Check if symbol type
cmp #SYMBOLTYPE_LABEL
bne variable
; New item is a label: Set value to current line number
ldy #SYMBOL_VALUE
lda line_dstlin
sta (symbol_pointer),y
tax
iny
lda line_dstlin+1
sta (symbol_pointer),y
tay
jmp exit
variable:
cmp #SYMBOLTYPE_VARIABLE
bne reserved_var
; New item is a variable: Set value to next available variable name
ldy #SYMBOL_VALUE
lda symbol_next_var
sta (symbol_pointer),y
pha
iny
lda symbol_next_var+1
sta (symbol_pointer),y
tay
; Remember last assigned variable
lda symbol_next_var
sta symbol_last_var
lda symbol_next_var+1
sta symbol_last_var+1
inc_var:
; Advance second character of the next available variable name
inc symbol_next_var+1
lda symbol_next_var+1
; If 1 => The current variable name is one character, next variable ends with '0'
cmp #1
beq :+
; Current variable name ends with a '9' => The next variable will end with 'A'
cmp #'9'+1
beq :++
; The current variable name ends with a 'Z' => The next variable is a single char
cmp #'z'+1
bne chk_var
inc symbol_next_var
stz symbol_next_var+1
bra chk_var
: lda #'0'
sta symbol_next_var+1
bra chk_var
: lda #'a'
sta symbol_next_var+1
chk_var:
; Check if the next variable name is reserved
ldx #0
chk_var_loop:
; Compare first char
lda SYMBOLTYPE_RESERVED_VARs,x
inx
cmp symbol_next_var
bne :+
; Compare second char
lda SYMBOLTYPE_RESERVED_VARs,x
inx
; There's a match in the reserved names table => try again
cmp symbol_next_var+1
beq inc_var
; Are we done?
cpx #SYMBOLTYPE_RESERVED_VARs_end-SYMBOLTYPE_RESERVED_VARs
bne chk_var_loop
bra :++
: ; Are we done?
inx
cpx #SYMBOLTYPE_RESERVED_VARs_end-SYMBOLTYPE_RESERVED_VARs
bne chk_var_loop
: ; Pull symbol low value from stack
plx
bra exit
reserved_var:
cmp #SYMBOLTYPE_RESERVED_VAR
bne control_char
ldy #SYMBOL_TYPE
lda #SYMBOLTYPE_VARIABLE
sta (symbol_pointer),y
ldy #SYMBOL_VALUE
ldx index1
: lda file_buf,x
sta (symbol_pointer),y
cpx index2
beq :+
inx
iny
cpy #SYMBOL_VALUE+2
bne :-
: ldx #0
ldy #0
bra exit
control_char:
cmp #SYMBOLTYPE_CONTROL_CHAR
bne token
ldy #SYMBOL_VALUE
lda controlcode_value
sta (symbol_pointer),y
iny
lda #0
sta (symbol_pointer),y
bra exit
token:
ldy #SYMBOL_VALUE
lda token_next_id
sta (symbol_pointer),y
iny
lda token_next_id+1
sta (symbol_pointer),y
exit:
; Set next item address
clc
lda symbol_next_addr
adc len
sta symbol_next_addr
lda symbol_next_addr+1
adc #0
sta symbol_next_addr+1
clc
lda symbol_next_addr
adc #7
sta symbol_next_addr
lda symbol_next_addr+1
adc #0
sta symbol_next_addr+1
lda #0
rts
.segment "VARS"
index1: .res 1
index2: .res 2
type: .res 1
hash: .res 1
len: .res 1
.CODE
.endproc
;******************************************************************************
;Function name: symbol_find
;Purpose......: Searches symbol table for a given symbol name
;Input........: X Index in file_buf where symbol starts
; Y Index in file_buf where symbol ends
;Output.......: X,Y Value (low, high byte)
; A Symbol type
; 0: Label
; 1: Variable
; 2: BASIC token
; 3: Reserved variable, stored as type 1=variable
; 4: PETSCII control char
;Error........: C=1 if label not found
.proc symbol_find
; Store input params
stx index1
sty index2
; Calculate symbol name length
sec
lda index2
sbc index1
ina
sta len
; Calculate symbol name hash value
lda #0
ldx index1
: eor file_buf,x
tay
lda pearson_tbl,y
cpx index2
beq :+
inx
bra :-
: sta hash
; Check if bucket head is NULL
lda #BASLOAD_RAM1 ; Select RAM bank 1
sta RAM_SEL
ldy hash
lda symbol_buckets_bank,y
beq not_found ; Bucket head was null => symbol doesn't exist
; OK, let's look what we find in the symbol table...
tax ; Store bank in X temporarily
lda symbol_buckets_addrl,y
sta symbol_pointer
lda symbol_buckets_addrh,y
sta symbol_pointer+1
loop:
stx RAM_SEL ; Set RAM bank where item is stored
ldy #SYMBOL_LEN ; Compare lengths
lda (symbol_pointer),y
cmp len
bne next
ldy #SYMBOL_NAME ; Compare names
ldx index1
: lda (symbol_pointer),y
cmp file_buf,x
bne next
cpx index2
beq found
inx
iny
bra :-
next: ; It wasn't a match, look at next item in the bucket
ldy #SYMBOL_NEXT_BANK
lda (symbol_pointer),y
beq not_found ; Next bank = 0 => No more items in this bucket
pha ; Store bank on stack
ldy #SYMBOL_NEXT_ADDR
lda (symbol_pointer),y
pha
iny
lda (symbol_pointer),y
sta symbol_pointer+1
pla
sta symbol_pointer
plx ; Get bank from stack
bra loop
found:
ldy #SYMBOL_TYPE
lda (symbol_pointer),y
pha
ldy #SYMBOL_VALUE
lda (symbol_pointer),y
tax
ldy #SYMBOL_VALUE+1
lda (symbol_pointer),y
tay
pla
clc
rts
not_found:
sec
rts
.segment "VARS"
index1: .res 1
index2: .res 2
hash: .res 1
len: .res 1
.CODE
.endproc
SYMBOLTYPE_RESERVED_VARs:
.byt "da"
.byt "if"
.byt "fn"
.byt "go"
.byt "mx"
.byt "my"
.byt "mb"
.byt "on"
.byt "or"
.byt "st"
.byt "ti"
.byt "to"
SYMBOLTYPE_RESERVED_VARs_end: