summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorkitty <nepeta@canaglie.net>2026-03-15 23:03:21 +1100
committerkitty <nepeta@canaglie.net>2026-03-15 23:03:21 +1100
commit85199c06b8e104dfed0366389e6b95a7757d0510 (patch)
tree41bae529a7c70d89b81387a19db5407689a9c5d0
parent9f311bce41eb76a46213867b8762ee517d33b6ba (diff)
parse and parse-name (untested)
-rw-r--r--readme.md12
-rw-r--r--sanctuary.s112
2 files changed, 124 insertions, 0 deletions
diff --git a/readme.md b/readme.md
index 46112f3..5dd75bc 100644
--- a/readme.md
+++ b/readme.md
@@ -10,6 +10,7 @@ for amd64 linux systems.
- `n`: signed integer
- `u`: unsigned integer
- `?`: boolean flag
+- `""`: string in input buffer
## Glossary
@@ -52,6 +53,17 @@ yields the address of the first available byte in user memory.
a variable containing the execution token of
the most recently created word.
+### `parse ( "<ws>name<ws>" c -- a u )`
+parse one word from the input buffer,
+separated by a newline or the character c,
+and return as a string.
+
+### `parse-name ( "<ws>name<ws>" -- a u )`
+parse one whitespace-separated word from the input buffer,
+and return as a string.
+tabs (ascii 0x09), newlines (ascii 0x10), and spaces (ascii 0x20)
+are considered whitespace.
+
### `state ( -- a )`
a variable containing a boolean value.
if 0 (false), the system is in interpreting mode,
diff --git a/sanctuary.s b/sanctuary.s
index b511d68..235f384 100644
--- a/sanctuary.s
+++ b/sanctuary.s
@@ -104,6 +104,118 @@ defcode "bye", bye, 0
syscall
ret
+; input parsing {{{
+; r11: string character count
+; rsi: input buffer address
+; al: char being parsed
+; r10: end of input buffer
+defcode "parse-name", parse_name, 0
+ mov rsi, qword [to_in]
+ mov r10, qword [tib]
+ add rsi, r10
+ add r10, qword [n_tib]
+ xor rax, rax
+
+.wsloop:
+ cmp rsi, r10
+ jge .empty
+ lodsb
+ cmp al, 0x20
+ je .wsloop
+ cmp al, 0x09
+ je .wsloop
+ cmp al, 0x0a
+ je .wsloop
+
+ cmp rsi, r10
+ jge .empty
+ mov r11, 1
+ dec rsi ; bring down by one to point to the start
+ push rsi ; will become `a`
+ inc rsi
+.wordloop:
+ cmp al, 0x20
+ je .wordloop_e
+ cmp al, 0x09
+ je .wordloop_e
+ cmp al, 0x0a
+ je .wordloop_e
+
+ ; is there a better way of checking before?
+ cmp rsi, r10
+ jge .wordloop_e
+ inc r11
+ lodsb
+ je .wordloop
+
+.wordloop_e:
+ sub rsi, qword [tib]
+ mov qword [to_in], rsi
+ pop rsi
+ pspush rsi
+ pspush r11
+ ret
+
+.empty:
+ pspush 0
+ pspush 0
+ ret
+
+; r11: string character count
+; rsi: input buffer address
+; al: char being parsed
+; r10: end of input buffer
+defcode "parse", parse, 0
+ mov rsi, qword [to_in]
+ mov r10, qword [tib]
+ add rsi, r10
+ add r10, qword [n_tib]
+ xor rax, rax
+
+.wsloop:
+ cmp rsi, r10
+ jge .empty
+ lodsb
+ cmp al, r15b
+ je .wsloop
+ cmp al, 0x0a
+ je .wsloop
+
+ cmp rsi, r10
+ jge .empty
+ mov r11, 1
+ dec rsi ; bring down by one to point to the start
+ push rsi ; will become `a`
+ inc rsi
+.wordloop:
+ cmp al, r15b
+ je .wordloop_e
+ cmp al, 0x0a
+ je .wordloop_e
+
+ ; is there a better way of checking before?
+ cmp rsi, r10
+ jge .wordloop_e
+ inc r11
+ lodsb
+ je .wordloop
+
+.wordloop_e:
+ sub rsi, qword [tib]
+ mov qword [to_in], rsi
+ pop rsi
+ pspop r8
+ pspush rsi
+ pspush r11
+ ret
+
+.empty:
+ pspop r8
+ pspush 0
+ pspush 0
+ ret
+; }}}
+
defvar "state", state, 0, INTERPRET
defvar "dp", dp, 0, 0
defvar "dp0", dp0, 0, 0