diff options
| author | kitty <nepeta@canaglie.net> | 2026-03-15 23:03:21 +1100 |
|---|---|---|
| committer | kitty <nepeta@canaglie.net> | 2026-03-15 23:03:21 +1100 |
| commit | 85199c06b8e104dfed0366389e6b95a7757d0510 (patch) | |
| tree | 41bae529a7c70d89b81387a19db5407689a9c5d0 | |
| parent | 9f311bce41eb76a46213867b8762ee517d33b6ba (diff) | |
parse and parse-name (untested)
| -rw-r--r-- | readme.md | 12 | ||||
| -rw-r--r-- | sanctuary.s | 112 |
2 files changed, 124 insertions, 0 deletions
@@ -10,6 +10,7 @@ for amd64 linux systems. - `n`: signed integer - `u`: unsigned integer - `?`: boolean flag +- `""`: string in input buffer ## Glossary @@ -52,6 +53,17 @@ yields the address of the first available byte in user memory. a variable containing the execution token of the most recently created word. +### `parse ( "<ws>name<ws>" c -- a u )` +parse one word from the input buffer, +separated by a newline or the character c, +and return as a string. + +### `parse-name ( "<ws>name<ws>" -- a u )` +parse one whitespace-separated word from the input buffer, +and return as a string. +tabs (ascii 0x09), newlines (ascii 0x10), and spaces (ascii 0x20) +are considered whitespace. + ### `state ( -- a )` a variable containing a boolean value. if 0 (false), the system is in interpreting mode, diff --git a/sanctuary.s b/sanctuary.s index b511d68..235f384 100644 --- a/sanctuary.s +++ b/sanctuary.s @@ -104,6 +104,118 @@ defcode "bye", bye, 0 syscall ret +; input parsing {{{ +; r11: string character count +; rsi: input buffer address +; al: char being parsed +; r10: end of input buffer +defcode "parse-name", parse_name, 0 + mov rsi, qword [to_in] + mov r10, qword [tib] + add rsi, r10 + add r10, qword [n_tib] + xor rax, rax + +.wsloop: + cmp rsi, r10 + jge .empty + lodsb + cmp al, 0x20 + je .wsloop + cmp al, 0x09 + je .wsloop + cmp al, 0x0a + je .wsloop + + cmp rsi, r10 + jge .empty + mov r11, 1 + dec rsi ; bring down by one to point to the start + push rsi ; will become `a` + inc rsi +.wordloop: + cmp al, 0x20 + je .wordloop_e + cmp al, 0x09 + je .wordloop_e + cmp al, 0x0a + je .wordloop_e + + ; is there a better way of checking before? + cmp rsi, r10 + jge .wordloop_e + inc r11 + lodsb + je .wordloop + +.wordloop_e: + sub rsi, qword [tib] + mov qword [to_in], rsi + pop rsi + pspush rsi + pspush r11 + ret + +.empty: + pspush 0 + pspush 0 + ret + +; r11: string character count +; rsi: input buffer address +; al: char being parsed +; r10: end of input buffer +defcode "parse", parse, 0 + mov rsi, qword [to_in] + mov r10, qword [tib] + add rsi, r10 + add r10, qword [n_tib] + xor rax, rax + +.wsloop: + cmp rsi, r10 + jge .empty + lodsb + cmp al, r15b + je .wsloop + cmp al, 0x0a + je .wsloop + + cmp rsi, r10 + jge .empty + mov r11, 1 + dec rsi ; bring down by one to point to the start + push rsi ; will become `a` + inc rsi +.wordloop: + cmp al, r15b + je .wordloop_e + cmp al, 0x0a + je .wordloop_e + + ; is there a better way of checking before? + cmp rsi, r10 + jge .wordloop_e + inc r11 + lodsb + je .wordloop + +.wordloop_e: + sub rsi, qword [tib] + mov qword [to_in], rsi + pop rsi + pspop r8 + pspush rsi + pspush r11 + ret + +.empty: + pspop r8 + pspush 0 + pspush 0 + ret +; }}} + defvar "state", state, 0, INTERPRET defvar "dp", dp, 0, 0 defvar "dp0", dp0, 0, 0 |
