From 85199c06b8e104dfed0366389e6b95a7757d0510 Mon Sep 17 00:00:00 2001 From: kitty Date: Sun, 15 Mar 2026 23:03:21 +1100 Subject: parse and parse-name (untested) --- readme.md | 12 +++++++ sanctuary.s | 112 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 124 insertions(+) diff --git a/readme.md b/readme.md index 46112f3..5dd75bc 100644 --- a/readme.md +++ b/readme.md @@ -10,6 +10,7 @@ for amd64 linux systems. - `n`: signed integer - `u`: unsigned integer - `?`: boolean flag +- `""`: string in input buffer ## Glossary @@ -52,6 +53,17 @@ yields the address of the first available byte in user memory. a variable containing the execution token of the most recently created word. +### `parse ( "name" c -- a u )` +parse one word from the input buffer, +separated by a newline or the character c, +and return as a string. + +### `parse-name ( "name" -- a u )` +parse one whitespace-separated word from the input buffer, +and return as a string. +tabs (ascii 0x09), newlines (ascii 0x10), and spaces (ascii 0x20) +are considered whitespace. + ### `state ( -- a )` a variable containing a boolean value. if 0 (false), the system is in interpreting mode, diff --git a/sanctuary.s b/sanctuary.s index b511d68..235f384 100644 --- a/sanctuary.s +++ b/sanctuary.s @@ -104,6 +104,118 @@ defcode "bye", bye, 0 syscall ret +; input parsing {{{ +; r11: string character count +; rsi: input buffer address +; al: char being parsed +; r10: end of input buffer +defcode "parse-name", parse_name, 0 + mov rsi, qword [to_in] + mov r10, qword [tib] + add rsi, r10 + add r10, qword [n_tib] + xor rax, rax + +.wsloop: + cmp rsi, r10 + jge .empty + lodsb + cmp al, 0x20 + je .wsloop + cmp al, 0x09 + je .wsloop + cmp al, 0x0a + je .wsloop + + cmp rsi, r10 + jge .empty + mov r11, 1 + dec rsi ; bring down by one to point to the start + push rsi ; will become `a` + inc rsi +.wordloop: + cmp al, 0x20 + je .wordloop_e + cmp al, 0x09 + je .wordloop_e + cmp al, 0x0a + je .wordloop_e + + ; is there a better way of checking before? + cmp rsi, r10 + jge .wordloop_e + inc r11 + lodsb + je .wordloop + +.wordloop_e: + sub rsi, qword [tib] + mov qword [to_in], rsi + pop rsi + pspush rsi + pspush r11 + ret + +.empty: + pspush 0 + pspush 0 + ret + +; r11: string character count +; rsi: input buffer address +; al: char being parsed +; r10: end of input buffer +defcode "parse", parse, 0 + mov rsi, qword [to_in] + mov r10, qword [tib] + add rsi, r10 + add r10, qword [n_tib] + xor rax, rax + +.wsloop: + cmp rsi, r10 + jge .empty + lodsb + cmp al, r15b + je .wsloop + cmp al, 0x0a + je .wsloop + + cmp rsi, r10 + jge .empty + mov r11, 1 + dec rsi ; bring down by one to point to the start + push rsi ; will become `a` + inc rsi +.wordloop: + cmp al, r15b + je .wordloop_e + cmp al, 0x0a + je .wordloop_e + + ; is there a better way of checking before? + cmp rsi, r10 + jge .wordloop_e + inc r11 + lodsb + je .wordloop + +.wordloop_e: + sub rsi, qword [tib] + mov qword [to_in], rsi + pop rsi + pspop r8 + pspush rsi + pspush r11 + ret + +.empty: + pspop r8 + pspush 0 + pspush 0 + ret +; }}} + defvar "state", state, 0, INTERPRET defvar "dp", dp, 0, 0 defvar "dp0", dp0, 0, 0 -- cgit v1.2.3