From 85199c06b8e104dfed0366389e6b95a7757d0510 Mon Sep 17 00:00:00 2001
From: kitty <nepeta@canaglie.net>
Date: Sun, 15 Mar 2026 23:03:21 +1100
Subject: parse and parse-name (untested)

---
 readme.md   |  12 +++++++
 sanctuary.s | 112 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 124 insertions(+)
diff --git a/readme.md b/readme.md
index 46112f3..5dd75bc 100644
--- a/readme.md
+++ b/readme.md
@@ -10,6 +10,7 @@ for amd64 linux systems.
 - `n`: signed integer
 - `u`: unsigned integer
 - `?`: boolean flag
+- `""`: string in input buffer
 
 ## Glossary
 
@@ -52,6 +53,17 @@ yields the address of the first available byte in user memory.
 a variable containing the execution token of
 the most recently created word.
 
+### `parse   ( "<ws>name<ws>" c -- a u )`
+parse one word from the input buffer,
+separated by a newline or the character c,
+and return as a string.
+
+### `parse-name   ( "<ws>name<ws>" -- a u )`
+parse one whitespace-separated word from the input buffer,
+and return as a string.
+tabs (ascii 0x09), newlines (ascii 0x10), and spaces (ascii 0x20)
+are considered whitespace.
+
 ### `state   ( -- a )`
 a variable containing a boolean value.
 if 0 (false), the system is in interpreting mode,
diff --git a/sanctuary.s b/sanctuary.s
index b511d68..235f384 100644
--- a/sanctuary.s
+++ b/sanctuary.s
@@ -104,6 +104,118 @@ defcode "bye", bye, 0
 	syscall
 	ret
 
+; input parsing {{{
+; r11: string character count
+; rsi: input buffer address
+; al: char being parsed
+; r10: end of input buffer
+defcode "parse-name", parse_name, 0
+	mov rsi, qword [to_in]
+	mov r10, qword [tib]
+	add rsi, r10
+	add r10, qword [n_tib]
+	xor rax, rax
+
+.wsloop:
+	cmp rsi, r10
+	jge .empty
+	lodsb
+	cmp al, 0x20
+	je .wsloop
+	cmp al, 0x09
+	je .wsloop
+	cmp al, 0x0a
+	je .wsloop
+
+	cmp rsi, r10
+	jge .empty
+	mov r11, 1
+	dec rsi ; bring down by one to point to the start
+	push rsi ; will become `a`
+	inc rsi
+.wordloop:
+	cmp al, 0x20
+	je .wordloop_e
+	cmp al, 0x09
+	je .wordloop_e
+	cmp al, 0x0a
+	je .wordloop_e
+
+	; is there a better way of checking before?
+	cmp rsi, r10
+	jge .wordloop_e
+	inc r11
+	lodsb
+	je .wordloop
+
+.wordloop_e:
+	sub rsi, qword [tib]
+	mov qword [to_in], rsi
+	pop rsi
+	pspush rsi
+	pspush r11
+	ret
+
+.empty:
+	pspush 0
+	pspush 0
+	ret
+
+; r11: string character count
+; rsi: input buffer address
+; al: char being parsed
+; r10: end of input buffer
+defcode "parse", parse, 0
+	mov rsi, qword [to_in]
+	mov r10, qword [tib]
+	add rsi, r10
+	add r10, qword [n_tib]
+	xor rax, rax
+
+.wsloop:
+	cmp rsi, r10
+	jge .empty
+	lodsb
+	cmp al, r15b
+	je .wsloop
+	cmp al, 0x0a
+	je .wsloop
+
+	cmp rsi, r10
+	jge .empty
+	mov r11, 1
+	dec rsi ; bring down by one to point to the start
+	push rsi ; will become `a`
+	inc rsi
+.wordloop:
+	cmp al, r15b
+	je .wordloop_e
+	cmp al, 0x0a
+	je .wordloop_e
+
+	; is there a better way of checking before?
+	cmp rsi, r10
+	jge .wordloop_e
+	inc r11
+	lodsb
+	je .wordloop
+
+.wordloop_e:
+	sub rsi, qword [tib]
+	mov qword [to_in], rsi
+	pop rsi
+	pspop r8
+	pspush rsi
+	pspush r11
+	ret
+
+.empty:
+	pspop r8
+	pspush 0
+	pspush 0
+	ret
+; }}}
+
 defvar "state", state, 0, INTERPRET
 defvar "dp", dp, 0, 0
 defvar "dp0", dp0, 0, 0
-- 
cgit v1.2.3