wip: more const parsing

This commit is contained in:
🪞👃🪞 2025-01-17 22:26:49 +01:00
parent ff31957fed
commit 297f9b30df
4 changed files with 141 additions and 114 deletions

View file

@ -5,7 +5,7 @@ version = "0.1.0"
[dependencies]
clojure-reader = "0.3.0"
konst = "0.3.16"
konst = { version = "0.3.16", features = [ "rust_1_83" ] }
itertools = "0.14.0"
[features]

View file

@ -1,16 +1,8 @@
use crate::*;
use konst::iter::{ConstIntoIter, IsIteratorKind};
use konst::string::{split_at, str_range, char_indices};
use self::ParseError::*;
use self::TokenKind::*;
macro_rules! iterate {
($expr:expr => $arg: pat => $body:expr) => {
let mut iter = $expr;
while let Some(($arg, next)) = iter.next() {
$body;
iter = next;
}
}
}
#[derive(Debug)] pub enum ParseError { Unimplemented, Empty, Incomplete, Unexpected(char), Code(u8), }
impl std::fmt::Display for ParseError {
fn fmt (&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
@ -24,6 +16,88 @@ impl std::fmt::Display for ParseError {
}
}
impl std::error::Error for ParseError {}
/// Iterator helper macro because I can't find the canonical one in [konst] docs.
macro_rules! iterate {
($expr:expr => $arg: pat => $body:expr) => {
let mut iter = $expr;
while let Some(($arg, next)) = iter.next() {
$body;
iter = next;
}
}
}
pub struct TokenIterator<'a>(&'a str);
impl<'a> ConstIntoIter for TokenIterator<'a> {
type Kind = IsIteratorKind;
type Item = Token<'a>;
type IntoIter = Self;
}
impl<'a> TokenIterator<'a> {
pub const fn new (source: &'a str) -> Self { Self(source) }
pub const fn split (self, index: usize) -> Self { Self(split_at(self.0, index).1) }
pub const fn next (self) -> Option<(Result<Token<'a>, ParseError>, Self)> {
let src = self.0;
let mut token: Token<'a> = Token::new(src, Nil, 0, 0, 0);
iterate!(char_indices(src) => (index, c) => token = match token.kind() {
Nil => match c {
'(' => Token::new(src, Exp, index, 1, 1),
':'|'@' => Token::new(src, Sym, index, 1, 0),
'0'..='9' => Token::new(src, Num, index, 1, 0),
'/'|'a'..='z' => Token::new(src, Key, index, 1, 0),
' '|'\n'|'\r'|'\t' => token.grow(),
_ => return Some((Err(Unexpected(c)), self.split(token.end())))
},
Num => match c {
'0'..='9' => token.grow(),
' '|'\n'|'\r'|'\t' => return Some((Ok(token), self.split(token.end()))),
_ => return Some((Err(Unexpected(c)), self.split(token.end())))
},
Sym => match c {
'a'..='z'|'0'..='9'|'-' => token.grow(),
' '|'\n'|'\r'|'\t' => return Some((Ok(token), self.split(token.end()))),
_ => return Some((Err(Unexpected(c)), self.split(token.end())))
},
Key => match c {
'a'..='z'|'0'..='9'|'-'|'/' => token.grow(),
' '|'\n'|'\r'|'\t' => return Some((Ok(token), self.split(token.end()))),
_ => return Some((Err(Unexpected(c)), self.split(token.end())))
},
Exp => match token.depth {
0 => return Some((Ok(token), Self(split_at(src, token.end()).1))),
_ => match c {
')' => match token.grow_out() {
Ok(token) => token,
Err(e) => return Some((Err(e), self.split(token.end())))
},
'(' => token.grow_in(),
_ => token.grow(),
}
},
});
match token.kind() { Nil => None, _ => Some((Err(ParseError::Incomplete), self.split(token.end()))) }
}
}
pub struct AtomIterator<'a>(TokenIterator<'a>);
impl<'a> ConstIntoIter for AtomIterator<'a> {
type Kind = IsIteratorKind;
type Item = Atom<&'a str>;
type IntoIter = Self;
}
impl<'a> AtomIterator<'a> {
pub const fn new (tokens: TokenIterator<'a>) -> Self { Self(tokens) }
pub const fn next (mut self) -> Option<(Result<Atom<&'a str>, ParseError>, Self)> {
match self.0.next() {
None => None,
Some((result, next)) => match result {
Err(e) => Some((Err(e), Self(next))),
Ok(token) => match token.to_ref_atom() {
Err(e) => Some((Err(e), Self(next))),
Ok(atom) => Some((Ok(atom), Self(next))),
}
}
}
}
}
#[derive(Debug, Copy, Clone, Default, PartialEq)]
pub enum TokenKind { #[default] Nil, Num, Sym, Key, Exp }
#[derive(Debug, Copy, Clone, Default, PartialEq)]
@ -55,74 +129,10 @@ impl<'a> Token<'a> {
d => Ok(Self { length: self.length + 1, depth: d - 1, ..self })
}
}
pub const fn chomp_first (source: &'a str) -> Result<Self, ParseError> {
match Self::chomp(source) {
Ok((token, _)) => match token.kind() { Nil => Err(Empty), _ => Ok(token) },
Err(e) => Err(e),
}
}
pub const fn chomp (src: &'a str) -> Result<(Self, &'a str), ParseError> {
let mut token: Token<'a> = Token::new(src, Nil, 0, 0, 0);
iterate!(char_indices(src) => (index, c) => token = match token.kind() {
Nil => match c {
'(' => Self::new(src, Exp, index, 1, 1),
':'|'@' => Self::new(src, Sym, index, 1, 0),
'0'..='9' => Self::new(src, Num, index, 1, 0),
'/'|'a'..='z' => Self::new(src, Key, index, 1, 0),
' '|'\n'|'\r'|'\t' => token.grow(),
_ => return Err(Unexpected(c))
},
Num => match c {
'0'..='9' => token.grow(),
' '|'\n'|'\r'|'\t' => return Ok((token, split_at(src, token.end()).1)),
_ => return Err(Unexpected(c))
},
Sym => match c {
'a'..='z'|'0'..='9'|'-' => token.grow(),
' '|'\n'|'\r'|'\t' => return Ok((token, split_at(src, token.end()).1)),
_ => return Err(Unexpected(c)),
},
Key => match c {
'a'..='z'|'0'..='9'|'-'|'/' => token.grow(),
' '|'\n'|'\r'|'\t' => return Ok((token, split_at(src, token.end()).1)),
_ => return Err(Unexpected(c))
},
Exp => match token.depth {
0 => match c {
' '|'\n'|'\r'|'\t' => return Ok((token, split_at(src, token.end()).1)),
_ => return Err(Unexpected(c))
},
_ => match c {
')' => match token.grow_out() {
Ok(token) => token,
Err(e) => return Err(e)
},
'(' => token.grow_in(),
_ => token.grow(),
}
},
});
Err(Empty)
}
pub const fn number (digits: &str) -> Result<usize, ParseError> {
let mut value = 0;
iterate!(char_indices(digits) => (_, c) => match Self::digit(c) {
Ok(digit) => value = 10 * value + digit,
Err(e) => return Err(e)
});
Ok(value)
}
pub const fn digit (c: char) -> Result<usize, ParseError> {
Ok(match c {
'0' => 0, '1' => 1, '2' => 2, '3' => 3, '4' => 4,
'5' => 5, '6' => 6, '7' => 7, '8' => 8, '9' => 9,
_ => return Err(Unexpected(c))
})
}
pub const fn to_ref_atom (&'a self) -> Result<Atom<&'a str>, ParseError> {
Ok(match self.kind {
Nil => return Err(ParseError::Empty),
Num => match Self::number(self.slice()) {
Num => match to_number(self.slice()) {
Ok(n) => Atom::Num(n),
Err(e) => return Err(e)
},
@ -134,7 +144,7 @@ impl<'a> Token<'a> {
pub fn to_arc_atom (&self) -> Result<Atom<Arc<str>>, ParseError> {
Ok(match self.kind {
Nil => return Err(ParseError::Empty),
Num => match Self::number(self.slice()) {
Num => match to_number(self.slice()) {
Ok(n) => Atom::Num(n),
Err(e) => return Err(e)
},
@ -144,6 +154,21 @@ impl<'a> Token<'a> {
})
}
}
const fn to_number (digits: &str) -> Result<usize, ParseError> {
let mut value = 0;
iterate!(char_indices(digits) => (_, c) => match to_digit(c) {
Ok(digit) => value = 10 * value + digit,
Err(e) => return Err(e)
});
Ok(value)
}
const fn to_digit (c: char) -> Result<usize, ParseError> {
Ok(match c {
'0' => 0, '1' => 1, '2' => 2, '3' => 3, '4' => 4,
'5' => 5, '6' => 6, '7' => 7, '8' => 8, '9' => 9,
_ => return Err(Unexpected(c))
})
}
#[derive(Clone, PartialEq)] pub enum Atom<T> { Num(usize), Sym(T), Key(T), Exp(Vec<Atom<T>>) }
impl<'a, T: 'a> Atom<T> {
pub fn transform <U: 'a, F: Fn(&'a T)->U + Clone> (&'a self, f: F) -> Atom<U> {