wip: EdnItem -> Atom, rewrite tokenizer

This commit is contained in:
🪞👃🪞 2025-01-17 21:47:34 +01:00
parent 143cd24e09
commit ff31957fed
39 changed files with 477 additions and 376 deletions

299
edn/src/token.rs Normal file
View file

@ -0,0 +1,299 @@
use crate::*;
use konst::string::{split_at, str_range, char_indices};
use self::ParseError::*;
use self::TokenKind::*;
macro_rules! iterate {
($expr:expr => $arg: pat => $body:expr) => {
let mut iter = $expr;
while let Some(($arg, next)) = iter.next() {
$body;
iter = next;
}
}
}
#[derive(Debug)] pub enum ParseError { Unimplemented, Empty, Incomplete, Unexpected(char), Code(u8), }
impl std::fmt::Display for ParseError {
fn fmt (&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
match self {
Unimplemented => write!(f, "unimplemented"),
Empty => write!(f, "empty"),
Incomplete => write!(f, "incomplete"),
Unexpected(c) => write!(f, "unexpected '{c}'"),
Code(i) => write!(f, "error #{i}"),
}
}
}
impl std::error::Error for ParseError {}
#[derive(Debug, Copy, Clone, Default, PartialEq)]
pub enum TokenKind { #[default] Nil, Num, Sym, Key, Exp }
#[derive(Debug, Copy, Clone, Default, PartialEq)]
pub struct Token<'a> {
source: &'a str,
kind: TokenKind,
start: usize,
length: usize,
depth: usize,
}
impl<'a> Token<'a> {
pub const fn new (
source: &'a str, kind: TokenKind, start: usize, length: usize, depth: usize
) -> Self {
Self { source, kind, start, length, depth }
}
pub const fn end (&self) -> usize { self.start + self.length }
pub const fn slice (&self) -> &str { str_range(self.source, self.start, self.end()) }
pub const fn kind (&self) -> TokenKind { Nil }
pub const fn grow (self) -> Self {
Self { length: self.length + 1, ..self }
}
pub const fn grow_in (self) -> Self {
Self { length: self.length + 1, depth: self.depth + 1, ..self }
}
pub const fn grow_out (self) -> Result<Self, ParseError> {
match self.depth {
0 => Err(Unexpected(')')),
d => Ok(Self { length: self.length + 1, depth: d - 1, ..self })
}
}
pub const fn chomp_first (source: &'a str) -> Result<Self, ParseError> {
match Self::chomp(source) {
Ok((token, _)) => match token.kind() { Nil => Err(Empty), _ => Ok(token) },
Err(e) => Err(e),
}
}
pub const fn chomp (src: &'a str) -> Result<(Self, &'a str), ParseError> {
let mut token: Token<'a> = Token::new(src, Nil, 0, 0, 0);
iterate!(char_indices(src) => (index, c) => token = match token.kind() {
Nil => match c {
'(' => Self::new(src, Exp, index, 1, 1),
':'|'@' => Self::new(src, Sym, index, 1, 0),
'0'..='9' => Self::new(src, Num, index, 1, 0),
'/'|'a'..='z' => Self::new(src, Key, index, 1, 0),
' '|'\n'|'\r'|'\t' => token.grow(),
_ => return Err(Unexpected(c))
},
Num => match c {
'0'..='9' => token.grow(),
' '|'\n'|'\r'|'\t' => return Ok((token, split_at(src, token.end()).1)),
_ => return Err(Unexpected(c))
},
Sym => match c {
'a'..='z'|'0'..='9'|'-' => token.grow(),
' '|'\n'|'\r'|'\t' => return Ok((token, split_at(src, token.end()).1)),
_ => return Err(Unexpected(c)),
},
Key => match c {
'a'..='z'|'0'..='9'|'-'|'/' => token.grow(),
' '|'\n'|'\r'|'\t' => return Ok((token, split_at(src, token.end()).1)),
_ => return Err(Unexpected(c))
},
Exp => match token.depth {
0 => match c {
' '|'\n'|'\r'|'\t' => return Ok((token, split_at(src, token.end()).1)),
_ => return Err(Unexpected(c))
},
_ => match c {
')' => match token.grow_out() {
Ok(token) => token,
Err(e) => return Err(e)
},
'(' => token.grow_in(),
_ => token.grow(),
}
},
});
Err(Empty)
}
pub const fn number (digits: &str) -> Result<usize, ParseError> {
let mut value = 0;
iterate!(char_indices(digits) => (_, c) => match Self::digit(c) {
Ok(digit) => value = 10 * value + digit,
Err(e) => return Err(e)
});
Ok(value)
}
pub const fn digit (c: char) -> Result<usize, ParseError> {
Ok(match c {
'0' => 0, '1' => 1, '2' => 2, '3' => 3, '4' => 4,
'5' => 5, '6' => 6, '7' => 7, '8' => 8, '9' => 9,
_ => return Err(Unexpected(c))
})
}
pub const fn to_ref_atom (&'a self) -> Result<Atom<&'a str>, ParseError> {
Ok(match self.kind {
Nil => return Err(ParseError::Empty),
Num => match Self::number(self.slice()) {
Ok(n) => Atom::Num(n),
Err(e) => return Err(e)
},
Sym => Atom::Sym(self.slice()),
Key => Atom::Key(self.slice()),
Exp => todo!()
})
}
pub fn to_arc_atom (&self) -> Result<Atom<Arc<str>>, ParseError> {
Ok(match self.kind {
Nil => return Err(ParseError::Empty),
Num => match Self::number(self.slice()) {
Ok(n) => Atom::Num(n),
Err(e) => return Err(e)
},
Sym => Atom::Sym(self.slice().into()),
Key => Atom::Key(self.slice().into()),
Exp => todo!()
})
}
}
#[derive(Clone, PartialEq)] pub enum Atom<T> { Num(usize), Sym(T), Key(T), Exp(Vec<Atom<T>>) }
impl<'a, T: 'a> Atom<T> {
pub fn transform <U: 'a, F: Fn(&'a T)->U + Clone> (&'a self, f: F) -> Atom<U> {
use Atom::*;
match self {
Num(n) => Num(*n),
Sym(t) => Sym(f(t)),
Key(t) => Key(f(t)),
Exp(e) => Exp(e.iter().map(|i|i.transform(f.clone())).collect())
}
}
}
impl<'a, T: AsRef<str>> Atom<T> {
pub fn to_ref (&'a self) -> Atom<&'a str> {
self.transform(|t|t.as_ref())
}
pub fn to_arc (&'a self) -> Atom<Arc<str>> {
self.transform(|t|t.as_ref().into())
}
}
impl<'a> Atom<&'a str> {
pub const fn read_all_ref (_: &'a str) -> Result<Vec<Self>, ParseError> {
Err(Unimplemented)
}
}
impl<T: Debug> Debug for Atom<T> {
fn fmt (&self, f: &mut Formatter<'_>) -> Result<(), FormatError> {
use Atom::*;
match self {
Num(u) => write!(f, "(num {u})"),
Sym(u) => write!(f, "(sym {u:?})"),
Key(u) => write!(f, "(key {u:?})"),
Exp(e) => write!(f, "(exp {})",
itertools::join(e.iter().map(|i|format!("{:?}", i)), ","))
}
}
}
impl<T: Display> Display for Atom<T> {
fn fmt (&self, f: &mut Formatter<'_>) -> Result<(), FormatError> {
use Atom::*;
use itertools::join;
match self {
Num(u) => write!(f, "{u}"),
Sym(u) => write!(f, "{u}"),
Key(u) => write!(f, "{u}"),
Exp(e) => write!(f, "({})", join(e.iter().map(|i|format!("{}", i)), " "))
}
}
}
//impl<'a> Token<'a> {
//pub const fn chomp_one (source: &'a str) -> Result<Token<'a>, ParseError> {
//match Self::chomp(source) {
//Ok((_, token)) => Ok(token),
//Err(e) => Err(e)
//}
//}
//pub const fn from_nil (c: char) -> Result<(&'a str, Token<'a>), ParseError> {
//match c {
//' '|'\n'|'\r'|'\t' => Nil,
//'(' => Exp(source, index, 1, 1),
//':'|'@' => Sym(source, index, 1),
//'0'..='9' => Num(source, index, 1),
//'a'..='z' => Key(source, index, 1),
//_ => return Err(Unexpected(c))
//}
//}
//pub const fn chomp (source: &'a str) -> Result<(&'a str, Token<'a>), ParseError> {
//let mut state = Self::Nil;
//let mut chars = char_indices(source);
//while let Some(((index, c), next)) = chars.next() {
//state = match state {
//// must begin expression
//Nil => Self::from_nil(c),
//Num(_, _, 0) => unreachable!(),
//Sym(_, _, 0) => unreachable!(),
//Key(_, _, 0) => unreachable!(),
//Num(src, idx, len) => match c {
//'0'..='9' => Num(src, idx, len + 1),
//' '|'\n'|'\r'|'\t' => return Ok((split(src, idx+len).1, Num(src, idx, len))),
//_ => return Err(Unexpected(c))
//},
//Sym(src, idx, len) => match c {
//'a'..='z'|'0'..='9'|'-' => Sym(src, idx, len + 1),
//' '|'\n'|'\r'|'\t' => return Ok((split(src, idx+len).1, Sym(src, idx, len))),
//_ => return Err(Unexpected(c))
//},
//Key(src, idx, len) => match c {
//'a'..='z'|'0'..='9'|'-'|'/' => Key(src, idx, len + 1),
//' '|'\n'|'\r'|'\t' => return Ok((split(src, idx+len).1, Key(src, idx, len))),
//_ => return Err(Unexpected(c))
//},
//Exp(src, idx, len, 0) => match c {
//' '|'\n'|'\r'|'\t' => return Ok((split(src, idx+len).1, Exp(src, idx, len, 0))),
//_ => return Err(Unexpected(c))
//},
//Exp(src, idx, len, balance) => match c {
//')' => Exp(src, idx, len + 1, balance - 1),
//'(' => Exp(src, idx, len + 1, balance + 1),
//_ => Exp(src, idx, len + 1, balance)
//},
//};
//chars = next
//}
//Ok(("", state))
//}
//pub fn src (&self) -> &str {
//match self {
//Self::Nil => "",
//Self::Num(src, _, _) => src,
//Self::Sym(src, _, _) => src,
//Self::Key(src, _, _) => src,
//Self::Exp(src, _, _, _) => src,
//}
//}
//pub fn str (&self) -> &str {
//match self {
//Self::Nil => "",
//Self::Num(src, start, len) => &src[*start..start+len],
//Self::Sym(src, start, len) => &src[*start..start+len],
//Self::Key(src, start, len) => &src[*start..start+len],
//Self::Exp(src, start, len, 0) => &src[*start..(start+len)],
//Self::Exp(src, start, len, d) => panic!(
//"unclosed delimiter with depth {d} in:\n{}",
//&src[*start..(start+len)]
//)
//}
//}
//pub fn to_atom_ref (&'a self) -> Result<Atom<&'a str>, ParseError> {
//use Atom::*;
//Ok(match self {
//Token::Nil => Nil,
//Token::Num(_, _, _) => Num(Token::number(self.str())),
//Token::Sym(_, _, _) => Sym(self.str().into()),
//Token::Key(_, _, _) => Key(self.str().into()),
//Token::Exp(_, _, _, _) => Exp(match Atom::read_all_ref(self.str()) {
//Ok(exp) => exp,
//Err(e) => return Err(e)
//}),
//})
//}
//}
#[cfg(test)] #[test] fn test_edn_token () -> Result<(), Box<dyn std::error::Error>> {
use Token::*;
assert_eq!(Nil, Token::chomp_one("")?);
assert_eq!(Nil, Token::chomp_one(" \n \r \t ")?);
assert_eq!(Num("8", 0, 1), Token::chomp_one("8")?);
assert_eq!(Num(" 8 ", 3, 1), Token::chomp_one(" 8 ")?);
assert_eq!(Sym(":foo", 0, 4), Token::chomp_one(":foo")?);
assert_eq!(Sym("@bar", 0, 4), Token::chomp_one("@bar")?);
assert_eq!(Key("foo/bar", 0, 7), Token::chomp_one("foo/bar")?);
Ok(())
}