wip: try to get a simplified parser going

This commit is contained in:
🪞👃🪞 2025-01-04 08:49:38 +01:00
parent fc82d6ff9b
commit 600d0b3aca
17 changed files with 676 additions and 133 deletions

296
edn/src/edn_lib.rs Normal file
View file

@ -0,0 +1,296 @@
use std::sync::{Arc, RwLock};
use std::collections::BTreeMap;
pub use clojure_reader::edn::Edn;
#[cfg(test)] #[test] fn test_edn () -> Result<(), ParseError> {
use Item::*;
assert_eq!(Item::read_all("")?,
vec![]);
assert_eq!(Item::read_all(" ")?,
vec![]);
assert_eq!(Item::read_all("1234")?,
vec![Num(1234)]);
assert_eq!(Item::read_all("1234 5 67")?,
vec![Num(1234), Num(5), Num(67)]);
assert_eq!(Item::read_all("foo/bar")?,
vec![Key("foo/bar".into())]);
assert_eq!(Item::read_all(":symbol")?,
vec![Sym(":symbol".into())]);
assert_eq!(Item::read_all(" foo/bar :baz 456")?,
vec![Key("foo/bar".into()), Sym(":baz".into()), Num(456)]);
assert_eq!(Item::read_all(" (foo/bar :baz 456) ")?,
vec![Exp(vec![Key("foo/bar".into()), Sym(":baz".into()), Num(456)])]);
Ok(())
}
fn number (digits: &str) -> usize {
let mut value = 0;
for c in digits.chars() {
value = 10 * value + digit(c);
}
value
}
const fn digit (c: char) -> usize {
match c { '0' => 0, '1' => 1, '2' => 2, '3' => 3, '4' => 4,
'5' => 5, '6' => 6, '7' => 7, '8' => 8, '9' => 9, _ => unreachable!() }
}
#[derive(Debug)]
pub enum ParseError {
Empty,
Unexpected(char),
Incomplete
}
#[derive(Debug, Clone, Default, PartialEq)]
pub enum Item {
#[default] Nil,
Num(usize),
Sym(String),
Key(String),
Exp(Vec<Item>),
}
impl Item {
pub fn read_all <'a> (mut source: &'a str) -> Result<Vec<Self>, ParseError> {
let mut items = vec![];
loop {
if source.len() == 0 {
break
}
let (remaining, token) = Token::chomp(source)?;
match Item::read(token)? { Item::Nil => {}, item => items.push(item) };
source = remaining
}
Ok(items)
}
pub fn read <'a> (token: Token<'a>) -> Result<Self, ParseError> {
use Token::*;
Ok(match token {
Nil => Item::Nil,
Num(chars, index, length) =>
Self::Num(number(&chars[index..index+length])),
Sym(chars, index, length) =>
Self::Sym(chars[index..index+length].to_string()),
Key(chars, index, length) =>
Self::Key(chars[index..index+length].to_string()),
Exp(chars, index, length, 0) =>
Self::Exp(Self::read_all(&chars[index+1..(index+length).saturating_sub(1)])?),
_ => panic!("unclosed delimiter")
})
}
}
#[derive(Debug, Copy, Clone, Default, PartialEq)]
pub enum Token<'a> {
#[default] Nil,
Num(&'a str, usize, usize),
Sym(&'a str, usize, usize),
Key(&'a str, usize, usize),
Exp(&'a str, usize, usize, usize),
}
impl<'a> Token<'a> {
fn chomp (source: &'a str) -> Result<(&'a str, Self), ParseError> {
use Token::*;
let mut state = Self::default();
for (index, c) in source.char_indices() {
state = match state {
// must begin expression
Nil => match c {
' '|'\n'|'\r'|'\t' => Nil,
'(' => Exp(source, index, 1, 1),
':' => Sym(source, index, 1),
'0'..='9' => Num(source, index, 1),
'a'..='z' => Key(source, index, 1),
_ => return Err(ParseError::Unexpected(c))
},
Num(_, _, 0) => unreachable!(),
Sym(_, _, 0) => unreachable!(),
Key(_, _, 0) => unreachable!(),
Num(source, index, length) => match c {
'0'..='9' => Num(source, index, length + 1),
' '|'\n'|'\r'|'\t' => return Ok((&source[index+length..], Num(source, index, length))),
_ => return Err(ParseError::Unexpected(c))
},
Sym(source, index, length) => match c {
'a'..='z'|'0'..='9'|'-' => Sym(source, index, length + 1),
' '|'\n'|'\r'|'\t' => return Ok((&source[index+length..], Sym(source, index, length))),
_ => return Err(ParseError::Unexpected(c))
},
Key(source, index, length) => match c {
'a'..='z'|'0'..='9'|'-'|'/' => Key(source, index, length + 1),
' '|'\n'|'\r'|'\t' => return Ok((&source[index+length..], Key(source, index, length))),
_ => return Err(ParseError::Unexpected(c))
},
Exp(source, index, length, 0) => match c {
' '|'\n'|'\r'|'\t' => return Ok((&source[index+length..], Exp(source, index, length, 0))),
_ => return Err(ParseError::Unexpected(c))
},
Exp(source, index, length, depth) => match c {
')' => Exp(source, index, length + 1, depth - 1),
'(' => Exp(source, index, length + 1, depth + 1),
_ => Exp(source, index, length + 1, depth)
},
}
}
Ok(("", state))
}
}
//#[derive(Debug, Copy, Clone, Default, PartialEq)]
//pub struct Items<'a>(&'a [Item<'a>]);
//impl<'a> Items<'a> {
//fn iter (&'a self) -> ItemsIterator<'a> {
//ItemsIterator(0, self.0)
//}
//}
//pub struct ItemsIterator<'a>(usize, &'a [Item<'a>]);
//impl<'a> Iterator for ItemsIterator<'a> {
//type Item = &'a Item<'a>;
//fn next (&mut self) -> Option<Self::Item> {
//let item = self.1.get(self.0);
//self.0 += 1;
//item
//}
//}
/*
nice but doesn't work without compile time slice concat
(which i guess could be implemeted using an unsafe linked list?)
never done that one before im ny life, might try
use konst::slice_concat;
const fn read <'a> (
chars: impl Iterator<Item = char>
) -> Result<Range<'a>, ParseError> {
use Range::*;
let mut state = Range::Nil;
let mut tokens: &[Range<'a>] = &[];
while let Some(c) = chars.next() {
state = match state {
// must begin expression
Nil => match c {
' ' => Nil,
'(' => Exp(&[]),
':' => Sym(&[]),
'1'..'9' => Num(digit(c)),
'a'..'z' => Key(&[&[c]]),
_ => return Err(ParseError::Unexpected(c))
},
Num(b) => match c {
' ' => return Ok(Num(digit(c))),
'1'..'9' => Num(b*10+digit(c)),
_ => return Err(ParseError::Unexpected(c))
}
Sym([]) => match c {
'a'..'z' => Sym(&[c]),
_ => return Err(ParseError::Unexpected(c))
},
Sym([b @ ..]) => match c {
' ' => return Ok(Sym(&b)),
'a'..'z' | '0'..'9' | '-' => Sym(&[..b, c]),
_ => return Err(ParseError::Unexpected(c))
}
Key([[b @ ..]]) => match c {
' ' => return Ok(Key(&[&b])),
'/' => Key(&[&b, &[]]),
'a'..'z' | '0'..'9' | '-' => Key(&[&[..b, c], &[]]),
_ => return Err(ParseError::Unexpected(c))
}
Key([s @ .., []]) => match c {
'a'..'z' => Key(&[..s, &[c]]),
_ => return Err(ParseError::Unexpected(c))
}
Key([s @ .., [b @ ..]]) => match c {
'/' => Key([..s, &b, &[]]),
'a'..'z' | '0'..'9' | '-' => Key(&[..s, &[..b, c]]),
_ => return Err(ParseError::Unexpected(c))
}
// expression must begin with key or symbol
Exp([]) => match c {
' ' => Exp(&[]),
')' => return Err(ParseError::Empty),
':' => Exp(&[Sym(&[':'])]),
c => Exp(&[Key(&[&[c]])]),
},
// expression can't begin with number
Exp([Num(num)]) => return Err(ParseError::Unexpected(c)),
// symbol begins with : and lowercase a-z
Exp([Sym([':'])]) => match c {
'a'..'z' => Exp(&[Sym(&[':', c])]),
_ => return Err(ParseError::Unexpected(c)),
},
// any other char is part of symbol until space or )
Exp([Sym([':', b @ ..])]) => match c {
')' => { tokens = &[..tokens, Exp(&[Sym(&[":", ..b])])]; Nil },
' ' => Exp(&[Sym(&[':', ..b]), Nil]),
c => Exp(&[Sym(&[':', ..b, c])]),
},
// key begins with lowercase a-z
Exp([Key([])]) => match c {
'a'..'z' => Exp([Key([[c]])]),
_ => return Err(ParseError::Unexpected(c)),
},
// any other char is part of key until slash space or )
Exp([Key([[b @ ..]])]) => match c {
'/' => Exp(&[Key(&[[..b], []])]),
' ' => Exp(&[Key(&[[..b]]), Nil]),
')' => { tokens = &[..tokens, Exp(&[Sym(&[":", ..b])])]; Nil },
c => Exp(&[Key(&[[..b, c]])])
}
// slash adds new section to key
Exp([Key([b @ .., []])]) => match c {
'/' => Exp(&[Key(&[[..b], []])]),
' ' => Exp(&[Key(&[[..b]]), Nil]),
')' => { tokens = &[..tokens, Exp(&[Sym(&[":", ..b])])]; Nil },
c => Exp(&[Key(&[[..b, c]])])
}
}
}
Ok(state)
}
*/
/// EDN parsing helper.
#[macro_export] macro_rules! edn {
($edn:ident { $($pat:pat => $expr:expr),* $(,)? }) => {
match $edn { $($pat => $expr),* }
};
($edn:ident in $args:ident { $($pat:pat => $expr:expr),* $(,)? }) => {
for $edn in $args {
edn!($edn { $($pat => $expr),* })
}
};
}
pub trait FromEdn<C>: Sized {
const ID: &'static str;
fn from_edn (context: C, expr: &[Edn<'_>]) ->
std::result::Result<Self, Box<dyn std::error::Error>>;
}
/// Implements the [FromEdn] trait.
#[macro_export] macro_rules! from_edn {
($id:expr => |$context:tt:$Context:ty, $args:ident| -> $T:ty $body:block) => {
impl FromEdn<$Context> for $T {
const ID: &'static str = $id;
fn from_edn <'e> ($context: $Context, $args: &[Edn<'e>]) -> Usually<Self> {
$body
}
}
}
}