mirror of
https://codeberg.org/unspeaker/tek.git
synced 2025-12-06 11:46:41 +01:00
wip: more const parsing
This commit is contained in:
parent
ff31957fed
commit
297f9b30df
4 changed files with 141 additions and 114 deletions
|
|
@ -5,7 +5,7 @@ version = "0.1.0"
|
|||
|
||||
[dependencies]
|
||||
clojure-reader = "0.3.0"
|
||||
konst = "0.3.16"
|
||||
konst = { version = "0.3.16", features = [ "rust_1_83" ] }
|
||||
itertools = "0.14.0"
|
||||
|
||||
[features]
|
||||
|
|
|
|||
175
edn/src/token.rs
175
edn/src/token.rs
|
|
@ -1,16 +1,8 @@
|
|||
use crate::*;
|
||||
use konst::iter::{ConstIntoIter, IsIteratorKind};
|
||||
use konst::string::{split_at, str_range, char_indices};
|
||||
use self::ParseError::*;
|
||||
use self::TokenKind::*;
|
||||
macro_rules! iterate {
|
||||
($expr:expr => $arg: pat => $body:expr) => {
|
||||
let mut iter = $expr;
|
||||
while let Some(($arg, next)) = iter.next() {
|
||||
$body;
|
||||
iter = next;
|
||||
}
|
||||
}
|
||||
}
|
||||
#[derive(Debug)] pub enum ParseError { Unimplemented, Empty, Incomplete, Unexpected(char), Code(u8), }
|
||||
impl std::fmt::Display for ParseError {
|
||||
fn fmt (&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
|
||||
|
|
@ -24,6 +16,88 @@ impl std::fmt::Display for ParseError {
|
|||
}
|
||||
}
|
||||
impl std::error::Error for ParseError {}
|
||||
/// Iterator helper macro because I can't find the canonical one in [konst] docs.
|
||||
macro_rules! iterate {
|
||||
($expr:expr => $arg: pat => $body:expr) => {
|
||||
let mut iter = $expr;
|
||||
while let Some(($arg, next)) = iter.next() {
|
||||
$body;
|
||||
iter = next;
|
||||
}
|
||||
}
|
||||
}
|
||||
pub struct TokenIterator<'a>(&'a str);
|
||||
impl<'a> ConstIntoIter for TokenIterator<'a> {
|
||||
type Kind = IsIteratorKind;
|
||||
type Item = Token<'a>;
|
||||
type IntoIter = Self;
|
||||
}
|
||||
impl<'a> TokenIterator<'a> {
|
||||
pub const fn new (source: &'a str) -> Self { Self(source) }
|
||||
pub const fn split (self, index: usize) -> Self { Self(split_at(self.0, index).1) }
|
||||
pub const fn next (self) -> Option<(Result<Token<'a>, ParseError>, Self)> {
|
||||
let src = self.0;
|
||||
let mut token: Token<'a> = Token::new(src, Nil, 0, 0, 0);
|
||||
iterate!(char_indices(src) => (index, c) => token = match token.kind() {
|
||||
Nil => match c {
|
||||
'(' => Token::new(src, Exp, index, 1, 1),
|
||||
':'|'@' => Token::new(src, Sym, index, 1, 0),
|
||||
'0'..='9' => Token::new(src, Num, index, 1, 0),
|
||||
'/'|'a'..='z' => Token::new(src, Key, index, 1, 0),
|
||||
' '|'\n'|'\r'|'\t' => token.grow(),
|
||||
_ => return Some((Err(Unexpected(c)), self.split(token.end())))
|
||||
},
|
||||
Num => match c {
|
||||
'0'..='9' => token.grow(),
|
||||
' '|'\n'|'\r'|'\t' => return Some((Ok(token), self.split(token.end()))),
|
||||
_ => return Some((Err(Unexpected(c)), self.split(token.end())))
|
||||
},
|
||||
Sym => match c {
|
||||
'a'..='z'|'0'..='9'|'-' => token.grow(),
|
||||
' '|'\n'|'\r'|'\t' => return Some((Ok(token), self.split(token.end()))),
|
||||
_ => return Some((Err(Unexpected(c)), self.split(token.end())))
|
||||
},
|
||||
Key => match c {
|
||||
'a'..='z'|'0'..='9'|'-'|'/' => token.grow(),
|
||||
' '|'\n'|'\r'|'\t' => return Some((Ok(token), self.split(token.end()))),
|
||||
_ => return Some((Err(Unexpected(c)), self.split(token.end())))
|
||||
},
|
||||
Exp => match token.depth {
|
||||
0 => return Some((Ok(token), Self(split_at(src, token.end()).1))),
|
||||
_ => match c {
|
||||
')' => match token.grow_out() {
|
||||
Ok(token) => token,
|
||||
Err(e) => return Some((Err(e), self.split(token.end())))
|
||||
},
|
||||
'(' => token.grow_in(),
|
||||
_ => token.grow(),
|
||||
}
|
||||
},
|
||||
});
|
||||
match token.kind() { Nil => None, _ => Some((Err(ParseError::Incomplete), self.split(token.end()))) }
|
||||
}
|
||||
}
|
||||
pub struct AtomIterator<'a>(TokenIterator<'a>);
|
||||
impl<'a> ConstIntoIter for AtomIterator<'a> {
|
||||
type Kind = IsIteratorKind;
|
||||
type Item = Atom<&'a str>;
|
||||
type IntoIter = Self;
|
||||
}
|
||||
impl<'a> AtomIterator<'a> {
|
||||
pub const fn new (tokens: TokenIterator<'a>) -> Self { Self(tokens) }
|
||||
pub const fn next (mut self) -> Option<(Result<Atom<&'a str>, ParseError>, Self)> {
|
||||
match self.0.next() {
|
||||
None => None,
|
||||
Some((result, next)) => match result {
|
||||
Err(e) => Some((Err(e), Self(next))),
|
||||
Ok(token) => match token.to_ref_atom() {
|
||||
Err(e) => Some((Err(e), Self(next))),
|
||||
Ok(atom) => Some((Ok(atom), Self(next))),
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
#[derive(Debug, Copy, Clone, Default, PartialEq)]
|
||||
pub enum TokenKind { #[default] Nil, Num, Sym, Key, Exp }
|
||||
#[derive(Debug, Copy, Clone, Default, PartialEq)]
|
||||
|
|
@ -55,74 +129,10 @@ impl<'a> Token<'a> {
|
|||
d => Ok(Self { length: self.length + 1, depth: d - 1, ..self })
|
||||
}
|
||||
}
|
||||
pub const fn chomp_first (source: &'a str) -> Result<Self, ParseError> {
|
||||
match Self::chomp(source) {
|
||||
Ok((token, _)) => match token.kind() { Nil => Err(Empty), _ => Ok(token) },
|
||||
Err(e) => Err(e),
|
||||
}
|
||||
}
|
||||
pub const fn chomp (src: &'a str) -> Result<(Self, &'a str), ParseError> {
|
||||
let mut token: Token<'a> = Token::new(src, Nil, 0, 0, 0);
|
||||
iterate!(char_indices(src) => (index, c) => token = match token.kind() {
|
||||
Nil => match c {
|
||||
'(' => Self::new(src, Exp, index, 1, 1),
|
||||
':'|'@' => Self::new(src, Sym, index, 1, 0),
|
||||
'0'..='9' => Self::new(src, Num, index, 1, 0),
|
||||
'/'|'a'..='z' => Self::new(src, Key, index, 1, 0),
|
||||
' '|'\n'|'\r'|'\t' => token.grow(),
|
||||
_ => return Err(Unexpected(c))
|
||||
},
|
||||
Num => match c {
|
||||
'0'..='9' => token.grow(),
|
||||
' '|'\n'|'\r'|'\t' => return Ok((token, split_at(src, token.end()).1)),
|
||||
_ => return Err(Unexpected(c))
|
||||
},
|
||||
Sym => match c {
|
||||
'a'..='z'|'0'..='9'|'-' => token.grow(),
|
||||
' '|'\n'|'\r'|'\t' => return Ok((token, split_at(src, token.end()).1)),
|
||||
_ => return Err(Unexpected(c)),
|
||||
},
|
||||
Key => match c {
|
||||
'a'..='z'|'0'..='9'|'-'|'/' => token.grow(),
|
||||
' '|'\n'|'\r'|'\t' => return Ok((token, split_at(src, token.end()).1)),
|
||||
_ => return Err(Unexpected(c))
|
||||
},
|
||||
Exp => match token.depth {
|
||||
0 => match c {
|
||||
' '|'\n'|'\r'|'\t' => return Ok((token, split_at(src, token.end()).1)),
|
||||
_ => return Err(Unexpected(c))
|
||||
},
|
||||
_ => match c {
|
||||
')' => match token.grow_out() {
|
||||
Ok(token) => token,
|
||||
Err(e) => return Err(e)
|
||||
},
|
||||
'(' => token.grow_in(),
|
||||
_ => token.grow(),
|
||||
}
|
||||
},
|
||||
});
|
||||
Err(Empty)
|
||||
}
|
||||
pub const fn number (digits: &str) -> Result<usize, ParseError> {
|
||||
let mut value = 0;
|
||||
iterate!(char_indices(digits) => (_, c) => match Self::digit(c) {
|
||||
Ok(digit) => value = 10 * value + digit,
|
||||
Err(e) => return Err(e)
|
||||
});
|
||||
Ok(value)
|
||||
}
|
||||
pub const fn digit (c: char) -> Result<usize, ParseError> {
|
||||
Ok(match c {
|
||||
'0' => 0, '1' => 1, '2' => 2, '3' => 3, '4' => 4,
|
||||
'5' => 5, '6' => 6, '7' => 7, '8' => 8, '9' => 9,
|
||||
_ => return Err(Unexpected(c))
|
||||
})
|
||||
}
|
||||
pub const fn to_ref_atom (&'a self) -> Result<Atom<&'a str>, ParseError> {
|
||||
Ok(match self.kind {
|
||||
Nil => return Err(ParseError::Empty),
|
||||
Num => match Self::number(self.slice()) {
|
||||
Num => match to_number(self.slice()) {
|
||||
Ok(n) => Atom::Num(n),
|
||||
Err(e) => return Err(e)
|
||||
},
|
||||
|
|
@ -134,7 +144,7 @@ impl<'a> Token<'a> {
|
|||
pub fn to_arc_atom (&self) -> Result<Atom<Arc<str>>, ParseError> {
|
||||
Ok(match self.kind {
|
||||
Nil => return Err(ParseError::Empty),
|
||||
Num => match Self::number(self.slice()) {
|
||||
Num => match to_number(self.slice()) {
|
||||
Ok(n) => Atom::Num(n),
|
||||
Err(e) => return Err(e)
|
||||
},
|
||||
|
|
@ -144,6 +154,21 @@ impl<'a> Token<'a> {
|
|||
})
|
||||
}
|
||||
}
|
||||
const fn to_number (digits: &str) -> Result<usize, ParseError> {
|
||||
let mut value = 0;
|
||||
iterate!(char_indices(digits) => (_, c) => match to_digit(c) {
|
||||
Ok(digit) => value = 10 * value + digit,
|
||||
Err(e) => return Err(e)
|
||||
});
|
||||
Ok(value)
|
||||
}
|
||||
const fn to_digit (c: char) -> Result<usize, ParseError> {
|
||||
Ok(match c {
|
||||
'0' => 0, '1' => 1, '2' => 2, '3' => 3, '4' => 4,
|
||||
'5' => 5, '6' => 6, '7' => 7, '8' => 8, '9' => 9,
|
||||
_ => return Err(Unexpected(c))
|
||||
})
|
||||
}
|
||||
#[derive(Clone, PartialEq)] pub enum Atom<T> { Num(usize), Sym(T), Key(T), Exp(Vec<Atom<T>>) }
|
||||
impl<'a, T: 'a> Atom<T> {
|
||||
pub fn transform <U: 'a, F: Fn(&'a T)->U + Clone> (&'a self, f: F) -> Atom<U> {
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue