tengri/dsl/src/lib.rs

423 lines
17 KiB
Rust

#![feature(adt_const_params)]
#![feature(type_alias_impl_trait)]
#![feature(impl_trait_in_fn_trait_return)]
#![feature(const_precise_live_drops)]
extern crate const_panic;
use const_panic::{concat_panic, PanicFmt};
pub(crate) use ::tengri_core::*;
pub(crate) use std::error::Error;
pub(crate) use std::fmt::Debug;
pub(crate) use std::sync::Arc;
pub(crate) use std::collections::VecDeque;
pub(crate) use konst::iter::{ConstIntoIter, IsIteratorKind};
pub(crate) use konst::string::{split_at, str_range, char_indices};
pub(crate) use thiserror::Error;
pub(crate) use self::DslError::*;
#[cfg(test)] mod test;
/// Enumeration of values that may figure in an expression.
/// Generic over string and expression storage.
#[derive(Clone, Debug, PartialEq, Default)]
pub enum Val<Str, Exp> {
/// Empty expression
#[default] Nil,
/// Unsigned integer literal
Num(usize),
/// An identifier that starts with `.`
Sym(Str),
/// An identifier that doesn't start with `:`
Key(Str),
/// A quoted string literal
Str(Str),
/// A DSL expression.
Exp(
/// Number of unclosed parentheses. Must be 0 to be valid.
isize,
/// Expression content.
Exp
),
/// An error.
Error(DslError),
}
impl<Str: Copy, Exp: Copy> Copy for Val<Str, Exp> {}
impl<Str: DslStr, Exp: DslExp> Val<Str, Exp> {
pub fn convert <T: Dsl> (&self) -> Val<T::Str, T::Exp> where
T::Str: for<'a> From<&'a Str>,
T::Exp: for<'a> From<&'a Exp>
{
match self {
Val::Nil => Val::Nil,
Val::Num(u) => Val::Num(*u),
Val::Sym(s) => Val::Sym(s.into()),
Val::Key(s) => Val::Key(s.into()),
Val::Str(s) => Val::Str(s.into()),
Val::Exp(d, x) => Val::Exp(*d, x.into()),
Val::Error(e) => Val::Error(*e)
}
}
}
/// The expression representation for a [Dsl] implementation.
/// [Cst] uses [CstIter]. [Ast] uses [VecDeque].
pub trait DslExp: PartialEq + Clone + Default + Debug + Dsl {}
impl<T: PartialEq + Clone + Default + Debug + Dsl> DslExp for T {}
/// The string representation for a [Dsl] implementation.
/// [Cst] uses `&'s str`. [Ast] uses `Arc<str>`.
pub trait DslStr: PartialEq + Clone + Default + Debug + AsRef<str> + std::ops::Deref<Target = str> {}
impl<T: PartialEq + Clone + Default + Debug + AsRef<str> + std::ops::Deref<Target = str>> DslStr for T {}
impl<Str: DslStr, Exp: DslExp + Dsl<Str=Str, Exp=Exp>> Val<Str, Exp> {
pub const fn err (&self) -> Option<DslError> {match self{Val::Error(e)=>Some(*e), _=>None}}
pub const fn nil (&self) -> bool {match self{Val::Nil=>true, _=>false}}
pub const fn num (&self) -> Option<usize> {match self{Val::Num(n)=>Some(*n), _=>None}}
pub const fn sym (&self) -> Option<&Str> {match self{Val::Sym(s)=>Some(s), _=>None}}
pub const fn key (&self) -> Option<&Str> {match self{Val::Key(k)=>Some(k), _=>None}}
pub const fn str (&self) -> Option<&Str> {match self{Val::Str(s)=>Some(s), _=>None}}
pub const fn exp (&self) -> Option<&Exp> {match self{Val::Exp(_, x)=>Some(x),_=>None}}
pub const fn exp_depth (&self) -> Option<isize> {match self{Val::Exp(d, _)=>Some(*d), _=>None}}
}
/// Parsed substring with range and value.
#[derive(Debug, Clone, Default, PartialEq)]
pub struct Token<Str, Exp> {
/// Meaning of token.
pub value: Val<Str, Exp>,
/// Reference to source text.
pub source: Str,
/// Index of 1st character of span.
pub start: usize,
/// Length of span.
pub length: usize,
}
impl<Str: Copy, Exp: Copy> Copy for Token<Str, Exp> {}
impl<Str, Exp> Token<Str, Exp> {
pub const fn end (&self) -> usize {
self.start.saturating_add(self.length) }
pub const fn value (&self)
-> &Val<Str, Exp> { &self.value }
pub const fn err (&self)
-> Option<DslError> { if let Val::Error(e) = self.value { Some(e) } else { None } }
pub const fn new (source: Str, start: usize, length: usize, value: Val<Str, Exp>)
-> Self { Self { value, start, length, source } }
pub const fn copy (&self) -> Self where Val<Str, Exp>: Copy, Str: Copy, Exp: Copy {
Self { value: self.value, ..*self }
}
}
/// To the [Dsl], a token is equivalent to its `value` field.
impl<Str: DslStr, Exp: DslExp> Dsl for Token<Str, Exp> {
type Str = Str; type Exp = Exp;
fn dsl (&self) -> Val<Str, Exp> { self.value.clone() }
}
/// Coerce to [Val] for predefined [Self::Str] and [Self::Exp].
pub trait Dsl: Clone + Debug {
/// The string representation for a dizzle.
type Str: DslStr;
/// The expression representation for a dizzle.
type Exp: DslExp;
/// Request the top-level DSL [Val]ue.
/// May perform cloning or parsing.
fn dsl (&self) -> Val<Self::Str, Self::Exp>;
fn err (&self) -> Option<DslError> {self.dsl().err()}
fn nil (&self) -> bool {self.dsl().nil()}
fn num (&self) -> Option<usize> {self.dsl().num()}
fn sym (&self) -> Option<Self::Str> {self.dsl().sym()}
fn key (&self) -> Option<Self::Str> {self.dsl().key()}
fn str (&self) -> Option<Self::Str> {self.dsl().str()}
fn exp (&self) -> Option<Self::Exp> {self.dsl().exp()}
fn exp_depth (&self) -> Option<isize> {self.dsl().exp_depth()}
fn exp_head (&self) -> Val<Self::Str, Self::Exp> {self.dsl().exp_head()}
fn exp_tail (&self) -> Self::Exp {self.dsl().exp_tail()}
fn exp_each (&self, f: impl Fn(&Self) -> Usually<()>) -> Usually<()> { todo!() }
}
/// The most basic implementor of the [Dsl] trait.
impl<Str: DslStr, Exp: DslExp> Dsl for Val<Str, Exp> {
type Str = Str; type Exp = Exp;
fn dsl (&self) -> Val<Str, Exp> { self.clone() }
}
/// The abstract syntax tree (AST) can be produced from the CST
/// by cloning source slices into owned ([Arc]) string slices.
#[derive(Debug, Clone, Default, PartialEq)]
pub struct Ast(Arc<VecDeque<Arc<Token<Arc<str>, Ast>>>>);
pub type AstVal = Val<Arc<str>, Ast>;
pub type AstToken = Token<Arc<str>, Ast>;
impl Dsl for Ast {
type Str = Arc<str>; type Exp = Ast;
fn dsl (&self) -> Val<Arc<str>, Ast> { Val::Exp(0, Ast(self.0.clone())) }
}
impl<'s> From<&'s str> for Ast {
fn from (source: &'s str) -> Self {
let source: Arc<str> = source.into();
Self(CstIter(CstConstIter(source.as_ref()))
.map(|token|Arc::new(Token {
source: source.clone(),
start: token.start,
length: token.length,
value: match token.value {
Val::Nil => Val::Nil,
Val::Num(u) => Val::Num(u),
Val::Sym(s) => Val::Sym(s.into()),
Val::Key(s) => Val::Key(s.into()),
Val::Str(s) => Val::Str(s.into()),
Val::Exp(d, x) => Val::Exp(d, x.into()),
Val::Error(e) => Val::Error(e.into())
},
}))
.collect::<VecDeque<_>>()
.into())
}
}
impl<'s> From<Cst<'s>> for Ast {
fn from (cst: Cst<'s>) -> Self {
let mut tokens: VecDeque<_> = Default::default();
Self(tokens.into())
}
}
/// The concrete syntax tree (CST) implements zero-copy
/// parsing of the DSL from a string reference. CST items
/// preserve info about their location in the source.
/// CST stores strings as source references and expressions as [CstIter] instances.
#[derive(Debug, Copy, Clone, Default, PartialEq)]
pub struct Cst<'s>(pub CstIter<'s>);
pub type CstVal<'s> = Val<&'s str, Cst<'s>>;
pub type CstToken<'s> = Token<&'s str, Cst<'s>>;
impl<'s> CstToken<'s> {
pub const fn slice (&self) -> &str {
str_range(self.source, self.start, self.end()) }
pub const fn slice_exp (&self) -> &str {
str_range(self.source, self.start.saturating_add(1), self.end()) }
pub const fn grow (&mut self) -> &mut Self {
let max_length = self.source.len().saturating_sub(self.start);
self.length = self.length + 1;
if self.length > max_length { self.length = max_length }
self
}
pub const fn grow_exp (&'s mut self, depth: isize, source: &'s str) -> &mut Self {
self.value = Val::Exp(depth, Cst(CstIter(CstConstIter(source))));
self
}
}
impl<'s> Dsl for Cst<'s> {
type Str = &'s str; type Exp = Cst<'s>;
fn dsl (&self) -> Val<Self::Str, Self::Exp> { Val::Exp(0, Cst(self.0)) }
}
impl<'s> From<&'s str> for Cst<'s> {
fn from (source: &'s str) -> Self {
Self(CstIter(CstConstIter(source)))
}
}
/// DSL-specific error codes.
#[derive(Error, Debug, Copy, Clone, PartialEq, PanicFmt)] pub enum DslError {
#[error("parse failed: not implemented")]
Unimplemented,
#[error("parse failed: empty")]
Empty,
#[error("parse failed: incomplete")]
Incomplete,
#[error("parse failed: unexpected character '{0}'")]
Unexpected(char),
#[error("parse failed: error #{0}")]
Code(u8),
#[error("end reached")]
End
}
/// Provides native [Iterator] API over [CstConstIter], emitting [Cst] items.
///
/// [Cst::next] returns just the [Cst] and mutates `self`,
/// instead of returning an updated version of the struct as [CstConstIter::next] does.
#[derive(Copy, Clone, Debug, Default, PartialEq)]
pub struct CstIter<'s>(pub CstConstIter<'s>);
impl<'s> CstIter<'s> {
pub const fn new (source: &'s str) -> Self { Self(CstConstIter::new(source)) }
}
impl<'s> Iterator for CstIter<'s> {
type Item = CstToken<'s>;
fn next (&mut self) -> Option<Self::Item> {
match self.0.advance() {
Some((item, rest)) => { self.0 = rest; Some(item.into()) },
None => None,
}
}
}
/// Holds a reference to the source text.
/// [CstConstIter::next] emits subsequent pairs of:
/// * a [Cst] and
/// * the source text remaining
/// * [ ] TODO: maybe [CstConstIter::next] should wrap the remaining source in `Self` ?
#[derive(Copy, Clone, Debug, Default, PartialEq)]
pub struct CstConstIter<'s>(pub &'s str);
impl<'s> From <&'s str> for CstConstIter<'s> {
fn from (src: &'s str) -> Self { Self(src) }
}
impl<'s> Iterator for CstConstIter<'s> {
type Item = CstToken<'s>;
fn next (&mut self) -> Option<CstToken<'s>> { self.advance().map(|x|x.0) }
}
impl<'s> ConstIntoIter for CstConstIter<'s> {
type Kind = IsIteratorKind;
type Item = Cst<'s>;
type IntoIter = Self;
}
impl<'s> CstConstIter<'s> {
pub const fn new (source: &'s str) -> Self { Self(source) }
pub const fn chomp (&self, index: usize) -> Self { Self(split_at(self.0, index).1) }
pub const fn advance (&mut self) -> Option<(CstToken<'s>, Self)> {
match peek(Val::Nil, self.0) {
Token { value: Val::Nil, .. } => None,
token => {
let end = self.chomp(token.end());
Some((token.copy(), end))
},
}
}
}
pub const fn peek <'s> (mut value: CstVal<'s>, source: &'s str) -> CstToken<'s> {
use Val::*;
let mut start = 0;
let mut length = 0;
let mut source = source;
loop {
if let Some(((i, c), next)) = char_indices(source).next() {
if matches!(value, Error(_)) {
break
} else if matches!(value, Nil) {
if is_whitespace(c) {
length += 1;
continue
}
start = i;
length = 1;
if is_exp_start(c) {
value = Exp(1, Cst(CstIter(CstConstIter(str_range(source, i, i+1)))));
} else if is_str_start(c) {
value = Str(str_range(source, i, i+1));
} else if is_sym_start(c) {
value = Sym(str_range(source, i, i+1));
} else if is_key_start(c) {
value = Key(str_range(source, i, i+1));
} else if is_digit(c) {
value = match to_digit(c) { Ok(c) => Num(c), Err(e) => Error(e) };
} else {
value = Error(Unexpected(c));
break
}
} else if matches!(value, Str(_)) {
if is_str_end(c) {
break
} else {
value = Str(str_range(source, start, start + length + 1));
}
} else if matches!(value, Sym(_)) {
if is_sym_end(c) {
break
} else if is_sym_char(c) {
value = Sym(str_range(source, start, start + length + 1));
} else {
value = Error(Unexpected(c));
}
} else if matches!(value, Key(_)) {
if is_key_end(c) {
break
}
length += 1;
if is_key_char(c) {
value = Key(str_range(source, start, start + length + 1));
} else {
value = Error(Unexpected(c));
}
} else if let Exp(depth, exp) = value {
if depth == 0 {
value = Exp(0, Cst(CstIter(CstConstIter(str_range(source, start, start + length)))));
break
}
length += 1;
value = Exp(
if c == ')' { depth-1 } else if c == '(' { depth+1 } else { depth },
Cst(CstIter(CstConstIter(str_range(source, start, start + length))))
);
} else if let Num(m) = value {
if is_num_end(c) {
break
}
length += 1;
match to_digit(c) {
Ok(n) => { value = Num(n+10*m); },
Err(e) => { value = Error(e); }
}
}
} else {
break
}
}
return Token { value, source, start, length }
}
const fn is_whitespace (c: char) -> bool { matches!(c, ' '|'\n'|'\r'|'\t') }
const fn is_digit (c: char) -> bool { matches!(c, '0'..='9') }
const fn is_num_end (c: char) -> bool { matches!(c, ' '|'\n'|'\r'|'\t'|')') }
const fn is_key_start (c: char) -> bool { matches!(c, '/'|'a'..='z') }
const fn is_key_char (c: char) -> bool { matches!(c, 'a'..='z'|'0'..='9'|'-'|'/') }
const fn is_key_end (c: char) -> bool { matches!(c, ' '|'\n'|'\r'|'\t'|')') }
const fn is_sym_start (c: char) -> bool { matches!(c, ':'|'@') }
const fn is_sym_char (c: char) -> bool { matches!(c, 'a'..='z'|'A'..='Z'|'0'..='9'|'-') }
const fn is_sym_end (c: char) -> bool { matches!(c, ' '|'\n'|'\r'|'\t'|')') }
const fn is_str_start (c: char) -> bool { matches!(c, '"') }
const fn is_str_end (c: char) -> bool { matches!(c, '"') }
const fn is_exp_start (c: char) -> bool { matches!(c, '(') }
pub const fn to_number <D: Dsl> (digits: &str) -> Result<usize, DslError> {
let mut iter = char_indices(digits);
let mut value = 0;
while let Some(((_, c), next)) = iter.next() {
match to_digit(c) {
Ok(digit) => value = 10 * value + digit,
Err(e) => return Err(e),
}
iter = next;
}
Ok(value)
}
pub const fn to_digit (c: char) -> Result<usize, DslError> {
Ok(match c {
'0' => 0, '1' => 1, '2' => 2, '3' => 3, '4' => 4,
'5' => 5, '6' => 6, '7' => 7, '8' => 8, '9' => 9,
_ => return Err(Unexpected(c))
})
}
/// `State` + [Dsl] -> `Self`.
pub trait FromDsl<State>: Sized {
fn try_from_dsl (state: &State, dsl: &impl Dsl) -> Perhaps<Self>;
fn from_dsl (state: &State, dsl: &impl Dsl, err: impl Fn()->Box<dyn Error>) -> Usually<Self> {
match Self::try_from_dsl(state, dsl)? { Some(dsl) => Ok(dsl), _ => Err(err()) } }
}
/// `self` + `Options` -> [Dsl]
pub trait IntoDsl { /*TODO*/ }
/// `self` + [Dsl] -> `Item`
pub trait DslInto<Item> {
fn try_dsl_into (&self, dsl: &impl Dsl) -> Perhaps<Item>;
fn dsl_into (&self, dsl: &impl Dsl, err: impl Fn()->Box<dyn Error>) -> Usually<Item> {
match Self::try_dsl_into(self, dsl)? { Some(dsl) => Ok(dsl), _ => Err(err()) } }
}
/// `self` + `Item` -> [Dsl]
pub trait DslFrom { /*TODO*/ }
/// Implement type conversions.
macro_rules! from(($($Struct:ty { $(
$(<$($l:lifetime),* $($T:ident$(:$U:ident)?),*>)? ($source:ident: $From:ty) $expr:expr
);+ $(;)? })*) => { $(
$(impl $(<$($l),* $($T$(:$U)?),*>)? From<$From> for $Struct {
fn from ($source: $From) -> Self { $expr }
})+
)* });
//from! {
////Vec<Token<'s>> { <'s> (val: CstIter<'s>) val.collect(); }
//CstConstIter<'s> {
//<'s> (src: &'s str) Self(src);
//<'s> (iter: CstIter<'s>) iter.0; }
//CstIter<'s> {
//<'s> (src: &'s str) Self(CstConstIter(src));
//<'s> (iter: CstConstIter<'s>) Self(iter); }
//Cst<'s> { <'s> (src: &'s str) Self(CstIter(CstConstIter(src))); }
//Vec<Ast> { <'s> (val: CstIter<'s>) val.map(Into::into).collect(); }
//Token<Cst<'s>> { <'s> (token: Token<Cst<'s>>) Self { value: token.value.into(), span: token.span.into() } }
//Ast {
//<'s> (src: &'s str) Ast::from(CstIter(CstConstIter(src)));
//<'s> (cst: Cst<'s>) Ast(VecDeque::from([dsl_val(cst.val())]).into());
//<'s> (iter: CstIter<'s>) Ast(iter.map(|x|x.value.into()).collect::<VecDeque<_>>().into());
//<D: Dsl> (token: Token<D>) Ast(VecDeque::from([dsl_val(token.val())]).into()); }
//}