wip: fix(dsl): maybe getting somewhere?
Some checks are pending
/ build (push) Waiting to run

This commit is contained in:
🪞👃🪞 2025-06-21 13:48:45 +03:00
parent 91dc77cfea
commit 11f686650f
19 changed files with 964 additions and 918 deletions

View file

@ -1,9 +1,219 @@
//! The concrete syntax tree (CST) implements zero-copy
//! parsing of the DSL from a string reference. CST items
//! preserve info about their location in the source.
use crate::*;
/// CST stores strings as source references and expressions as [CstIter] instances.
#[derive(Debug, Clone, Default, PartialEq)]
pub struct Cst<'src>(pub CstIter<'src>);
impl<'src> Dsl for Cst<'src> {
type Str = &'src str;
type Exp = CstIter<'src>;
fn nth (&self, index: usize) -> Option<DslVal<Self::Str, Self::Exp>> {
self.0.nth(index)
}
}
/// Parsed substring with range and value.
#[derive(Debug, Copy, Clone, Default, PartialEq)]
pub struct CstVal<'src> {
/// Meaning of token.
pub value: DslVal<&'src str, CstIter<'src>>,
/// Reference to source text.
pub source: &'src str,
/// Index of 1st character of token.
pub start: usize,
/// Length of token.
pub length: usize,
}
impl<'src> Dsl for CstVal<'src> {
type Str = &'src str;
type Exp = CstIter<'src>;
fn nth (&self, index: usize) -> Option<DslVal<Self::Str, Self::Exp>> {
todo!()
}
}
impl<'src> CstVal<'src> {
pub const fn new (
source: &'src str,
start: usize,
length: usize,
value: DslVal<&'src str, CstIter<'src>>
) -> Self {
Self { source, start, length, value }
}
pub const fn end (&self) -> usize {
self.start.saturating_add(self.length)
}
pub const fn slice (&'src self) -> &'src str {
self.slice_source(self.source)
}
pub const fn slice_source <'range> (&'src self, source: &'range str) -> &'range str {
str_range(source, self.start, self.end())
}
pub const fn slice_source_exp <'range> (&'src self, source: &'range str) -> &'range str {
str_range(source, self.start.saturating_add(1), self.end())
}
pub const fn with_value (self, value: DslVal<&'src str, CstIter<'src>>) -> Self {
Self { value, ..self }
}
pub const fn value (&self) -> DslVal<&'src str, CstIter<'src>> {
self.value
}
pub const fn error (self, error: DslErr) -> Self {
Self { value: DslVal::Err(error), ..self }
}
pub const fn grow (self) -> Self {
Self { length: self.length.saturating_add(1), ..self }
}
pub const fn grow_num (self, m: usize, c: char) -> Self {
match to_digit(c) {
Result::Ok(n) => Self { value: DslVal::Num(10*m+n), ..self.grow() },
Result::Err(e) => Self { value: DslVal::Err(e), ..self.grow() },
}
}
pub const fn grow_key (self) -> Self {
let token = self.grow();
token.with_value(DslVal::Key(token.slice_source(self.source)))
}
pub const fn grow_sym (self) -> Self {
let token = self.grow();
token.with_value(DslVal::Sym(token.slice_source(self.source)))
}
pub const fn grow_str (self) -> Self {
let token = self.grow();
token.with_value(DslVal::Str(token.slice_source(self.source)))
}
pub const fn grow_exp (self) -> Self {
let token = self.grow();
if let DslVal::Exp(depth, _) = token.value() {
token.with_value(DslVal::Exp(depth, CstIter::new(token.slice_source_exp(self.source))))
} else {
unreachable!()
}
}
pub const fn grow_in (self) -> Self {
let token = self.grow_exp();
if let DslVal::Exp(depth, source) = token.value() {
token.with_value(DslVal::Exp(depth.saturating_add(1), source))
} else {
unreachable!()
}
}
pub const fn grow_out (self) -> Self {
let token = self.grow_exp();
if let DslVal::Exp(depth, source) = token.value() {
if depth > 0 {
token.with_value(DslVal::Exp(depth - 1, source))
} else {
return self.error(Unexpected(')'))
}
} else {
unreachable!()
}
}
}
/// Provides a native [Iterator] API over [CstConstIter],
/// emitting [Cst] items.
///
/// [Cst::next] returns just the [Cst] and mutates `self`,
/// instead of returning an updated version of the struct as [CstConstIter::next] does.
#[derive(Copy, Clone, Debug, Default, PartialEq)]
pub struct CstIter<'src>(pub CstConstIter<'src>);
impl<'src> Dsl for CstIter<'src> {
type Str = &'src str;
type Exp = Self;
fn nth (&self, index: usize) -> Option<DslVal<Self::Str, Self::Exp>> {
use DslVal::*;
self.0.nth(index).map(|x|match x {
Nil => Nil,
Err(e) => Err(e),
Num(u) => Num(u),
Sym(s) => Sym(s),
Key(s) => Sym(s),
Str(s) => Sym(s),
Exp(d, x) => DslVal::Exp(d, CstIter(x)),
})
}
}
impl<'src> CstIter<'src> {
pub const fn new (source: &'src str) -> Self {
Self(CstConstIter::new(source))
}
pub const fn peek (&self) -> Option<CstVal<'src>> {
self.0.peek()
}
}
impl<'src> Iterator for CstIter<'src> {
type Item = CstVal<'src>;
fn next (&mut self) -> Option<CstVal<'src>> {
self.0.next().map(|(item, rest)|{
self.0 = rest;
item
})
}
}
impl<'src> Into<Vec<CstVal<'src>>> for CstIter<'src> {
fn into (self) -> Vec<CstVal<'src>> {
self.collect()
}
}
impl<'src> Into<Vec<Ast>> for CstIter<'src> {
fn into (self) -> Vec<Ast> {
self.map(Into::into).collect()
}
}
/// Owns a reference to the source text.
/// [CstConstIter::next] emits subsequent pairs of:
/// * a [Cst] and
/// * the source text remaining
/// * [ ] TODO: maybe [CstConstIter::next] should wrap the remaining source in `Self` ?
#[derive(Copy, Clone, Debug, Default, PartialEq)]
pub struct CstConstIter<'src>(pub &'src str);
impl<'src> Dsl for CstConstIter<'src> {
type Str = &'src str;
type Exp = Self;
fn nth (&self, mut index: usize) -> Option<DslVal<Self::Str, Self::Exp>> {
use DslVal::*;
let mut iter = self.clone();
for i in 0..index {
iter = iter.next()?.1
}
iter.next().map(|(x, _)|match x.value {
Nil => Nil,
Err(e) => Err(e),
Num(u) => Num(u),
Sym(s) => Sym(s),
Key(s) => Sym(s),
Str(s) => Sym(s),
Exp(d, x) => DslVal::Exp(d, x.0),
})
}
}
impl<'src> CstConstIter<'src> {
pub const fn new (source: &'src str) -> Self {
Self(source)
}
pub const fn chomp (&self, index: usize) -> Self {
Self(split_at(self.0, index).1)
}
pub const fn next (mut self) -> Option<(CstVal<'src>, Self)> {
Self::next_mut(&mut self)
}
pub const fn peek (&self) -> Option<CstVal<'src>> {
peek_src(self.0)
}
pub const fn next_mut (&mut self) -> Option<(CstVal<'src>, Self)> {
match self.peek() {
Some(token) => Some((token, self.chomp(token.end()))),
None => None
}
}
}
/// Implement the const iterator pattern.
macro_rules! const_iter {
($(<$l:lifetime>)?|$self:ident: $Struct:ty| => $Item:ty => $expr:expr) => {
@ -19,6 +229,10 @@ macro_rules! const_iter {
}
}
const_iter!(<'src>|self: CstConstIter<'src>|
=> CstVal<'src>
=> self.next_mut().map(|(result, _)|result));
/// Static iteration helper used by [cst].
macro_rules! iterate {
($expr:expr => $arg: pat => $body:expr) => {
@ -30,140 +244,26 @@ macro_rules! iterate {
}
}
/// CST stores strings as source references and expressions as [SourceIter] instances.
pub type CstValue<'source> = DslValue<&'source str, SourceIter<'source>>;
/// Token sharing memory with source reference.
#[derive(Debug, Copy, Clone, Default, PartialEq)]
pub struct Cst<'src> {
/// Reference to source text.
pub source: &'src str,
/// Index of 1st character of token.
pub start: usize,
/// Length of token.
pub length: usize,
/// Meaning of token.
pub value: CstValue<'src>,
}
impl<'src> Cst<'src> {
pub const fn new (
source: &'src str,
start: usize,
length: usize,
value: CstValue<'src>
) -> Self {
Self { source, start, length, value }
}
pub const fn end (&self) -> usize {
self.start.saturating_add(self.length)
}
pub const fn slice (&'src self) -> &'src str {
self.slice_source(self.source)
}
pub const fn slice_source <'range> (&'src self, source: &'range str) -> &'range str {
str_range(source, self.start, self.end())
}
pub const fn slice_source_exp <'range> (&'src self, source: &'range str) -> &'range str {
str_range(source, self.start.saturating_add(1), self.end())
}
pub const fn with_value (self, value: CstValue<'src>) -> Self {
Self { value, ..self }
}
pub const fn value (&self) -> CstValue<'src> {
self.value
}
pub const fn error (self, error: DslError) -> Self {
Self { value: DslValue::Err(error), ..self }
}
pub const fn grow (self) -> Self {
Self { length: self.length.saturating_add(1), ..self }
}
pub const fn grow_num (self, m: usize, c: char) -> Self {
match to_digit(c) {
Result::Ok(n) => Self { value: DslValue::Num(10*m+n), ..self.grow() },
Result::Err(e) => Self { value: DslValue::Err(e), ..self.grow() },
}
}
pub const fn grow_key (self) -> Self {
let token = self.grow();
token.with_value(DslValue::Key(token.slice_source(self.source)))
}
pub const fn grow_sym (self) -> Self {
let token = self.grow();
token.with_value(DslValue::Sym(token.slice_source(self.source)))
}
pub const fn grow_str (self) -> Self {
let token = self.grow();
token.with_value(DslValue::Str(token.slice_source(self.source)))
}
pub const fn grow_exp (self) -> Self {
let token = self.grow();
if let DslValue::Exp(depth, _) = token.value() {
token.with_value(DslValue::Exp(depth, SourceIter::new(token.slice_source_exp(self.source))))
} else {
unreachable!()
}
}
pub const fn grow_in (self) -> Self {
let token = self.grow_exp();
if let DslValue::Exp(depth, source) = token.value() {
token.with_value(DslValue::Exp(depth.saturating_add(1), source))
} else {
unreachable!()
}
}
pub const fn grow_out (self) -> Self {
let token = self.grow_exp();
if let DslValue::Exp(depth, source) = token.value() {
if depth > 0 {
token.with_value(DslValue::Exp(depth - 1, source))
} else {
return self.error(Unexpected(')'))
}
} else {
unreachable!()
}
}
}
pub const fn to_number (digits: &str) -> DslResult<usize> {
let mut value = 0;
iterate!(char_indices(digits) => (_, c) => match to_digit(c) {
Ok(digit) => value = 10 * value + digit,
Result::Err(e) => return Result::Err(e)
});
Ok(value)
}
pub const fn to_digit (c: char) -> DslResult<usize> {
Ok(match c {
'0' => 0, '1' => 1, '2' => 2, '3' => 3, '4' => 4,
'5' => 5, '6' => 6, '7' => 7, '8' => 8, '9' => 9,
_ => return Result::Err(Unexpected(c))
})
}
pub const fn peek_src <'src> (source: &'src str) -> Option<Cst<'src>> {
use DslValue::*;
let mut token: Cst<'src> = Cst::new(source, 0, 0, Nil);
pub const fn peek_src <'src> (source: &'src str) -> Option<CstVal<'src>> {
use DslVal::*;
let mut token: CstVal<'src> = CstVal::new(source, 0, 0, Nil);
iterate!(char_indices(source) => (start, c) => token = match token.value() {
Err(_) => return Some(token),
Nil => match c {
' '|'\n'|'\r'|'\t' =>
token.grow(),
'(' =>
Cst::new(source, start, 1, Exp(1, SourceIter::new(str_range(source, start, start + 1)))),
CstVal::new(source, start, 1, Exp(1, CstIter::new(str_range(source, start, start + 1)))),
'"' =>
Cst::new(source, start, 1, Str(str_range(source, start, start + 1))),
CstVal::new(source, start, 1, Str(str_range(source, start, start + 1))),
':'|'@' =>
Cst::new(source, start, 1, Sym(str_range(source, start, start + 1))),
CstVal::new(source, start, 1, Sym(str_range(source, start, start + 1))),
'/'|'a'..='z' =>
Cst::new(source, start, 1, Key(str_range(source, start, start + 1))),
CstVal::new(source, start, 1, Key(str_range(source, start, start + 1))),
'0'..='9' =>
Cst::new(source, start, 1, match to_digit(c) {
Ok(c) => DslValue::Num(c),
Result::Err(e) => DslValue::Err(e)
CstVal::new(source, start, 1, match to_digit(c) {
Ok(c) => DslVal::Num(c),
Result::Err(e) => DslVal::Err(e)
}),
_ => token.error(Unexpected(c))
},
@ -201,90 +301,19 @@ pub const fn peek_src <'src> (source: &'src str) -> Option<Cst<'src>> {
}
}
/// Owns a reference to the source text.
/// [SourceConstIter::next] emits subsequent pairs of:
/// * a [Cst] and
/// * the source text remaining
/// * [ ] TODO: maybe [SourceConstIter::next] should wrap the remaining source in `Self` ?
#[derive(Copy, Clone, Debug, Default, PartialEq)]
pub struct SourceConstIter<'src>(pub &'src str);
impl<'src> From<SourceConstIter<'src>> for SourceIter<'src> {
fn from (source: SourceConstIter<'src>) -> Self{
Self(source)
}
pub const fn to_number (digits: &str) -> DslResult<usize> {
let mut value = 0;
iterate!(char_indices(digits) => (_, c) => match to_digit(c) {
Ok(digit) => value = 10 * value + digit,
Result::Err(e) => return Result::Err(e)
});
Ok(value)
}
impl<'src> From<&'src str> for SourceConstIter<'src> {
fn from (source: &'src str) -> Self{
Self::new(source)
}
}
impl<'src> SourceConstIter<'src> {
pub const fn new (source: &'src str) -> Self {
Self(source)
}
pub const fn chomp (&self, index: usize) -> Self {
Self(split_at(self.0, index).1)
}
pub const fn next (mut self) -> Option<(Cst<'src>, Self)> {
Self::next_mut(&mut self)
}
pub const fn peek (&self) -> Option<Cst<'src>> {
peek_src(self.0)
}
pub const fn next_mut (&mut self) -> Option<(Cst<'src>, Self)> {
match self.peek() {
Some(token) => Some((token, self.chomp(token.end()))),
None => None
}
}
}
const_iter!(<'src>|self: SourceConstIter<'src>| => Cst<'src> => self.next_mut().map(|(result, _)|result));
/// Provides a native [Iterator] API over [SourceConstIter],
/// emitting [Cst] items.
///
/// [Cst::next] returns just the [Cst] and mutates `self`,
/// instead of returning an updated version of the struct as [SourceConstIter::next] does.
#[derive(Copy, Clone, Debug, Default, PartialEq)]
pub struct SourceIter<'src>(pub SourceConstIter<'src>);
impl<'src> SourceIter<'src> {
pub const fn new (source: &'src str) -> Self {
Self(SourceConstIter::new(source))
}
pub const fn peek (&self) -> Option<Cst<'src>> {
self.0.peek()
}
}
impl<'src> Iterator for SourceIter<'src> {
type Item = Cst<'src>;
fn next (&mut self) -> Option<Cst<'src>> {
self.0.next().map(|(item, rest)|{
self.0 = rest;
item
})
}
}
impl<'src> From<&'src str> for SourceIter<'src> {
fn from (source: &'src str) -> Self{
Self(SourceConstIter(source))
}
}
impl<'src> Into<Vec<Cst<'src>>> for SourceIter<'src> {
fn into (self) -> Vec<Cst<'src>> {
self.collect()
}
}
impl<'src> Into<Vec<Ast>> for SourceIter<'src> {
fn into (self) -> Vec<Ast> {
self.map(Into::into).collect()
}
pub const fn to_digit (c: char) -> DslResult<usize> {
Ok(match c {
'0' => 0, '1' => 1, '2' => 2, '3' => 3, '4' => 4,
'5' => 5, '6' => 6, '7' => 7, '8' => 8, '9' => 9,
_ => return Result::Err(Unexpected(c))
})
}