//! The concrete syntax tree (CST) implements zero-copy //! parsing of the DSL from a string reference. CST items //! preserve info about their location in the source. use crate::*; /// CST stores strings as source references and expressions as [CstIter] instances. #[derive(Debug, Clone, Default, PartialEq)] pub struct Cst<'s>(pub CstIter<'s>); impl<'s> Dsl for Cst<'s> { type Str = &'s str; type Exp = CstIter<'s>; fn nth (&self, index: usize) -> Option> { self.0.nth(index) } } /// Parsed substring with range and value. #[derive(Debug, Copy, Clone, Default, PartialEq)] pub struct CstVal<'s> { /// Meaning of token. pub value: DslVal<&'s str, CstIter<'s>>, /// Reference to source text. pub source: &'s str, /// Index of 1st character of token. pub start: usize, /// Length of token. pub length: usize, } impl<'s> Dsl for CstVal<'s> { type Str = &'s str; type Exp = CstIter<'s>; fn nth (&self, index: usize) -> Option> { todo!() } } impl<'s> CstVal<'s> { pub const fn new ( source: &'s str, start: usize, length: usize, value: DslVal<&'s str, CstIter<'s>> ) -> Self { Self { source, start, length, value } } pub const fn end (&self) -> usize { self.start.saturating_add(self.length) } pub const fn slice (&'s self) -> &'s str { self.slice_source(self.source) } pub const fn slice_source <'range> (&'s self, source: &'range str) -> &'range str { str_range(source, self.start, self.end()) } pub const fn slice_source_exp <'range> (&'s self, source: &'range str) -> &'range str { str_range(source, self.start.saturating_add(1), self.end()) } pub const fn with_value (self, value: DslVal<&'s str, CstIter<'s>>) -> Self { Self { value, ..self } } pub const fn value (&self) -> DslVal<&'s str, CstIter<'s>> { self.value } pub const fn error (self, error: DslErr) -> Self { Self { value: DslVal::Err(error), ..self } } pub const fn grow (self) -> Self { Self { length: self.length.saturating_add(1), ..self } } pub const fn grow_num (self, m: usize, c: char) -> Self { match to_digit(c) { Result::Ok(n) => Self { value: DslVal::Num(10*m+n), ..self.grow() }, Result::Err(e) => Self { value: DslVal::Err(e), ..self.grow() }, } } pub const fn grow_key (self) -> Self { let token = self.grow(); token.with_value(DslVal::Key(token.slice_source(self.source))) } pub const fn grow_sym (self) -> Self { let token = self.grow(); token.with_value(DslVal::Sym(token.slice_source(self.source))) } pub const fn grow_str (self) -> Self { let token = self.grow(); token.with_value(DslVal::Str(token.slice_source(self.source))) } pub const fn grow_exp (self) -> Self { let token = self.grow(); if let DslVal::Exp(depth, _) = token.value() { token.with_value(DslVal::Exp(depth, CstIter::new(token.slice_source_exp(self.source)))) } else { unreachable!() } } pub const fn grow_in (self) -> Self { let token = self.grow_exp(); if let DslVal::Exp(depth, source) = token.value() { token.with_value(DslVal::Exp(depth.saturating_add(1), source)) } else { unreachable!() } } pub const fn grow_out (self) -> Self { let token = self.grow_exp(); if let DslVal::Exp(depth, source) = token.value() { if depth > 0 { token.with_value(DslVal::Exp(depth - 1, source)) } else { return self.error(Unexpected(')')) } } else { unreachable!() } } } /// Provides a native [Iterator] API over [CstConstIter], /// emitting [Cst] items. /// /// [Cst::next] returns just the [Cst] and mutates `self`, /// instead of returning an updated version of the struct as [CstConstIter::next] does. #[derive(Copy, Clone, Debug, Default, PartialEq)] pub struct CstIter<'s>(pub CstConstIter<'s>); impl<'s> Dsl for CstIter<'s> { type Str = &'s str; type Exp = Self; fn nth (&self, index: usize) -> Option> { self.0.nth(index).map(|x|dsl_val(x)) } } impl<'s> CstIter<'s> { pub const fn new (source: &'s str) -> Self { Self(CstConstIter::new(source)) } pub const fn peek (&self) -> Option> { self.0.peek() } } impl<'s> Iterator for CstIter<'s> { type Item = CstVal<'s>; fn next (&mut self) -> Option> { self.0.next().map(|(item, rest)|{ self.0 = rest; item }) } } impl<'s> Into>> for CstIter<'s> { fn into (self) -> Vec> { self.collect() } } impl<'s> Into> for CstIter<'s> { fn into (self) -> Vec { self.map(Into::into).collect() } } /// Owns a reference to the source text. /// [CstConstIter::next] emits subsequent pairs of: /// * a [Cst] and /// * the source text remaining /// * [ ] TODO: maybe [CstConstIter::next] should wrap the remaining source in `Self` ? #[derive(Copy, Clone, Debug, Default, PartialEq)] pub struct CstConstIter<'s>(pub &'s str); impl<'s> Dsl for CstConstIter<'s> { type Str = &'s str; type Exp = Self; fn nth (&self, mut index: usize) -> Option> { use DslVal::*; let mut iter = self.clone(); for i in 0..index { iter = iter.next()?.1 } iter.next().map(|(x, _)|dsl_val(x.value)) } } impl<'s> CstConstIter<'s> { pub const fn new (source: &'s str) -> Self { Self(source) } pub const fn chomp (&self, index: usize) -> Self { Self(split_at(self.0, index).1) } pub const fn next (mut self) -> Option<(CstVal<'s>, Self)> { Self::next_mut(&mut self) } pub const fn peek (&self) -> Option> { peek_src(self.0) } pub const fn next_mut (&mut self) -> Option<(CstVal<'s>, Self)> { match self.peek() { Some(token) => Some((token, self.chomp(token.end()))), None => None } } } impl<'s> From> for CstIter<'s> { fn from (iter: CstConstIter<'s>) -> Self { Self(iter) } } impl<'s> From> for CstConstIter<'s> { fn from (iter: CstIter<'s>) -> Self { iter.0 } } /// Implement the const iterator pattern. macro_rules! const_iter { ($(<$l:lifetime>)?|$self:ident: $Struct:ty| => $Item:ty => $expr:expr) => { impl$(<$l>)? Iterator for $Struct { type Item = $Item; fn next (&mut $self) -> Option<$Item> { $expr } } impl$(<$l>)? ConstIntoIter for $Struct { type Kind = IsIteratorKind; type Item = $Item; type IntoIter = Self; } } } const_iter!(<'s>|self: CstConstIter<'s>| => CstVal<'s> => self.next_mut().map(|(result, _)|result)); /// Static iteration helper used by [cst]. macro_rules! iterate { ($expr:expr => $arg: pat => $body:expr) => { let mut iter = $expr; while let Some(($arg, next)) = iter.next() { $body; iter = next; } } } pub const fn peek_src <'s> (source: &'s str) -> Option> { use DslVal::*; let mut token: CstVal<'s> = CstVal::new(source, 0, 0, Nil); iterate!(char_indices(source) => (start, c) => token = match token.value() { Err(_) => return Some(token), Nil => match c { ' '|'\n'|'\r'|'\t' => token.grow(), '(' => CstVal::new(source, start, 1, Exp(1, CstIter::new(str_range(source, start, start + 1)))), '"' => CstVal::new(source, start, 1, Str(str_range(source, start, start + 1))), ':'|'@' => CstVal::new(source, start, 1, Sym(str_range(source, start, start + 1))), '/'|'a'..='z' => CstVal::new(source, start, 1, Key(str_range(source, start, start + 1))), '0'..='9' => CstVal::new(source, start, 1, match to_digit(c) { Ok(c) => DslVal::Num(c), Result::Err(e) => DslVal::Err(e) }), _ => token.error(Unexpected(c)) }, Str(_) => match c { '"' => return Some(token), _ => token.grow_str(), }, Num(n) => match c { '0'..='9' => token.grow_num(n, c), ' '|'\n'|'\r'|'\t'|')' => return Some(token), _ => token.error(Unexpected(c)) }, Sym(_) => match c { 'a'..='z'|'A'..='Z'|'0'..='9'|'-' => token.grow_sym(), ' '|'\n'|'\r'|'\t'|')' => return Some(token), _ => token.error(Unexpected(c)) }, Key(_) => match c { 'a'..='z'|'0'..='9'|'-'|'/' => token.grow_key(), ' '|'\n'|'\r'|'\t'|')' => return Some(token), _ => token.error(Unexpected(c)) }, Exp(depth, _) => match depth { 0 => return Some(token.grow_exp()), _ => match c { ')' => token.grow_out(), '(' => token.grow_in(), _ => token.grow_exp(), } }, }); match token.value() { Nil => None, _ => Some(token), } } pub const fn to_number (digits: &str) -> DslResult { let mut value = 0; iterate!(char_indices(digits) => (_, c) => match to_digit(c) { Ok(digit) => value = 10 * value + digit, Result::Err(e) => return Result::Err(e) }); Ok(value) } pub const fn to_digit (c: char) -> DslResult { Ok(match c { '0' => 0, '1' => 1, '2' => 2, '3' => 3, '4' => 4, '5' => 5, '6' => 6, '7' => 7, '8' => 8, '9' => 9, _ => return Result::Err(Unexpected(c)) }) }