mirror of
https://codeberg.org/unspeaker/tengri.git
synced 2025-12-06 03:36:42 +01:00
312 lines
9.9 KiB
Rust
312 lines
9.9 KiB
Rust
//! The concrete syntax tree (CST) implements zero-copy
|
|
//! parsing of the DSL from a string reference. CST items
|
|
//! preserve info about their location in the source.
|
|
use crate::*;
|
|
|
|
/// CST stores strings as source references and expressions as [CstIter] instances.
|
|
#[derive(Debug, Clone, Default, PartialEq)]
|
|
pub struct Cst<'s>(pub CstIter<'s>);
|
|
impl<'s> Dsl for Cst<'s> {
|
|
type Str = &'s str;
|
|
type Exp = CstIter<'s>;
|
|
fn nth (&self, index: usize) -> Option<DslVal<Self::Str, Self::Exp>> {
|
|
self.0.nth(index)
|
|
}
|
|
}
|
|
|
|
/// Parsed substring with range and value.
|
|
#[derive(Debug, Copy, Clone, Default, PartialEq)]
|
|
pub struct CstVal<'s> {
|
|
/// Meaning of token.
|
|
pub value: DslVal<&'s str, CstIter<'s>>,
|
|
/// Reference to source text.
|
|
pub source: &'s str,
|
|
/// Index of 1st character of token.
|
|
pub start: usize,
|
|
/// Length of token.
|
|
pub length: usize,
|
|
}
|
|
impl<'s> Dsl for CstVal<'s> {
|
|
type Str = &'s str;
|
|
type Exp = CstIter<'s>;
|
|
fn nth (&self, index: usize) -> Option<DslVal<Self::Str, Self::Exp>> {
|
|
todo!()
|
|
}
|
|
}
|
|
|
|
impl<'s> CstVal<'s> {
|
|
pub const fn new (
|
|
source: &'s str,
|
|
start: usize,
|
|
length: usize,
|
|
value: DslVal<&'s str, CstIter<'s>>
|
|
) -> Self {
|
|
Self { source, start, length, value }
|
|
}
|
|
pub const fn end (&self) -> usize {
|
|
self.start.saturating_add(self.length)
|
|
}
|
|
pub const fn slice (&'s self) -> &'s str {
|
|
self.slice_source(self.source)
|
|
}
|
|
pub const fn slice_source <'range> (&'s self, source: &'range str) -> &'range str {
|
|
str_range(source, self.start, self.end())
|
|
}
|
|
pub const fn slice_source_exp <'range> (&'s self, source: &'range str) -> &'range str {
|
|
str_range(source, self.start.saturating_add(1), self.end())
|
|
}
|
|
pub const fn with_value (self, value: DslVal<&'s str, CstIter<'s>>) -> Self {
|
|
Self { value, ..self }
|
|
}
|
|
pub const fn value (&self) -> DslVal<&'s str, CstIter<'s>> {
|
|
self.value
|
|
}
|
|
pub const fn error (self, error: DslErr) -> Self {
|
|
Self { value: DslVal::Err(error), ..self }
|
|
}
|
|
pub const fn grow (self) -> Self {
|
|
Self { length: self.length.saturating_add(1), ..self }
|
|
}
|
|
pub const fn grow_num (self, m: usize, c: char) -> Self {
|
|
match to_digit(c) {
|
|
Result::Ok(n) => Self { value: DslVal::Num(10*m+n), ..self.grow() },
|
|
Result::Err(e) => Self { value: DslVal::Err(e), ..self.grow() },
|
|
}
|
|
}
|
|
pub const fn grow_key (self) -> Self {
|
|
let token = self.grow();
|
|
token.with_value(DslVal::Key(token.slice_source(self.source)))
|
|
}
|
|
pub const fn grow_sym (self) -> Self {
|
|
let token = self.grow();
|
|
token.with_value(DslVal::Sym(token.slice_source(self.source)))
|
|
}
|
|
pub const fn grow_str (self) -> Self {
|
|
let token = self.grow();
|
|
token.with_value(DslVal::Str(token.slice_source(self.source)))
|
|
}
|
|
pub const fn grow_exp (self) -> Self {
|
|
let token = self.grow();
|
|
if let DslVal::Exp(depth, _) = token.value() {
|
|
token.with_value(DslVal::Exp(depth, CstIter::new(token.slice_source_exp(self.source))))
|
|
} else {
|
|
unreachable!()
|
|
}
|
|
}
|
|
pub const fn grow_in (self) -> Self {
|
|
let token = self.grow_exp();
|
|
if let DslVal::Exp(depth, source) = token.value() {
|
|
token.with_value(DslVal::Exp(depth.saturating_add(1), source))
|
|
} else {
|
|
unreachable!()
|
|
}
|
|
}
|
|
pub const fn grow_out (self) -> Self {
|
|
let token = self.grow_exp();
|
|
if let DslVal::Exp(depth, source) = token.value() {
|
|
if depth > 0 {
|
|
token.with_value(DslVal::Exp(depth - 1, source))
|
|
} else {
|
|
return self.error(Unexpected(')'))
|
|
}
|
|
} else {
|
|
unreachable!()
|
|
}
|
|
}
|
|
}
|
|
|
|
/// Provides a native [Iterator] API over [CstConstIter],
|
|
/// emitting [Cst] items.
|
|
///
|
|
/// [Cst::next] returns just the [Cst] and mutates `self`,
|
|
/// instead of returning an updated version of the struct as [CstConstIter::next] does.
|
|
#[derive(Copy, Clone, Debug, Default, PartialEq)]
|
|
pub struct CstIter<'s>(pub CstConstIter<'s>);
|
|
impl<'s> Dsl for CstIter<'s> {
|
|
type Str = &'s str;
|
|
type Exp = Self;
|
|
fn nth (&self, index: usize) -> Option<DslVal<Self::Str, Self::Exp>> {
|
|
self.0.nth(index).map(|x|dsl_val(x))
|
|
}
|
|
}
|
|
impl<'s> CstIter<'s> {
|
|
pub const fn new (source: &'s str) -> Self {
|
|
Self(CstConstIter::new(source))
|
|
}
|
|
pub const fn peek (&self) -> Option<CstVal<'s>> {
|
|
self.0.peek()
|
|
}
|
|
}
|
|
impl<'s> Iterator for CstIter<'s> {
|
|
type Item = CstVal<'s>;
|
|
fn next (&mut self) -> Option<CstVal<'s>> {
|
|
self.0.next().map(|(item, rest)|{
|
|
self.0 = rest;
|
|
item
|
|
})
|
|
}
|
|
}
|
|
impl<'s> Into<Vec<CstVal<'s>>> for CstIter<'s> {
|
|
fn into (self) -> Vec<CstVal<'s>> {
|
|
self.collect()
|
|
}
|
|
}
|
|
impl<'s> Into<Vec<Ast>> for CstIter<'s> {
|
|
fn into (self) -> Vec<Ast> {
|
|
self.map(Into::into).collect()
|
|
}
|
|
}
|
|
|
|
/// Owns a reference to the source text.
|
|
/// [CstConstIter::next] emits subsequent pairs of:
|
|
/// * a [Cst] and
|
|
/// * the source text remaining
|
|
/// * [ ] TODO: maybe [CstConstIter::next] should wrap the remaining source in `Self` ?
|
|
#[derive(Copy, Clone, Debug, Default, PartialEq)]
|
|
pub struct CstConstIter<'s>(pub &'s str);
|
|
impl<'s> Dsl for CstConstIter<'s> {
|
|
type Str = &'s str;
|
|
type Exp = Self;
|
|
fn nth (&self, mut index: usize) -> Option<DslVal<Self::Str, Self::Exp>> {
|
|
use DslVal::*;
|
|
let mut iter = self.clone();
|
|
for i in 0..index {
|
|
iter = iter.next()?.1
|
|
}
|
|
iter.next().map(|(x, _)|dsl_val(x.value))
|
|
}
|
|
}
|
|
impl<'s> CstConstIter<'s> {
|
|
pub const fn new (source: &'s str) -> Self {
|
|
Self(source)
|
|
}
|
|
pub const fn chomp (&self, index: usize) -> Self {
|
|
Self(split_at(self.0, index).1)
|
|
}
|
|
pub const fn next (mut self) -> Option<(CstVal<'s>, Self)> {
|
|
Self::next_mut(&mut self)
|
|
}
|
|
pub const fn peek (&self) -> Option<CstVal<'s>> {
|
|
peek_src(self.0)
|
|
}
|
|
pub const fn next_mut (&mut self) -> Option<(CstVal<'s>, Self)> {
|
|
match self.peek() {
|
|
Some(token) => Some((token, self.chomp(token.end()))),
|
|
None => None
|
|
}
|
|
}
|
|
}
|
|
impl<'s> From<CstConstIter<'s>> for CstIter<'s> {
|
|
fn from (iter: CstConstIter<'s>) -> Self {
|
|
Self(iter)
|
|
}
|
|
}
|
|
impl<'s> From<CstIter<'s>> for CstConstIter<'s> {
|
|
fn from (iter: CstIter<'s>) -> Self {
|
|
iter.0
|
|
}
|
|
}
|
|
|
|
/// Implement the const iterator pattern.
|
|
macro_rules! const_iter {
|
|
($(<$l:lifetime>)?|$self:ident: $Struct:ty| => $Item:ty => $expr:expr) => {
|
|
impl$(<$l>)? Iterator for $Struct {
|
|
type Item = $Item;
|
|
fn next (&mut $self) -> Option<$Item> { $expr }
|
|
}
|
|
impl$(<$l>)? ConstIntoIter for $Struct {
|
|
type Kind = IsIteratorKind;
|
|
type Item = $Item;
|
|
type IntoIter = Self;
|
|
}
|
|
}
|
|
}
|
|
|
|
const_iter!(<'s>|self: CstConstIter<'s>|
|
|
=> CstVal<'s>
|
|
=> self.next_mut().map(|(result, _)|result));
|
|
|
|
/// Static iteration helper used by [cst].
|
|
macro_rules! iterate {
|
|
($expr:expr => $arg: pat => $body:expr) => {
|
|
let mut iter = $expr;
|
|
while let Some(($arg, next)) = iter.next() {
|
|
$body;
|
|
iter = next;
|
|
}
|
|
}
|
|
}
|
|
|
|
pub const fn peek_src <'s> (source: &'s str) -> Option<CstVal<'s>> {
|
|
use DslVal::*;
|
|
let mut token: CstVal<'s> = CstVal::new(source, 0, 0, Nil);
|
|
iterate!(char_indices(source) => (start, c) => token = match token.value() {
|
|
Err(_) => return Some(token),
|
|
Nil => match c {
|
|
' '|'\n'|'\r'|'\t' =>
|
|
token.grow(),
|
|
'(' =>
|
|
CstVal::new(source, start, 1, Exp(1, CstIter::new(str_range(source, start, start + 1)))),
|
|
'"' =>
|
|
CstVal::new(source, start, 1, Str(str_range(source, start, start + 1))),
|
|
':'|'@' =>
|
|
CstVal::new(source, start, 1, Sym(str_range(source, start, start + 1))),
|
|
'/'|'a'..='z' =>
|
|
CstVal::new(source, start, 1, Key(str_range(source, start, start + 1))),
|
|
'0'..='9' =>
|
|
CstVal::new(source, start, 1, match to_digit(c) {
|
|
Ok(c) => DslVal::Num(c),
|
|
Result::Err(e) => DslVal::Err(e)
|
|
}),
|
|
_ => token.error(Unexpected(c))
|
|
},
|
|
Str(_) => match c {
|
|
'"' => return Some(token),
|
|
_ => token.grow_str(),
|
|
},
|
|
Num(n) => match c {
|
|
'0'..='9' => token.grow_num(n, c),
|
|
' '|'\n'|'\r'|'\t'|')' => return Some(token),
|
|
_ => token.error(Unexpected(c))
|
|
},
|
|
Sym(_) => match c {
|
|
'a'..='z'|'A'..='Z'|'0'..='9'|'-' => token.grow_sym(),
|
|
' '|'\n'|'\r'|'\t'|')' => return Some(token),
|
|
_ => token.error(Unexpected(c))
|
|
},
|
|
Key(_) => match c {
|
|
'a'..='z'|'0'..='9'|'-'|'/' => token.grow_key(),
|
|
' '|'\n'|'\r'|'\t'|')' => return Some(token),
|
|
_ => token.error(Unexpected(c))
|
|
},
|
|
Exp(depth, _) => match depth {
|
|
0 => return Some(token.grow_exp()),
|
|
_ => match c {
|
|
')' => token.grow_out(),
|
|
'(' => token.grow_in(),
|
|
_ => token.grow_exp(),
|
|
}
|
|
},
|
|
});
|
|
match token.value() {
|
|
Nil => None,
|
|
_ => Some(token),
|
|
}
|
|
}
|
|
|
|
pub const fn to_number (digits: &str) -> DslResult<usize> {
|
|
let mut value = 0;
|
|
iterate!(char_indices(digits) => (_, c) => match to_digit(c) {
|
|
Ok(digit) => value = 10 * value + digit,
|
|
Result::Err(e) => return Result::Err(e)
|
|
});
|
|
Ok(value)
|
|
}
|
|
|
|
pub const fn to_digit (c: char) -> DslResult<usize> {
|
|
Ok(match c {
|
|
'0' => 0, '1' => 1, '2' => 2, '3' => 3, '4' => 4,
|
|
'5' => 5, '6' => 6, '7' => 7, '8' => 8, '9' => 9,
|
|
_ => return Result::Err(Unexpected(c))
|
|
})
|
|
}
|