From d2095111f180fe338341656cf20d97bf9c6068fd Mon Sep 17 00:00:00 2001 From: unspeaker Date: Thu, 31 Jul 2025 22:33:59 +0300 Subject: [PATCH] dsl: testing expression handling --- dsl/README.md | 20 +++++++ dsl/src/dsl.rs | 91 ++++++++++++++--------------- dsl/src/dsl_parse.rs | 133 +++++++++++++++++++++++++++++-------------- dsl/src/dsl_test.rs | 90 +++++++++++++++++++++++++++++ 4 files changed, 247 insertions(+), 87 deletions(-) diff --git a/dsl/README.md b/dsl/README.md index 97f1d2c..009a2f9 100644 --- a/dsl/README.md +++ b/dsl/README.md @@ -92,3 +92,23 @@ or configuration statements, and look like this: * [ ] const parse * [ ] live reload * [ ] serialize modified code back to original indentation + +## implementation notes + +### `DslExp` trait behavior + +this is the trait which differentiates "a thing" from +"a thing that is many things". + +|source |key|exp |head |tail | +|-----------------------|---|-------|---------|---------------| +|`a` |`a`|E0 |`a` |none | +|`(a)` |E1 |`a` |`a` |none | +|`a b c` |E2 |E0 |`a` |`b c` | +|`(a b c)` |E0 |`a b c`|`(a b c)`|none | +|`(a b c) d e f` |E0 |E2 |`(a b c)`|`d e f` | +|`a (b c d) e f` |E2 |E0 |`a` |`(b c d) e f` | + +* **E0**: Expected `(` +* **E1**: Unexpected `(` +* **E2**: Trailing characters diff --git a/dsl/src/dsl.rs b/dsl/src/dsl.rs index d8de0f5..51a5250 100644 --- a/dsl/src/dsl.rs +++ b/dsl/src/dsl.rs @@ -8,7 +8,8 @@ use const_panic::PanicFmt; use std::fmt::Debug; pub(crate) use std::error::Error; pub(crate) use std::sync::Arc; -pub(crate) use konst::string::{str_range, char_indices}; +pub(crate) use konst::iter::for_each; +pub(crate) use konst::string::{str_from, str_range, char_indices}; pub(crate) use thiserror::Error; pub(crate) use ::tengri_core::*; pub(crate) use self::DslError::*; @@ -16,77 +17,52 @@ mod dsl_conv; pub use self::dsl_conv::*; mod dsl_parse; pub(crate) use self::dsl_parse::*; pub mod parse { pub use crate::dsl_parse::*; } - -#[cfg(test)] -mod dsl_test; - +#[cfg(test)] mod dsl_test; flex_trait!(Dsl: Debug + Send + Sync + Sized { - fn src (&self) -> &str { - unreachable!("Dsl::src default impl") - } + fn src (&self) -> DslPerhaps<&str> { unreachable!("Dsl::src default impl") } }); impl Dsl for Arc { - fn src (&self) -> &str { self.as_ref() } + fn src (&self) -> DslPerhaps<&str> { Ok(Some(self.as_ref())) } } impl<'s> Dsl for &'s str { - fn src (&self) -> &str { self.as_ref() } + fn src (&self) -> DslPerhaps<&str> { Ok(Some(self.as_ref())) } } impl Dsl for Option { - fn src (&self) -> &str { if let Some(dsl) = self { dsl.src() } else { "" } } + fn src (&self) -> DslPerhaps<&str> { + Ok(if let Some(dsl) = self { dsl.src()? } else { None }) + } +} +impl Dsl for Result { + fn src (&self) -> DslPerhaps<&str> { + match self { Ok(dsl) => Ok(dsl.src()?), Err(e) => Err(*e) } + } } - impl DslExp for D {} pub trait DslExp: Dsl { - fn exp (&self) -> DslPerhaps<&str> { - Ok(exp_peek(self.src())?) - } - fn head (&self) -> DslPerhaps<&str> { - let start = 1; - let src = self.src(); - let src = &src[start.min(src.len().saturating_sub(1))..]; - peek(src) - } - fn tail (&self) -> DslPerhaps<&str> { - let start = 1; - let src = self.src(); - let src = &src[start.min(src.len().saturating_sub(1))..]; - Ok(if let Some((head_start, head_len)) = seek(src)? { - let start = 1 + head_start + head_len; - let src = self.src(); - let src = &src[start.min(src.len().saturating_sub(1))..]; - peek(src)? - } else { - None - }) - } + fn exp (&self) -> DslPerhaps<&str> { ok_flat(self.src()?.map(exp_peek_inner)) } + fn head (&self) -> DslPerhaps<&str> { ok_flat(self.src()?.map(peek)) } + fn tail (&self) -> DslPerhaps<&str> { ok_flat(self.src()?.map(peek_tail(self.head()))) } } - impl DslSym for D {} pub trait DslSym: Dsl { - fn sym (&self) -> DslPerhaps<&str> { crate::parse::sym_peek(self.src()) } + fn sym (&self) -> DslPerhaps<&str> { ok_flat(self.src()?.map(sym_peek_only)) } } - impl DslKey for D {} pub trait DslKey: Dsl { - fn key (&self) -> DslPerhaps<&str> { crate::parse::key_peek(self.src()) } + fn key (&self) -> DslPerhaps<&str> { ok_flat(self.src()?.map(key_peek_only)) } } - impl DslText for D {} pub trait DslText: Dsl { - fn text (&self) -> DslPerhaps<&str> { crate::parse::text_peek(self.src()) } + fn text (&self) -> DslPerhaps<&str> { ok_flat(self.src()?.map(text_peek_only)) } } - impl DslNum for D {} pub trait DslNum: Dsl { - fn num (&self) -> DslPerhaps<&str> { crate::parse::num_peek(self.src()) } + fn num (&self) -> DslPerhaps<&str> { ok_flat(self.src()?.map(num_peek_only)) } } - /// DSL-specific result type. pub type DslResult = Result; - /// DSL-specific optional result type. pub type DslPerhaps = Result, DslError>; - /// DSL-specific error codes. #[derive(Error, Debug, Copy, Clone, PartialEq, PanicFmt)] pub enum DslError { @@ -97,3 +73,28 @@ pub enum DslError { #[error("parse failed: error #{0}")] Code(u8), #[error("end reached")] End } +#[macro_export] macro_rules! dsl_for_each (($dsl:expr => |$next:ident|$body:expr)=>{ + let mut dsl: Arc = $dsl.src().into(); + let mut $next: Option> = dsl.next()?.map(Into::into); + let mut rest: Option> = dsl.rest()?.map(Into::into); + loop { + if let Some($next) = $next { $body; } else { break } + if let Some(next) = rest { + $next = next.next()?.map(Into::into); + rest = next.rest()?.map(Into::into); + } else { + break + } + } +}); +fn ok_flat (x: Option>) -> DslPerhaps { Ok(x.transpose()?.flatten()) } +fn peek_tail <'a> (head: DslPerhaps<&'a str>) -> impl Fn(&'a str)->DslPerhaps<&'a str> { + move|src|match head { + Ok(Some(next)) => { + let src = &src[src.len().min(1 + next.len())..]; + for c in src.chars() { if !is_whitespace(c) { return Ok(Some(src)) } } + Ok(None) + }, + e => e + } +} diff --git a/dsl/src/dsl_parse.rs b/dsl/src/dsl_parse.rs index 25eef20..bd8208d 100644 --- a/dsl/src/dsl_parse.rs +++ b/dsl/src/dsl_parse.rs @@ -1,123 +1,172 @@ use crate::*; -macro_rules! iter_chars(($source:expr => |$i:ident, $c:ident|$val:expr)=>{ - while let Some((($i, $c), next)) = char_indices($source).next() { - $source = next.as_str(); $val } }); - -macro_rules! def_peek_seek(($peek:ident, $seek:ident, $seek_start:ident, $seek_length:ident)=>{ +macro_rules! def_peek_seek(($peek:ident, $peek_only:ident, $seek:ident, $seek_start:ident, $seek_length:ident)=>{ /// Find a slice corrensponding to a syntax token. pub const fn $peek (source: &str) -> DslPerhaps<&str> { match $seek(source) { Ok(Some((start, length))) => Ok(Some(str_range(source, start, start + length))), Ok(None) => Ok(None), - Err(e) => Err(e) } } + Err(e) => Err(e) + } + } + /// Find a slice corrensponding to a syntax token + /// but return an error if it isn't the only thing + /// in the source. + pub const fn $peek_only (source: &str) -> DslPerhaps<&str> { + match $seek(source) { + Ok(Some((start, length))) => { + let remaining = source.len().saturating_sub(start + length); + let tail = str_range(source, start + length, remaining); + for_each!((_, c) in char_indices(tail) => if !is_whitespace(c) { + return Err(Unexpected(c)) + }); + Ok(Some(str_range(source, start, start + length))) + } + Ok(None) => Ok(None), + Err(e) => Err(e) + } + } /// Find a start and length corresponding to a syntax token. pub const fn $seek (source: &str) -> DslPerhaps<(usize, usize)> { match $seek_start(source) { - Ok(Some(start)) => match $seek_length(str_range(source, start, source.len() - start)) { + Ok(Some(start)) => match $seek_length(str_from(source, start)) { Ok(Some(length)) => Ok(Some((start, length))), Ok(None) => Ok(None), Err(e) => Err(e), }, Ok(None) => Ok(None), - Err(e) => Err(e) } } }); + Err(e) => Err(e) + } + } +}); -def_peek_seek!(exp_peek, exp_seek, exp_seek_start, exp_seek_length); -pub const fn is_exp_start (c: char) -> bool { matches!(c, '(') } -pub const fn is_exp_end (c: char) -> bool { matches!(c, ')') } -pub const fn exp_seek_start (mut source: &str) -> DslPerhaps { - iter_chars!(source => |i, c| if is_exp_start(c) { +pub const fn no_trailing_non_whitespace (source: &str, offset: usize) -> DslResult<()> { + let tail = str_range(source, offset, source.len().saturating_sub(offset)); + for_each!((_, c) in char_indices(tail) => if !is_whitespace(c) { + return Err(Unexpected(c)) + }); + Ok(()) +} + +def_peek_seek!(exp_peek, exp_peek_only, exp_seek, exp_seek_start, exp_seek_length); +pub const fn exp_peek_inner (source: &str) -> DslPerhaps<&str> { + match exp_peek(source) { + Ok(Some(peeked)) => { + let len = peeked.len(); + let start = if len > 0 { 1 } else { 0 }; + Ok(Some(str_range(source, start, start + len.saturating_sub(2)))) + }, + e => e + } +} +pub const fn exp_peek_inner_only (source: &str) -> DslPerhaps<&str> { + match exp_seek(source) { + Err(e) => Err(e), + Ok(None) => Ok(None), + Ok(Some((start, length))) => { + if let Err(e) = no_trailing_non_whitespace(source, start) { + return Err(e) + } + let start = if length > 0 { 1 } else { 0 }; + Ok(Some(str_range(source, start, start + length.saturating_sub(2)))) + }, + } +} +pub const fn is_exp_start (c: char) -> bool { c == '(' } +pub const fn is_exp_end (c: char) -> bool { c == ')' } +pub const fn exp_seek_start (source: &str) -> DslPerhaps { + for_each!((i, c) in char_indices(source) => if is_exp_start(c) { return Ok(Some(i)) } else if !is_whitespace(c) { return Err(Unexpected(c)) }); Ok(None) } -pub const fn exp_seek_length (mut source: &str) -> DslPerhaps { +pub const fn exp_seek_length (source: &str) -> DslPerhaps { let mut depth = 0; - iter_chars!(source => |i, c| if is_exp_start(c) { + for_each!((i, c) in char_indices(source) => if is_exp_start(c) { depth += 1; } else if is_exp_end(c) { if depth == 0 { return Err(Unexpected(c)) } else if depth == 1 { - return Ok(Some(i)) + return Ok(Some(i + 1)) } else { depth -= 1; } }); - Ok(None) + Err(Incomplete) } -def_peek_seek!(sym_peek, sym_seek, sym_seek_start, sym_seek_length); +def_peek_seek!(sym_peek, sym_peek_only, sym_seek, sym_seek_start, sym_seek_length); pub const fn is_sym_start (c: char) -> bool { matches!(c, ':'|'@') } pub const fn is_sym_char (c: char) -> bool { matches!(c, 'a'..='z'|'A'..='Z'|'0'..='9'|'-') } pub const fn is_sym_end (c: char) -> bool { matches!(c, ' '|'\n'|'\r'|'\t'|')') } -pub const fn sym_seek_start (mut source: &str) -> DslPerhaps { - iter_chars!(source => |i, c| if is_sym_start(c) { +pub const fn sym_seek_start (source: &str) -> DslPerhaps { + for_each!((i, c) in char_indices(source) => if is_sym_start(c) { return Ok(Some(i)) } else if !is_whitespace(c) { return Err(Unexpected(c)) }); Ok(None) } -pub const fn sym_seek_length (mut source: &str) -> DslPerhaps { - iter_chars!(source => |i, c| if is_sym_end(c) { +pub const fn sym_seek_length (source: &str) -> DslPerhaps { + for_each!((i, c) in char_indices(source) => if is_sym_end(c) { return Ok(Some(i)) } else if !is_sym_char(c) { return Err(Unexpected(c)) }); - Ok(None) + Ok(Some(source.len())) } -def_peek_seek!(key_peek, key_seek, key_seek_start, key_seek_length); -pub const fn is_key_start (c: char) -> bool { matches!(c, '/'|'a'..='z') } +def_peek_seek!(key_peek, key_peek_only, key_seek, key_seek_start, key_seek_length); +pub const fn is_key_start (c: char) -> bool { matches!(c, '/'|('a'..='z')) } pub const fn is_key_char (c: char) -> bool { matches!(c, 'a'..='z'|'0'..='9'|'-'|'/') } pub const fn is_key_end (c: char) -> bool { matches!(c, ' '|'\n'|'\r'|'\t'|')') } -pub const fn key_seek_start (mut source: &str) -> DslPerhaps { - iter_chars!(source => |i, c| if is_key_start(c) { +pub const fn key_seek_start (source: &str) -> DslPerhaps { + for_each!((i, c) in char_indices(source) => if is_key_start(c) { return Ok(Some(i)) } else if !is_whitespace(c) { return Err(Unexpected(c)) }); Ok(None) } -pub const fn key_seek_length (mut source: &str) -> DslPerhaps { - iter_chars!(source => |i, c| if is_key_end(c) { +pub const fn key_seek_length (source: &str) -> DslPerhaps { + for_each!((i, c) in char_indices(source) => if is_key_end(c) { return Ok(Some(i)) } else if !is_key_char(c) { return Err(Unexpected(c)) }); - Ok(None) + Ok(Some(source.len())) } -def_peek_seek!(text_peek, text_seek, text_seek_start, text_seek_length); +def_peek_seek!(text_peek, text_peek_only, text_seek, text_seek_start, text_seek_length); pub const fn is_text_start (c: char) -> bool { matches!(c, '"') } pub const fn is_text_end (c: char) -> bool { matches!(c, '"') } -pub const fn text_seek_start (mut source: &str) -> DslPerhaps { - iter_chars!(source => |i, c| if is_text_start(c) { +pub const fn text_seek_start (source: &str) -> DslPerhaps { + for_each!((i, c) in char_indices(source) => if is_text_start(c) { return Ok(Some(i)) } else if !is_whitespace(c) { return Err(Unexpected(c)) }); Ok(None) } -pub const fn text_seek_length (mut source: &str) -> DslPerhaps { - iter_chars!(source => |i, c| if is_text_end(c) { return Ok(Some(i)) }); +pub const fn text_seek_length (source: &str) -> DslPerhaps { + for_each!((i, c) in char_indices(source) => if is_text_end(c) { return Ok(Some(i)) }); Ok(None) } -def_peek_seek!(num_peek, num_seek, num_seek_start, num_seek_length); -pub const fn num_seek_start (mut source: &str) -> DslPerhaps { - iter_chars!(source => |i, c| if is_digit(c) { +def_peek_seek!(num_peek, num_peek_only, num_seek, num_seek_start, num_seek_length); +pub const fn num_seek_start (source: &str) -> DslPerhaps { + for_each!((i, c) in char_indices(source) => if is_digit(c) { return Ok(Some(i)); } else if !is_whitespace(c) { return Err(Unexpected(c)) }); Ok(None) } -pub const fn num_seek_length (mut source: &str) -> DslPerhaps { - iter_chars!(source => |i, c| if is_num_end(c) { +pub const fn num_seek_length (source: &str) -> DslPerhaps { + for_each!((i, c) in char_indices(source) => if is_num_end(c) { return Ok(Some(i)) } else if !is_digit(c) { return Err(Unexpected(c)) @@ -154,7 +203,7 @@ pub const fn peek (mut src: &str) -> DslPerhaps<&str> { _ if let Ok(Some(num)) = num_peek(src) => num, _ if let Ok(Some(text)) = text_peek(src) => text, _ => { - iter_chars!(src => |_i, c| if !is_whitespace(c) { + for_each!((i, c) in char_indices(src) => if !is_whitespace(c) { return Err(Unexpected(c)) }); return Ok(None) @@ -170,7 +219,7 @@ pub const fn seek (mut src: &str) -> DslPerhaps<(usize, usize)> { _ if let Ok(Some(num)) = num_seek(src) => num, _ if let Ok(Some(text)) = text_seek(src) => text, _ => { - iter_chars!(src => |_i, c| if !is_whitespace(c) { + for_each!((i, c) in char_indices(src) => if !is_whitespace(c) { return Err(Unexpected(c)) }); return Ok(None) diff --git a/dsl/src/dsl_test.rs b/dsl/src/dsl_test.rs index e69de29..be491fe 100644 --- a/dsl/src/dsl_test.rs +++ b/dsl/src/dsl_test.rs @@ -0,0 +1,90 @@ +use crate::*; +macro_rules!is_ok_some(($exp:expr, $val:expr)=>{assert_eq!($exp, Ok(Some($val)))};); +macro_rules!is_ok_none(($exp:expr)=>{assert_eq!($exp, Ok(None))};); +macro_rules!is_err(($exp:expr)=>{assert!($exp.is_err())}; + ($exp:expr, $err:expr)=>{assert_eq!($exp, Err($err))};); +#[test] fn test_exp () -> Result<(), DslError> { + let e0 = DslError::Unexpected('a'); + let e1 = DslError::Unexpected('('); + let e2 = DslError::Unexpected('b'); + let e3 = DslError::Unexpected('d'); + let check = |src: &str, key, exp, head, tail|{ + assert_eq!(src.key(), key, "{src}"); + assert_eq!(src.exp(), exp, "{src}"); + assert_eq!(src.head(), head, "{src}"); + assert_eq!(src.tail(), tail, "{src}"); + }; + check("a", Ok(Some("a")), Err(e0), Ok(Some("a")), Ok(None)); + check("(a)", Err(e1), Ok(Some("a")), Ok(Some("(a)")), Ok(None)); + check("a b c", Err(e2), Err(e0), Ok(Some("a")), Ok(Some("b c"))); + check("(a b c)", Err(e1), Ok(Some("a b c")), Ok(Some("(a b c)")), Ok(None)); + check("(a b c) d e f", Err(e1), Err(e3), Ok(Some("(a b c)")), Ok(Some("d e f"))); + check("a (b c d) e f", Err(e1), Err(e0), Ok(Some("a")), Ok(Some("(b c d) e f"))); + + assert!(is_whitespace(' ')); + assert!(!is_key_start(' ')); + assert!(is_key_start('f')); + + is_ok_some!(key_seek_start("foo"), 0); + is_ok_some!(key_seek_start("foo "), 0); + is_ok_some!(key_seek_start(" foo "), 1); + is_ok_some!(key_seek_length(&" foo "[1..]), 3); + is_ok_some!(key_seek("foo"), (0, 3)); + is_ok_some!(key_peek("foo"), "foo"); + is_ok_some!(key_seek("foo "), (0, 3)); + is_ok_some!(key_peek("foo "), "foo"); + is_ok_some!(key_seek(" foo "), (1, 3)); + is_ok_some!(key_peek(" foo "), "foo"); + + is_err!("(foo)".key()); + is_err!("foo".exp()); + + is_ok_some!("(foo)".exp(), "foo"); + is_ok_some!("(foo)".head(), "(foo)"); + is_ok_none!("(foo)".tail()); + + is_ok_some!("(foo bar baz)".exp(), "foo bar baz"); + is_ok_some!("(foo bar baz)".head(), "(foo bar baz)"); + is_ok_none!("(foo bar baz)".tail()); + + is_ok_some!("(foo bar baz)".exp().head(), "foo"); + is_ok_some!("(foo bar baz)".exp().tail(), "bar baz"); + is_ok_some!("(foo bar baz)".exp().tail().head(), "bar"); + is_ok_some!("(foo bar baz)".exp().tail().tail(), "baz"); + + is_ok_none!("foo".exp()); + is_ok_some!("foo".key(), "foo"); + is_ok_some!(" foo".key(), "foo"); + is_ok_some!(" foo ".key(), "foo"); + + assert_eq!(" foo ".head(), Ok(Some("foo"))); + //assert_eq!(" foo ".head().head(), Ok(None)); + assert_eq!(" foo ".head().tail(), Ok(None)); + assert_eq!(" foo ".tail(), Ok(None)); + assert_eq!(" foo ".tail().head(), Ok(None)); + assert_eq!(" foo ".tail().tail(), Ok(None)); + + assert_eq!(" foo bar ".head(), Ok(Some("foo"))); + //assert_eq!(" foo bar ".head().head(), Ok(None)); + assert_eq!(" foo bar ".head().tail(), Ok(None)); + assert_eq!(" foo bar ".tail(), Ok(Some("bar"))); + assert_eq!(" foo bar ".tail().head(), Ok(Some("bar"))); + assert_eq!(" foo bar ".tail().tail(), Ok(None)); + + assert_eq!(" (foo) ".head(), Ok(Some("(foo)"))); + //assert_eq!(" (foo) ".head().head(), Ok(Some("foo"))); + //assert_eq!(" (foo) ".head().head().head(), Ok(None)); + assert_eq!(" (foo) ".tail(), Ok(None)); + + assert_eq!(" (foo) (bar) ".head(), Ok(Some("(foo)"))); + //assert_eq!(" (foo) (bar) ".head().head(), Ok(Some("foo"))); + //assert_eq!(" (foo) (bar) ".head().head().head(), Ok(None)); + assert_eq!(" (foo) (bar) ".tail(), Ok(Some("(bar)"))); + + assert_eq!(" (foo bar baz) ".head(), Ok(Some("(foo bar baz)"))); + assert_eq!(" (foo bar baz) ".head().head(), Ok(Some("foo"))); + assert_eq!(" (foo bar baz) ".head().tail(), Ok(Some("bar baz"))); + assert_eq!(" (foo bar baz) ".head().tail().head(), Ok(Some("bar"))); + assert_eq!(" (foo bar baz) ".head().tail().tail(), Ok(Some("baz"))); + Ok(()) +}