use crate::prelude::*; use nu_engine::WholeStreamCommand; use nu_errors::ShellError; use nu_protocol::{Signature, SyntaxShape, TaggedDictBuilder, UntaggedValue, Value}; use nu_source::Tagged; use regex::Regex; #[derive(Deserialize)] struct Arguments { pattern: Tagged, regex: Tagged, } pub struct Command; impl WholeStreamCommand for Command { fn name(&self) -> &str { "parse" } fn signature(&self) -> Signature { Signature::build("parse") .required( "pattern", SyntaxShape::String, "the pattern to match. Eg) \"{foo}: {bar}\"", ) .switch("regex", "use full regex syntax for patterns", Some('r')) } fn usage(&self) -> &str { "Parse columns from string data using a simple pattern." } fn run_with_actions(&self, args: CommandArgs) -> Result { operate(args) } fn examples(&self) -> Vec { let mut row = IndexMap::new(); row.insert("foo".to_string(), Value::from("hi")); row.insert("bar".to_string(), Value::from("there")); vec![Example { description: "Parse a string into two named columns", example: "echo \"hi there\" | parse \"{foo} {bar}\"", result: Some(vec![UntaggedValue::row(row).into()]), }] } } pub fn operate(args: CommandArgs) -> Result { let name_tag = args.call_info.name_tag.clone(); let (Arguments { regex, pattern }, input) = args.process()?; let regex_pattern = if let Tagged { item: true, tag } = regex { Regex::new(&pattern.item) .map_err(|_| ShellError::labeled_error("Invalid regex", "invalid regex", tag.span))? } else { let parse_regex = build_regex(&pattern.item, name_tag.clone())?; Regex::new(&parse_regex).map_err(|_| { ShellError::labeled_error("Invalid pattern", "invalid pattern", name_tag.span) })? }; let columns = column_names(®ex_pattern); let mut parsed: VecDeque = VecDeque::new(); for v in input { match v.as_string() { Ok(s) => { let results = regex_pattern.captures_iter(&s); for c in results { let mut dict = TaggedDictBuilder::new(&v.tag); for (column_name, cap) in columns.iter().zip(c.iter().skip(1)) { let cap_string = cap.map(|v| v.as_str()).unwrap_or("").to_string(); dict.insert_untagged(column_name, UntaggedValue::string(cap_string)); } parsed.push_back(dict.into_value()); } } Err(_) => { return Err(ShellError::labeled_error_with_secondary( "Expected string input", "expected string input", &name_tag, "value originated here", v.tag, )) } } } Ok(parsed.into_iter().to_action_stream()) } fn build_regex(input: &str, tag: Tag) -> Result { let mut output = "(?s)\\A".to_string(); //let mut loop_input = input; let mut loop_input = input.chars().peekable(); loop { let mut before = String::new(); while let Some(c) = loop_input.next() { if c == '{' { // If '{{', still creating a plaintext parse command, but just for a single '{' char if loop_input.peek() == Some(&'{') { let _ = loop_input.next(); } else { break; } } before.push(c); } if !before.is_empty() { output.push_str(®ex::escape(&before)); } // Look for column as we're now at one let mut column = String::new(); while let Some(c) = loop_input.next() { if c == '}' { break; } column.push(c); if loop_input.peek().is_none() { return Err(ShellError::labeled_error( "Found opening `{` without an associated closing `}`", "invalid parse pattern", tag, )); } } if !column.is_empty() { output.push_str("(?P<"); output.push_str(&column); output.push_str(">.*?)"); } if before.is_empty() && column.is_empty() { break; } } output.push_str("\\z"); Ok(output) } fn column_names(regex: &Regex) -> Vec { regex .capture_names() .enumerate() .skip(1) .map(|(i, name)| { name.map(String::from) .unwrap_or_else(|| format!("Capture{}", i)) }) .collect() } #[cfg(test)] mod tests { use super::Command; use super::ShellError; #[test] fn examples_work_as_expected() -> Result<(), ShellError> { use crate::examples::test as test_examples; test_examples(Command {}) } }