diff --git a/crates/nu-parser/src/lex.rs b/crates/nu-parser/src/lex.rs index 11afea861d..f9e75d13f0 100644 --- a/crates/nu-parser/src/lex.rs +++ b/crates/nu-parser/src/lex.rs @@ -225,6 +225,19 @@ pub fn lex_item( )), ); } + } else if c == b'r' && input.get(*curr_offset + 1) == Some(b'#').as_ref() { + // already checked `r#` pattern, so it's a raw string. + let lex_result = lex_raw_string(input, curr_offset, span_offset); + let span = Span::new(span_offset + token_start, span_offset + *curr_offset); + if let Err(e) = lex_result { + return ( + Token { + contents: TokenContents::Item, + span, + }, + Some(e), + ); + } } else if is_item_terminator(&block_level, c, additional_whitespace, special_tokens) { break; } @@ -331,6 +344,65 @@ pub fn lex_item( (output, err) } +fn lex_raw_string( + input: &[u8], + curr_offset: &mut usize, + span_offset: usize, +) -> Result<(), ParseError> { + // A raw string literal looks like `echo r#'Look, I can use 'single quotes'!'#` + // If the next character is `#` we're probably looking at a raw string literal + // so we need to read all the text until we find a closing `#`. This raw string + // can contain any character, including newlines and double quotes without needing + // to escape them. + // + // A raw string can contain many `#` as prefix, + // incase if there is a `'#` or `#'` in the string itself. + // E.g: r##'I can use '#' in a raw string'## + let mut prefix_sharp_cnt = 0; + let start = *curr_offset; + while let Some(b'#') = input.get(start + prefix_sharp_cnt + 1) { + prefix_sharp_cnt += 1; + } + + // curr_offset is the character `r`, we need to move forward and skip all `#` + // characters. + // + // e.g: r###' + // ^ + // ^ + // curr_offset + *curr_offset += prefix_sharp_cnt + 1; + // the next one should be a single quote. + if input.get(*curr_offset) != Some(&b'\'') { + return Err(ParseError::Expected( + "'", + Span::new(span_offset + *curr_offset, span_offset + *curr_offset + 1), + )); + } + + *curr_offset += 1; + let mut matches = false; + while let Some(ch) = input.get(*curr_offset) { + // check for postfix '### + if *ch == b'#' { + let start_ch = input[*curr_offset - prefix_sharp_cnt]; + let postfix = &input[*curr_offset - prefix_sharp_cnt + 1..=*curr_offset]; + if start_ch == b'\'' && postfix.iter().all(|x| *x == b'#') { + matches = true; + break; + } + } + *curr_offset += 1 + } + if !matches { + return Err(ParseError::UnexpectedEof( + "#".to_string(), + Span::new(span_offset + *curr_offset, span_offset + *curr_offset), + )); + } + Ok(()) +} + pub fn lex_signature( input: &[u8], span_offset: usize, @@ -503,79 +575,6 @@ fn lex_internal( } else if c == b' ' || c == b'\t' || additional_whitespace.contains(&c) { // If the next character is non-newline whitespace, skip it. curr_offset += 1; - } else if c == b'r' { - // A raw string literal looks like `echo r#'Look, I can use 'single quotes'!'#` - // If the next character is `#` we're probably looking at a raw string literal - // so we need to read all the text until we find a closing `#`. This raw string - // can contain any character, including newlines and double quotes without needing - // to escape them. - // - // A raw string can contain many `#` as prefix, - // incase if there is a `'#` or `#'` in the string itself. - // E.g: r##'I can use '#' in a raw string'## - let mut prefix_sharp_cnt = 0; - let start = curr_offset; - while let Some(b'#') = input.get(start + prefix_sharp_cnt + 1) { - prefix_sharp_cnt += 1; - } - - if prefix_sharp_cnt != 0 { - // curr_offset is the character `r`, we need to move forward and skip all `#` - // characters. - // - // e.g: r###' - // ^ - // ^ - // curr_offset - curr_offset += prefix_sharp_cnt + 1; - // the next one should be a single quote. - if input.get(curr_offset) != Some(&b'\'') { - error = Some(ParseError::Expected( - "'", - Span::new(span_offset + curr_offset, span_offset + curr_offset + 1), - )); - } - - curr_offset += 1; - let mut matches = false; - while let Some(ch) = input.get(curr_offset) { - // check for postfix '### - if *ch == b'#' { - let start_ch = input[curr_offset - prefix_sharp_cnt]; - let postfix = &input[curr_offset - prefix_sharp_cnt + 1..=curr_offset]; - if start_ch == b'\'' && postfix.iter().all(|x| *x == b'#') { - matches = true; - curr_offset += 1; - break; - } - } - curr_offset += 1 - } - if matches { - output.push(Token::new( - TokenContents::Item, - Span::new(span_offset + start, span_offset + curr_offset), - )); - } else if error.is_none() { - error = Some(ParseError::UnexpectedEof( - "#".to_string(), - Span::new(span_offset + curr_offset, span_offset + curr_offset), - )) - } - } else { - let (token, err) = lex_item( - input, - &mut curr_offset, - span_offset, - additional_whitespace, - special_tokens, - in_signature, - ); - if error.is_none() { - error = err; - } - output.push(token); - } } else { let token = try_lex_special_piped_item(input, &mut curr_offset, span_offset); if let Some(token) = token { diff --git a/src/tests/test_strings.rs b/src/tests/test_strings.rs index dcb03801b9..7d2ae1de84 100644 --- a/src/tests/test_strings.rs +++ b/src/tests/test_strings.rs @@ -87,6 +87,72 @@ fn raw_string() -> TestResult { ) } +#[test] +fn raw_string_inside_parentheses() -> TestResult { + let (left, right) = ('(', ')'); + run_test( + &format!(r#"{left}r#'abcde""fghi"''''jkl'#{right}"#), + r#"abcde""fghi"''''jkl"#, + )?; + run_test( + &format!(r#"{left}r##'abcde""fghi"''''#jkl'##{right}"#), + r#"abcde""fghi"''''#jkl"#, + )?; + run_test( + &format!(r#"{left}r###'abcde""fghi"'''##'#jkl'###{right}"#), + r#"abcde""fghi"'''##'#jkl"#, + )?; + run_test(&format!("{left}r#''#{right}"), "")?; + run_test( + &format!(r#"{left}r#'a string with sharp inside # and ends with #'#{right}"#), + "a string with sharp inside # and ends with #", + ) +} + +#[test] +fn raw_string_inside_list() -> TestResult { + let (left, right) = ('[', ']'); + run_test( + &format!(r#"{left}r#'abcde""fghi"''''jkl'#{right} | get 0"#), + r#"abcde""fghi"''''jkl"#, + )?; + run_test( + &format!(r#"{left}r##'abcde""fghi"''''#jkl'##{right} | get 0"#), + r#"abcde""fghi"''''#jkl"#, + )?; + run_test( + &format!(r#"{left}r###'abcde""fghi"'''##'#jkl'###{right} | get 0"#), + r#"abcde""fghi"'''##'#jkl"#, + )?; + run_test(&format!("{left}r#''#{right} | get 0"), "")?; + run_test( + &format!(r#"{left}r#'a string with sharp inside # and ends with #'#{right} | get 0"#), + "a string with sharp inside # and ends with #", + ) +} + +#[test] +fn raw_string_inside_closure() -> TestResult { + let (left, right) = ('{', '}'); + run_test( + &format!(r#"do {left}r#'abcde""fghi"''''jkl'#{right}"#), + r#"abcde""fghi"''''jkl"#, + )?; + run_test( + &format!(r#"do {left}r##'abcde""fghi"''''#jkl'##{right}"#), + r#"abcde""fghi"''''#jkl"#, + )?; + run_test( + &format!(r#"do {left}r###'abcde""fghi"'''##'#jkl'###{right}"#), + r#"abcde""fghi"'''##'#jkl"#, + )?; + run_test(&format!("do {left}r#''#{right}"), "")?; + run_test( + &format!(r#"do {left}r#'a string with sharp inside # and ends with #'#{right}"#), + "a string with sharp inside # and ends with #", + ) +} + #[test] fn incomplete_raw_string() -> TestResult { fail_test("r#abc", "expected '")