nushell/crates/nu-parser/src/parse_patterns.rs
Devyn Cairns 35d2750757
Change how and and or operations are compiled to IR to support custom values (#14653)
# Description

Because `and` and `or` are short-circuiting operations in Nushell, they
must be compiled to a sequence that avoids evaluating the RHS if the LHS
is already sufficient to determine the output - i.e., `false` for `and`
and `true` for `or`. I initially implemented this with `branch-if`
instructions, simply returning the RHS if it needed to be evaluated, and
returning the short-circuited boolean value if it did not.

Example for `$a and $b`:

```
   0: load-variable          %0, var 999 "$a"
   1: branch-if              %0, 3
   2: jump                   5
   3: load-variable          %0, var 1000 "$b" # label(0), from(1:)
   4: jump                   6
   5: load-literal           %0, bool(false) # label(1), from(2:)
   6: span                   %0          # label(2), from(4:)
   7: return                 %0
```

Unfortunately, this broke polars, because using `and`/`or` on custom
values is perfectly valid and they're allowed to define that behavior
differently, and the polars plugin uses this for boolean masks. But
without using the `binary-op` instruction, that custom behavior is never
invoked. Additionally, `branch-if` requires a boolean, and custom values
are not booleans. This changes the IR to the following, using the
`match` instruction to check for the specific short-circuit value
instead, and still invoking `binary-op` otherwise:

```
   0: load-variable          %0, var 125 "$a"
   1: match                  (false), %0, 4
   2: load-variable          %1, var 124 "$b"
   3: binary-op              %0, Boolean(And), %1
   4: span                   %0          # label(0), from(1:)
   5: return                 %0
```

I've also renamed `Pattern::Value` to `Pattern::Expression` and added a
proper `Pattern::Value` variant that actually contains a `Value`
instead. I'm still hoping to remove `Pattern::Expression` eventually,
because it's kind of a hack - we don't actually evaluate the expression,
we just match it against a few cases specifically for pattern matching,
and it's one of the cases where AST leaks into IR and I want to remove
all of those cases, because AST should not leak into IR.

Fixes #14518

# User-Facing Changes

- `and` and `or` now support custom values again.
- the IR is actually a little bit cleaner, though it may be a bit
slower; `match` is more complex.

# Tests + Formatting

The existing tests pass, but I didn't add anything new. Unfortunately I
don't think there's anything built-in to trigger this, but maybe some
testcases could be added to polars to test it.
2024-12-25 06:12:53 -06:00

230 lines
6.8 KiB
Rust

#![allow(clippy::byte_char_slices)]
use crate::{
lex, lite_parse,
parser::{is_variable, parse_value},
};
use nu_protocol::{
ast::{MatchPattern, Pattern},
engine::StateWorkingSet,
ParseError, Span, SyntaxShape, Type, VarId,
};
pub fn garbage(span: Span) -> MatchPattern {
MatchPattern {
pattern: Pattern::Garbage,
guard: None,
span,
}
}
pub fn parse_pattern(working_set: &mut StateWorkingSet, span: Span) -> MatchPattern {
let bytes = working_set.get_span_contents(span);
if bytes.starts_with(b"$") {
// Variable pattern
parse_variable_pattern(working_set, span)
} else if bytes.starts_with(b"{") {
// Record pattern
parse_record_pattern(working_set, span)
} else if bytes.starts_with(b"[") {
// List pattern
parse_list_pattern(working_set, span)
} else if bytes == b"_" {
MatchPattern {
pattern: Pattern::IgnoreValue,
guard: None,
span,
}
} else {
// Literal value
let value = parse_value(working_set, span, &SyntaxShape::Any);
MatchPattern {
pattern: Pattern::Expression(Box::new(value)),
guard: None,
span,
}
}
}
fn parse_variable_pattern_helper(working_set: &mut StateWorkingSet, span: Span) -> Option<VarId> {
let bytes = working_set.get_span_contents(span);
if is_variable(bytes) {
if let Some(var_id) = working_set.find_variable_in_current_frame(bytes) {
Some(var_id)
} else {
let var_id = working_set.add_variable(bytes.to_vec(), span, Type::Any, false);
Some(var_id)
}
} else {
None
}
}
pub fn parse_variable_pattern(working_set: &mut StateWorkingSet, span: Span) -> MatchPattern {
if let Some(var_id) = parse_variable_pattern_helper(working_set, span) {
MatchPattern {
pattern: Pattern::Variable(var_id),
guard: None,
span,
}
} else {
working_set.error(ParseError::Expected("valid variable name", span));
garbage(span)
}
}
pub fn parse_list_pattern(working_set: &mut StateWorkingSet, span: Span) -> MatchPattern {
let bytes = working_set.get_span_contents(span);
let mut start = span.start;
let mut end = span.end;
if bytes.starts_with(b"[") {
start += 1;
}
if bytes.ends_with(b"]") {
end -= 1;
} else {
working_set.error(ParseError::Unclosed("]".into(), Span::new(end, end)));
}
let inner_span = Span::new(start, end);
let source = working_set.get_span_contents(inner_span);
let (output, err) = lex(source, inner_span.start, &[b'\n', b'\r', b','], &[], true);
if let Some(err) = err {
working_set.error(err);
}
let (output, err) = lite_parse(&output);
if let Some(err) = err {
working_set.error(err);
}
let mut args = vec![];
if !output.block.is_empty() {
for command in &output.block[0].commands {
let mut spans_idx = 0;
while spans_idx < command.parts.len() {
let contents = working_set.get_span_contents(command.parts[spans_idx]);
if contents == b".." {
args.push(MatchPattern {
pattern: Pattern::IgnoreRest,
guard: None,
span: command.parts[spans_idx],
});
break;
} else if contents.starts_with(b"..$") {
if let Some(var_id) = parse_variable_pattern_helper(
working_set,
Span::new(
command.parts[spans_idx].start + 2,
command.parts[spans_idx].end,
),
) {
args.push(MatchPattern {
pattern: Pattern::Rest(var_id),
guard: None,
span: command.parts[spans_idx],
});
break;
} else {
args.push(garbage(command.parts[spans_idx]));
working_set.error(ParseError::Expected(
"valid variable name",
command.parts[spans_idx],
));
}
} else {
let arg = parse_pattern(working_set, command.parts[spans_idx]);
args.push(arg);
};
spans_idx += 1;
}
}
}
MatchPattern {
pattern: Pattern::List(args),
guard: None,
span,
}
}
pub fn parse_record_pattern(working_set: &mut StateWorkingSet, span: Span) -> MatchPattern {
let mut bytes = working_set.get_span_contents(span);
let mut start = span.start;
let mut end = span.end;
if bytes.starts_with(b"{") {
start += 1;
} else {
working_set.error(ParseError::Expected("{", Span::new(start, start + 1)));
bytes = working_set.get_span_contents(span);
}
if bytes.ends_with(b"}") {
end -= 1;
} else {
working_set.error(ParseError::Unclosed("}".into(), Span::new(end, end)));
}
let inner_span = Span::new(start, end);
let source = working_set.get_span_contents(inner_span);
let (tokens, err) = lex(source, start, &[b'\n', b'\r', b','], &[b':'], true);
if let Some(err) = err {
working_set.error(err);
}
let mut output = vec![];
let mut idx = 0;
while idx < tokens.len() {
let bytes = working_set.get_span_contents(tokens[idx].span);
let (field, pattern) = if !bytes.is_empty() && bytes[0] == b'$' {
// If this is a variable, treat it as both the name of the field and the pattern
let field = String::from_utf8_lossy(&bytes[1..]).to_string();
let pattern = parse_variable_pattern(working_set, tokens[idx].span);
(field, pattern)
} else {
let field = String::from_utf8_lossy(bytes).to_string();
idx += 1;
if idx == tokens.len() {
working_set.error(ParseError::Expected("record", span));
return garbage(span);
}
let colon = working_set.get_span_contents(tokens[idx].span);
idx += 1;
if idx == tokens.len() || colon != b":" {
//FIXME: need better error
working_set.error(ParseError::Expected("record", span));
return garbage(span);
}
let pattern = parse_pattern(working_set, tokens[idx].span);
(field, pattern)
};
idx += 1;
output.push((field, pattern));
}
MatchPattern {
pattern: Pattern::Record(output),
guard: None,
span,
}
}