jj/cli/src/commands/fix.rs

353 lines
13 KiB
Rust

// Copyright 2024 The Jujutsu Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use std::collections::HashMap;
use std::io::Write as _;
use std::path::Path;
use std::process::Stdio;
use clap_complete::ArgValueCandidates;
use itertools::Itertools as _;
use jj_lib::backend::CommitId;
use jj_lib::fileset;
use jj_lib::fileset::FilesetDiagnostics;
use jj_lib::fileset::FilesetExpression;
use jj_lib::fix::fix_files;
use jj_lib::fix::FileToFix;
use jj_lib::fix::FixError;
use jj_lib::fix::FixResult;
use jj_lib::fix::ParallelFileFixer;
use jj_lib::matchers::Matcher;
use jj_lib::repo_path::RepoPathUiConverter;
use jj_lib::settings::UserSettings;
use jj_lib::store::Store;
use pollster::FutureExt as _;
use tracing::instrument;
use crate::cli_util::CommandHelper;
use crate::cli_util::RevisionArg;
use crate::command_error::config_error;
use crate::command_error::print_parse_diagnostics;
use crate::command_error::CommandError;
use crate::complete;
use crate::config::CommandNameAndArgs;
use crate::ui::Ui;
/// Update files with formatting fixes or other changes
///
/// The primary use case for this command is to apply the results of automatic
/// code formatting tools to revisions that may not be properly formatted yet.
/// It can also be used to modify files with other tools like `sed` or `sort`.
///
/// The changed files in the given revisions will be updated with any fixes
/// determined by passing their file content through any external tools the user
/// has configured for those files. Descendants will also be updated by passing
/// their versions of the same files through the same tools, which will ensure
/// that the fixes are not lost. This will never result in new conflicts. Files
/// with existing conflicts will be updated on all sides of the conflict, which
/// can potentially increase or decrease the number of conflict markers.
///
/// The external tools must accept the current file content on standard input,
/// and return the updated file content on standard output. A tool's output will
/// not be used unless it exits with a successful exit code. Output on standard
/// error will be passed through to the terminal.
///
/// Tools are defined in a table where the keys are arbitrary identifiers and
/// the values have the following properties:
/// - `command`: The arguments used to run the tool. The first argument is the
/// path to an executable file. Arguments can contain the substring `$path`,
/// which will be replaced with the repo-relative path of the file being
/// fixed. It is useful to provide the path to tools that include the path in
/// error messages, or behave differently based on the directory or file
/// name.
/// - `patterns`: Determines which files the tool will affect. If this list is
/// empty, no files will be affected by the tool. If there are multiple
/// patterns, the tool is applied only once to each file in the union of the
/// patterns.
/// - `enabled`: Enables or disables the tool. If omitted, the tool is enabled.
/// This is useful for defining disabled tools in user configuration that can
/// be enabled in individual repositories with one config setting.
///
/// For example, the following configuration defines how two code formatters
/// (`clang-format` and `black`) will apply to three different file extensions
/// (`.cc`, `.h`, and `.py`):
///
/// ```toml
/// [fix.tools.clang-format]
/// command = ["/usr/bin/clang-format", "--assume-filename=$path"]
/// patterns = ["glob:'**/*.cc'",
/// "glob:'**/*.h'"]
///
/// [fix.tools.black]
/// command = ["/usr/bin/black", "-", "--stdin-filename=$path"]
/// patterns = ["glob:'**/*.py'"]
/// ```
///
/// Execution order of tools that affect the same file is deterministic, but
/// currently unspecified, and may change between releases. If two tools affect
/// the same file, the second tool to run will receive its input from the
/// output of the first tool.
#[derive(clap::Args, Clone, Debug)]
#[command(verbatim_doc_comment)]
pub(crate) struct FixArgs {
/// Fix files in the specified revision(s) and their descendants. If no
/// revisions are specified, this defaults to the `revsets.fix` setting, or
/// `reachable(@, mutable())` if it is not set.
#[arg(
long,
short,
value_name = "REVSETS",
add = ArgValueCandidates::new(complete::mutable_revisions)
)]
source: Vec<RevisionArg>,
/// Fix only these paths
#[arg(value_name = "FILESETS", value_hint = clap::ValueHint::AnyPath)]
paths: Vec<String>,
/// Fix unchanged files in addition to changed ones. If no paths are
/// specified, all files in the repo will be fixed.
#[arg(long)]
include_unchanged_files: bool,
}
#[instrument(skip_all)]
pub(crate) fn cmd_fix(
ui: &mut Ui,
command: &CommandHelper,
args: &FixArgs,
) -> Result<(), CommandError> {
let mut workspace_command = command.workspace_helper(ui)?;
let workspace_root = workspace_command.workspace_root().to_owned();
let tools_config = get_tools_config(ui, workspace_command.settings())?;
let root_commits: Vec<CommitId> = if args.source.is_empty() {
let revs = workspace_command.settings().get_string("revsets.fix")?;
workspace_command.parse_revset(ui, &RevisionArg::from(revs))?
} else {
workspace_command.parse_union_revsets(ui, &args.source)?
}
.evaluate_to_commit_ids()?
.try_collect()?;
workspace_command.check_rewritable(root_commits.iter())?;
let matcher = workspace_command
.parse_file_patterns(ui, &args.paths)?
.to_matcher();
let mut tx = workspace_command.start_transaction();
let parallel_fixer = ParallelFileFixer::new(|store, file_to_fix| {
fix_one_file(&workspace_root, &tools_config, store, file_to_fix)
});
let summary = fix_files(
root_commits,
&matcher,
args.include_unchanged_files,
tx.repo_mut(),
&parallel_fixer,
)?;
writeln!(
ui.status(),
"Fixed {} commits of {} checked.",
summary.num_fixed_commits,
summary.num_checked_commits
)?;
tx.finish(ui, format!("fixed {} commits", summary.num_fixed_commits))
}
/// Invokes all matching tools (if any) to file_to_fix. If the content is
/// successfully transformed the new content is written and the new FileId is
/// returned. Returns None if the content is unchanged.
///
/// The matching tools are invoked in order, with the result of one tool feeding
/// into the next tool. Returns FixError if there is an error reading or writing
/// the file. However, if a tool invocation fails for whatever reason, the tool
/// is simply skipped and we proceed to invoke the next tool (this is
/// indistinguishable from succeeding with no changes).
///
/// TODO: Better error handling so we can tell the user what went wrong with
/// each failed input.
fn fix_one_file(
workspace_root: &Path,
tools_config: &ToolsConfig,
store: &Store,
file_to_fix: &FileToFix,
) -> Result<FixResult, FixError> {
let mut matching_tools = tools_config
.tools
.iter()
.filter(|tool_config| tool_config.matcher.matches(&file_to_fix.repo_path))
.peekable();
if matching_tools.peek().is_none() {
return Ok(FixResult {
file_id: Some(file_to_fix.file_id.clone()),
messages: vec![],
});
}
// The first matching tool gets its input from the committed file, and any
// subsequent matching tool gets its input from the previous matching tool's
// output.
let mut old_content = vec![];
let mut read = store.read_file(&file_to_fix.repo_path, &file_to_fix.file_id)?;
read.read_to_end(&mut old_content)?;
let new_content = matching_tools.fold(old_content.clone(), |prev_content, tool_config| {
match run_tool(
workspace_root,
&tool_config.command,
file_to_fix,
&prev_content,
) {
Ok(next_content) => next_content,
// TODO: Because the stderr is passed through, this isn't always failing
// silently, but it should do something better will the exit code, tool
// name, etc.
Err(_) => prev_content,
}
});
if new_content == old_content {
return Ok(FixResult {
file_id: None,
messages: vec![],
});
}
// TODO: send futures back over channel
let new_file_id = store
.write_file(&file_to_fix.repo_path, &mut new_content.as_slice())
.block_on()?;
Ok(FixResult {
file_id: Some(new_file_id),
messages: vec![],
})
}
/// Runs the `tool_command` to fix the given file content.
///
/// The `old_content` is assumed to be that of the `file_to_fix`'s `FileId`, but
/// this is not verified.
///
/// Returns the new file content, whose value will be the same as `old_content`
/// unless the command introduced changes. Returns `None` if there were any
/// failures when starting, stopping, or communicating with the subprocess.
fn run_tool(
workspace_root: &Path,
tool_command: &CommandNameAndArgs,
file_to_fix: &FileToFix,
old_content: &[u8],
) -> Result<Vec<u8>, ()> {
// TODO: Pipe stderr so we can tell the user which commit, file, and tool it is
// associated with.
let mut vars: HashMap<&str, &str> = HashMap::new();
vars.insert("path", file_to_fix.repo_path.as_internal_file_string());
let mut command = tool_command.to_command_with_variables(&vars);
tracing::debug!(?command, ?file_to_fix.repo_path, "spawning fix tool");
let mut child = command
.current_dir(workspace_root)
.stdin(Stdio::piped())
.stdout(Stdio::piped())
.spawn()
.or(Err(()))?;
let mut stdin = child.stdin.take().unwrap();
let output = std::thread::scope(|s| {
s.spawn(move || {
stdin.write_all(old_content).ok();
});
Some(child.wait_with_output().or(Err(())))
})
.unwrap()?;
tracing::debug!(?command, ?output.status, "fix tool exited:");
if output.status.success() {
Ok(output.stdout)
} else {
Err(())
}
}
/// Represents an entry in the `fix.tools` config table.
struct ToolConfig {
/// The command that will be run to fix a matching file.
command: CommandNameAndArgs,
/// The matcher that determines if this tool matches a file.
matcher: Box<dyn Matcher>,
/// Whether the tool is enabled
enabled: bool,
// TODO: Store the `name` field here and print it with the command's stderr, to clearly
// associate any errors/warnings with the tool and its configuration entry.
}
/// Represents the `fix.tools` config table.
struct ToolsConfig {
/// Some tools, stored in the order they will be executed if more than one
/// of them matches the same file.
tools: Vec<ToolConfig>,
}
/// Simplifies deserialization of the config values while building a ToolConfig.
#[derive(Clone, Debug, Eq, PartialEq, serde::Deserialize)]
#[serde(rename_all = "kebab-case")]
struct RawToolConfig {
command: CommandNameAndArgs,
patterns: Vec<String>,
#[serde(default = "default_tool_enabled")]
enabled: bool,
}
fn default_tool_enabled() -> bool {
true
}
/// Parses the `fix.tools` config table.
///
/// Fails if any of the commands or patterns are obviously unusable, but does
/// not check for issues that might still occur later like missing executables.
/// This is a place where we could fail earlier in some cases, though.
fn get_tools_config(ui: &mut Ui, settings: &UserSettings) -> Result<ToolsConfig, CommandError> {
let mut tools: Vec<ToolConfig> = settings
.table_keys("fix.tools")
// Sort keys early so errors are deterministic.
.sorted()
.map(|name| -> Result<ToolConfig, CommandError> {
let mut diagnostics = FilesetDiagnostics::new();
let tool: RawToolConfig = settings.get(["fix", "tools", name])?;
let expression = FilesetExpression::union_all(
tool.patterns
.iter()
.map(|arg| {
fileset::parse(
&mut diagnostics,
arg,
&RepoPathUiConverter::Fs {
cwd: "".into(),
base: "".into(),
},
)
})
.try_collect()?,
);
print_parse_diagnostics(ui, &format!("In `fix.tools.{name}`"), &diagnostics)?;
Ok(ToolConfig {
command: tool.command,
matcher: expression.to_matcher(),
enabled: tool.enabled,
})
})
.try_collect()?;
if tools.is_empty() {
return Err(config_error("No `fix.tools` are configured"));
}
tools.retain(|t| t.enabled);
if tools.is_empty() {
Err(config_error(
"At least one entry of `fix.tools` must be enabled.".to_string(),
))
} else {
Ok(ToolsConfig { tools })
}
}