diff --git a/lib/src/backend.rs b/lib/src/backend.rs index 3aa888db7..f9cf5425d 100644 --- a/lib/src/backend.rs +++ b/lib/src/backend.rs @@ -23,6 +23,7 @@ use std::vec::Vec; use thiserror::Error; +use crate::conflicts; use crate::content_hash::ContentHash; use crate::repo_path::{RepoPath, RepoPathComponent}; @@ -145,7 +146,14 @@ content_hash! { pub struct Commit { pub parents: Vec, pub predecessors: Vec, - pub root_tree: TreeId, + pub root_tree: conflicts::Conflict, + /// Indicates that there this commit uses the new tree-level conflict format, which means + /// that if `root_tree` is not a conflict, we know that we won't have to walk it to + /// determine if there are conflicts. + // TODO(#1624): Delete this field at some point in the future, when we decide to drop + // support for conflicts in older repos, or maybe after we have provided an upgrade + // mechanism. + pub uses_tree_conflict_format: bool, pub change_id: ChangeId, pub description: String, pub author: Signature, @@ -387,7 +395,8 @@ pub fn make_root_commit(root_change_id: ChangeId, empty_tree_id: TreeId) -> Comm Commit { parents: vec![], predecessors: vec![], - root_tree: empty_tree_id, + root_tree: conflicts::Conflict::resolved(empty_tree_id), + uses_tree_conflict_format: false, change_id: root_change_id, description: String::new(), author: signature.clone(), diff --git a/lib/src/commit.rs b/lib/src/commit.rs index 4922c7cab..c87d85fc6 100644 --- a/lib/src/commit.rs +++ b/lib/src/commit.rs @@ -103,12 +103,12 @@ impl Commit { pub fn tree(&self) -> Tree { self.store - .get_tree(&RepoPath::root(), &self.data.root_tree) + .get_tree(&RepoPath::root(), self.data.root_tree.as_legacy_tree_id()) .unwrap() } pub fn tree_id(&self) -> &TreeId { - &self.data.root_tree + self.data.root_tree.as_legacy_tree_id() } pub fn change_id(&self) -> &ChangeId { diff --git a/lib/src/commit_builder.rs b/lib/src/commit_builder.rs index ef1f804dc..1f24e914e 100644 --- a/lib/src/commit_builder.rs +++ b/lib/src/commit_builder.rs @@ -18,6 +18,7 @@ use std::sync::Arc; use crate::backend::{self, BackendResult, ChangeId, CommitId, Signature, TreeId}; use crate::commit::Commit; +use crate::conflicts::Conflict; use crate::repo::{MutableRepo, Repo}; use crate::settings::{JJRng, UserSettings}; @@ -43,7 +44,9 @@ impl CommitBuilder<'_> { let commit = backend::Commit { parents, predecessors: vec![], - root_tree: tree_id, + // TODO(#1624): set this when appropriate + root_tree: Conflict::from_legacy_tree_id(tree_id), + uses_tree_conflict_format: false, change_id, description: String::new(), author: signature.clone(), @@ -101,11 +104,11 @@ impl CommitBuilder<'_> { } pub fn tree(&self) -> &TreeId { - &self.commit.root_tree + self.commit.root_tree.as_legacy_tree_id() } pub fn set_tree(mut self, tree_id: TreeId) -> Self { - self.commit.root_tree = tree_id; + self.commit.root_tree = Conflict::from_legacy_tree_id(tree_id); self } diff --git a/lib/src/conflicts.rs b/lib/src/conflicts.rs index 0e280640e..ed953e490 100644 --- a/lib/src/conflicts.rs +++ b/lib/src/conflicts.rs @@ -22,6 +22,7 @@ use std::sync::Arc; use itertools::Itertools; use crate::backend::{BackendError, BackendResult, FileId, ObjectId, TreeId, TreeValue}; +use crate::content_hash::ContentHash; use crate::diff::{find_line_ranges, Diff, DiffHunk}; use crate::files::{ContentHunk, MergeResult}; use crate::merge::trivial_merge; @@ -53,6 +54,7 @@ impl Conflict { Conflict { removes, adds } } + /// Creates a `Conflict` with a single resolved value. pub fn resolved(value: T) -> Self { Conflict::new(vec![], vec![value]) } @@ -205,6 +207,32 @@ impl Conflict> { } } +impl ContentHash for Conflict { + fn hash(&self, state: &mut impl digest::Update) { + self.removes().hash(state); + self.adds().hash(state); + } +} + +impl Conflict { + // Creates a resolved conflict for a legacy tree id (same as + // `Conflict::resolved()`). + // TODO(#1624): delete when all callers have been updated to support tree-level + // conflicts + pub fn from_legacy_tree_id(value: TreeId) -> Self { + Conflict { + removes: vec![], + adds: vec![value], + } + } + + // TODO(#1624): delete when all callers have been updated to support tree-level + // conflicts + pub fn as_legacy_tree_id(&self) -> &TreeId { + self.as_resolved().unwrap() + } +} + impl Conflict> { /// Create a `Conflict` from a `backend::Conflict`, padding with `None` to /// make sure that there is exactly one more `adds()` than `removes()`. diff --git a/lib/src/git_backend.rs b/lib/src/git_backend.rs index 1f1c07624..a3e5adb62 100644 --- a/lib/src/git_backend.rs +++ b/lib/src/git_backend.rs @@ -17,6 +17,7 @@ use std::any::Any; use std::fmt::{Debug, Error, Formatter}; use std::io::{Cursor, Read}; +use std::ops::Deref; use std::path::Path; use std::sync::{Arc, Mutex, MutexGuard}; use std::{fs, slice}; @@ -31,6 +32,7 @@ use crate::backend::{ ChangeId, Commit, CommitId, Conflict, ConflictId, ConflictTerm, FileId, MillisSinceEpoch, ObjectId, Signature, SymlinkId, Timestamp, Tree, TreeId, TreeValue, }; +use crate::conflicts; use crate::file_util::{IoResultExt as _, PathError}; use crate::lock::FileLock; use crate::repo_path::{RepoPath, RepoPathComponent}; @@ -219,6 +221,9 @@ fn commit_from_git_without_root_parent(commit: &git2::Commit) -> Commit { .map(|oid| CommitId::from_bytes(oid.as_bytes())) .collect_vec(); let tree_id = TreeId::from_bytes(commit.tree_id().as_bytes()); + // If this commit is a conflict, we'll update the root tree later, when we read + // the extra metadata. + let root_tree = conflicts::Conflict::resolved(tree_id); let description = commit.message().unwrap_or("").to_owned(); let author = signature_from_git(commit.author()); let committer = signature_from_git(commit.committer()); @@ -226,7 +231,9 @@ fn commit_from_git_without_root_parent(commit: &git2::Commit) -> Commit { Commit { parents, predecessors: vec![], - root_tree: tree_id, + root_tree, + // If this commit has associated extra metadata, we may set this later. + uses_tree_conflict_format: false, change_id, description, author, @@ -262,8 +269,28 @@ fn signature_to_git(signature: &Signature) -> git2::Signature<'static> { fn serialize_extras(commit: &Commit) -> Vec { let mut proto = crate::protos::git_store::Commit { change_id: commit.change_id.to_bytes(), + uses_tree_conflict_format: commit.uses_tree_conflict_format, ..Default::default() }; + if commit.root_tree.as_resolved().is_none() { + assert!(commit.uses_tree_conflict_format); + let removes = commit + .root_tree + .removes() + .iter() + .map(|r| r.to_bytes()) + .collect_vec(); + let adds = commit + .root_tree + .adds() + .iter() + .map(|r| r.to_bytes()) + .collect_vec(); + let conflict = crate::protos::git_store::TreeConflict { removes, adds }; + proto.root_tree = Some(crate::protos::git_store::commit::RootTree::Conflict( + conflict, + )); + } for predecessor in &commit.predecessors { proto.predecessors.push(predecessor.to_bytes()); } @@ -273,6 +300,28 @@ fn serialize_extras(commit: &Commit) -> Vec { fn deserialize_extras(commit: &mut Commit, bytes: &[u8]) { let proto = crate::protos::git_store::Commit::decode(bytes).unwrap(); commit.change_id = ChangeId::new(proto.change_id); + commit.uses_tree_conflict_format = proto.uses_tree_conflict_format; + match proto.root_tree { + Some(crate::protos::git_store::commit::RootTree::Conflict(proto_conflict)) => { + assert!(commit.uses_tree_conflict_format); + commit.root_tree = conflicts::Conflict::new( + proto_conflict + .removes + .iter() + .map(|id_bytes| TreeId::from_bytes(id_bytes)) + .collect(), + proto_conflict + .adds + .iter() + .map(|id_bytes| TreeId::from_bytes(id_bytes)) + .collect(), + ); + } + Some(crate::protos::git_store::commit::RootTree::Resolved(_)) => { + panic!("found resolved root tree in extras (should only be written to git metadata)"); + } + None => {} + } for predecessor in &proto.predecessors { commit.predecessors.push(CommitId::from_bytes(predecessor)); } @@ -597,6 +646,9 @@ impl Backend for GitBackend { // the commit is a branch head, so bulk-import metadata as much as possible. tracing::debug!("import extra metadata entries"); let mut mut_table = table.start_mutation(); + // TODO(#1624): Should we read the root tree here and check if it has a + // `.jjconflict-...` entries? That could happen if the user used `git` to e.g. + // change the description of a commit with tree-level conflicts. mut_table.add_entry(id.to_bytes(), serialize_extras(&commit)); if commit.parents != slice::from_ref(&self.root_commit_id) { import_extra_metadata_entries_from_heads( @@ -615,10 +667,14 @@ impl Backend for GitBackend { fn write_commit(&self, mut contents: Commit) -> BackendResult<(CommitId, Commit)> { let locked_repo = self.repo.lock().unwrap(); - let git_tree_id = validate_git_object_id(&contents.root_tree)?; + let git_tree_id = if let Some(tree_id) = contents.root_tree.as_resolved() { + validate_git_object_id(tree_id)? + } else { + write_tree_conflict(locked_repo.deref(), &contents.root_tree)? + }; let git_tree = locked_repo .find_tree(git_tree_id) - .map_err(|err| map_not_found_err(err, &contents.root_tree))?; + .map_err(|err| map_not_found_err(err, &TreeId::from_bytes(git_tree_id.as_bytes())))?; let author = signature_to_git(&contents.author); let mut committer = signature_to_git(&contents.committer); let message = &contents.description; @@ -705,6 +761,29 @@ impl Backend for GitBackend { } } +/// Write a tree conflict as a special tree with `.jjconflict-base-N` and +/// `.jjconflict-base-N` subtrees. This ensure that the parts are not GC'd. +fn write_tree_conflict( + repo: &git2::Repository, + conflict: &conflicts::Conflict, +) -> Result { + let mut builder = repo.treebuilder(None).unwrap(); + let mut add_tree_entry = |name, tree_id: &TreeId| { + let tree_oid = Oid::from_bytes(tree_id.as_bytes()).unwrap(); + builder.insert(name, tree_oid, 0o040000).unwrap(); + }; + for (i, tree_id) in conflict.removes().iter().enumerate() { + add_tree_entry(format!(".jjconflict-base-{i}"), tree_id); + } + for (i, tree_id) in conflict.adds().iter().enumerate() { + add_tree_entry(format!(".jjconflict-side-{i}"), tree_id); + } + builder.write().map_err(|err| BackendError::WriteObject { + object_type: "tree", + source: Box::new(err), + }) +} + fn conflict_term_list_to_json(parts: &[ConflictTerm]) -> serde_json::Value { serde_json::Value::Array(parts.iter().map(conflict_term_to_json).collect()) } @@ -836,7 +915,11 @@ mod tests { assert_eq!(&commit.change_id, &change_id); assert_eq!(commit.parents, vec![CommitId::from_bytes(&[0; 20])]); assert_eq!(commit.predecessors, vec![]); - assert_eq!(commit.root_tree.as_bytes(), root_tree_id.as_bytes()); + assert_eq!( + commit.root_tree.as_resolved().unwrap().as_bytes(), + root_tree_id.as_bytes() + ); + assert!(!commit.uses_tree_conflict_format); assert_eq!(commit.description, "git commit message"); assert_eq!(commit.author.name, "git author"); assert_eq!(commit.author.email, "git.author@example.com"); @@ -905,7 +988,8 @@ mod tests { let mut commit = Commit { parents: vec![], predecessors: vec![], - root_tree: backend.empty_tree_id().clone(), + root_tree: conflicts::Conflict::resolved(backend.empty_tree_id().clone()), + uses_tree_conflict_format: false, change_id: ChangeId::from_hex("abc123"), description: "".to_string(), author: create_signature(), @@ -957,6 +1041,81 @@ mod tests { ); } + #[test] + fn write_tree_conflicts() { + let temp_dir = testutils::new_temp_dir(); + let store_path = temp_dir.path(); + let git_repo_path = temp_dir.path().join("git"); + let git_repo = git2::Repository::init(&git_repo_path).unwrap(); + + let backend = GitBackend::init_external(store_path, &git_repo_path).unwrap(); + let crete_tree = |i| { + let blob_id = git_repo.blob(b"content {i}").unwrap(); + let mut tree_builder = git_repo.treebuilder(None).unwrap(); + tree_builder + .insert(format!("file{i}"), blob_id, 0o100644) + .unwrap(); + TreeId::from_bytes(tree_builder.write().unwrap().as_bytes()) + }; + + let root_tree = conflicts::Conflict::new( + vec![crete_tree(0), crete_tree(1)], + vec![crete_tree(2), crete_tree(3), crete_tree(4)], + ); + let mut commit = Commit { + parents: vec![backend.root_commit_id().clone()], + predecessors: vec![], + root_tree: root_tree.clone(), + uses_tree_conflict_format: true, + change_id: ChangeId::from_hex("abc123"), + description: "".to_string(), + author: create_signature(), + committer: create_signature(), + }; + + // When writing a tree-level conflict, the root tree on the git side has the + // individual trees as subtrees. + let read_commit_id = backend.write_commit(commit.clone()).unwrap().0; + let read_commit = backend.read_commit(&read_commit_id).unwrap(); + assert_eq!(read_commit, commit); + let git_commit = git_repo + .find_commit(Oid::from_bytes(read_commit_id.as_bytes()).unwrap()) + .unwrap(); + let git_tree = git_repo.find_tree(git_commit.tree_id()).unwrap(); + assert!(git_tree.iter().all(|entry| entry.filemode() == 0o040000)); + let mut iter = git_tree.iter(); + let entry = iter.next().unwrap(); + assert_eq!(entry.name(), Some(".jjconflict-base-0")); + assert_eq!(entry.id().as_bytes(), root_tree.removes()[0].as_bytes()); + let entry = iter.next().unwrap(); + assert_eq!(entry.name(), Some(".jjconflict-base-1")); + assert_eq!(entry.id().as_bytes(), root_tree.removes()[1].as_bytes()); + let entry = iter.next().unwrap(); + assert_eq!(entry.name(), Some(".jjconflict-side-0")); + assert_eq!(entry.id().as_bytes(), root_tree.adds()[0].as_bytes()); + let entry = iter.next().unwrap(); + assert_eq!(entry.name(), Some(".jjconflict-side-1")); + assert_eq!(entry.id().as_bytes(), root_tree.adds()[1].as_bytes()); + let entry = iter.next().unwrap(); + assert_eq!(entry.name(), Some(".jjconflict-side-2")); + assert_eq!(entry.id().as_bytes(), root_tree.adds()[2].as_bytes()); + assert!(iter.next().is_none()); + + // When writing a single tree using the new format, it's represented by a + // regular git tree. + commit.root_tree = conflicts::Conflict::resolved(crete_tree(5)); + let read_commit_id = backend.write_commit(commit.clone()).unwrap().0; + let read_commit = backend.read_commit(&read_commit_id).unwrap(); + assert_eq!(read_commit, commit); + let git_commit = git_repo + .find_commit(Oid::from_bytes(read_commit_id.as_bytes()).unwrap()) + .unwrap(); + assert_eq!( + git_commit.tree_id().as_bytes(), + commit.root_tree.adds()[0].as_bytes() + ); + } + #[test] fn commit_has_ref() { let temp_dir = testutils::new_temp_dir(); @@ -972,7 +1131,8 @@ mod tests { let commit = Commit { parents: vec![store.root_commit_id().clone()], predecessors: vec![], - root_tree: store.empty_tree_id().clone(), + root_tree: conflicts::Conflict::resolved(store.empty_tree_id().clone()), + uses_tree_conflict_format: false, change_id: ChangeId::new(vec![]), description: "initial".to_string(), author: signature.clone(), @@ -995,7 +1155,8 @@ mod tests { let mut commit1 = Commit { parents: vec![store.root_commit_id().clone()], predecessors: vec![], - root_tree: store.empty_tree_id().clone(), + root_tree: conflicts::Conflict::resolved(store.empty_tree_id().clone()), + uses_tree_conflict_format: false, change_id: ChangeId::new(vec![]), description: "initial".to_string(), author: create_signature(), diff --git a/lib/src/local_backend.rs b/lib/src/local_backend.rs index e9151451d..b96f257a0 100644 --- a/lib/src/local_backend.rs +++ b/lib/src/local_backend.rs @@ -30,6 +30,7 @@ use crate::backend::{ ConflictId, ConflictTerm, FileId, MillisSinceEpoch, ObjectId, Signature, SymlinkId, Timestamp, Tree, TreeId, TreeValue, }; +use crate::conflicts; use crate::content_hash::blake2b_hash; use crate::file_util::persist_content_addressed_temp_file; use crate::repo_path::{RepoPath, RepoPathComponent}; @@ -284,7 +285,21 @@ pub fn commit_to_proto(commit: &Commit) -> crate::protos::local_store::Commit { for predecessor in &commit.predecessors { proto.predecessors.push(predecessor.to_bytes()); } - proto.root_tree = commit.root_tree.to_bytes(); + let conflict = crate::protos::local_store::TreeConflict { + removes: commit + .root_tree + .removes() + .iter() + .map(|id| id.to_bytes()) + .collect(), + adds: commit + .root_tree + .adds() + .iter() + .map(|id| id.to_bytes()) + .collect(), + }; + proto.root_tree = Some(conflict); proto.change_id = commit.change_id.to_bytes(); proto.description = commit.description.clone(); proto.author = Some(signature_to_proto(&commit.author)); @@ -295,12 +310,17 @@ pub fn commit_to_proto(commit: &Commit) -> crate::protos::local_store::Commit { fn commit_from_proto(proto: crate::protos::local_store::Commit) -> Commit { let parents = proto.parents.into_iter().map(CommitId::new).collect(); let predecessors = proto.predecessors.into_iter().map(CommitId::new).collect(); - let root_tree = TreeId::new(proto.root_tree); + let conflict = proto.root_tree.unwrap(); + let root_tree = conflicts::Conflict::new( + conflict.removes.into_iter().map(TreeId::new).collect(), + conflict.adds.into_iter().map(TreeId::new).collect(), + ); let change_id = ChangeId::new(proto.change_id); Commit { parents, predecessors, root_tree, + uses_tree_conflict_format: true, change_id, description: proto.description, author: signature_from_proto(proto.author.unwrap_or_default()), diff --git a/lib/src/protos/git_store.proto b/lib/src/protos/git_store.proto index 796a8d629..67a39e5d8 100644 --- a/lib/src/protos/git_store.proto +++ b/lib/src/protos/git_store.proto @@ -16,10 +16,22 @@ syntax = "proto3"; package git_store; +message TreeConflict { + repeated bytes removes = 1; + repeated bytes adds = 2; +} + message Commit { repeated bytes predecessors = 2; bytes change_id = 4; + oneof root_tree { + bytes resolved = 3; + TreeConflict conflict = 1; + } + // TODO(#1624): delete when we assume that all commits use this format + bool uses_tree_conflict_format = 10; + bool is_open = 8 [deprecated = true]; bool is_pruned = 9 [deprecated = true]; } diff --git a/lib/src/protos/git_store.rs b/lib/src/protos/git_store.rs index 81142b166..70df97ab8 100644 --- a/lib/src/protos/git_store.rs +++ b/lib/src/protos/git_store.rs @@ -1,14 +1,38 @@ #[allow(clippy::derive_partial_eq_without_eq)] #[derive(Clone, PartialEq, ::prost::Message)] +pub struct TreeConflict { + #[prost(bytes = "vec", repeated, tag = "1")] + pub removes: ::prost::alloc::vec::Vec<::prost::alloc::vec::Vec>, + #[prost(bytes = "vec", repeated, tag = "2")] + pub adds: ::prost::alloc::vec::Vec<::prost::alloc::vec::Vec>, +} +#[allow(clippy::derive_partial_eq_without_eq)] +#[derive(Clone, PartialEq, ::prost::Message)] pub struct Commit { #[prost(bytes = "vec", repeated, tag = "2")] pub predecessors: ::prost::alloc::vec::Vec<::prost::alloc::vec::Vec>, #[prost(bytes = "vec", tag = "4")] pub change_id: ::prost::alloc::vec::Vec, + /// TODO(#1624): delete when we assume that all commits use this format + #[prost(bool, tag = "10")] + pub uses_tree_conflict_format: bool, #[deprecated] #[prost(bool, tag = "8")] pub is_open: bool, #[deprecated] #[prost(bool, tag = "9")] pub is_pruned: bool, + #[prost(oneof = "commit::RootTree", tags = "3, 1")] + pub root_tree: ::core::option::Option, +} +/// Nested message and enum types in `Commit`. +pub mod commit { + #[allow(clippy::derive_partial_eq_without_eq)] + #[derive(Clone, PartialEq, ::prost::Oneof)] + pub enum RootTree { + #[prost(bytes, tag = "3")] + Resolved(::prost::alloc::vec::Vec), + #[prost(message, tag = "1")] + Conflict(super::TreeConflict), + } } diff --git a/lib/src/protos/local_store.proto b/lib/src/protos/local_store.proto index 2122f96da..9186e17ec 100644 --- a/lib/src/protos/local_store.proto +++ b/lib/src/protos/local_store.proto @@ -39,10 +39,15 @@ message Tree { repeated Entry entries = 1; } +message TreeConflict { + repeated bytes removes = 1; + repeated bytes adds = 2; +} + message Commit { repeated bytes parents = 1; repeated bytes predecessors = 2; - bytes root_tree = 3; + TreeConflict root_tree = 3; bytes change_id = 4; string description = 5; diff --git a/lib/src/protos/local_store.rs b/lib/src/protos/local_store.rs index 585998d01..9ead47759 100644 --- a/lib/src/protos/local_store.rs +++ b/lib/src/protos/local_store.rs @@ -46,13 +46,21 @@ pub mod tree { } #[allow(clippy::derive_partial_eq_without_eq)] #[derive(Clone, PartialEq, ::prost::Message)] +pub struct TreeConflict { + #[prost(bytes = "vec", repeated, tag = "1")] + pub removes: ::prost::alloc::vec::Vec<::prost::alloc::vec::Vec>, + #[prost(bytes = "vec", repeated, tag = "2")] + pub adds: ::prost::alloc::vec::Vec<::prost::alloc::vec::Vec>, +} +#[allow(clippy::derive_partial_eq_without_eq)] +#[derive(Clone, PartialEq, ::prost::Message)] pub struct Commit { #[prost(bytes = "vec", repeated, tag = "1")] pub parents: ::prost::alloc::vec::Vec<::prost::alloc::vec::Vec>, #[prost(bytes = "vec", repeated, tag = "2")] pub predecessors: ::prost::alloc::vec::Vec<::prost::alloc::vec::Vec>, - #[prost(bytes = "vec", tag = "3")] - pub root_tree: ::prost::alloc::vec::Vec, + #[prost(message, optional, tag = "3")] + pub root_tree: ::core::option::Option, #[prost(bytes = "vec", tag = "4")] pub change_id: ::prost::alloc::vec::Vec, #[prost(string, tag = "5")]