git_backend: derive the change ID from the git change-id header

When read/writing commits from the git-backend, populate the git commit
header with a backwards hash of the `change-id`. This should enable
preserving change identity across various git remotes assuming a
cooperative git server that doesn't strip the git header.

This feature is behind a `git.write-change-id-header` configuration flag
at least to start.
This commit is contained in:
Benjamin Brittain 2025-03-27 12:06:38 -04:00
parent e5478bbf7b
commit 0b6d0a7a75
8 changed files with 165 additions and 27 deletions

View File

@ -10,6 +10,13 @@ to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
### Release highlights ### Release highlights
* Experimental support for transferring the change ID to/from Git remotes behind configuration
setting `git.write-change-id-header`. If this is enabled, the change ID will be stored in the Git
commit itself (in a commit header called `change-id`), which means it will be transferred by
regular `git push` etc. This is an evolving feature that currently defaults to "false". This
default will likely change in the future as we gain confidence with forge support and user
expectations.
### Breaking changes ### Breaking changes
### Deprecations ### Deprecations

View File

@ -470,6 +470,11 @@
"description": "Whether jj spawns a git subprocess for network operations (push/fetch/clone)", "description": "Whether jj spawns a git subprocess for network operations (push/fetch/clone)",
"default": true "default": true
}, },
"write-change-id-header": {
"type": "boolean",
"description": "Whether the change id should be stored in the Git commit object",
"default": false
},
"executable-path": { "executable-path": {
"type": "string", "type": "string",
"description": "Path to the git executable", "description": "Path to the git executable",

View File

@ -217,3 +217,10 @@ directories in your working copy. If you then run e.g. `jj status`, the
resulting snapshot will contain those directories, making it look like they resulting snapshot will contain those directories, making it look like they
replaced all the other paths in your repo. You will probably want to run replaced all the other paths in your repo. You will probably want to run
`jj abandon` to get back to the state with the unresolved conflicts. `jj abandon` to get back to the state with the unresolved conflicts.
Change IDs are stored in git commit headers as reverse hex encodings. These is
a non-standard header and is not preserved by all `git` tooling. For example,
the header is preserved by a `git commit --amend`, but is not preserved through
a rebase operation. GitHub and other major forges seem to preserve them for the
most part. This functionality is currently behind a `git.write-change-id-header`
flag.

View File

@ -54,9 +54,6 @@ A change ID is a unique identifier for a [change](#change). They are typically
them as a sequence of 12 letters in the k-z range, at the beginning of a line. them as a sequence of 12 letters in the k-z range, at the beginning of a line.
These are actually hexadecimal numbers that use "digits" z-k instead of 0-9a-f. These are actually hexadecimal numbers that use "digits" z-k instead of 0-9a-f.
For the Git backend, Change IDs are currently maintained only locally and not
exchanged via push/fetch operations.
## Commit ## Commit
A snapshot of the files in the repository at a given point in time (technically A snapshot of the files in the repository at a given point in time (technically

View File

@ -14,6 +14,7 @@ abandon-unreachable-commits = true
auto-local-bookmark = false auto-local-bookmark = false
subprocess = true subprocess = true
executable-path = "git" executable-path = "git"
write-change-id-header = false
[operation] [operation]
hostname = "" hostname = ""

View File

@ -69,8 +69,10 @@ use crate::backend::Timestamp;
use crate::backend::Tree; use crate::backend::Tree;
use crate::backend::TreeId; use crate::backend::TreeId;
use crate::backend::TreeValue; use crate::backend::TreeValue;
use crate::config::ConfigGetError;
use crate::file_util::IoResultExt as _; use crate::file_util::IoResultExt as _;
use crate::file_util::PathError; use crate::file_util::PathError;
use crate::hex_util::to_forward_hex;
use crate::index::Index; use crate::index::Index;
use crate::lock::FileLock; use crate::lock::FileLock;
use crate::merge::Merge; use crate::merge::Merge;
@ -93,6 +95,7 @@ const NO_GC_REF_NAMESPACE: &str = "refs/jj/keep/";
const CONFLICT_SUFFIX: &str = ".jjconflict"; const CONFLICT_SUFFIX: &str = ".jjconflict";
pub const JJ_TREES_COMMIT_HEADER: &[u8] = b"jj:trees"; pub const JJ_TREES_COMMIT_HEADER: &[u8] = b"jj:trees";
pub const CHANGE_ID_COMMIT_HEADER: &[u8] = b"change-id";
#[derive(Debug, Error)] #[derive(Debug, Error)]
pub enum GitBackendInitError { pub enum GitBackendInitError {
@ -101,6 +104,8 @@ pub enum GitBackendInitError {
#[error("Failed to open git repository")] #[error("Failed to open git repository")]
OpenRepository(#[source] gix::open::Error), OpenRepository(#[source] gix::open::Error),
#[error(transparent)] #[error(transparent)]
Config(ConfigGetError),
#[error(transparent)]
Path(PathError), Path(PathError),
} }
@ -159,6 +164,7 @@ pub struct GitBackend {
empty_tree_id: TreeId, empty_tree_id: TreeId,
extra_metadata_store: TableStore, extra_metadata_store: TableStore,
cached_extra_metadata: Mutex<Option<Arc<ReadonlyTable>>>, cached_extra_metadata: Mutex<Option<Arc<ReadonlyTable>>>,
change_id_setting: bool,
} }
impl GitBackend { impl GitBackend {
@ -166,7 +172,11 @@ impl GitBackend {
"git" "git"
} }
fn new(base_repo: gix::ThreadSafeRepository, extra_metadata_store: TableStore) -> Self { fn new(
base_repo: gix::ThreadSafeRepository,
extra_metadata_store: TableStore,
change_id_setting: bool,
) -> Self {
let repo = Mutex::new(base_repo.to_thread_local()); let repo = Mutex::new(base_repo.to_thread_local());
let root_commit_id = CommitId::from_bytes(&[0; HASH_LENGTH]); let root_commit_id = CommitId::from_bytes(&[0; HASH_LENGTH]);
let root_change_id = ChangeId::from_bytes(&[0; CHANGE_ID_LENGTH]); let root_change_id = ChangeId::from_bytes(&[0; CHANGE_ID_LENGTH]);
@ -179,6 +189,7 @@ impl GitBackend {
empty_tree_id, empty_tree_id,
extra_metadata_store, extra_metadata_store,
cached_extra_metadata: Mutex::new(None), cached_extra_metadata: Mutex::new(None),
change_id_setting,
} }
} }
@ -194,7 +205,12 @@ impl GitBackend {
gix_open_opts_from_settings(settings), gix_open_opts_from_settings(settings),
) )
.map_err(GitBackendInitError::InitRepository)?; .map_err(GitBackendInitError::InitRepository)?;
Self::init_with_repo(store_path, git_repo_path, git_repo)
let change_id_setting = settings
.git_settings()
.map_err(GitBackendInitError::Config)?
.change_id;
Self::init_with_repo(store_path, git_repo_path, git_repo, change_id_setting)
} }
/// Initializes backend by creating a new Git repo at the specified /// Initializes backend by creating a new Git repo at the specified
@ -218,7 +234,11 @@ impl GitBackend {
) )
.map_err(GitBackendInitError::InitRepository)?; .map_err(GitBackendInitError::InitRepository)?;
let git_repo_path = workspace_root.join(".git"); let git_repo_path = workspace_root.join(".git");
Self::init_with_repo(store_path, &git_repo_path, git_repo) let change_id_setting = settings
.git_settings()
.map_err(GitBackendInitError::Config)?
.change_id;
Self::init_with_repo(store_path, &git_repo_path, git_repo, change_id_setting)
} }
/// Initializes backend with an existing Git repo at the specified path. /// Initializes backend with an existing Git repo at the specified path.
@ -238,13 +258,18 @@ impl GitBackend {
gix_open_opts_from_settings(settings), gix_open_opts_from_settings(settings),
) )
.map_err(GitBackendInitError::OpenRepository)?; .map_err(GitBackendInitError::OpenRepository)?;
Self::init_with_repo(store_path, git_repo_path, git_repo) let change_id_setting = settings
.git_settings()
.map_err(GitBackendInitError::Config)?
.change_id;
Self::init_with_repo(store_path, git_repo_path, git_repo, change_id_setting)
} }
fn init_with_repo( fn init_with_repo(
store_path: &Path, store_path: &Path,
git_repo_path: &Path, git_repo_path: &Path,
git_repo: gix::ThreadSafeRepository, git_repo: gix::ThreadSafeRepository,
change_id_setting: bool,
) -> Result<Self, Box<GitBackendInitError>> { ) -> Result<Self, Box<GitBackendInitError>> {
let extra_path = store_path.join("extra"); let extra_path = store_path.join("extra");
fs::create_dir(&extra_path) fs::create_dir(&extra_path)
@ -271,7 +296,11 @@ impl GitBackend {
.map_err(GitBackendInitError::Path)?; .map_err(GitBackendInitError::Path)?;
}; };
let extra_metadata_store = TableStore::init(extra_path, HASH_LENGTH); let extra_metadata_store = TableStore::init(extra_path, HASH_LENGTH);
Ok(GitBackend::new(git_repo, extra_metadata_store)) Ok(GitBackend::new(
git_repo,
extra_metadata_store,
change_id_setting,
))
} }
pub fn load( pub fn load(
@ -294,7 +323,12 @@ impl GitBackend {
) )
.map_err(GitBackendLoadError::OpenRepository)?; .map_err(GitBackendLoadError::OpenRepository)?;
let extra_metadata_store = TableStore::load(store_path.join("extra"), HASH_LENGTH); let extra_metadata_store = TableStore::load(store_path.join("extra"), HASH_LENGTH);
Ok(GitBackend::new(repo, extra_metadata_store)) let change_id_setting = settings.git_settings().unwrap_or_default().change_id;
Ok(GitBackend::new(
repo,
extra_metadata_store,
change_id_setting,
))
} }
fn lock_git_repo(&self) -> MutexGuard<'_, gix::Repository> { fn lock_git_repo(&self) -> MutexGuard<'_, gix::Repository> {
@ -516,19 +550,30 @@ fn commit_from_git_without_root_parent(
.try_to_commit_ref() .try_to_commit_ref()
.map_err(|err| to_read_object_err(err, id))?; .map_err(|err| to_read_object_err(err, id))?;
// We reverse the bits of the commit id to create the change id. We don't want // If the git header has a change-id field, we attempt to convert that to a
// to use the first bytes unmodified because then it would be ambiguous // valid JJ Change Id
// if a given hash prefix refers to the commit id or the change id. It let change_id = commit
// would have been enough to pick the last 16 bytes instead of the .extra_headers()
// leading 16 bytes to address that. We also reverse the bits to make it less .find("change-id")
// likely that users depend on any relationship between the two ids. .and_then(to_forward_hex)
let change_id = ChangeId::new( .and_then(|change_id_hex| ChangeId::try_from_hex(change_id_hex.as_str()).ok())
id.as_bytes()[4..HASH_LENGTH] .filter(|val| val.as_bytes().len() == CHANGE_ID_LENGTH)
.iter() // Otherwise, we reverse the bits of the commit id to create the change id.
.rev() // We don't want to use the first bytes unmodified because then it would be
.map(|b| b.reverse_bits()) // ambiguous if a given hash prefix refers to the commit id or the change id.
.collect(), // It would have been enough to pick the last 16 bytes instead of the
); // leading 16 bytes to address that. We also reverse the bits to make it
// less likely that users depend on any relationship between the two ids.
.unwrap_or_else(|| {
ChangeId::new(
id.as_bytes()[4..HASH_LENGTH]
.iter()
.rev()
.map(|b| b.reverse_bits())
.collect(),
)
});
// shallow commits don't have parents their parents actually fetched, so we // shallow commits don't have parents their parents actually fetched, so we
// discard them here // discard them here
// TODO: This causes issues when a shallow repository is deepened/unshallowed // TODO: This causes issues when a shallow repository is deepened/unshallowed
@ -664,7 +709,9 @@ fn serialize_extras(commit: &Commit) -> Vec<u8> {
fn deserialize_extras(commit: &mut Commit, bytes: &[u8]) { fn deserialize_extras(commit: &mut Commit, bytes: &[u8]) {
let proto = crate::protos::git_store::Commit::decode(bytes).unwrap(); let proto = crate::protos::git_store::Commit::decode(bytes).unwrap();
commit.change_id = ChangeId::new(proto.change_id); if !proto.change_id.is_empty() {
commit.change_id = ChangeId::new(proto.change_id);
}
if let MergedTreeId::Legacy(legacy_tree_id) = &commit.root_tree { if let MergedTreeId::Legacy(legacy_tree_id) = &commit.root_tree {
if proto.uses_tree_conflict_format { if proto.uses_tree_conflict_format {
if !proto.root_tree.is_empty() { if !proto.root_tree.is_empty() {
@ -1220,6 +1267,13 @@ impl Backend for GitBackend {
)); ));
} }
} }
if self.change_id_setting {
extra_headers.push((
BString::new(CHANGE_ID_COMMIT_HEADER.to_vec()),
BString::new(contents.change_id.reverse_hex().into()),
));
}
let extras = serialize_extras(&contents); let extras = serialize_extras(&contents);
// If two writers write commits of the same id with different metadata, they // If two writers write commits of the same id with different metadata, they
@ -1511,11 +1565,16 @@ fn bytes_vec_from_json(value: &serde_json::Value) -> Vec<u8> {
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use std::str::FromStr as _;
use assert_matches::assert_matches; use assert_matches::assert_matches;
use hex::ToHex as _; use hex::ToHex as _;
use pollster::FutureExt as _; use pollster::FutureExt as _;
use toml_edit::DocumentMut;
use super::*; use super::*;
use crate::config::ConfigLayer;
use crate::config::ConfigSource;
use crate::config::StackedConfig; use crate::config::StackedConfig;
use crate::content_hash::blake2b_hash; use crate::content_hash::blake2b_hash;
use crate::tests::new_temp_dir; use crate::tests::new_temp_dir;
@ -1815,6 +1874,55 @@ mod tests {
assert_eq!(std::str::from_utf8(&sig.data).unwrap(), commit_str); assert_eq!(std::str::from_utf8(&sig.data).unwrap(), commit_str);
} }
#[test]
fn round_trip_change_id_via_git_header() {
let settings = user_settings_with_change_id();
let temp_dir = new_temp_dir();
let store_path = temp_dir.path();
let temp_dir2 = new_temp_dir();
let empty_store = temp_dir2.path();
let git_repo_path = temp_dir.path().join("git");
let git_repo = git_init(git_repo_path);
let backend = GitBackend::init_external(&settings, store_path, git_repo.path()).unwrap();
let original_change_id = ChangeId::from_hex("1111eeee1111eeee1111eeee1111eeee");
let commit = Commit {
parents: vec![backend.root_commit_id().clone()],
predecessors: vec![],
root_tree: MergedTreeId::Legacy(backend.empty_tree_id().clone()),
change_id: original_change_id.clone(),
description: "initial".to_string(),
author: create_signature(),
committer: create_signature(),
secure_sig: None,
};
let (initial_commit_id, _init_commit) =
backend.write_commit(commit, None).block_on().unwrap();
let commit = backend.read_commit(&initial_commit_id).block_on().unwrap();
assert_eq!(
commit.change_id, original_change_id,
"The change-id header did not roundtrip"
);
// Because of how change ids are also persisted in extra proto files,
// initialize a new store without those files, but reuse the same git
// storage. This change-id must be derived from the git commit header.
let no_extra_backend =
GitBackend::init_external(&settings, empty_store, git_repo.path()).unwrap();
let no_extra_commit = no_extra_backend
.read_commit(&initial_commit_id)
.block_on()
.unwrap();
assert_eq!(
no_extra_commit.change_id, original_change_id,
"The change-id header did not roundtrip"
);
}
#[test] #[test]
fn read_empty_string_placeholder() { fn read_empty_string_placeholder() {
let git_signature1 = gix::actor::SignatureRef { let git_signature1 = gix::actor::SignatureRef {
@ -2137,7 +2245,7 @@ mod tests {
parents: vec![backend.root_commit_id().clone()], parents: vec![backend.root_commit_id().clone()],
predecessors: vec![], predecessors: vec![],
root_tree: MergedTreeId::Legacy(backend.empty_tree_id().clone()), root_tree: MergedTreeId::Legacy(backend.empty_tree_id().clone()),
change_id: ChangeId::new(vec![]), change_id: ChangeId::from_hex("7f0a7ce70354b22efcccf7bf144017c4"),
description: "initial".to_string(), description: "initial".to_string(),
author: create_signature(), author: create_signature(),
committer: create_signature(), committer: create_signature(),
@ -2253,4 +2361,13 @@ mod tests {
let config = StackedConfig::with_defaults(); let config = StackedConfig::with_defaults();
UserSettings::from_config(config).unwrap() UserSettings::from_config(config).unwrap()
} }
fn user_settings_with_change_id() -> UserSettings {
let mut config = StackedConfig::with_defaults();
config.add_layer(ConfigLayer::with_data(
ConfigSource::Default,
DocumentMut::from_str("git.write-change-id-header = true").unwrap(),
));
UserSettings::from_config(config).unwrap()
}
} }

View File

@ -29,10 +29,11 @@ fn to_forward_hex_digit(b: u8) -> Option<u8> {
} }
} }
pub fn to_forward_hex(reverse_hex: &str) -> Option<String> { pub fn to_forward_hex(reverse_hex: impl AsRef<[u8]>) -> Option<String> {
reverse_hex reverse_hex
.bytes() .as_ref()
.map(|b| to_forward_hex_digit(b).map(char::from)) .iter()
.map(|b| to_forward_hex_digit(*b).map(char::from))
.collect() .collect()
} }

View File

@ -66,6 +66,7 @@ pub struct GitSettings {
#[cfg(feature = "git2")] #[cfg(feature = "git2")]
pub subprocess: bool, pub subprocess: bool,
pub executable_path: PathBuf, pub executable_path: PathBuf,
pub change_id: bool,
} }
impl GitSettings { impl GitSettings {
@ -76,6 +77,7 @@ impl GitSettings {
#[cfg(feature = "git2")] #[cfg(feature = "git2")]
subprocess: settings.get_bool("git.subprocess")?, subprocess: settings.get_bool("git.subprocess")?,
executable_path: settings.get("git.executable-path")?, executable_path: settings.get("git.executable-path")?,
change_id: settings.get("git.write-change-id-header")?,
}) })
} }
} }
@ -88,6 +90,7 @@ impl Default for GitSettings {
#[cfg(feature = "git2")] #[cfg(feature = "git2")]
subprocess: true, subprocess: true,
executable_path: PathBuf::from("git"), executable_path: PathBuf::from("git"),
change_id: false,
} }
} }
} }