diff --git a/Cargo.lock b/Cargo.lock index 00b2e56836e..324c1919ee3 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1612,7 +1612,8 @@ dependencies = [ "gix-trace", "gix-traverse", "gix-worktree", - "imara-diff", + "imara-diff 0.1.8", + "imara-diff 0.2.0", "serde", "thiserror 2.0.17", ] @@ -1904,7 +1905,7 @@ dependencies = [ "gix-trace", "gix-utils", "gix-worktree", - "imara-diff", + "imara-diff 0.1.8", "pretty_assertions", "serde", "termtree", @@ -2915,6 +2916,16 @@ dependencies = [ "hashbrown 0.15.5", ] +[[package]] +name = "imara-diff" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2f01d462f766df78ab820dd06f5eb700233c51f0f4c2e846520eaf4ba6aa5c5c" +dependencies = [ + "hashbrown 0.15.5", + "memchr", +] + [[package]] name = "indexmap" version = "2.12.1" diff --git a/gix-diff/Cargo.toml b/gix-diff/Cargo.toml index 88a73df6a51..d03bca70c44 100644 --- a/gix-diff/Cargo.toml +++ b/gix-diff/Cargo.toml @@ -15,7 +15,20 @@ autotests = false [features] default = ["blob", "index"] ## Enable diffing of blobs using imara-diff. -blob = ["dep:imara-diff", "dep:gix-filter", "dep:gix-worktree", "dep:gix-path", "dep:gix-fs", "dep:gix-command", "dep:gix-tempfile", "dep:gix-trace", "dep:gix-traverse"] +blob = [ + "dep:imara-diff", + "dep:gix-filter", + "dep:gix-worktree", + "dep:gix-path", + "dep:gix-fs", + "dep:gix-command", + "dep:gix-tempfile", + "dep:gix-trace", + "dep:gix-traverse" +] +## An experimental use of the v0.2 branch of `imara-diff` to allow trying it out, and for writing tests against it more easily. +## We will decide later how it should actually be exposed. +blob-experimental = ["dep:imara-diff-v2"] ## Enable diffing of two indices, which also allows for a generic rewrite tracking implementation. index = ["dep:gix-index", "dep:gix-pathspec", "dep:gix-attributes"] ## Data structures implement `serde::Serialize` and `serde::Deserialize`. @@ -43,6 +56,7 @@ gix-traverse = { version = "^0.49.0", path = "../gix-traverse", optional = true thiserror = "2.0.17" imara-diff = { version = "0.1.8", optional = true } +imara-diff-v2 = { version = "0.2.0", optional = true, package = "imara-diff" } serde = { version = "1.0.114", optional = true, default-features = false, features = ["derive"] } getrandom = { version = "0.2.8", optional = true, default-features = false, features = ["js"] } bstr = { version = "1.12.0", default-features = false } diff --git a/gix-diff/src/blob/mod.rs b/gix-diff/src/blob/mod.rs index 5cf1f0c806e..e8f205ee567 100644 --- a/gix-diff/src/blob/mod.rs +++ b/gix-diff/src/blob/mod.rs @@ -5,6 +5,42 @@ use std::{collections::HashMap, path::PathBuf}; use bstr::BString; pub use imara_diff::*; +/// Re-export imara-diff v0.2 types for use with slider heuristics. +/// +/// This module provides access to the v0.2 API of imara-diff, which includes +/// support for Git's slider heuristics to produce more intuitive diffs. +#[cfg(feature = "blob-experimental")] +pub use imara_diff_v2 as v2; + +/// Compute a diff with Git's slider heuristics to produce more intuitive diffs. +/// +/// This function uses `imara-diff` v0.2 which provides the [`v2::Diff`] structure +/// that supports postprocessing with slider heuristics. The slider heuristics move +/// diff hunks to more intuitive locations based on indentation and other factors, +/// resulting in diffs that are more readable and match Git's output more closely. +/// +/// # Examples +/// +/// ``` +/// use gix_diff::blob::{diff_with_slider_heuristics, v2::{Algorithm, InternedInput}}; +/// +/// let before = "fn foo() {\n let x = 1;\n}\n"; +/// let after = "fn foo() {\n let x = 2;\n}\n"; +/// +/// let input = InternedInput::new(before, after); +/// let diff = diff_with_slider_heuristics(Algorithm::Histogram, &input); +/// +/// // The diff now has slider heuristics applied +/// assert_eq!(diff.count_removals(), 1); +/// assert_eq!(diff.count_additions(), 1); +/// ``` +#[cfg(feature = "blob-experimental")] +pub fn diff_with_slider_heuristics>(algorithm: v2::Algorithm, input: &v2::InternedInput) -> v2::Diff { + let mut diff = v2::Diff::compute(algorithm, input); + diff.postprocess_lines(input); + diff +} + /// pub mod pipeline; diff --git a/gix-diff/tests/Cargo.toml b/gix-diff/tests/Cargo.toml index f30349598f9..7e6bdcd8818 100644 --- a/gix-diff/tests/Cargo.toml +++ b/gix-diff/tests/Cargo.toml @@ -17,7 +17,7 @@ name = "diff" path = "diff/main.rs" [dev-dependencies] -gix-diff = { path = ".." } +gix-diff = { path = "..", features = ["blob-experimental"] } gix-index = { path = "../../gix-index" } gix-pathspec = { path = "../../gix-pathspec" } gix-hash = { path = "../../gix-hash" } diff --git a/gix-diff/tests/diff/blob/mod.rs b/gix-diff/tests/diff/blob/mod.rs index 5742dfd4350..f1e522c09bf 100644 --- a/gix-diff/tests/diff/blob/mod.rs +++ b/gix-diff/tests/diff/blob/mod.rs @@ -2,3 +2,4 @@ pub(crate) mod pipeline; mod platform; mod slider; mod unified_diff; +mod v2; diff --git a/gix-diff/tests/diff/blob/v2.rs b/gix-diff/tests/diff/blob/v2.rs new file mode 100644 index 00000000000..b927a893e2a --- /dev/null +++ b/gix-diff/tests/diff/blob/v2.rs @@ -0,0 +1,279 @@ +//! We can consider to move some of these tests to the actual imara-diff test-suite as well. +use gix_diff::blob::{diff_with_slider_heuristics, v2}; + +/// Test that the UnifiedDiffPrinter can be used with the v0.2 API +#[test] +fn unified_diff_printer_usage() -> crate::Result { + let before = r#"fn foo() { + let x = 1; + println!("x = {}", x); +} +"#; + + let after = r#"fn foo() { + let x = 2; + println!("x = {}", x); + println!("done"); +} +"#; + + let input = v2::InternedInput::new(before, after); + let diff = diff_with_slider_heuristics(v2::Algorithm::Histogram, &input); + + let printer = v2::BasicLineDiffPrinter(&input.interner); + insta::assert_snapshot!(util::unidiff(&diff, &input, &printer), @r#" + @@ -2,1 +2,1 @@ + - let x = 1; + + let x = 2; + @@ -4,0 +4,1 @@ + + println!("done"); + "#); + Ok(()) +} + +/// Test slider heuristics with indentation +#[test] +fn slider_heuristics_with_indentation() -> crate::Result { + let before = r#"fn main() { + if true { + println!("hello"); + } +} +"#; + + let after = r#"fn main() { + if true { + println!("hello"); + println!("world"); + } +} +"#; + + let input = v2::InternedInput::new(before, after); + let diff = diff_with_slider_heuristics(v2::Algorithm::Histogram, &input); + + let printer = v2::BasicLineDiffPrinter(&input.interner); + insta::assert_snapshot!(util::unidiff(&diff, &input, &printer), @r#" + @@ -4,0 +4,1 @@ + + println!("world"); + "#); + + Ok(()) +} + +/// Test that Myers algorithm also works with slider heuristics +#[test] +fn myers_with_slider_heuristics() -> crate::Result { + let before = "a\nb\nc\n"; + let after = "a\nx\nc\n"; + + let input = v2::InternedInput::new(before, after); + let diff = diff_with_slider_heuristics(v2::Algorithm::Myers, &input); + + let printer = v2::BasicLineDiffPrinter(&input.interner); + insta::assert_snapshot!(util::unidiff(&diff, &input, &printer), @r" + @@ -2,1 +2,1 @@ + -b + +x + "); + + Ok(()) +} + +/// Test empty diff +#[test] +fn empty_diff_with_slider_heuristics() -> crate::Result { + let before = "unchanged\n"; + let after = "unchanged\n"; + + let input = v2::InternedInput::new(before, after); + let diff = diff_with_slider_heuristics(v2::Algorithm::Histogram, &input); + + assert_eq!(diff.count_removals(), 0); + assert_eq!(diff.count_additions(), 0); + + Ok(()) +} + +/// Test complex multi-hunk diff with slider heuristics +#[test] +fn multi_hunk_diff_with_slider_heuristics() -> crate::Result { + let before = r#"struct Foo { + x: i32, +} + +impl Foo { + fn new() -> Self { + Foo { x: 0 } + } +} +"#; + + let after = r#"struct Foo { + x: i32, + y: i32, +} + +impl Foo { + fn new() -> Self { + Foo { x: 0, y: 0 } + } +} +"#; + + let input = v2::InternedInput::new(before, after); + let diff = diff_with_slider_heuristics(v2::Algorithm::Histogram, &input); + + let printer = v2::BasicLineDiffPrinter(&input.interner); + insta::assert_snapshot!(util::unidiff(&diff, &input, &printer), @r" + @@ -3,0 +3,1 @@ + + y: i32, + @@ -7,1 +8,1 @@ + - Foo { x: 0 } + + Foo { x: 0, y: 0 } + "); + + Ok(()) +} + +/// Test custom context size in UnifiedDiffConfig +#[test] +fn custom_context_size() -> crate::Result { + let before = "line1\nline2\nline3\nline4\nline5\nline6\nline7\n"; + let after = "line1\nline2\nline3\nMODIFIED\nline5\nline6\nline7\n"; + + let input = v2::InternedInput::new(before, after); + let diff = diff_with_slider_heuristics(v2::Algorithm::Histogram, &input); + + let printer = v2::BasicLineDiffPrinter(&input.interner); + + // Test with context size of 1 + let mut config = v2::UnifiedDiffConfig::default(); + config.context_len(1); + let unified = diff.unified_diff(&printer, config, &input); + insta::assert_snapshot!(unified, @r" + @@ -3,3 +3,3 @@ + line3 + -line4 + +MODIFIED + line5 + "); + + // Test with context size of 3 (default) + let config_default = v2::UnifiedDiffConfig::default(); + let unified_default = diff.unified_diff(&printer, config_default, &input); + + // Smaller context should have fewer lines + insta::assert_snapshot!(unified_default, @r" + @@ -1,7 +1,7 @@ + line1 + line2 + line3 + -line4 + +MODIFIED + line5 + line6 + line7 + "); + + Ok(()) +} + +/// Test that hunks iterator works correctly +#[test] +fn hunks_iterator() -> crate::Result { + let before = "a\nb\nc\nd\ne\n"; + let after = "a\nX\nc\nY\ne\n"; + + let input = v2::InternedInput::new(before, after); + let diff = diff_with_slider_heuristics(v2::Algorithm::Histogram, &input); + + let hunks: Vec<_> = diff.hunks().collect(); + + let printer = v2::BasicLineDiffPrinter(&input.interner); + insta::assert_snapshot!(util::unidiff(&diff, &input, &printer), @r" + @@ -2,1 +2,1 @@ + -b + +X + @@ -4,1 +4,1 @@ + -d + +Y + "); + // Should have two separate hunks + insta::assert_debug_snapshot!(hunks, @r" + [ + Hunk { + before: 1..2, + after: 1..2, + }, + Hunk { + before: 3..4, + after: 3..4, + }, + ] + "); + Ok(()) +} + +/// Test postprocessing without heuristic +#[test] +fn postprocess_no_heuristic() -> crate::Result { + let before = "a\nb\nc\n"; + let after = "a\nX\nc\n"; + + let input = v2::InternedInput::new(before, after); + + // Create diff but postprocess without heuristic + let mut diff = v2::Diff::compute(v2::Algorithm::Histogram, &input); + diff.postprocess_no_heuristic(&input); + + let printer = v2::BasicLineDiffPrinter(&input.interner); + insta::assert_snapshot!(util::unidiff(&diff, &input, &printer), @r" + @@ -2,1 +2,1 @@ + -b + +X + "); + + Ok(()) +} + +/// Test that the v0.2 API exposes the IndentHeuristic +#[test] +fn indent_heuristic_available() -> crate::Result { + let before = "fn foo() {\n x\n}\n"; + let after = "fn foo() {\n y\n}\n"; + + let input = v2::InternedInput::new(before, after); + + let mut diff = v2::Diff::compute(v2::Algorithm::Histogram, &input); + + let heuristic = v2::IndentHeuristic::new(|token| { + let line: &str = input.interner[token]; + v2::IndentLevel::for_ascii_line(line.as_bytes().iter().copied(), 4) + }); + + diff.postprocess_with_heuristic(&input, heuristic); + + let printer = v2::BasicLineDiffPrinter(&input.interner); + insta::assert_snapshot!(util::unidiff(&diff, &input, &printer), @r" + @@ -2,1 +2,1 @@ + - x + + y + "); + + Ok(()) +} + +mod util { + use gix_diff::blob::v2; + + pub fn unidiff<'a>( + diff: &'a v2::Diff, + input: &'a v2::InternedInput<&str>, + printer: &'a v2::BasicLineDiffPrinter<'_, str>, + ) -> v2::UnifiedDiff<'a, v2::BasicLineDiffPrinter<'a, str>> { + let mut config = v2::UnifiedDiffConfig::default(); + config.context_len(0); + diff.unified_diff(printer, config, input) + } +}