diff --git a/Cargo.lock b/Cargo.lock index 324c1919ee3..e5a888901aa 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -103,15 +103,6 @@ version = "1.0.100" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a23eb6b1614318a8071c9b2521f36b424b2c83db5eb3a0fead4a6c0809af6e61" -[[package]] -name = "arbitrary" -version = "1.4.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c3d036a3c4ab069c7b410a2ce876bd74808d2d0888a82667669f8e783a898bf1" -dependencies = [ - "derive_arbitrary", -] - [[package]] name = "arc-swap" version = "1.7.1" @@ -857,17 +848,6 @@ version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "930c7171c8df9fb1782bdf9b918ed9ed2d33d1d22300abb754f9085bc48bf8e8" -[[package]] -name = "derive_arbitrary" -version = "1.4.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1e567bd82dcff979e4b03460c307b3cdc9e96fde3d73bed1496d2bc75d9dd62a" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.111", -] - [[package]] name = "diff" version = "0.1.13" @@ -1418,9 +1398,9 @@ dependencies = [ "gix-worktree", "gix-worktree-stream", "jiff", + "rawzip", "tar", "thiserror 2.0.17", - "zip", ] [[package]] @@ -3850,6 +3830,12 @@ dependencies = [ "unicode-width", ] +[[package]] +name = "rawzip" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "27cc19f82b641448e861623f52a6a6413bbc0595b62a9d45bf31ccdf18aab72c" + [[package]] name = "rayon" version = "1.11.0" @@ -5858,19 +5844,6 @@ dependencies = [ "syn 2.0.111", ] -[[package]] -name = "zip" -version = "6.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eb2a05c7c36fde6c09b08576c9f7fb4cda705990f73b58fe011abf7dfb24168b" -dependencies = [ - "arbitrary", - "crc32fast", - "flate2", - "indexmap", - "memchr", -] - [[package]] name = "zlib-rs" version = "0.5.2" diff --git a/gix-archive/Cargo.toml b/gix-archive/Cargo.toml index ec0bff184da..f5623fb1504 100644 --- a/gix-archive/Cargo.toml +++ b/gix-archive/Cargo.toml @@ -23,7 +23,7 @@ tar = ["dep:tar", "dep:gix-path"] tar_gz = ["tar", "dep:flate2"] ## Enable the `zip` archive format. -zip = ["dep:zip"] +zip = ["dep:rawzip", "dep:flate2"] [dependencies] @@ -33,7 +33,7 @@ gix-path = { version = "^0.10.22", path = "../gix-path", optional = true } gix-date = { version = "^0.11.0", path = "../gix-date" } flate2 = { version = "1.1.1", optional = true, default-features = false, features = ["zlib-rs"] } -zip = { version = "6.0.0", optional = true, default-features = false, features = ["deflate-flate2-zlib-rs"] } +rawzip = { version = "0.4.2", optional = true } jiff = { version = "0.2.15", default-features = false, features = ["std"] } thiserror = "2.0.17" diff --git a/gix-archive/src/write.rs b/gix-archive/src/write.rs index c9813f36de3..b2c45a21ff6 100644 --- a/gix-archive/src/write.rs +++ b/gix-archive/src/write.rs @@ -2,6 +2,9 @@ use gix_worktree_stream::{Entry, Stream}; use crate::{Error, Format, Options}; +#[cfg(feature = "zip")] +use std::io::Write; + /// Write all stream entries in `stream` as provided by `next_entry(stream)` to `out` configured according to `opts` which /// also includes the streaming format. /// @@ -135,24 +138,9 @@ where #[cfg(feature = "zip")] { - let mut ar = zip::write::ZipWriter::new(out); + let mut ar = rawzip::ZipArchiveWriter::new(out); let mut buf = Vec::new(); - let zdt = jiff::Timestamp::from_second(opts.modification_time) - .map_err(|err| Error::InvalidModificationTime(Box::new(err)))? - .to_zoned(jiff::tz::TimeZone::UTC); - let mtime = zip::DateTime::from_date_and_time( - zdt.year() - .try_into() - .map_err(|err| Error::InvalidModificationTime(Box::new(err)))?, - // These are all OK because month, day, hour, minute and second - // are always positive. - zdt.month().try_into().expect("non-negative"), - zdt.day().try_into().expect("non-negative"), - zdt.hour().try_into().expect("non-negative"), - zdt.minute().try_into().expect("non-negative"), - zdt.second().try_into().expect("non-negative"), - ) - .map_err(|err| Error::InvalidModificationTime(Box::new(err)))?; + let mtime = rawzip::time::UtcDateTime::from_unix(opts.modification_time); while let Some(entry) = next_entry(stream)? { append_zip_entry( &mut ar, @@ -171,35 +159,95 @@ where #[cfg(feature = "zip")] fn append_zip_entry( - ar: &mut zip::write::ZipWriter, + ar: &mut rawzip::ZipArchiveWriter, mut entry: gix_worktree_stream::Entry<'_>, buf: &mut Vec, - mtime: zip::DateTime, + mtime: rawzip::time::UtcDateTime, compression_level: Option, tree_prefix: Option<&bstr::BString>, ) -> Result<(), Error> { - let file_opts = zip::write::SimpleFileOptions::default() - .compression_method(zip::CompressionMethod::Deflated) - .compression_level(compression_level) - .large_file(entry.bytes_remaining().is_none_or(|len| len > u32::MAX as usize)) - .last_modified_time(mtime) - .unix_permissions(if entry.mode.is_executable() { 0o755 } else { 0o644 }); let path = add_prefix(entry.relative_path(), tree_prefix).into_owned(); + let unix_permissions = if entry.mode.is_executable() { 0o755 } else { 0o644 }; + match entry.mode.kind() { gix_object::tree::EntryKind::Blob | gix_object::tree::EntryKind::BlobExecutable => { - ar.start_file(path.to_string(), file_opts) - .map_err(std::io::Error::other)?; - std::io::copy(&mut entry, ar)?; + use bstr::ByteSlice; + let file_builder = ar + .new_file(path.to_str().map_err(|_| { + Error::Io(std::io::Error::new( + std::io::ErrorKind::InvalidData, + "Invalid UTF-8 in file path", + )) + })?) + .compression_method(rawzip::CompressionMethod::Deflate) + .last_modified(mtime) + .unix_permissions(unix_permissions); + + let (mut zip_entry, config) = file_builder.start().map_err(std::io::Error::other)?; + + // Use flate2 for compression. Level 9 is the maximum compression level for deflate. + let encoder = flate2::write::DeflateEncoder::new( + &mut zip_entry, + match compression_level { + None => flate2::Compression::default(), + Some(level) => flate2::Compression::new(level.clamp(0, 9) as u32), + }, + ); + let mut writer = config.wrap(encoder); + std::io::copy(&mut entry, &mut writer)?; + let (encoder, descriptor) = writer.finish().map_err(std::io::Error::other)?; + encoder.finish()?; + zip_entry.finish(descriptor).map_err(std::io::Error::other)?; } gix_object::tree::EntryKind::Tree | gix_object::tree::EntryKind::Commit => { - ar.add_directory(path.to_string(), file_opts) - .map_err(std::io::Error::other)?; + use bstr::ByteSlice; + // rawzip requires directory paths to end with '/' + let mut dir_path = path.to_str().map_err(|_| { + Error::Io(std::io::Error::new( + std::io::ErrorKind::InvalidData, + "Invalid UTF-8 in directory path", + )) + })?.to_string(); + if !dir_path.ends_with('/') { + dir_path.push('/'); + } + ar.new_dir(&dir_path) + .last_modified(mtime) + .unix_permissions(unix_permissions) + .create() + .map_err(std::io::Error::other)?; } gix_object::tree::EntryKind::Link => { use bstr::ByteSlice; + buf.clear(); std::io::copy(&mut entry, buf)?; - ar.add_symlink(path.to_string(), buf.as_bstr().to_string(), file_opts) + + // For symlinks, we need to create a file with symlink permissions + let symlink_path = path.to_str().map_err(|_| { + Error::Io(std::io::Error::new( + std::io::ErrorKind::InvalidData, + "Invalid UTF-8 in symlink path", + )) + })?; + let target = buf.as_bstr().to_str().map_err(|_| { + Error::Io(std::io::Error::new( + std::io::ErrorKind::InvalidData, + "Invalid UTF-8 in symlink target", + )) + })?; + + let (mut zip_entry, config) = ar + .new_file(symlink_path) + .compression_method(rawzip::CompressionMethod::Store) + .last_modified(mtime) + .unix_permissions(0o120644) // Symlink mode + .start() .map_err(std::io::Error::other)?; + + let mut writer = config.wrap(&mut zip_entry); + writer.write_all(target.as_bytes())?; + let (_, descriptor) = writer.finish().map_err(std::io::Error::other)?; + zip_entry.finish(descriptor).map_err(std::io::Error::other)?; } } Ok(()) diff --git a/gix-archive/tests/archive.rs b/gix-archive/tests/archive.rs index b2310075588..25a7e6c6ef5 100644 --- a/gix-archive/tests/archive.rs +++ b/gix-archive/tests/archive.rs @@ -167,14 +167,18 @@ mod from_tree { }, |buf| { assert!( - buf.len() < 1280, - "much bigger than uncompressed for some reason (565): {} < 1270", + buf.len() < 1400, + "much bigger than uncompressed for some reason (565): {} < 1400", buf.len() ); - let mut ar = zip::ZipArchive::new(std::io::Cursor::new(buf.as_slice()))?; + let ar = rawzip::ZipArchive::from_slice(buf.as_slice())?; assert_eq!( { - let mut n: Vec<_> = ar.file_names().collect(); + let mut n: Vec<_> = Vec::new(); + for entry_result in ar.entries() { + let entry = entry_result?; + n.push(String::from_utf8_lossy(entry.file_path().as_ref()).to_string()); + } n.sort(); n }, @@ -190,13 +194,27 @@ mod from_tree { "prefix/symlink-to-a" ] ); - let mut link = ar.by_name("prefix/symlink-to-a")?; - assert!(!link.is_dir()); - assert!(link.is_symlink(), "symlinks are supported as well, but only on Unix"); - assert_eq!(link.unix_mode(), Some(0o120644), "the mode specifies what it should be"); - let mut buf = Vec::new(); - link.read_to_end(&mut buf)?; - assert_eq!(buf.as_bstr(), "a"); + + // Find the symlink entry + let ar = rawzip::ZipArchive::from_slice(buf.as_slice())?; + let mut found_link = false; + for entry_result in ar.entries() { + let entry = entry_result?; + if String::from_utf8_lossy(entry.file_path().as_ref()) == "prefix/symlink-to-a" { + assert!(!entry.is_dir()); + let mode = entry.mode(); + assert!(mode.is_symlink(), "symlinks are supported as well, but only on Unix"); + assert_eq!(mode.value(), 0o120644, "the mode specifies what it should be"); + let wayfinder = entry.wayfinder(); + let zip_entry = ar.get_entry(wayfinder)?; + // For symlinks stored with Store compression, the data is uncompressed + let data = zip_entry.data(); + assert_eq!(data.as_bstr(), "a"); + found_link = true; + break; + } + } + assert!(found_link, "symlink entry should be found"); Ok(()) }, )