From e23325304dcab1e91381c7a1e73ca07bf6c16a83 Mon Sep 17 00:00:00 2001
From: MrBrocoli <59612417+Mr-Brocoli@users.noreply.github.com>
Date: Wed, 24 Dec 2025 20:03:13 -0600
Subject: [PATCH] Added zstd_match6Found_branch and required code

---
 lib/compress/zstd_fast.c | 32 ++++++++++++++++++++++++++++----
 1 file changed, 28 insertions(+), 4 deletions(-)

diff --git a/lib/compress/zstd_fast.c b/lib/compress/zstd_fast.c
index ee25bcbac8d..dd087fb5b06 100644
--- a/lib/compress/zstd_fast.c
+++ b/lib/compress/zstd_fast.c
@@ -97,7 +97,7 @@ void ZSTD_fillHashTable(ZSTD_MatchState_t* ms,
 }
 
 
-typedef int (*ZSTD_match4Found) (const BYTE* currentPtr, const BYTE* matchAddress, U32 matchIdx, U32 idxLowLimit);
+typedef int (*ZSTD_matchFound) (const BYTE* currentPtr, const BYTE* matchAddress, U32 matchIdx, U32 idxLowLimit);
 
 static int
 ZSTD_match4Found_cmov(const BYTE* currentPtr, const BYTE* matchAddress, U32 matchIdx, U32 idxLowLimit)
@@ -140,6 +140,22 @@ ZSTD_match4Found_branch(const BYTE* currentPtr, const BYTE* matchAddress, U32 ma
     return (MEM_read32(currentPtr) == mval);
 }
 
+static int
+ZSTD_match6Found_branch(const BYTE* currentPtr, const BYTE* matchAddress, U32 matchIdx, U32 idxLowLimit)
+{
+    /* using a branch instead of a cmov,
+     * because it's faster in scenarios where matchIdx >= idxLowLimit is generally true,
+     * aka almost all candidates are within range */
+    U32 mval;
+    if (matchIdx >= idxLowLimit) {
+        mval = MEM_read32(matchAddress);
+    } else {
+        mval = MEM_read32(currentPtr) ^ 1; /* guaranteed to not match. */
+    }
+
+    return (MEM_read32(currentPtr) == mval && MEM_read16(currentPtr+4) == MEM_read16(matchAddress+4));
+}
+
 
 /**
  * If you squint hard enough (and ignore repcodes), the search operation at any
@@ -224,6 +240,7 @@ size_t ZSTD_compressBlock_fast_noDict_generic(
     U32 offcode;
     const BYTE* match0;
     size_t mLength;
+	
 
     /* ip0 and ip1 are always adjacent. The targetLength skipping and
      * uncompressibility acceleration is applied to every other position,
@@ -232,7 +249,13 @@ size_t ZSTD_compressBlock_fast_noDict_generic(
     size_t step;
     const BYTE* nextStep;
     const size_t kStepIncr = (1 << (kSearchStrength - 1));
-    const ZSTD_match4Found matchFound = useCmov ? ZSTD_match4Found_cmov : ZSTD_match4Found_branch;
+	
+    /* If we use the cmov condition, then just always do 4 byte matching.
+     * If we are using the branch match found, and have a hash of 6 or greater,
+     * then we verify we have found at least a 6 byte match before continuing,
+     * as the extra 2 byte compare operation is a bit faster than relying on ZSTD_count later. */
+    const ZSTD_matchFound matchFound = useCmov ? ZSTD_match4Found_cmov : (mls >= 6 ? ZSTD_match6Found_branch : ZSTD_match4Found_branch);
+    const size_t mLengthGuaranteed = (!useCmov && mls >= 6) ? 6 : 4;
 
     DEBUGLOG(5, "ZSTD_compressBlock_fast_generic");
     ip0 += (ip0 == prefixStart);
@@ -318,7 +341,8 @@ size_t ZSTD_compressBlock_fast_noDict_generic(
             /* Write next hash table entry, since it's already calculated */
             if (step <= 4) {
                 /* Avoid writing an index if it's >= position where search will resume.
-                * The minimum possible match has length 4, so search can resume at ip0 + 4.
+                * The minimum possible match has length 4, so search
+                * can resume at ip0 + 4.
                 */
                 hashTable[hash1] = (U32)(ip1 - base);
             }
@@ -381,7 +405,7 @@ size_t ZSTD_compressBlock_fast_noDict_generic(
     rep_offset2 = rep_offset1;
     rep_offset1 = (U32)(ip0-match0);
     offcode = OFFSET_TO_OFFBASE(rep_offset1);
-    mLength = 4;
+    mLength = mLengthGuaranteed;
 
     /* Count the backwards match length. */
     while (((ip0>anchor) & (match0>prefixStart)) && (ip0[-1] == match0[-1])) {