From 57b66d2011b1f906e7067cfb6f83f5e7a6f31260 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sun, 11 Jan 2026 12:21:07 +0000 Subject: [PATCH 1/3] Initial plan From b62950d17dc68a738d2eec56dbd8e505f1cc75aa Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sun, 11 Jan 2026 12:26:40 +0000 Subject: [PATCH 2/3] Add TQ2_0 quantization support to whisper.cpp Co-authored-by: lhpqaq <63844184+lhpqaq@users.noreply.github.com> --- examples/common-ggml.cpp | 6 ++++-- ggml/include/ggml.h | 1 + 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/examples/common-ggml.cpp b/examples/common-ggml.cpp index c42b644fedd..984cdfbd114 100644 --- a/examples/common-ggml.cpp +++ b/examples/common-ggml.cpp @@ -14,6 +14,7 @@ static const std::map GGML_FTYPE_MAP = { {"q4_k", GGML_FTYPE_MOSTLY_Q4_K}, {"q5_k", GGML_FTYPE_MOSTLY_Q5_K}, {"q6_k", GGML_FTYPE_MOSTLY_Q6_K}, + {"tq2_0", GGML_FTYPE_MOSTLY_TQ2_0}, }; void ggml_print_ftypes(FILE * fp) { @@ -24,7 +25,7 @@ void ggml_print_ftypes(FILE * fp) { enum ggml_ftype ggml_parse_ftype(const char * str) { enum ggml_ftype ftype; - if (str[0] == 'q') { + if (str[0] == 'q' || str[0] == 't') { const auto it = GGML_FTYPE_MAP.find(str); if (it == GGML_FTYPE_MAP.end()) { fprintf(stderr, "%s: unknown ftype '%s'\n", __func__, str); @@ -58,6 +59,7 @@ bool ggml_common_quantize_0( case GGML_FTYPE_MOSTLY_Q4_K: qtype = GGML_TYPE_Q4_K; break; case GGML_FTYPE_MOSTLY_Q5_K: qtype = GGML_TYPE_Q5_K; break; case GGML_FTYPE_MOSTLY_Q6_K: qtype = GGML_TYPE_Q6_K; break; + case GGML_FTYPE_MOSTLY_TQ2_0: qtype = GGML_TYPE_TQ2_0; break; case GGML_FTYPE_UNKNOWN: case GGML_FTYPE_ALL_F32: case GGML_FTYPE_MOSTLY_F16: @@ -188,6 +190,7 @@ bool ggml_common_quantize_0( case GGML_TYPE_Q4_K: case GGML_TYPE_Q5_K: case GGML_TYPE_Q6_K: + case GGML_TYPE_TQ2_0: { cur_size = ggml_quantize_chunk((ggml_type) ttype, data_f32.data(), work.data(), 0, nelements/ne[0], ne[0], nullptr); } break; @@ -211,7 +214,6 @@ bool ggml_common_quantize_0( case GGML_TYPE_IQ1_M: case GGML_TYPE_BF16: case GGML_TYPE_TQ1_0: - case GGML_TYPE_TQ2_0: case GGML_TYPE_MXFP4: case GGML_TYPE_COUNT: { diff --git a/ggml/include/ggml.h b/ggml/include/ggml.h index 20c912d0e9b..a94deec07f2 100644 --- a/ggml/include/ggml.h +++ b/ggml/include/ggml.h @@ -458,6 +458,7 @@ extern "C" { GGML_FTYPE_MOSTLY_IQ1_M = 23, // except 1d tensors GGML_FTYPE_MOSTLY_BF16 = 24, // except 1d tensors GGML_FTYPE_MOSTLY_MXFP4 = 25, // except 1d tensors + GGML_FTYPE_MOSTLY_TQ2_0 = 26, // except 1d tensors }; // available tensor operations: From c0f50e45a9b6566e10c7cf55da7c500836faa4ce Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sun, 11 Jan 2026 12:29:16 +0000 Subject: [PATCH 3/3] Add TQ2_0 to ggml_ftype_to_ggml_type mapping Co-authored-by: lhpqaq <63844184+lhpqaq@users.noreply.github.com> --- ggml/src/ggml.c | 1 + 1 file changed, 1 insertion(+) diff --git a/ggml/src/ggml.c b/ggml/src/ggml.c index eb3ae72eaac..d2ce1da5a16 100644 --- a/ggml/src/ggml.c +++ b/ggml/src/ggml.c @@ -1377,6 +1377,7 @@ enum ggml_type ggml_ftype_to_ggml_type(enum ggml_ftype ftype) { case GGML_FTYPE_MOSTLY_IQ4_XS: wtype = GGML_TYPE_IQ4_XS; break; case GGML_FTYPE_MOSTLY_IQ3_S: wtype = GGML_TYPE_IQ3_S; break; case GGML_FTYPE_MOSTLY_IQ2_S: wtype = GGML_TYPE_IQ2_S; break; + case GGML_FTYPE_MOSTLY_TQ2_0: wtype = GGML_TYPE_TQ2_0; break; case GGML_FTYPE_UNKNOWN: wtype = GGML_TYPE_COUNT; break; case GGML_FTYPE_MOSTLY_Q4_1_SOME_F16: wtype = GGML_TYPE_COUNT; break; }