From df4d20cd53d5bb6fc21c1dc65f026d53b566d097 Mon Sep 17 00:00:00 2001
From: Georgi Gerganov <ggerganov@gmail.com>
Date: Wed, 26 Mar 2025 14:21:05 +0200
Subject: [PATCH] convert : fix squeeze for ssm_conv tensors (#12573)

* convert : fix squeeze for ssm_conv tensors

* convert : match ssm_conv tensors by type

---------

Co-authored-by: Francis Couture-Harpin <git@compilade.net>
---
 convert_hf_to_gguf.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py
index 76ab4233e..52637c42f 100755
--- a/convert_hf_to_gguf.py
+++ b/convert_hf_to_gguf.py
@@ -3803,8 +3803,6 @@ class MambaModel(Model):
     _tok_embd = None
 
     def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
-        del bid  # unused
-
         output_name = self.format_tensor_name(gguf.MODEL_TENSOR.OUTPUT)
         tok_embd_name = self.format_tensor_name(gguf.MODEL_TENSOR.TOKEN_EMBD)
 
@@ -3814,6 +3812,10 @@ class MambaModel(Model):
             logger.debug("A_log --> A ==> " + new_name)
             data_torch = -torch.exp(data_torch)
 
+        # [4 1 8192 1] -> [4 8192 1 1]
+        if self.match_model_tensor_name(new_name, gguf.MODEL_TENSOR.SSM_CONV1D, bid):
+            data_torch = data_torch.squeeze()
+
         # assuming token_embd.weight is seen before output.weight
         if self._tok_embd is not None and new_name == output_name:
             if torch.equal(self._tok_embd, data_torch):