Support diffusion models: Add Dream 7B (#14644)

* Support diffusion models: Add Dream 7B

* Move diffusion to examples

* Move stuff to examples. Add patch to not use kv-cache

* Address review comments

* Make sampling fast

* llama: remove diffusion functions

* Add basic timings + cleanup

* More cleanup

* Review comments: better formating, use LOG instead std::cerr, re-use batch, use ubatch instead of max_length

* fixup!

* Review: move everything to diffusion-cli for now
This commit is contained in:
Aman Gupta
2025-07-16 20:03:51 +08:00
committed by GitHub
parent 64978340b0
commit ab14019821
13 changed files with 804 additions and 0 deletions

View File

@ -367,6 +367,7 @@ class MODEL_ARCH(IntEnum):
HUNYUAN_MOE = auto()
SMOLLM3 = auto()
LFM2 = auto()
DREAM = auto()
class VISION_PROJECTOR_TYPE(IntEnum):
@ -683,6 +684,7 @@ MODEL_ARCH_NAMES: dict[MODEL_ARCH, str] = {
MODEL_ARCH.HUNYUAN_MOE: "hunyuan-moe",
MODEL_ARCH.SMOLLM3: "smollm3",
MODEL_ARCH.LFM2: "lfm2",
MODEL_ARCH.DREAM: "dream",
}
VISION_PROJECTOR_TYPE_NAMES: dict[VISION_PROJECTOR_TYPE, str] = {
@ -1289,6 +1291,21 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
MODEL_TENSOR.FFN_DOWN,
MODEL_TENSOR.FFN_UP,
],
MODEL_ARCH.DREAM: [
MODEL_TENSOR.TOKEN_EMBD,
MODEL_TENSOR.OUTPUT_NORM,
MODEL_TENSOR.OUTPUT,
MODEL_TENSOR.ROPE_FREQS,
MODEL_TENSOR.ATTN_NORM,
MODEL_TENSOR.ATTN_Q,
MODEL_TENSOR.ATTN_K,
MODEL_TENSOR.ATTN_V,
MODEL_TENSOR.ATTN_OUT,
MODEL_TENSOR.FFN_NORM,
MODEL_TENSOR.FFN_GATE,
MODEL_TENSOR.FFN_DOWN,
MODEL_TENSOR.FFN_UP,
],
MODEL_ARCH.QWEN2VL: [
MODEL_TENSOR.TOKEN_EMBD,
MODEL_TENSOR.OUTPUT_NORM,