mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-06-30 20:58:45 +00:00
gguf-py : support lazy tensor splitting (#12809)
* gguf-py : support lazy tensor splitting Splitting usually involves returning tuples of tensors, which need to be handled properly to avoid early eager evaluation. * gguf-py : fix flake8 lint
This commit is contained in:
@ -139,6 +139,16 @@ class LazyBase(ABC, metaclass=LazyMeta):
|
|||||||
|
|
||||||
if isinstance(res, cls._tensor_type):
|
if isinstance(res, cls._tensor_type):
|
||||||
return cls(meta=cls.eager_to_meta(res), args=args, kwargs=kwargs, func=fn)
|
return cls(meta=cls.eager_to_meta(res), args=args, kwargs=kwargs, func=fn)
|
||||||
|
elif isinstance(res, tuple) and all(isinstance(t, cls._tensor_type) for t in res):
|
||||||
|
# share the evaluation between lazy tuple elements
|
||||||
|
shared_args: list = [args, None]
|
||||||
|
|
||||||
|
def eager_tuple_element(a: list[Any], i: int = 0, /, **kw) -> LazyBase:
|
||||||
|
assert len(a) == 2
|
||||||
|
if a[1] is None:
|
||||||
|
a[1] = fn(*a[0], **kw)
|
||||||
|
return a[1][i]
|
||||||
|
return tuple(cls(meta=cls.eager_to_meta(res[i]), args=(shared_args, i), kwargs=kwargs, func=eager_tuple_element) for i in range(len(res)))
|
||||||
else:
|
else:
|
||||||
del res # not needed
|
del res # not needed
|
||||||
# non-tensor return likely relies on the contents of the args
|
# non-tensor return likely relies on the contents of the args
|
||||||
|
Reference in New Issue
Block a user