mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-08-01 06:59:13 -04:00
gguf-py : add more clarifying comments for multi-thread writes
This commit is contained in:
@@ -63,14 +63,17 @@ class WriterState(Enum):
|
|||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class TensorWriteInfo:
|
class ThreadedTensorWriteInfo:
|
||||||
filename: Path
|
filename: Path
|
||||||
offset: int
|
offset: int
|
||||||
post_pad: int
|
post_pad: int
|
||||||
tensor: np.ndarray
|
tensor: np.ndarray
|
||||||
bar: Any | None
|
bar: Any | None # optional tqdm progress bar
|
||||||
|
|
||||||
def write_chunk(self, open_files: dict[Path, BufferedWriter]):
|
def write_chunk(self, open_files: dict[Path, BufferedWriter]):
|
||||||
|
# This is called from a thread pool,
|
||||||
|
# and each thread should have its own file handle per output file
|
||||||
|
# so that they can have different seek locations.
|
||||||
if self.filename not in open_files:
|
if self.filename not in open_files:
|
||||||
open_files[self.filename] = open(self.filename, "r+b")
|
open_files[self.filename] = open(self.filename, "r+b")
|
||||||
f = open_files[self.filename]
|
f = open_files[self.filename]
|
||||||
@@ -460,8 +463,9 @@ class GGUFWriter:
|
|||||||
if self.temp_file is None:
|
if self.temp_file is None:
|
||||||
bar = None
|
bar = None
|
||||||
# Distribute writing the tensors between multiple threads
|
# Distribute writing the tensors between multiple threads
|
||||||
tensor_queue: Queue[TensorWriteInfo] = Queue()
|
tensor_queue: Queue[ThreadedTensorWriteInfo] = Queue()
|
||||||
|
|
||||||
|
# Initial file offsets before writing the tensor data
|
||||||
offsets: list[int] = [fout.tell() for fout in self.fout]
|
offsets: list[int] = [fout.tell() for fout in self.fout]
|
||||||
|
|
||||||
if progress:
|
if progress:
|
||||||
@@ -472,6 +476,7 @@ class GGUFWriter:
|
|||||||
|
|
||||||
bar = tqdm(desc="Writing", total=total_bytes, unit="byte", unit_scale=True)
|
bar = tqdm(desc="Writing", total=total_bytes, unit="byte", unit_scale=True)
|
||||||
|
|
||||||
|
# Fill the tensor queue with all the pending tensor writes
|
||||||
for i, (filename, tensors) in enumerate(zip(self.filenames, self.tensors)):
|
for i, (filename, tensors) in enumerate(zip(self.filenames, self.tensors)):
|
||||||
offset = offsets[i]
|
offset = offsets[i]
|
||||||
|
|
||||||
@@ -484,7 +489,7 @@ class GGUFWriter:
|
|||||||
offset = self.ggml_pad(start_offset + nbytes, self.data_alignment)
|
offset = self.ggml_pad(start_offset + nbytes, self.data_alignment)
|
||||||
padding = offset - (start_offset + nbytes)
|
padding = offset - (start_offset + nbytes)
|
||||||
tensor_queue.put(
|
tensor_queue.put(
|
||||||
TensorWriteInfo(
|
ThreadedTensorWriteInfo(
|
||||||
filename=filename,
|
filename=filename,
|
||||||
offset=start_offset,
|
offset=start_offset,
|
||||||
post_pad=padding,
|
post_pad=padding,
|
||||||
@@ -496,12 +501,13 @@ class GGUFWriter:
|
|||||||
|
|
||||||
# Write tensors in parallel
|
# Write tensors in parallel
|
||||||
# TODO: total tensor size limit for the running threads
|
# TODO: total tensor size limit for the running threads
|
||||||
def write_tensors_from_thread(queue: Queue[TensorWriteInfo]):
|
def write_tensors_from_thread(queue: Queue[ThreadedTensorWriteInfo]):
|
||||||
|
# Opening the files only once per thread
|
||||||
open_files: dict[Path, BufferedWriter] = {}
|
open_files: dict[Path, BufferedWriter] = {}
|
||||||
try:
|
try:
|
||||||
while t := queue.get_nowait():
|
while tensor := queue.get_nowait():
|
||||||
t.write_chunk(open_files)
|
tensor.write_chunk(open_files)
|
||||||
del t
|
del tensor
|
||||||
queue.task_done()
|
queue.task_done()
|
||||||
except Empty:
|
except Empty:
|
||||||
pass
|
pass
|
||||||
|
Reference in New Issue
Block a user