mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-08-18 05:56:00 -04:00
py : convert-hf-to-gguf-update improvements (#7340)
* convert-hf-to-gguf-update: automate updating * convert-hf-to-gguf-update: improve download * share requests session for performance * create directories only when needed, don't skip downloads when empty directory encountered * be more graceful about errors
This commit is contained in:
@@ -402,6 +402,7 @@ class Model:
|
||||
# NOTE: this function is generated by convert-hf-to-gguf-update.py
|
||||
# do not modify it manually!
|
||||
# ref: https://github.com/ggerganov/llama.cpp/pull/6920
|
||||
# Marker: Start get_vocab_base_pre
|
||||
def get_vocab_base_pre(self, tokenizer) -> str:
|
||||
# encoding this string and hashing the resulting tokens would (hopefully) give us a unique identifier that
|
||||
# is specific for the BPE pre-tokenizer used by the model
|
||||
@@ -489,6 +490,7 @@ class Model:
|
||||
logger.debug(f"chkhsh: {chkhsh}")
|
||||
|
||||
return res
|
||||
# Marker: End get_vocab_base_pre
|
||||
|
||||
def _set_vocab_gpt2(self) -> None:
|
||||
tokens, toktypes, tokpre = self.get_vocab_base()
|
||||
|
Reference in New Issue
Block a user