diff --git a/pyproject.toml b/pyproject.toml index 7e3b1af8b3da16a69653f0c91953e4579d114de1..5528e1790fc6f73a39060711302796e0fd0aa74c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "mistral_inference" -version = "1.3.0" +version = "1.3.1" description = "" authors = ["bam4d <bam4d@mistral.ai>"] readme = "README.md" diff --git a/src/mistral_inference/__init__.py b/src/mistral_inference/__init__.py index 67bc602abf06e9bcea675fe21c56a2f3c76bc331..9c73af26be70465839a5f43818dbab3f5c35571f 100644 --- a/src/mistral_inference/__init__.py +++ b/src/mistral_inference/__init__.py @@ -1 +1 @@ -__version__ = "1.3.0" +__version__ = "1.3.1" diff --git a/src/mistral_inference/main.py b/src/mistral_inference/main.py index d4302fef38db53fd3f07e9c06e15445655cd417d..74743e8b63ba451c2234b9bc242a08be7a6d7810 100644 --- a/src/mistral_inference/main.py +++ b/src/mistral_inference/main.py @@ -12,6 +12,7 @@ from mistral_common.protocol.instruct.messages import AssistantMessage, UserMess from mistral_common.protocol.instruct.request import ChatCompletionRequest from mistral_common.tokens.tokenizers.base import Tokenizer from mistral_common.tokens.tokenizers.mistral import MistralTokenizer +from mistral_common.tokens.tokenizers.tekken import Tekkenizer, SpecialTokenPolicy from mistral_common.tokens.tokenizers.sentencepiece import is_sentencepiece from mistral_common.tokens.tokenizers.tekken import is_tekken @@ -36,6 +37,9 @@ def load_tokenizer(model_path: Path) -> MistralTokenizer: mistral_tokenizer = MistralTokenizer.from_file(str(model_path / tokenizer[0])) + if isinstance(mistral_tokenizer.instruct_tokenizer.tokenizer, Tekkenizer): + mistral_tokenizer.instruct_tokenizer.tokenizer.special_token_policy = SpecialTokenPolicy.KEEP + logging.info(f"Loaded tokenizer of type {mistral_tokenizer.instruct_tokenizer.__class__}") return mistral_tokenizer