From 1a55e950aaaf9859ac7ff830e2932f5a248d1b37 Mon Sep 17 00:00:00 2001 From: Sergey Kozlov Date: Sat, 16 Mar 2024 07:33:51 +0600 Subject: [PATCH] community[patch]: support fastembed v1 and v2 (#19125) **Description:** #18040 forces `fastembed>2.0`, and this causes dependency conflicts with the new `unstructured` package (different `onnxruntime`). There may be other dependency conflicts.. The only way to use `langchain-community>=0.0.28` is rollback to `unstructured 0.10.X`. But new `unstructured` contains many fixes. This PR allows to use both `fastembed` `v1` and `v2`. How to reproduce: `pyproject.toml`: ```toml [tool.poetry] name = "depstest" version = "0.0.0" description = "test" authors = [""] [tool.poetry.dependencies] python = ">=3.10,<3.12" langchain-community = "^0.0.28" fastembed = "^0.2.0" unstructured = {extras = ["pdf"], version = "^0.12"} ``` ```bash $ poetry lock ``` Co-authored-by: Sergey Kozlov --- .../embeddings/fastembed.py | 29 ++++++++++++++----- 1 file changed, 21 insertions(+), 8 deletions(-) diff --git a/libs/community/langchain_community/embeddings/fastembed.py b/libs/community/langchain_community/embeddings/fastembed.py index 65ed5b16c26..4f03ab70fe2 100644 --- a/libs/community/langchain_community/embeddings/fastembed.py +++ b/libs/community/langchain_community/embeddings/fastembed.py @@ -57,13 +57,15 @@ class FastEmbedEmbeddings(BaseModel, Embeddings): @root_validator() def validate_environment(cls, values: Dict) -> Dict: """Validate that FastEmbed has been installed.""" + model_name = values.get("model_name") + max_length = values.get("max_length") + cache_dir = values.get("cache_dir") + threads = values.get("threads") + try: + # >= v0.2.0 from fastembed import TextEmbedding - model_name = values.get("model_name") - max_length = values.get("max_length") - cache_dir = values.get("cache_dir") - threads = values.get("threads") values["_model"] = TextEmbedding( model_name=model_name, max_length=max_length, @@ -71,10 +73,21 @@ class FastEmbedEmbeddings(BaseModel, Embeddings): threads=threads, ) except ImportError as ie: - raise ImportError( - "'FastEmbedEmbeddings' requires 'fastembed==v0.2.0' or above. " - "Please install it with `pip install fastembed`." - ) from ie + try: + # < v0.2.0 + from fastembed.embedding import FlagEmbedding + + values["_model"] = FlagEmbedding( + model_name=model_name, + max_length=max_length, + cache_dir=cache_dir, + threads=threads, + ) + except ImportError: + raise ImportError( + "Could not import 'fastembed' Python package. " + "Please install it with `pip install fastembed`." + ) from ie return values def embed_documents(self, texts: List[str]) -> List[List[float]]: