community[patch]: support fastembed v1 and v2 (#19125)

**Description:**
#18040 forces `fastembed>2.0`, and this causes dependency conflicts with
the new `unstructured` package (different `onnxruntime`). There may be
other dependency conflicts.. The only way to use
`langchain-community>=0.0.28` is rollback to `unstructured 0.10.X`. But
new `unstructured` contains many fixes.

This PR allows to use both `fastembed` `v1` and `v2`.

How to reproduce:

`pyproject.toml`:
```toml
[tool.poetry]
name = "depstest"
version = "0.0.0"
description = "test"
authors = ["<dev@example.org>"]

[tool.poetry.dependencies]
python = ">=3.10,<3.12"
langchain-community = "^0.0.28"
fastembed = "^0.2.0"
unstructured = {extras = ["pdf"], version = "^0.12"}
```

```bash
$ poetry lock
```

Co-authored-by: Sergey Kozlov <sergey.kozlov@ludditelabs.io>
This commit is contained in:
Sergey Kozlov 2024-03-16 07:33:51 +06:00 committed by GitHub
parent fd4f536c77
commit 1a55e950aa
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -57,13 +57,15 @@ class FastEmbedEmbeddings(BaseModel, Embeddings):
@root_validator()
def validate_environment(cls, values: Dict) -> Dict:
"""Validate that FastEmbed has been installed."""
model_name = values.get("model_name")
max_length = values.get("max_length")
cache_dir = values.get("cache_dir")
threads = values.get("threads")
try:
# >= v0.2.0
from fastembed import TextEmbedding
model_name = values.get("model_name")
max_length = values.get("max_length")
cache_dir = values.get("cache_dir")
threads = values.get("threads")
values["_model"] = TextEmbedding(
model_name=model_name,
max_length=max_length,
@ -71,10 +73,21 @@ class FastEmbedEmbeddings(BaseModel, Embeddings):
threads=threads,
)
except ImportError as ie:
raise ImportError(
"'FastEmbedEmbeddings' requires 'fastembed==v0.2.0' or above. "
"Please install it with `pip install fastembed`."
) from ie
try:
# < v0.2.0
from fastembed.embedding import FlagEmbedding
values["_model"] = FlagEmbedding(
model_name=model_name,
max_length=max_length,
cache_dir=cache_dir,
threads=threads,
)
except ImportError:
raise ImportError(
"Could not import 'fastembed' Python package. "
"Please install it with `pip install fastembed`."
) from ie
return values
def embed_documents(self, texts: List[str]) -> List[List[float]]: