community[patch]: update the default hf bge embeddings (#22627)

**Description:** This updates the langchain_community > huggingface >
default bge embeddings ([the current default recommends this
change](https://huggingface.co/BAAI/bge-large-en))
**Issue:** None
**Dependencies:** None
**Twitter handle:** @jonzeolla

---------

Co-authored-by: Bagatur <baskaryan@gmail.com>
This commit is contained in:
JonZeolla
2024-09-02 18:10:21 -04:00
committed by GitHub
parent 150251fd49
commit 78ff51ce83
19 changed files with 81 additions and 45 deletions

View File

@@ -67,6 +67,19 @@ class HuggingFaceEmbeddings(BaseModel, Embeddings):
def __init__(self, **kwargs: Any):
"""Initialize the sentence_transformer."""
super().__init__(**kwargs)
if "model_name" not in kwargs:
since = "0.2.16"
removal = "0.4.0"
warn_deprecated(
since=since,
removal=removal,
message=f"Default values for {self.__class__.__name__}.model_name"
+ f" were deprecated in LangChain {since} and will be removed in"
+ f" {removal}. Explicitly pass a model_name to the"
+ f" {self.__class__.__name__} constructor instead.",
)
try:
import sentence_transformers
@@ -159,6 +172,19 @@ class HuggingFaceInstructEmbeddings(BaseModel, Embeddings):
def __init__(self, **kwargs: Any):
"""Initialize the sentence_transformer."""
super().__init__(**kwargs)
if "model_name" not in kwargs:
since = "0.2.16"
removal = "0.4.0"
warn_deprecated(
since=since,
removal=removal,
message=f"Default values for {self.__class__.__name__}.model_name"
+ f" were deprecated in LangChain {since} and will be removed in"
+ f" {removal}. Explicitly pass a model_name to the"
+ f" {self.__class__.__name__} constructor instead.",
)
try:
from InstructorEmbedding import INSTRUCTOR
@@ -231,7 +257,7 @@ class HuggingFaceBgeEmbeddings(BaseModel, Embeddings):
from langchain_community.embeddings import HuggingFaceBgeEmbeddings
model_name = "BAAI/bge-large-en"
model_name = "BAAI/bge-large-en-v1.5"
model_kwargs = {'device': 'cpu'}
encode_kwargs = {'normalize_embeddings': True}
hf = HuggingFaceBgeEmbeddings(
@@ -279,6 +305,19 @@ class HuggingFaceBgeEmbeddings(BaseModel, Embeddings):
def __init__(self, **kwargs: Any):
"""Initialize the sentence_transformer."""
super().__init__(**kwargs)
if "model_name" not in kwargs:
since = "0.2.5"
removal = "0.4.0"
warn_deprecated(
since=since,
removal=removal,
message=f"Default values for {self.__class__.__name__}.model_name"
+ f" were deprecated in LangChain {since} and will be removed in"
+ f" {removal}. Explicitly pass a model_name to the"
+ f" {self.__class__.__name__} constructor instead.",
)
try:
import sentence_transformers

View File

@@ -303,7 +303,7 @@ class OpenVINOBgeEmbeddings(OpenVINOEmbeddings):
from langchain_community.embeddings import OpenVINOBgeEmbeddings
model_name = "BAAI/bge-large-en"
model_name = "BAAI/bge-large-en-v1.5"
model_kwargs = {'device': 'CPU'}
encode_kwargs = {'normalize_embeddings': True}
ov = OpenVINOBgeEmbeddings(

View File

@@ -41,9 +41,10 @@ class ScaNN(VectorStore):
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import ScaNN
model_name = "sentence-transformers/all-mpnet-base-v2"
db = ScaNN.from_texts(
['foo', 'bar', 'barz', 'qux'],
HuggingFaceEmbeddings())
HuggingFaceEmbeddings(model_name=model_name))
db.similarity_search('foo?', k=1)
"""

View File

@@ -1,12 +1,5 @@
import asyncio
from typing import (
Any,
Dict,
Iterable,
List,
Optional,
Tuple,
)
from typing import Any, Dict, Iterable, List, Optional, Tuple
import numpy as np
from langchain_core.documents import Document
@@ -40,7 +33,8 @@ class SurrealDBStore(VectorStore):
from langchain_community.vectorstores.surrealdb import SurrealDBStore
from langchain_community.embeddings import HuggingFaceEmbeddings
embedding_function = HuggingFaceEmbeddings()
model_name = "sentence-transformers/all-mpnet-base-v2"
embedding_function = HuggingFaceEmbeddings(model_name=model_name)
dburl = "ws://localhost:8000/rpc"
ns = "langchain"
db = "docstore"

View File

@@ -23,10 +23,11 @@ class Vald(VectorStore):
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import Vald
model_name = "sentence-transformers/all-mpnet-base-v2"
texts = ['foo', 'bar', 'baz']
vald = Vald.from_texts(
texts=texts,
embedding=HuggingFaceEmbeddings(),
embedding=HuggingFaceEmbeddings(model_name=model_name),
host="localhost",
port=8080,
skip_strict_exist_check=False,

View File

@@ -161,9 +161,10 @@ class VDMS(VectorStore):
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_community.vectorstores.vdms import VDMS, VDMS_Client
model_name = "sentence-transformers/all-mpnet-base-v2"
vectorstore = VDMS(
client=VDMS_Client("localhost", 55555),
embedding=HuggingFaceEmbeddings(),
embedding=HuggingFaceEmbeddings(model_name=model_name),
collection_name="langchain-demo",
distance_strategy="L2",
engine="FaissFlat",