mirror of
https://github.com/hwchase17/langchain.git
synced 2025-09-16 23:13:31 +00:00
community[patch]: update the default hf bge embeddings (#22627)
**Description:** This updates the langchain_community > huggingface > default bge embeddings ([the current default recommends this change](https://huggingface.co/BAAI/bge-large-en)) **Issue:** None **Dependencies:** None **Twitter handle:** @jonzeolla --------- Co-authored-by: Bagatur <baskaryan@gmail.com>
This commit is contained in:
@@ -67,6 +67,19 @@ class HuggingFaceEmbeddings(BaseModel, Embeddings):
|
||||
def __init__(self, **kwargs: Any):
|
||||
"""Initialize the sentence_transformer."""
|
||||
super().__init__(**kwargs)
|
||||
|
||||
if "model_name" not in kwargs:
|
||||
since = "0.2.16"
|
||||
removal = "0.4.0"
|
||||
warn_deprecated(
|
||||
since=since,
|
||||
removal=removal,
|
||||
message=f"Default values for {self.__class__.__name__}.model_name"
|
||||
+ f" were deprecated in LangChain {since} and will be removed in"
|
||||
+ f" {removal}. Explicitly pass a model_name to the"
|
||||
+ f" {self.__class__.__name__} constructor instead.",
|
||||
)
|
||||
|
||||
try:
|
||||
import sentence_transformers
|
||||
|
||||
@@ -159,6 +172,19 @@ class HuggingFaceInstructEmbeddings(BaseModel, Embeddings):
|
||||
def __init__(self, **kwargs: Any):
|
||||
"""Initialize the sentence_transformer."""
|
||||
super().__init__(**kwargs)
|
||||
|
||||
if "model_name" not in kwargs:
|
||||
since = "0.2.16"
|
||||
removal = "0.4.0"
|
||||
warn_deprecated(
|
||||
since=since,
|
||||
removal=removal,
|
||||
message=f"Default values for {self.__class__.__name__}.model_name"
|
||||
+ f" were deprecated in LangChain {since} and will be removed in"
|
||||
+ f" {removal}. Explicitly pass a model_name to the"
|
||||
+ f" {self.__class__.__name__} constructor instead.",
|
||||
)
|
||||
|
||||
try:
|
||||
from InstructorEmbedding import INSTRUCTOR
|
||||
|
||||
@@ -231,7 +257,7 @@ class HuggingFaceBgeEmbeddings(BaseModel, Embeddings):
|
||||
|
||||
from langchain_community.embeddings import HuggingFaceBgeEmbeddings
|
||||
|
||||
model_name = "BAAI/bge-large-en"
|
||||
model_name = "BAAI/bge-large-en-v1.5"
|
||||
model_kwargs = {'device': 'cpu'}
|
||||
encode_kwargs = {'normalize_embeddings': True}
|
||||
hf = HuggingFaceBgeEmbeddings(
|
||||
@@ -279,6 +305,19 @@ class HuggingFaceBgeEmbeddings(BaseModel, Embeddings):
|
||||
def __init__(self, **kwargs: Any):
|
||||
"""Initialize the sentence_transformer."""
|
||||
super().__init__(**kwargs)
|
||||
|
||||
if "model_name" not in kwargs:
|
||||
since = "0.2.5"
|
||||
removal = "0.4.0"
|
||||
warn_deprecated(
|
||||
since=since,
|
||||
removal=removal,
|
||||
message=f"Default values for {self.__class__.__name__}.model_name"
|
||||
+ f" were deprecated in LangChain {since} and will be removed in"
|
||||
+ f" {removal}. Explicitly pass a model_name to the"
|
||||
+ f" {self.__class__.__name__} constructor instead.",
|
||||
)
|
||||
|
||||
try:
|
||||
import sentence_transformers
|
||||
|
||||
|
@@ -303,7 +303,7 @@ class OpenVINOBgeEmbeddings(OpenVINOEmbeddings):
|
||||
|
||||
from langchain_community.embeddings import OpenVINOBgeEmbeddings
|
||||
|
||||
model_name = "BAAI/bge-large-en"
|
||||
model_name = "BAAI/bge-large-en-v1.5"
|
||||
model_kwargs = {'device': 'CPU'}
|
||||
encode_kwargs = {'normalize_embeddings': True}
|
||||
ov = OpenVINOBgeEmbeddings(
|
||||
|
@@ -41,9 +41,10 @@ class ScaNN(VectorStore):
|
||||
from langchain_community.embeddings import HuggingFaceEmbeddings
|
||||
from langchain_community.vectorstores import ScaNN
|
||||
|
||||
model_name = "sentence-transformers/all-mpnet-base-v2"
|
||||
db = ScaNN.from_texts(
|
||||
['foo', 'bar', 'barz', 'qux'],
|
||||
HuggingFaceEmbeddings())
|
||||
HuggingFaceEmbeddings(model_name=model_name))
|
||||
db.similarity_search('foo?', k=1)
|
||||
"""
|
||||
|
||||
|
@@ -1,12 +1,5 @@
|
||||
import asyncio
|
||||
from typing import (
|
||||
Any,
|
||||
Dict,
|
||||
Iterable,
|
||||
List,
|
||||
Optional,
|
||||
Tuple,
|
||||
)
|
||||
from typing import Any, Dict, Iterable, List, Optional, Tuple
|
||||
|
||||
import numpy as np
|
||||
from langchain_core.documents import Document
|
||||
@@ -40,7 +33,8 @@ class SurrealDBStore(VectorStore):
|
||||
from langchain_community.vectorstores.surrealdb import SurrealDBStore
|
||||
from langchain_community.embeddings import HuggingFaceEmbeddings
|
||||
|
||||
embedding_function = HuggingFaceEmbeddings()
|
||||
model_name = "sentence-transformers/all-mpnet-base-v2"
|
||||
embedding_function = HuggingFaceEmbeddings(model_name=model_name)
|
||||
dburl = "ws://localhost:8000/rpc"
|
||||
ns = "langchain"
|
||||
db = "docstore"
|
||||
|
@@ -23,10 +23,11 @@ class Vald(VectorStore):
|
||||
from langchain_community.embeddings import HuggingFaceEmbeddings
|
||||
from langchain_community.vectorstores import Vald
|
||||
|
||||
model_name = "sentence-transformers/all-mpnet-base-v2"
|
||||
texts = ['foo', 'bar', 'baz']
|
||||
vald = Vald.from_texts(
|
||||
texts=texts,
|
||||
embedding=HuggingFaceEmbeddings(),
|
||||
embedding=HuggingFaceEmbeddings(model_name=model_name),
|
||||
host="localhost",
|
||||
port=8080,
|
||||
skip_strict_exist_check=False,
|
||||
|
@@ -161,9 +161,10 @@ class VDMS(VectorStore):
|
||||
from langchain_huggingface import HuggingFaceEmbeddings
|
||||
from langchain_community.vectorstores.vdms import VDMS, VDMS_Client
|
||||
|
||||
model_name = "sentence-transformers/all-mpnet-base-v2"
|
||||
vectorstore = VDMS(
|
||||
client=VDMS_Client("localhost", 55555),
|
||||
embedding=HuggingFaceEmbeddings(),
|
||||
embedding=HuggingFaceEmbeddings(model_name=model_name),
|
||||
collection_name="langchain-demo",
|
||||
distance_strategy="L2",
|
||||
engine="FaissFlat",
|
||||
|
Reference in New Issue
Block a user