mirror of
https://github.com/hwchase17/langchain.git
synced 2025-08-10 21:35:08 +00:00
community[patch]: fix public interface for embeddings module (#21650)
## Description The existing public interface for `langchain_community.emeddings` is broken. In this file, `__all__` is statically defined, but is subsequently overwritten with a dynamic expression, which type checkers like pyright do not support. pyright actually gives the following diagnostic on the line I am requesting we remove: [reportUnsupportedDunderAll](https://github.com/microsoft/pyright/blob/main/docs/configuration.md#reportUnsupportedDunderAll): ``` Operation on "__all__" is not supported, so exported symbol list may be incorrect ``` Currently, I get the following errors when attempting to use publicablly exported classes in `langchain_community.emeddings`: ```python import langchain_community.embeddings langchain_community.embeddings.HuggingFaceEmbeddings(...) # error: "HuggingFaceEmbeddings" is not exported from module "langchain_community.embeddings" (reportPrivateImportUsage) ``` This is solved easily by removing the dynamic expression.
This commit is contained in:
parent
6548052f9e
commit
4f2e3bd7fd
@ -203,6 +203,9 @@ if TYPE_CHECKING:
|
||||
from langchain_community.embeddings.tensorflow_hub import (
|
||||
TensorflowHubEmbeddings,
|
||||
)
|
||||
from langchain_community.embeddings.titan_takeoff import (
|
||||
TitanTakeoffEmbed,
|
||||
)
|
||||
from langchain_community.embeddings.vertexai import (
|
||||
VertexAIEmbeddings,
|
||||
)
|
||||
@ -288,6 +291,7 @@ __all__ = [
|
||||
"SpacyEmbeddings",
|
||||
"SparkLLMTextEmbeddings",
|
||||
"TensorflowHubEmbeddings",
|
||||
"TitanTakeoffEmbed",
|
||||
"VertexAIEmbeddings",
|
||||
"VolcanoEmbeddings",
|
||||
"VoyageEmbeddings",
|
||||
@ -380,8 +384,6 @@ def __getattr__(name: str) -> Any:
|
||||
raise AttributeError(f"module {__name__} has no attribute {name}")
|
||||
|
||||
|
||||
__all__ = list(_module_lookup.keys())
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
from enum import Enum
|
||||
from typing import Any, List, Optional, Set, Union
|
||||
from typing import Any, Dict, List, Optional, Set, Union
|
||||
|
||||
from langchain_core.embeddings import Embeddings
|
||||
from langchain_core.pydantic_v1 import BaseModel
|
||||
@ -142,11 +142,12 @@ class TitanTakeoffEmbed(Embeddings):
|
||||
|
||||
def _embed(
|
||||
self, input: Union[List[str], str], consumer_group: Optional[str]
|
||||
) -> dict:
|
||||
) -> Dict[str, Any]:
|
||||
"""Embed text.
|
||||
|
||||
Args:
|
||||
input (List[str]): prompt/document or list of prompts/documents to embed
|
||||
input (Union[List[str], str]): prompt/document or list of prompts/documents
|
||||
to embed
|
||||
consumer_group (Optional[str]): what consumer group to send the embedding
|
||||
request to. If not specified and there is only one
|
||||
consumer group specified during initialization, it will be used. If there
|
||||
|
@ -7,7 +7,11 @@ from typing import Any
|
||||
import pytest
|
||||
|
||||
from langchain_community.embeddings import TitanTakeoffEmbed
|
||||
from langchain_community.embeddings.titan_takeoff import MissingConsumerGroup
|
||||
from langchain_community.embeddings.titan_takeoff import (
|
||||
Device,
|
||||
MissingConsumerGroup,
|
||||
ReaderConfig,
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.requires("pytest_httpx")
|
||||
@ -24,7 +28,7 @@ def test_titan_takeoff_call(httpx_mock: Any) -> None:
|
||||
|
||||
embedding = TitanTakeoffEmbed(port=port)
|
||||
|
||||
output_1 = embedding.embed_documents("What is 2 + 2?", "primary")
|
||||
output_1 = embedding.embed_documents(["What is 2 + 2?"], "primary")
|
||||
output_2 = embedding.embed_query("What is 2 + 2?", "primary")
|
||||
|
||||
assert isinstance(output_1, list)
|
||||
@ -53,12 +57,12 @@ def test_no_consumer_group_fails(httpx_mock: Any) -> None:
|
||||
embedding = TitanTakeoffEmbed(port=port)
|
||||
|
||||
with pytest.raises(MissingConsumerGroup):
|
||||
embedding.embed_documents("What is 2 + 2?")
|
||||
embedding.embed_documents(["What is 2 + 2?"])
|
||||
with pytest.raises(MissingConsumerGroup):
|
||||
embedding.embed_query("What is 2 + 2?")
|
||||
|
||||
# Check specifying a consumer group works
|
||||
embedding.embed_documents("What is 2 + 2?", "primary")
|
||||
embedding.embed_documents(["What is 2 + 2?"], "primary")
|
||||
embedding.embed_query("What is 2 + 2?", "primary")
|
||||
|
||||
|
||||
@ -70,14 +74,16 @@ def test_takeoff_initialization(httpx_mock: Any) -> None:
|
||||
inf_port = 46253
|
||||
mgnt_url = f"http://localhost:{mgnt_port}/reader"
|
||||
embed_url = f"http://localhost:{inf_port}/embed"
|
||||
reader_1 = {
|
||||
"model_name": "test",
|
||||
"device": "cpu",
|
||||
"consumer_group": "embed",
|
||||
}
|
||||
reader_2 = reader_1.copy()
|
||||
reader_2["model_name"] = "test2"
|
||||
reader_2["device"] = "cuda"
|
||||
reader_1 = ReaderConfig(
|
||||
model_name="test",
|
||||
device=Device.cpu,
|
||||
consumer_group="embed",
|
||||
)
|
||||
reader_2 = ReaderConfig(
|
||||
model_name="test2",
|
||||
device=Device.cuda,
|
||||
consumer_group="embed",
|
||||
)
|
||||
|
||||
httpx_mock.add_response(
|
||||
method="POST", url=mgnt_url, json={"key": "value"}, status_code=201
|
||||
@ -94,18 +100,18 @@ def test_takeoff_initialization(httpx_mock: Any) -> None:
|
||||
)
|
||||
# Shouldn't need to specify consumer group as there is only one specified during
|
||||
# initialization
|
||||
output_1 = llm.embed_documents("What is 2 + 2?")
|
||||
output_1 = llm.embed_documents(["What is 2 + 2?"])
|
||||
output_2 = llm.embed_query("What is 2 + 2?")
|
||||
|
||||
assert isinstance(output_1, list)
|
||||
assert isinstance(output_2, list)
|
||||
# Ensure the management api was called to create the reader
|
||||
assert len(httpx_mock.get_requests()) == 4
|
||||
for key, value in reader_1.items():
|
||||
for key, value in reader_1.dict().items():
|
||||
assert json.loads(httpx_mock.get_requests()[0].content)[key] == value
|
||||
assert httpx_mock.get_requests()[0].url == mgnt_url
|
||||
# Also second call should be made to spin uo reader 2
|
||||
for key, value in reader_2.items():
|
||||
for key, value in reader_2.dict().items():
|
||||
assert json.loads(httpx_mock.get_requests()[1].content)[key] == value
|
||||
assert httpx_mock.get_requests()[1].url == mgnt_url
|
||||
# Ensure the third call is to generate endpoint to inference
|
||||
@ -126,15 +132,16 @@ def test_takeoff_initialization_with_more_than_one_consumer_group(
|
||||
inf_port = 46253
|
||||
mgnt_url = f"http://localhost:{mgnt_port}/reader"
|
||||
embed_url = f"http://localhost:{inf_port}/embed"
|
||||
reader_1 = {
|
||||
"model_name": "test",
|
||||
"device": "cpu",
|
||||
"consumer_group": "embed",
|
||||
}
|
||||
reader_2 = reader_1.copy()
|
||||
reader_2["model_name"] = "test2"
|
||||
reader_2["device"] = "cuda"
|
||||
reader_2["consumer_group"] = "embed2"
|
||||
reader_1 = ReaderConfig(
|
||||
model_name="test",
|
||||
device=Device.cpu,
|
||||
consumer_group="embed",
|
||||
)
|
||||
reader_2 = ReaderConfig(
|
||||
model_name="test2",
|
||||
device=Device.cuda,
|
||||
consumer_group="embed2",
|
||||
)
|
||||
|
||||
httpx_mock.add_response(
|
||||
method="POST", url=mgnt_url, json={"key": "value"}, status_code=201
|
||||
@ -152,22 +159,22 @@ def test_takeoff_initialization_with_more_than_one_consumer_group(
|
||||
# There was more than one consumer group specified during initialization so we
|
||||
# need to specify which one to use
|
||||
with pytest.raises(MissingConsumerGroup):
|
||||
llm.embed_documents("What is 2 + 2?")
|
||||
llm.embed_documents(["What is 2 + 2?"])
|
||||
with pytest.raises(MissingConsumerGroup):
|
||||
llm.embed_query("What is 2 + 2?")
|
||||
|
||||
output_1 = llm.embed_documents("What is 2 + 2?", "embed")
|
||||
output_1 = llm.embed_documents(["What is 2 + 2?"], "embed")
|
||||
output_2 = llm.embed_query("What is 2 + 2?", "embed2")
|
||||
|
||||
assert isinstance(output_1, list)
|
||||
assert isinstance(output_2, list)
|
||||
# Ensure the management api was called to create the reader
|
||||
assert len(httpx_mock.get_requests()) == 4
|
||||
for key, value in reader_1.items():
|
||||
for key, value in reader_1.dict().items():
|
||||
assert json.loads(httpx_mock.get_requests()[0].content)[key] == value
|
||||
assert httpx_mock.get_requests()[0].url == mgnt_url
|
||||
# Also second call should be made to spin uo reader 2
|
||||
for key, value in reader_2.items():
|
||||
for key, value in reader_2.dict().items():
|
||||
assert json.loads(httpx_mock.get_requests()[1].content)[key] == value
|
||||
assert httpx_mock.get_requests()[1].url == mgnt_url
|
||||
# Ensure the third call is to generate endpoint to inference
|
||||
|
Loading…
Reference in New Issue
Block a user