mirror of
https://github.com/hwchase17/langchain.git
synced 2025-09-02 19:47:13 +00:00
community[major], core[patch], langchain[patch], experimental[patch]: Create langchain-community (#14463)
Moved the following modules to new package langchain-community in a backwards compatible fashion: ``` mv langchain/langchain/adapters community/langchain_community mv langchain/langchain/callbacks community/langchain_community/callbacks mv langchain/langchain/chat_loaders community/langchain_community mv langchain/langchain/chat_models community/langchain_community mv langchain/langchain/document_loaders community/langchain_community mv langchain/langchain/docstore community/langchain_community mv langchain/langchain/document_transformers community/langchain_community mv langchain/langchain/embeddings community/langchain_community mv langchain/langchain/graphs community/langchain_community mv langchain/langchain/llms community/langchain_community mv langchain/langchain/memory/chat_message_histories community/langchain_community mv langchain/langchain/retrievers community/langchain_community mv langchain/langchain/storage community/langchain_community mv langchain/langchain/tools community/langchain_community mv langchain/langchain/utilities community/langchain_community mv langchain/langchain/vectorstores community/langchain_community mv langchain/langchain/agents/agent_toolkits community/langchain_community mv langchain/langchain/cache.py community/langchain_community mv langchain/langchain/adapters community/langchain_community mv langchain/langchain/callbacks community/langchain_community/callbacks mv langchain/langchain/chat_loaders community/langchain_community mv langchain/langchain/chat_models community/langchain_community mv langchain/langchain/document_loaders community/langchain_community mv langchain/langchain/docstore community/langchain_community mv langchain/langchain/document_transformers community/langchain_community mv langchain/langchain/embeddings community/langchain_community mv langchain/langchain/graphs community/langchain_community mv langchain/langchain/llms community/langchain_community mv langchain/langchain/memory/chat_message_histories community/langchain_community mv langchain/langchain/retrievers community/langchain_community mv langchain/langchain/storage community/langchain_community mv langchain/langchain/tools community/langchain_community mv langchain/langchain/utilities community/langchain_community mv langchain/langchain/vectorstores community/langchain_community mv langchain/langchain/agents/agent_toolkits community/langchain_community mv langchain/langchain/cache.py community/langchain_community ``` Moved the following to core ``` mv langchain/langchain/utils/json_schema.py core/langchain_core/utils mv langchain/langchain/utils/html.py core/langchain_core/utils mv langchain/langchain/utils/strings.py core/langchain_core/utils cat langchain/langchain/utils/env.py >> core/langchain_core/utils/env.py rm langchain/langchain/utils/env.py ``` See .scripts/community_split/script_integrations.sh for all changes
This commit is contained in:
100
libs/community/langchain_community/document_loaders/couchbase.py
Normal file
100
libs/community/langchain_community/document_loaders/couchbase.py
Normal file
@@ -0,0 +1,100 @@
|
||||
import logging
|
||||
from typing import Iterator, List, Optional
|
||||
|
||||
from langchain_core.documents import Document
|
||||
|
||||
from langchain_community.document_loaders.base import BaseLoader
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class CouchbaseLoader(BaseLoader):
|
||||
"""Load documents from `Couchbase`.
|
||||
|
||||
Each document represents one row of the result. The `page_content_fields` are
|
||||
written into the `page_content`of the document. The `metadata_fields` are written
|
||||
into the `metadata` of the document. By default, all columns are written into
|
||||
the `page_content` and none into the `metadata`.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
connection_string: str,
|
||||
db_username: str,
|
||||
db_password: str,
|
||||
query: str,
|
||||
*,
|
||||
page_content_fields: Optional[List[str]] = None,
|
||||
metadata_fields: Optional[List[str]] = None,
|
||||
) -> None:
|
||||
"""Initialize Couchbase document loader.
|
||||
|
||||
Args:
|
||||
connection_string (str): The connection string to the Couchbase cluster.
|
||||
db_username (str): The username to connect to the Couchbase cluster.
|
||||
db_password (str): The password to connect to the Couchbase cluster.
|
||||
query (str): The SQL++ query to execute.
|
||||
page_content_fields (Optional[List[str]]): The columns to write into the
|
||||
`page_content` field of the document. By default, all columns are
|
||||
written.
|
||||
metadata_fields (Optional[List[str]]): The columns to write into the
|
||||
`metadata` field of the document. By default, no columns are written.
|
||||
"""
|
||||
try:
|
||||
from couchbase.auth import PasswordAuthenticator
|
||||
from couchbase.cluster import Cluster
|
||||
from couchbase.options import ClusterOptions
|
||||
except ImportError as e:
|
||||
raise ImportError(
|
||||
"Could not import couchbase package."
|
||||
"Please install couchbase SDK with `pip install couchbase`."
|
||||
) from e
|
||||
if not connection_string:
|
||||
raise ValueError("connection_string must be provided.")
|
||||
|
||||
if not db_username:
|
||||
raise ValueError("db_username must be provided.")
|
||||
|
||||
if not db_password:
|
||||
raise ValueError("db_password must be provided.")
|
||||
|
||||
auth = PasswordAuthenticator(
|
||||
db_username,
|
||||
db_password,
|
||||
)
|
||||
|
||||
self.cluster: Cluster = Cluster(connection_string, ClusterOptions(auth))
|
||||
self.query = query
|
||||
self.page_content_fields = page_content_fields
|
||||
self.metadata_fields = metadata_fields
|
||||
|
||||
def load(self) -> List[Document]:
|
||||
"""Load Couchbase data into Document objects."""
|
||||
return list(self.lazy_load())
|
||||
|
||||
def lazy_load(self) -> Iterator[Document]:
|
||||
"""Load Couchbase data into Document objects lazily."""
|
||||
from datetime import timedelta
|
||||
|
||||
# Ensure connection to Couchbase cluster
|
||||
self.cluster.wait_until_ready(timedelta(seconds=5))
|
||||
|
||||
# Run SQL++ Query
|
||||
result = self.cluster.query(self.query)
|
||||
for row in result:
|
||||
metadata_fields = self.metadata_fields
|
||||
page_content_fields = self.page_content_fields
|
||||
|
||||
if not page_content_fields:
|
||||
page_content_fields = list(row.keys())
|
||||
|
||||
if not metadata_fields:
|
||||
metadata_fields = []
|
||||
|
||||
metadata = {field: row[field] for field in metadata_fields}
|
||||
|
||||
document = "\n".join(
|
||||
f"{k}: {v}" for k, v in row.items() if k in page_content_fields
|
||||
)
|
||||
|
||||
yield (Document(page_content=document, metadata=metadata))
|
Reference in New Issue
Block a user