mirror of
https://github.com/hwchase17/langchain.git
synced 2025-06-25 08:03:39 +00:00
Ability to specify credentials wihen using Google BigQuery as a data loader (#5466)
# Adds ability to specify credentials when using Google BigQuery as a data loader Fixes #5465 . Adds ability to set credentials which must be of the `google.auth.credentials.Credentials` type. This argument is optional and will default to `None. Co-authored-by: Dev 2049 <dev.dev2049@gmail.com>
This commit is contained in:
parent
eab4b4ccd7
commit
199cc700a3
@ -1,8 +1,13 @@
|
|||||||
from typing import List, Optional
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from typing import TYPE_CHECKING, List, Optional
|
||||||
|
|
||||||
from langchain.docstore.document import Document
|
from langchain.docstore.document import Document
|
||||||
from langchain.document_loaders.base import BaseLoader
|
from langchain.document_loaders.base import BaseLoader
|
||||||
|
|
||||||
|
if TYPE_CHECKING:
|
||||||
|
from google.auth.credentials import Credentials
|
||||||
|
|
||||||
|
|
||||||
class BigQueryLoader(BaseLoader):
|
class BigQueryLoader(BaseLoader):
|
||||||
"""Loads a query result from BigQuery into a list of documents.
|
"""Loads a query result from BigQuery into a list of documents.
|
||||||
@ -11,6 +16,7 @@ class BigQueryLoader(BaseLoader):
|
|||||||
are written into the `page_content` of the document. The `metadata_columns`
|
are written into the `page_content` of the document. The `metadata_columns`
|
||||||
are written into the `metadata` of the document. By default, all columns
|
are written into the `metadata` of the document. By default, all columns
|
||||||
are written into the `page_content` and none into the `metadata`.
|
are written into the `page_content` and none into the `metadata`.
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(
|
def __init__(
|
||||||
@ -19,11 +25,28 @@ class BigQueryLoader(BaseLoader):
|
|||||||
project: Optional[str] = None,
|
project: Optional[str] = None,
|
||||||
page_content_columns: Optional[List[str]] = None,
|
page_content_columns: Optional[List[str]] = None,
|
||||||
metadata_columns: Optional[List[str]] = None,
|
metadata_columns: Optional[List[str]] = None,
|
||||||
|
credentials: Optional[Credentials] = None,
|
||||||
):
|
):
|
||||||
|
"""Initialize BigQuery document loader.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
query: The query to run in BigQuery.
|
||||||
|
project: Optional. The project to run the query in.
|
||||||
|
page_content_columns: Optional. The columns to write into the `page_content`
|
||||||
|
of the document.
|
||||||
|
metadata_columns: Optional. The columns to write into the `metadata` of the
|
||||||
|
document.
|
||||||
|
credentials : google.auth.credentials.Credentials, optional
|
||||||
|
Credentials for accessing Google APIs. Use this parameter to override
|
||||||
|
default credentials, such as to use Compute Engine
|
||||||
|
(`google.auth.compute_engine.Credentials`) or Service Account
|
||||||
|
(`google.oauth2.service_account.Credentials`) credentials directly.
|
||||||
|
"""
|
||||||
self.query = query
|
self.query = query
|
||||||
self.project = project
|
self.project = project
|
||||||
self.page_content_columns = page_content_columns
|
self.page_content_columns = page_content_columns
|
||||||
self.metadata_columns = metadata_columns
|
self.metadata_columns = metadata_columns
|
||||||
|
self.credentials = credentials
|
||||||
|
|
||||||
def load(self) -> List[Document]:
|
def load(self) -> List[Document]:
|
||||||
try:
|
try:
|
||||||
@ -34,7 +57,7 @@ class BigQueryLoader(BaseLoader):
|
|||||||
"Please install it with `pip install google-cloud-bigquery`."
|
"Please install it with `pip install google-cloud-bigquery`."
|
||||||
) from ex
|
) from ex
|
||||||
|
|
||||||
bq_client = bigquery.Client(self.project)
|
bq_client = bigquery.Client(credentials=self.credentials, project=self.project)
|
||||||
query_result = bq_client.query(self.query).result()
|
query_result = bq_client.query(self.query).result()
|
||||||
docs: List[Document] = []
|
docs: List[Document] = []
|
||||||
|
|
||||||
|
7
poetry.lock
generated
7
poetry.lock
generated
@ -1,4 +1,4 @@
|
|||||||
# This file is automatically @generated by Poetry 1.4.1 and should not be changed by hand.
|
# This file is automatically @generated by Poetry 1.4.2 and should not be changed by hand.
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "absl-py"
|
name = "absl-py"
|
||||||
@ -6950,7 +6950,6 @@ files = [
|
|||||||
{file = "pylance-0.4.12-cp38-abi3-macosx_10_15_x86_64.whl", hash = "sha256:2b86fb8dccc03094c0db37bef0d91bda60e8eb0d1eddf245c6971450c8d8a53f"},
|
{file = "pylance-0.4.12-cp38-abi3-macosx_10_15_x86_64.whl", hash = "sha256:2b86fb8dccc03094c0db37bef0d91bda60e8eb0d1eddf245c6971450c8d8a53f"},
|
||||||
{file = "pylance-0.4.12-cp38-abi3-macosx_11_0_arm64.whl", hash = "sha256:0bc82914b13204187d673b5f3d45f93219c38a0e9d0542ba251074f639669789"},
|
{file = "pylance-0.4.12-cp38-abi3-macosx_11_0_arm64.whl", hash = "sha256:0bc82914b13204187d673b5f3d45f93219c38a0e9d0542ba251074f639669789"},
|
||||||
{file = "pylance-0.4.12-cp38-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5a4bcce77f99ecd4cbebbadb01e58d5d8138d40eb56bdcdbc3b20b0475e7a472"},
|
{file = "pylance-0.4.12-cp38-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5a4bcce77f99ecd4cbebbadb01e58d5d8138d40eb56bdcdbc3b20b0475e7a472"},
|
||||||
{file = "pylance-0.4.12-cp38-abi3-win_amd64.whl", hash = "sha256:9616931c5300030adb9626d22515710a127d1e46a46737a7a0f980b52f13627c"},
|
|
||||||
]
|
]
|
||||||
|
|
||||||
[package.dependencies]
|
[package.dependencies]
|
||||||
@ -10949,7 +10948,7 @@ cffi = {version = ">=1.11", markers = "platform_python_implementation == \"PyPy\
|
|||||||
cffi = ["cffi (>=1.11)"]
|
cffi = ["cffi (>=1.11)"]
|
||||||
|
|
||||||
[extras]
|
[extras]
|
||||||
all = ["O365", "aleph-alpha-client", "anthropic", "arxiv", "atlassian-python-api", "azure-ai-formrecognizer", "azure-ai-vision", "azure-cognitiveservices-speech", "azure-cosmos", "azure-identity", "beautifulsoup4", "clickhouse-connect", "cohere", "deeplake", "docarray", "duckduckgo-search", "elasticsearch", "faiss-cpu", "google-api-python-client", "google-search-results", "gptcache", "html2text", "huggingface_hub", "jina", "jinja2", "jq", "lancedb", "langkit", "lark", "lxml", "manifest-ml", "momento", "neo4j", "networkx", "nlpcloud", "nltk", "nomic", "openai", "openlm", "opensearch-py", "pdfminer-six", "pexpect", "pgvector", "pinecone-client", "pinecone-text", "psycopg2-binary", "pymongo", "pyowm", "pypdf", "pytesseract", "pyvespa", "qdrant-client", "redis", "requests-toolbelt", "sentence-transformers", "spacy", "steamship", "tensorflow-text", "tiktoken", "torch", "transformers", "weaviate-client", "wikipedia", "wolframalpha"]
|
all = ["O365", "aleph-alpha-client", "anthropic", "arxiv", "atlassian-python-api", "azure-ai-formrecognizer", "azure-ai-vision", "azure-cognitiveservices-speech", "azure-cosmos", "azure-identity", "beautifulsoup4", "clickhouse-connect", "cohere", "deeplake", "docarray", "duckduckgo-search", "elasticsearch", "faiss-cpu", "google-api-python-client", "google-auth", "google-search-results", "gptcache", "html2text", "huggingface_hub", "jina", "jinja2", "jq", "lancedb", "langkit", "lark", "lxml", "manifest-ml", "momento", "neo4j", "networkx", "nlpcloud", "nltk", "nomic", "openai", "openlm", "opensearch-py", "pdfminer-six", "pexpect", "pgvector", "pinecone-client", "pinecone-text", "psycopg2-binary", "pymongo", "pyowm", "pypdf", "pytesseract", "pyvespa", "qdrant-client", "redis", "requests-toolbelt", "sentence-transformers", "spacy", "steamship", "tensorflow-text", "tiktoken", "torch", "transformers", "weaviate-client", "wikipedia", "wolframalpha"]
|
||||||
azure = ["azure-ai-formrecognizer", "azure-ai-vision", "azure-cognitiveservices-speech", "azure-core", "azure-cosmos", "azure-identity", "openai"]
|
azure = ["azure-ai-formrecognizer", "azure-ai-vision", "azure-cognitiveservices-speech", "azure-core", "azure-cosmos", "azure-identity", "openai"]
|
||||||
cohere = ["cohere"]
|
cohere = ["cohere"]
|
||||||
docarray = ["docarray"]
|
docarray = ["docarray"]
|
||||||
@ -10963,4 +10962,4 @@ text-helpers = ["chardet"]
|
|||||||
[metadata]
|
[metadata]
|
||||||
lock-version = "2.0"
|
lock-version = "2.0"
|
||||||
python-versions = ">=3.8.1,<4.0"
|
python-versions = ">=3.8.1,<4.0"
|
||||||
content-hash = "6a28a31679ae3bdb156121ff7c09bfb1f691345f445196eb0384f08e031c84d3"
|
content-hash = "379bfcf130acc24f2f8408e2bb7e3ae9d769070e6bf5f66868491bddb1b2fc53"
|
||||||
|
@ -40,6 +40,7 @@ pymongo = {version = "^4.3.3", optional = true}
|
|||||||
clickhouse-connect = {version="^0.5.14", optional=true}
|
clickhouse-connect = {version="^0.5.14", optional=true}
|
||||||
weaviate-client = {version = "^3", optional = true}
|
weaviate-client = {version = "^3", optional = true}
|
||||||
google-api-python-client = {version = "2.70.0", optional = true}
|
google-api-python-client = {version = "2.70.0", optional = true}
|
||||||
|
google-auth = {version = "^2.18.1", optional = true}
|
||||||
wolframalpha = {version = "5.0.0", optional = true}
|
wolframalpha = {version = "5.0.0", optional = true}
|
||||||
anthropic = {version = "^0.2.6", optional = true}
|
anthropic = {version = "^0.2.6", optional = true}
|
||||||
qdrant-client = {version = "^1.1.2", optional = true, python = ">=3.8.1,<3.12"}
|
qdrant-client = {version = "^1.1.2", optional = true, python = ">=3.8.1,<3.12"}
|
||||||
@ -239,6 +240,7 @@ all = [
|
|||||||
"weaviate-client",
|
"weaviate-client",
|
||||||
"redis",
|
"redis",
|
||||||
"google-api-python-client",
|
"google-api-python-client",
|
||||||
|
"google-auth",
|
||||||
"wolframalpha",
|
"wolframalpha",
|
||||||
"qdrant-client",
|
"qdrant-client",
|
||||||
"tensorflow-text",
|
"tensorflow-text",
|
||||||
|
Loading…
Reference in New Issue
Block a user