diff --git a/docs/modules/agents/toolkits/examples/csv.ipynb b/docs/modules/agents/toolkits/examples/csv.ipynb index 1377d20054f..7e38e7817e4 100644 --- a/docs/modules/agents/toolkits/examples/csv.ipynb +++ b/docs/modules/agents/toolkits/examples/csv.ipynb @@ -116,7 +116,7 @@ } ], "source": [ - "agent.run(\"how many people have more than 3 sibligngs\")" + "agent.run(\"how many people have more than 3 siblings\")" ] }, { diff --git a/docs/modules/agents/toolkits/examples/pandas.ipynb b/docs/modules/agents/toolkits/examples/pandas.ipynb index 35c977e2169..542cb0b019c 100644 --- a/docs/modules/agents/toolkits/examples/pandas.ipynb +++ b/docs/modules/agents/toolkits/examples/pandas.ipynb @@ -118,7 +118,7 @@ } ], "source": [ - "agent.run(\"how many people have more than 3 sibligngs\")" + "agent.run(\"how many people have more than 3 siblings\")" ] }, { diff --git a/docs/modules/agents/toolkits/examples/spark.ipynb b/docs/modules/agents/toolkits/examples/spark.ipynb index c3ea9010f6b..8874826df50 100644 --- a/docs/modules/agents/toolkits/examples/spark.ipynb +++ b/docs/modules/agents/toolkits/examples/spark.ipynb @@ -151,7 +151,7 @@ } ], "source": [ - "agent.run(\"how many people have more than 3 sibligngs\")" + "agent.run(\"how many people have more than 3 siblings\")" ] }, { diff --git a/docs/modules/indexes/retrievers/examples/chroma_self_query_retriever.ipynb b/docs/modules/indexes/retrievers/examples/chroma_self_query_retriever.ipynb new file mode 100644 index 00000000000..b54746a27bd --- /dev/null +++ b/docs/modules/indexes/retrievers/examples/chroma_self_query_retriever.ipynb @@ -0,0 +1,310 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "13afcae7", + "metadata": {}, + "source": [ + "# Self-querying retriever with Chroma\n", + "In the notebook we'll demo the `SelfQueryRetriever` wrapped around a Chroma vector store. " + ] + }, + { + "cell_type": "markdown", + "id": "68e75fb9", + "metadata": {}, + "source": [ + "## Creating a Chroma vectorstore\n", + "First we'll want to create a Chroma VectorStore and seed it with some data. We've created a small demo set of documents that contain summaries of movies.\n", + "\n", + "NOTE: The self-query retriever requires you to have `lark` installed (`pip install lark`)" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "63a8af5b", + "metadata": {}, + "outputs": [], + "source": [ + "# !pip install lark" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "cb4a5787", + "metadata": {}, + "outputs": [], + "source": [ + "from langchain.schema import Document\n", + "from langchain.embeddings.openai import OpenAIEmbeddings\n", + "from langchain.vectorstores import Chroma\n", + "\n", + "embeddings = OpenAIEmbeddings()" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "bcbe04d9", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Using embedded DuckDB without persistence: data will be transient\n" + ] + } + ], + "source": [ + "docs = [\n", + " Document(page_content=\"A bunch of scientists bring back dinosaurs and mayhem breaks loose\", metadata={\"year\": 1993, \"rating\": 7.7, \"genre\": \"science fiction\"}),\n", + " Document(page_content=\"Leo DiCaprio gets lost in a dream within a dream within a dream within a ...\", metadata={\"year\": 2010, \"director\": \"Christopher Nolan\", \"rating\": 8.2}),\n", + " Document(page_content=\"A psychologist / detective gets lost in a series of dreams within dreams within dreams and Inception reused the idea\", metadata={\"year\": 2006, \"director\": \"Satoshi Kon\", \"rating\": 8.6}),\n", + " Document(page_content=\"A bunch of normal-sized women are supremely wholesome and some men pine after them\", metadata={\"year\": 2019, \"director\": \"Greta Gerwig\", \"rating\": 8.3}),\n", + " Document(page_content=\"Toys come alive and have a blast doing so\", metadata={\"year\": 1995, \"genre\": \"animated\"}),\n", + " Document(page_content=\"Three men walk into the Zone, three men walk out of the Zone\", metadata={\"year\": 1979, \"rating\": 9.9, \"director\": \"Andrei Tarkovsky\", \"genre\": \"science fiction\", \"rating\": 9.9})\n", + "]\n", + "vectorstore = Chroma.from_documents(\n", + " docs, embeddings\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "5ecaab6d", + "metadata": {}, + "source": [ + "## Creating our self-querying retriever\n", + "Now we can instantiate our retriever. To do this we'll need to provide some information upfront about the metadata fields that our documents support and a short description of the document contents." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "86e34dbf", + "metadata": {}, + "outputs": [], + "source": [ + "from langchain.llms import OpenAI\n", + "from langchain.retrievers.self_query.base import SelfQueryRetriever\n", + "from langchain.chains.query_constructor.base import AttributeInfo\n", + "\n", + "metadata_field_info=[\n", + " AttributeInfo(\n", + " name=\"genre\",\n", + " description=\"The genre of the movie\", \n", + " type=\"string or list[string]\", \n", + " ),\n", + " AttributeInfo(\n", + " name=\"year\",\n", + " description=\"The year the movie was released\", \n", + " type=\"integer\", \n", + " ),\n", + " AttributeInfo(\n", + " name=\"director\",\n", + " description=\"The name of the movie director\", \n", + " type=\"string\", \n", + " ),\n", + " AttributeInfo(\n", + " name=\"rating\",\n", + " description=\"A 1-10 rating for the movie\",\n", + " type=\"float\"\n", + " ),\n", + "]\n", + "document_content_description = \"Brief summary of a movie\"\n", + "llm = OpenAI(temperature=0)\n", + "retriever = SelfQueryRetriever.from_llm(llm, vectorstore, document_content_description, metadata_field_info, verbose=True)" + ] + }, + { + "cell_type": "markdown", + "id": "ea9df8d4", + "metadata": {}, + "source": [ + "## Testing it out\n", + "And now we can try actually using our retriever!" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "38a126e9", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "query='dinosaur' filter=None\n" + ] + }, + { + "data": { + "text/plain": [ + "[Document(page_content='A bunch of scientists bring back dinosaurs and mayhem breaks loose', metadata={'year': 1993, 'rating': 7.7, 'genre': 'science fiction'}),\n", + " Document(page_content='Toys come alive and have a blast doing so', metadata={'year': 1995, 'genre': 'animated'}),\n", + " Document(page_content='A psychologist / detective gets lost in a series of dreams within dreams within dreams and Inception reused the idea', metadata={'year': 2006, 'director': 'Satoshi Kon', 'rating': 8.6}),\n", + " Document(page_content='Leo DiCaprio gets lost in a dream within a dream within a dream within a ...', metadata={'year': 2010, 'director': 'Christopher Nolan', 'rating': 8.2})]" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# This example only specifies a relevant query\n", + "retriever.get_relevant_documents(\"What are some movies about dinosaurs\")" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "fc3f1e6e", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "query=' ' filter=Comparison(comparator=, attribute='rating', value=8.5)\n" + ] + }, + { + "data": { + "text/plain": [ + "[Document(page_content='A psychologist / detective gets lost in a series of dreams within dreams within dreams and Inception reused the idea', metadata={'year': 2006, 'director': 'Satoshi Kon', 'rating': 8.6}),\n", + " Document(page_content='Three men walk into the Zone, three men walk out of the Zone', metadata={'year': 1979, 'rating': 9.9, 'director': 'Andrei Tarkovsky', 'genre': 'science fiction'})]" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# This example only specifies a filter\n", + "retriever.get_relevant_documents(\"I want to watch a movie rated higher than 8.5\")" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "b19d4da0", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "query='women' filter=Comparison(comparator=, attribute='director', value='Greta Gerwig')\n" + ] + }, + { + "data": { + "text/plain": [ + "[Document(page_content='A bunch of normal-sized women are supremely wholesome and some men pine after them', metadata={'year': 2019, 'director': 'Greta Gerwig', 'rating': 8.3})]" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# This example specifies a query and a filter\n", + "retriever.get_relevant_documents(\"Has Greta Gerwig directed any movies about women\")" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "f900e40e", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "query=' ' filter=Operation(operator=, arguments=[Comparison(comparator=, attribute='genre', value='science fiction'), Comparison(comparator=, attribute='rating', value=8.5)])\n" + ] + }, + { + "data": { + "text/plain": [ + "[Document(page_content='Three men walk into the Zone, three men walk out of the Zone', metadata={'year': 1979, 'rating': 9.9, 'director': 'Andrei Tarkovsky', 'genre': 'science fiction'})]" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# This example specifies a composite filter\n", + "retriever.get_relevant_documents(\"What's a highly rated (above 8.5) science fiction film?\")" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "12a51522", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "query='toys' filter=Operation(operator=, arguments=[Comparison(comparator=, attribute='year', value=1990), Comparison(comparator=, attribute='year', value=2005), Comparison(comparator=, attribute='genre', value='animated')])\n" + ] + }, + { + "data": { + "text/plain": [ + "[Document(page_content='Toys come alive and have a blast doing so', metadata={'year': 1995, 'genre': 'animated'})]" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# This example specifies a query and composite filter\n", + "retriever.get_relevant_documents(\"What's a movie after 1990 but before 2005 that's all about toys, and preferably is animated\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "60110338", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.1" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/docs/modules/indexes/retrievers/examples/contextual-compression.ipynb b/docs/modules/indexes/retrievers/examples/contextual-compression.ipynb index 52858dd8349..9f299c6b0ac 100644 --- a/docs/modules/indexes/retrievers/examples/contextual-compression.ipynb +++ b/docs/modules/indexes/retrievers/examples/contextual-compression.ipynb @@ -363,7 +363,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.3" + "version": "3.9.1" } }, "nbformat": 4, diff --git a/docs/modules/indexes/retrievers/examples/self_query_retriever.ipynb b/docs/modules/indexes/retrievers/examples/self_query_retriever.ipynb index 665adf9fe21..7668bf34edb 100644 --- a/docs/modules/indexes/retrievers/examples/self_query_retriever.ipynb +++ b/docs/modules/indexes/retrievers/examples/self_query_retriever.ipynb @@ -17,8 +17,6 @@ "## Creating a Pinecone index\n", "First we'll want to create a Pinecone VectorStore and seed it with some data. We've created a small demo set of documents that contain summaries of movies.\n", "\n", - "NOTE: The self-query retriever currently only has built-in support for Pinecone VectorStore.\n", - "\n", "NOTE: The self-query retriever requires you to have `lark` installed (`pip install lark`)" ] }, @@ -97,7 +95,7 @@ "id": "5ecaab6d", "metadata": {}, "source": [ - "# Creating our self-querying retriever\n", + "## Creating our self-querying retriever\n", "Now we can instantiate our retriever. To do this we'll need to provide some information upfront about the metadata fields that our documents support and a short description of the document contents." ] }, @@ -144,7 +142,7 @@ "id": "ea9df8d4", "metadata": {}, "source": [ - "# Testing it out\n", + "## Testing it out\n", "And now we can try actually using our retriever!" ] }, diff --git a/langchain/callbacks/tracers/langchain.py b/langchain/callbacks/tracers/langchain.py index 5ffacd93b30..76e1f75f615 100644 --- a/langchain/callbacks/tracers/langchain.py +++ b/langchain/callbacks/tracers/langchain.py @@ -12,7 +12,7 @@ from langchain.callbacks.tracers.base import BaseTracer from langchain.callbacks.tracers.schemas import ( ChainRun, LLMRun, - Run, + RunCreate, ToolRun, TracerSession, TracerSessionBase, @@ -190,7 +190,7 @@ class LangChainTracerV2(LangChainTracer): """Load the default tracing session and set it as the Tracer's session.""" return self.load_session("default") - def _convert_run(self, run: Union[LLMRun, ChainRun, ToolRun]) -> Run: + def _convert_run(self, run: Union[LLMRun, ChainRun, ToolRun]) -> RunCreate: """Convert a run to a Run.""" session = self.session or self.load_default_session() inputs: Dict[str, Any] = {} @@ -220,9 +220,9 @@ class LangChainTracerV2(LangChainTracer): *run.child_tool_runs, ] - return Run( + return RunCreate( id=run.uuid, - name=run.serialized.get("name", f"{run_type}-{run.uuid}"), + name=run.serialized.get("name"), start_time=run.start_time, end_time=run.end_time, extra=run.extra or {}, @@ -233,7 +233,6 @@ class LangChainTracerV2(LangChainTracer): outputs=outputs, session_id=session.id, run_type=run_type, - parent_run_id=run.parent_uuid, reference_example_id=self.example_id, child_runs=[self._convert_run(child) for child in child_runs], ) diff --git a/langchain/callbacks/tracers/schemas.py b/langchain/callbacks/tracers/schemas.py index c9495c1aba0..f38094ae273 100644 --- a/langchain/callbacks/tracers/schemas.py +++ b/langchain/callbacks/tracers/schemas.py @@ -102,11 +102,10 @@ class RunTypeEnum(str, Enum): llm = "llm" -class Run(BaseModel): - """Run schema.""" +class RunBase(BaseModel): + """Base Run schema.""" id: Optional[UUID] - name: str start_time: datetime.datetime = Field(default_factory=datetime.datetime.utcnow) end_time: datetime.datetime = Field(default_factory=datetime.datetime.utcnow) extra: dict @@ -116,10 +115,23 @@ class Run(BaseModel): inputs: dict outputs: Optional[dict] session_id: UUID - parent_run_id: Optional[UUID] reference_example_id: Optional[UUID] run_type: RunTypeEnum +class RunCreate(RunBase): + """Schema to create a run in the DB.""" + + name: Optional[str] + child_runs: List[RunCreate] = Field(default_factory=list) + + +class Run(RunBase): + """Run schema when loading from the DB.""" + + name: str + parent_run_id: Optional[UUID] + + ChainRun.update_forward_refs() ToolRun.update_forward_refs() diff --git a/langchain/document_loaders/confluence.py b/langchain/document_loaders/confluence.py index f20ed5f366e..7920ff2c94d 100644 --- a/langchain/document_loaders/confluence.py +++ b/langchain/document_loaders/confluence.py @@ -217,7 +217,6 @@ class ConfluenceLoader(BaseLoader): label=label, limit=limit, max_pages=max_pages, - expand="body.storage.value", ) ids_by_label = [page["id"] for page in pages] if page_ids: diff --git a/langchain/document_loaders/csv_loader.py b/langchain/document_loaders/csv_loader.py index 54c0d8f566a..a844f94b1db 100644 --- a/langchain/document_loaders/csv_loader.py +++ b/langchain/document_loaders/csv_loader.py @@ -36,13 +36,7 @@ class CSVLoader(BaseLoader): self.file_path = file_path self.source_column = source_column self.encoding = encoding - if csv_args is None: - self.csv_args = { - "delimiter": csv.Dialect.delimiter, - "quotechar": csv.Dialect.quotechar, - } - else: - self.csv_args = csv_args + self.csv_args = csv_args or {} def load(self) -> List[Document]: """Load data into document objects.""" diff --git a/langchain/retrievers/__init__.py b/langchain/retrievers/__init__.py index a56c9f96b0c..6fc0c8063c4 100644 --- a/langchain/retrievers/__init__.py +++ b/langchain/retrievers/__init__.py @@ -6,6 +6,7 @@ from langchain.retrievers.knn import KNNRetriever from langchain.retrievers.metal import MetalRetriever from langchain.retrievers.pinecone_hybrid_search import PineconeHybridSearchRetriever from langchain.retrievers.remote_retriever import RemoteLangChainRetriever +from langchain.retrievers.self_query.base import SelfQueryRetriever from langchain.retrievers.svm import SVMRetriever from langchain.retrievers.tfidf import TFIDFRetriever from langchain.retrievers.time_weighted_retriever import ( @@ -28,4 +29,5 @@ __all__ = [ "SVMRetriever", "KNNRetriever", "VespaRetriever", + "SelfQueryRetriever", ] diff --git a/langchain/retrievers/document_compressors/cohere_rerank.py b/langchain/retrievers/document_compressors/cohere_rerank.py index 43b084d770c..41513c656c2 100644 --- a/langchain/retrievers/document_compressors/cohere_rerank.py +++ b/langchain/retrievers/document_compressors/cohere_rerank.py @@ -2,7 +2,7 @@ from __future__ import annotations from typing import TYPE_CHECKING, Dict, Sequence -from pydantic import root_validator +from pydantic import Extra, root_validator from langchain.retrievers.document_compressors.base import BaseDocumentCompressor from langchain.schema import Document @@ -10,6 +10,13 @@ from langchain.utils import get_from_dict_or_env if TYPE_CHECKING: from cohere import Client +else: + # We do to avoid pydantic annotation issues when actually instantiating + # while keeping this import optional + try: + from cohere import Client + except ImportError: + pass class CohereRerank(BaseDocumentCompressor): @@ -17,7 +24,13 @@ class CohereRerank(BaseDocumentCompressor): top_n: int = 3 model: str = "rerank-english-v2.0" - @root_validator() + class Config: + """Configuration for this pydantic object.""" + + extra = Extra.forbid + arbitrary_types_allowed = True + + @root_validator(pre=True) def validate_environment(cls, values: Dict) -> Dict: """Validate that api key and python package exists in environment.""" cohere_api_key = get_from_dict_or_env( diff --git a/langchain/retrievers/self_query/base.py b/langchain/retrievers/self_query/base.py index b74dfacadc2..bf5ad303aee 100644 --- a/langchain/retrievers/self_query/base.py +++ b/langchain/retrievers/self_query/base.py @@ -8,15 +8,17 @@ from langchain.base_language import BaseLanguageModel from langchain.chains.query_constructor.base import load_query_constructor_chain from langchain.chains.query_constructor.ir import StructuredQuery, Visitor from langchain.chains.query_constructor.schema import AttributeInfo +from langchain.retrievers.self_query.chroma import ChromaTranslator from langchain.retrievers.self_query.pinecone import PineconeTranslator from langchain.schema import BaseRetriever, Document -from langchain.vectorstores import Pinecone, VectorStore +from langchain.vectorstores import Chroma, Pinecone, VectorStore def _get_builtin_translator(vectorstore_cls: Type[VectorStore]) -> Visitor: """Get the translator class corresponding to the vector store class.""" BUILTIN_TRANSLATORS: Dict[Type[VectorStore], Type[Visitor]] = { - Pinecone: PineconeTranslator + Pinecone: PineconeTranslator, + Chroma: ChromaTranslator, } if vectorstore_cls not in BUILTIN_TRANSLATORS: raise ValueError( diff --git a/langchain/retrievers/self_query/chroma.py b/langchain/retrievers/self_query/chroma.py new file mode 100644 index 00000000000..02457de3300 --- /dev/null +++ b/langchain/retrievers/self_query/chroma.py @@ -0,0 +1,53 @@ +"""Logic for converting internal query language to a valid Chroma query.""" +from typing import Dict, Tuple, Union + +from langchain.chains.query_constructor.ir import ( + Comparator, + Comparison, + Operation, + Operator, + StructuredQuery, + Visitor, +) + + +class ChromaTranslator(Visitor): + """Logic for converting internal query language elements to valid filters.""" + + allowed_operators = [Operator.AND, Operator.OR] + """Subset of allowed logical operators.""" + + def _format_func(self, func: Union[Operator, Comparator]) -> str: + if isinstance(func, Operator) and self.allowed_operators is not None: + if func not in self.allowed_operators: + raise ValueError( + f"Received disallowed operator {func}. Allowed " + f"comparators are {self.allowed_operators}" + ) + if isinstance(func, Comparator) and self.allowed_comparators is not None: + if func not in self.allowed_comparators: + raise ValueError( + f"Received disallowed comparator {func}. Allowed " + f"comparators are {self.allowed_comparators}" + ) + return f"${func.value}" + + def visit_operation(self, operation: Operation) -> Dict: + args = [arg.accept(self) for arg in operation.arguments] + return {self._format_func(operation.operator): args} + + def visit_comparison(self, comparison: Comparison) -> Dict: + return { + comparison.attribute: { + self._format_func(comparison.comparator): comparison.value + } + } + + def visit_structured_query( + self, structured_query: StructuredQuery + ) -> Tuple[str, dict]: + if structured_query.filter is None: + kwargs = {} + else: + kwargs = {"filter": structured_query.filter.accept(self)} + return structured_query.query, kwargs diff --git a/langchain/utilities/powerbi.py b/langchain/utilities/powerbi.py index 605fdd8c462..1e095f13236 100644 --- a/langchain/utilities/powerbi.py +++ b/langchain/utilities/powerbi.py @@ -4,6 +4,7 @@ from __future__ import annotations import logging import os +from copy import deepcopy from typing import TYPE_CHECKING, Any, Dict, Iterable, List, Optional, Union import aiohttp @@ -12,8 +13,6 @@ from aiohttp import ServerTimeoutError from pydantic import BaseModel, Field, root_validator from requests.exceptions import Timeout -from langchain.tools.powerbi.prompt import SCHEMA_ERROR_RESPONSE, UNAUTHORIZED_RESPONSE - _LOGGER = logging.getLogger(__name__) BASE_URL = os.getenv("POWERBI_BASE_URL", "https://api.powerbi.com/v1.0/myorg") @@ -63,27 +62,29 @@ class PowerBIDataset(BaseModel): @property def headers(self) -> Dict[str, str]: """Get the token.""" - from azure.core.exceptions import ClientAuthenticationError - - token = None if self.token: - token = self.token + return { + "Content-Type": "application/json", + "Authorization": "Bearer " + self.token, + } + from azure.core.exceptions import ( # pylint: disable=import-outside-toplevel + ClientAuthenticationError, + ) + if self.credential: try: token = self.credential.get_token( "https://analysis.windows.net/powerbi/api/.default" ).token + return { + "Content-Type": "application/json", + "Authorization": "Bearer " + token, + } except Exception as exc: # pylint: disable=broad-exception-caught raise ClientAuthenticationError( "Could not get a token from the supplied credentials." ) from exc - if not token: - raise ClientAuthenticationError("No credential or token supplied.") - - return { - "Content-Type": "application/json", - "Authorization": "Bearer " + token, - } + raise ClientAuthenticationError("No credential or token supplied.") def get_table_names(self) -> Iterable[str]: """Get names of tables available.""" @@ -116,10 +117,12 @@ class PowerBIDataset(BaseModel): return self.table_names def _get_tables_todo(self, tables_todo: List[str]) -> List[str]: - for table in tables_todo: + """Get the tables that still need to be queried.""" + todo = deepcopy(tables_todo) + for table in todo: if table in self.schemas: - tables_todo.remove(table) - return tables_todo + todo.remove(table) + return todo def _get_schema_for_tables(self, table_names: List[str]) -> str: """Create a string of the table schemas for the supplied tables.""" @@ -135,19 +138,20 @@ class PowerBIDataset(BaseModel): tables_requested = self._get_tables_to_query(table_names) tables_todo = self._get_tables_todo(tables_requested) for table in tables_todo: + if " " in table and not table.startswith("'") and not table.endswith("'"): + table = f"'{table}'" try: result = self.run( f"EVALUATE TOPN({self.sample_rows_in_table_info}, {table})" ) except Timeout: _LOGGER.warning("Timeout while getting table info for %s", table) + self.schemas[table] = "unknown" continue except Exception as exc: # pylint: disable=broad-exception-caught - if "bad request" in str(exc).lower(): - return SCHEMA_ERROR_RESPONSE - if "unauthorized" in str(exc).lower(): - return UNAUTHORIZED_RESPONSE - return str(exc) + _LOGGER.warning("Error while getting table info for %s: %s", table, exc) + self.schemas[table] = "unknown" + continue self.schemas[table] = json_to_md(result["results"][0]["tables"][0]["rows"]) return self._get_schema_for_tables(tables_requested) @@ -158,19 +162,20 @@ class PowerBIDataset(BaseModel): tables_requested = self._get_tables_to_query(table_names) tables_todo = self._get_tables_todo(tables_requested) for table in tables_todo: + if " " in table and not table.startswith("'") and not table.endswith("'"): + table = f"'{table}'" try: result = await self.arun( f"EVALUATE TOPN({self.sample_rows_in_table_info}, {table})" ) except ServerTimeoutError: _LOGGER.warning("Timeout while getting table info for %s", table) + self.schemas[table] = "unknown" continue except Exception as exc: # pylint: disable=broad-exception-caught - if "bad request" in str(exc).lower(): - return SCHEMA_ERROR_RESPONSE - if "unauthorized" in str(exc).lower(): - return UNAUTHORIZED_RESPONSE - return str(exc) + _LOGGER.warning("Error while getting table info for %s: %s", table, exc) + self.schemas[table] = "unknown" + continue self.schemas[table] = json_to_md(result["results"][0]["tables"][0]["rows"]) return self._get_schema_for_tables(tables_requested) diff --git a/pyproject.toml b/pyproject.toml index f66b83c4d79..918b0365792 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "langchain" -version = "0.0.158" +version = "0.0.159" description = "Building applications with LLMs through composability" authors = [] license = "MIT" diff --git a/tests/integration_tests/retrievers/document_compressors/test_cohere_reranker.py b/tests/integration_tests/retrievers/document_compressors/test_cohere_reranker.py new file mode 100644 index 00000000000..667452041b8 --- /dev/null +++ b/tests/integration_tests/retrievers/document_compressors/test_cohere_reranker.py @@ -0,0 +1,8 @@ +"""Test the cohere reranker.""" + +from langchain.retrievers.document_compressors.cohere_rerank import CohereRerank + + +def test_cohere_reranker_init() -> None: + """Test the cohere reranker initializes correctly.""" + CohereRerank()