diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md index 63dfbc03018..ff139139daf 100644 --- a/.github/CONTRIBUTING.md +++ b/.github/CONTRIBUTING.md @@ -123,6 +123,32 @@ This can be very helpful when you've made changes to only certain parts of the p We recognize linting can be annoying - if you do not want to do it, please contact a project maintainer, and they can help you with it. We do not want this to be a blocker for good code getting contributed. +### Spellcheck + +Spellchecking for this project is done via [codespell](https://github.com/codespell-project/codespell). +Note that `codespell` finds common typos, so could have false-positive (correctly spelled but rarely used) and false-negatives (not finding misspelled) words. + +To check spelling for this project: + +```bash +make spell_check +``` + +To fix spelling in place: + +```bash +make spell_fix +``` + +If codespell is incorrectly flagging a word, you can skip spellcheck for that word by adding it to the codespell config in the `pyproject.toml` file. + +```python +[tool.codespell] +... +# Add here: +ignore-words-list = 'momento,collison,ned,foor,reworkd,parth,whats,aapply,mysogyny,unsecure' +``` + ### Coverage Code coverage (i.e. the amount of code that is covered by unit tests) helps identify areas of the code that are potentially more or less brittle. diff --git a/.github/workflows/codespell.yml b/.github/workflows/codespell.yml new file mode 100644 index 00000000000..7373affc383 --- /dev/null +++ b/.github/workflows/codespell.yml @@ -0,0 +1,22 @@ +--- +name: Codespell + +on: + push: + branches: [master] + pull_request: + branches: [master] + +permissions: + contents: read + +jobs: + codespell: + name: Check for spelling errors + runs-on: ubuntu-latest + + steps: + - name: Checkout + uses: actions/checkout@v3 + - name: Codespell + uses: codespell-project/actions-codespell@v2 diff --git a/Makefile b/Makefile index 091284e9d85..e81add90a86 100644 --- a/Makefile +++ b/Makefile @@ -81,6 +81,12 @@ format format_diff: poetry run black $(PYTHON_FILES) poetry run ruff --select I --fix $(PYTHON_FILES) +spell_check: + poetry run codespell --toml pyproject.toml + +spell_fix: + poetry run codespell --toml pyproject.toml -w + ###################### # HELP ###################### diff --git a/docs/extras/ecosystem/integrations/grobid.mdx b/docs/extras/ecosystem/integrations/grobid.mdx index ca68487f822..6a24e68baa2 100644 --- a/docs/extras/ecosystem/integrations/grobid.mdx +++ b/docs/extras/ecosystem/integrations/grobid.mdx @@ -1,7 +1,7 @@ # Grobid This page covers how to use the Grobid to parse articles for LangChain. -It is seperated into two parts: installation and running the server +It is separated into two parts: installation and running the server ## Installation and Setup #Ensure You have Java installed diff --git a/docs/extras/ecosystem/integrations/langchain_decorators.mdx b/docs/extras/ecosystem/integrations/langchain_decorators.mdx index 22e7f63b4df..cdd32abdae5 100644 --- a/docs/extras/ecosystem/integrations/langchain_decorators.mdx +++ b/docs/extras/ecosystem/integrations/langchain_decorators.mdx @@ -10,7 +10,7 @@ For Feedback, Issues, Contributions - please raise an issue here: Main principles and benefits: - more `pythonic` way of writing code -- write multiline prompts that wont break your code flow with indentation +- write multiline prompts that won't break your code flow with indentation - making use of IDE in-built support for **hinting**, **type checking** and **popup with docs** to quickly peek in the function to see the prompt, parameters it consumes etc. - leverage all the power of 🦜🔗 LangChain ecosystem - adding support for **optional parameters** @@ -31,7 +31,7 @@ def write_me_short_post(topic:str, platform:str="twitter", audience:str = "devel """ return -# run it naturaly +# run it naturally write_me_short_post(topic="starwars") # or write_me_short_post(topic="starwars", platform="redit") @@ -122,7 +122,7 @@ await write_me_short_post(topic="old movies") # Simplified streaming -If we wan't to leverage streaming: +If we want to leverage streaming: - we need to define prompt as async function - turn on the streaming on the decorator, or we can define PromptType with streaming on - capture the stream using StreamingContext @@ -149,7 +149,7 @@ async def write_me_short_post(topic:str, platform:str="twitter", audience:str = -# just an arbitrary function to demonstrate the streaming... wil be some websockets code in the real world +# just an arbitrary function to demonstrate the streaming... will be some websockets code in the real world tokens=[] def capture_stream_func(new_token:str): tokens.append(new_token) @@ -250,7 +250,7 @@ the roles here are model native roles (assistant, user, system for chatGPT) # Optional sections - you can define a whole sections of your prompt that should be optional -- if any input in the section is missing, the whole section wont be rendered +- if any input in the section is missing, the whole section won't be rendered the syntax for this is as follows: @@ -273,7 +273,7 @@ def prompt_with_optional_partials(): # Output parsers - llm_prompt decorator natively tries to detect the best output parser based on the output type. (if not set, it returns the raw string) -- list, dict and pydantic outputs are also supported natively (automaticaly) +- list, dict and pydantic outputs are also supported natively (automatically) ``` python # this code example is complete and should run as it is diff --git a/docs/extras/ecosystem/integrations/myscale.mdx b/docs/extras/ecosystem/integrations/myscale.mdx index 99464b623ef..131af59f96a 100644 --- a/docs/extras/ecosystem/integrations/myscale.mdx +++ b/docs/extras/ecosystem/integrations/myscale.mdx @@ -18,7 +18,7 @@ We also deliver with live demo on huggingface! Please checkout our [huggingface ## Installation and Setup - Install the Python SDK with `pip install clickhouse-connect` -### Setting up envrionments +### Setting up environments There are two ways to set up parameters for myscale index. diff --git a/docs/extras/ecosystem/integrations/vectara/index.mdx b/docs/extras/ecosystem/integrations/vectara/index.mdx index 3cd14d61bfd..627a234a3b4 100644 --- a/docs/extras/ecosystem/integrations/vectara/index.mdx +++ b/docs/extras/ecosystem/integrations/vectara/index.mdx @@ -39,7 +39,7 @@ vectara = Vectara( ``` The customer_id, corpus_id and api_key are optional, and if they are not supplied will be read from the environment variables `VECTARA_CUSTOMER_ID`, `VECTARA_CORPUS_ID` and `VECTARA_API_KEY`, respectively. -Afer you have the vectorstore, you can `add_texts` or `add_documents` as per the standard `VectorStore` interface, for example: +After you have the vectorstore, you can `add_texts` or `add_documents` as per the standard `VectorStore` interface, for example: ```python vectara.add_texts(["to be or not to be", "that is the question"]) diff --git a/docs/extras/modules/data_connection/document_loaders/integrations/example_data/testmw_pages_current.xml b/docs/extras/modules/data_connection/document_loaders/integrations/example_data/testmw_pages_current.xml index 3875d153d65..4c4ea28da9c 100644 --- a/docs/extras/modules/data_connection/document_loaders/integrations/example_data/testmw_pages_current.xml +++ b/docs/extras/modules/data_connection/document_loaders/integrations/example_data/testmw_pages_current.xml @@ -1840,7 +1840,7 @@ This category contains articles that are incomplete and are tagged with the {{T| FANDOM 32769624 - Created page with "{{LicenseBox|text=''This work is licensed under the [https://opensource.org/licenses/MIT MIT License].''}}{{#ifeq: {{NAMESPACENUMBER}} | 0 | <includeonly>Category:MIT licens..." + Created page with "{{LicenseBox|text=''This work is licensed under the [https://opensource.org/licenses/MIT MIT License].''}}{{#ifeq: {{NAMESPACENUMBER}} | 0 | <includeonly>Category:MIT license..." 104 wikitext text/x-wiki diff --git a/docs/extras/modules/paul_graham_essay.txt b/docs/extras/modules/paul_graham_essay.txt index 30ffafc6c72..b572cb6726b 100644 --- a/docs/extras/modules/paul_graham_essay.txt +++ b/docs/extras/modules/paul_graham_essay.txt @@ -142,7 +142,7 @@ There were three main parts to the software: the editor, which people used to bu There were a lot of startups making ecommerce software in the second half of the 90s. We were determined to be the Microsoft Word, not the Interleaf. Which meant being easy to use and inexpensive. It was lucky for us that we were poor, because that caused us to make Viaweb even more inexpensive than we realized. We charged $100 a month for a small store and $300 a month for a big one. This low price was a big attraction, and a constant thorn in the sides of competitors, but it wasn't because of some clever insight that we set the price low. We had no idea what businesses paid for things. $300 a month seemed like a lot of money to us. -We did a lot of things right by accident like that. For example, we did what's now called "doing things that don't scale," although at the time we would have described it as "being so lame that we're driven to the most desperate measures to get users." The most common of which was building stores for them. This seemed particularly humiliating, since the whole raison d'etre of our software was that people could use it to make their own stores. But anything to get users. +We did a lot of things right by accident like that. For example, we did what's now called "doing things that don't scale," although at the time we would have described it as "being so lame that we're driven to the most desperate measures to get users." The most common of which was building stores for them. This seemed particularly humiliating, since the whole reason d'etre of our software was that people could use it to make their own stores. But anything to get users. We learned a lot more about retail than we wanted to know. For example, that if you could only have a small image of a man's shirt (and all images were small then by present standards), it was better to have a closeup of the collar than a picture of the whole shirt. The reason I remember learning this was that it meant I had to rescan about 30 images of men's shirts. My first set of scans were so beautiful too. diff --git a/docs/extras/use_cases/question_answering/index.mdx b/docs/extras/use_cases/question_answering/index.mdx index 590bc16ab77..e36222d261c 100644 --- a/docs/extras/use_cases/question_answering/index.mdx +++ b/docs/extras/use_cases/question_answering/index.mdx @@ -45,7 +45,7 @@ Let's load this [blog post](https://lilianweng.github.io/posts/2023-06-23-agent/ We have a QA app in a few lines of code. -Set enviorment varaibles and get packages: +Set environment variables and get packages: ```python pip install openai pip install chromadb @@ -140,7 +140,7 @@ Here are the three pieces together: #### 1.2.2 Retaining metadata -`Context-aware splitters` keep the location ("context") of each split in the origional `Document`: +`Context-aware splitters` keep the location ("context") of each split in the original `Document`: * [Markdown files](https://python.langchain.com/docs/use_cases/question_answering/document-context-aware-QA) * [Code (py or js)](https://python.langchain.com/docs/modules/data_connection/document_loaders/integrations/source_code) diff --git a/docs/snippets/modules/agents/agent_types/structured_chat.mdx b/docs/snippets/modules/agents/agent_types/structured_chat.mdx index 818ed8a2f62..68350f97b45 100644 --- a/docs/snippets/modules/agents/agent_types/structured_chat.mdx +++ b/docs/snippets/modules/agents/agent_types/structured_chat.mdx @@ -126,7 +126,7 @@ print(response) TL;DR - We recently open-sourced an auto-evaluator tool for grading LLM question-answer chains. We are now releasing an open source, free to use hosted app and API to expand usability. Below we discuss a few opportunities to further improve May 1, 2023 5 min read Callbacks Improvements TL;DR: We're announcing improvements to our callbacks system, which powers logging, tracing, streaming output, and some awesome third-party integrations. This will better support concurrent runs with independent callbacks, tracing of deeply nested trees of LangChain components, and callback handlers scoped to a single request (which is super useful for May 1, 2023 3 min read Unleashing the power of AI Collaboration with Parallelized LLM Agent Actor Trees Editor's note: the following is a guest blog post from Cyrus at Shaman AI. We use guest blog posts to highlight interesting and novel applciations, and this is certainly that. There's been a lot of talk about agents recently, but most have been discussions around a single agent. If multiple Apr 28, 2023 4 min read Gradio & LLM Agents Editor's note: this is a guest blog post from Freddy Boulton, a software engineer at Gradio. We're excited to share this post because it brings a large number of exciting new tools into the ecosystem. Agents are largely defined by the tools they have, so to be able to equip Apr 23, 2023 4 min read RecAlign - The smart content filter for social media feed [Editor's Note] This is a guest post by Tian Jin. We are highlighting this application as we think it is a novel use case. Specifically, we think recommendation systems are incredibly impactful in our everyday lives and there has not been a ton of discourse on how LLMs will impact Apr 22, 2023 3 min read Improving Document Retrieval with Contextual Compression Note: This post assumes some familiarity with LangChain and is moderately technical. + We recently open-sourced an auto-evaluator tool for grading LLM question-answer chains. We are now releasing an open source, free to use hosted app and API to expand usability. Below we discuss a few opportunities to further improve May 1, 2023 5 min read Callbacks Improvements TL;DR: We're announcing improvements to our callbacks system, which powers logging, tracing, streaming output, and some awesome third-party integrations. This will better support concurrent runs with independent callbacks, tracing of deeply nested trees of LangChain components, and callback handlers scoped to a single request (which is super useful for May 1, 2023 3 min read Unleashing the power of AI Collaboration with Parallelized LLM Agent Actor Trees Editor's note: the following is a guest blog post from Cyrus at Shaman AI. We use guest blog posts to highlight interesting and novel applications, and this is certainly that. There's been a lot of talk about agents recently, but most have been discussions around a single agent. If multiple Apr 28, 2023 4 min read Gradio & LLM Agents Editor's note: this is a guest blog post from Freddy Boulton, a software engineer at Gradio. We're excited to share this post because it brings a large number of exciting new tools into the ecosystem. Agents are largely defined by the tools they have, so to be able to equip Apr 23, 2023 4 min read RecAlign - The smart content filter for social media feed [Editor's Note] This is a guest post by Tian Jin. We are highlighting this application as we think it is a novel use case. Specifically, we think recommendation systems are incredibly impactful in our everyday lives and there has not been a ton of discourse on how LLMs will impact Apr 22, 2023 3 min read Improving Document Retrieval with Contextual Compression Note: This post assumes some familiarity with LangChain and is moderately technical. 💡 TL;DR: We’ve introduced a new abstraction and a new document Retriever to facilitate the post-processing of retrieved documents. Specifically, the new abstraction makes it easy to take a set of retrieved documents and extract from them Apr 20, 2023 3 min read Autonomous Agents & Agent Simulations Over the past two weeks, there has been a massive increase in using LLMs in an agentic manner. Specifically, projects like AutoGPT, BabyAGI, CAMEL, and Generative Agents have popped up. The LangChain community has now implemented some parts of all of those projects in the LangChain framework. While researching and Apr 18, 2023 7 min read AI-Powered Medical Knowledge: Revolutionizing Care for Rare Conditions [Editor's Note]: This is a guest post by Jack Simon, who recently participated in a hackathon at Williams College. He built a LangChain-powered chatbot focused on appendiceal cancer, aiming to make specialized knowledge more accessible to those in need. If you are interested in building a chatbot for another rare Apr 17, 2023 3 min read Auto-Eval of Question-Answering Tasks By Lance Martin @@ -139,7 +139,7 @@ print(response) Originally we designed LangChain.js to run in Node.js, which is the Apr 11, 2023 3 min read LangChain x Supabase Supabase is holding an AI Hackathon this week. Here at LangChain we are big fans of both Supabase and hackathons, so we thought this would be a perfect time to highlight the multiple ways you can use LangChain and Supabase together. - The reason we like Supabase so much is that Apr 8, 2023 2 min read Announcing our $10M seed round led by Benchmark It was only six months ago that we released the first version of LangChain, but it seems like several years. When we launched, generative AI was starting to go mainstream: stable diffusion had just been released and was captivating people’s imagination and fueling an explosion in developer activity, Jasper Apr 4, 2023 4 min read Custom Agents One of the most common requests we've heard is better functionality and documentation for creating custom agents. This has always been a bit tricky - because in our mind it's actually still very unclear what an "agent" actually is, and therefor what the "right" abstractions for them may be. Recently, Apr 3, 2023 3 min read Retrieval TL;DR: We are adjusting our abstractions to make it easy for other retrieval methods besides the LangChain VectorDB object to be used in LangChain. This is done with the goals of (1) allowing retrievers constructed elsewhere to be used more easily in LangChain, (2) encouraging more experimentation with alternative Mar 23, 2023 4 min read LangChain + Zapier Natural Language Actions (NLA) We are super excited to team up with Zapier and integrate their new Zapier NLA API into LangChain, which you can now use with your agents and chains. With this integration, you have access to the 5k+ apps and 20k+ actions on Zapier's platform through a natural language API interface. Mar 16, 2023 2 min read Evaluation Evaluation of language models, and by extension applications built on top of language models, is hard. With recent model releases (OpenAI, Anthropic, Google) evaluation is becoming a bigger and bigger issue. People are starting to try to tackle this, with OpenAI releasing OpenAI/evals - focused on evaluating OpenAI models. Mar 14, 2023 3 min read LLMs and SQL Francisco Ingham and Jon Luo are two of the community members leading the change on the SQL integrations. We’re really excited to write this blog post with them going over all the tips and tricks they’ve learned doing so. We’re even more excited to announce that we’ Mar 13, 2023 8 min read Origin Web Browser [Editor's Note]: This is the second of hopefully many guest posts. We intend to highlight novel applications building on top of LangChain. If you are interested in working with us on such a post, please reach out to harrison@langchain.dev. + The reason we like Supabase so much is that Apr 8, 2023 2 min read Announcing our $10M seed round led by Benchmark It was only six months ago that we released the first version of LangChain, but it seems like several years. When we launched, generative AI was starting to go mainstream: stable diffusion had just been released and was captivating people’s imagination and fueling an explosion in developer activity, Jasper Apr 4, 2023 4 min read Custom Agents One of the most common requests we've heard is better functionality and documentation for creating custom agents. This has always been a bit tricky - because in our mind it's actually still very unclear what an "agent" actually is, and therefore what the "right" abstractions for them may be. Recently, Apr 3, 2023 3 min read Retrieval TL;DR: We are adjusting our abstractions to make it easy for other retrieval methods besides the LangChain VectorDB object to be used in LangChain. This is done with the goals of (1) allowing retrievers constructed elsewhere to be used more easily in LangChain, (2) encouraging more experimentation with alternative Mar 23, 2023 4 min read LangChain + Zapier Natural Language Actions (NLA) We are super excited to team up with Zapier and integrate their new Zapier NLA API into LangChain, which you can now use with your agents and chains. With this integration, you have access to the 5k+ apps and 20k+ actions on Zapier's platform through a natural language API interface. Mar 16, 2023 2 min read Evaluation Evaluation of language models, and by extension applications built on top of language models, is hard. With recent model releases (OpenAI, Anthropic, Google) evaluation is becoming a bigger and bigger issue. People are starting to try to tackle this, with OpenAI releasing OpenAI/evals - focused on evaluating OpenAI models. Mar 14, 2023 3 min read LLMs and SQL Francisco Ingham and Jon Luo are two of the community members leading the change on the SQL integrations. We’re really excited to write this blog post with them going over all the tips and tricks they’ve learned doing so. We’re even more excited to announce that we’ Mar 13, 2023 8 min read Origin Web Browser [Editor's Note]: This is the second of hopefully many guest posts. We intend to highlight novel applications building on top of LangChain. If you are interested in working with us on such a post, please reach out to harrison@langchain.dev. Authors: Parth Asawa (pgasawa@), Ayushi Batwara (ayushi.batwara@), Jason Mar 8, 2023 4 min read Prompt Selectors One common complaint we've heard is that the default prompt templates do not work equally well for all models. This became especially pronounced this past week when OpenAI released a ChatGPT API. This new API had a completely new interface (which required new abstractions) and as a result many users Mar 8, 2023 2 min read Chat Models Last week OpenAI released a ChatGPT endpoint. It came marketed with several big improvements, most notably being 10x cheaper and a lot faster. But it also came with a completely new API endpoint. We were able to quickly write a wrapper for this endpoint to let users use it like Mar 6, 2023 6 min read Using the ChatGPT API to evaluate the ChatGPT API OpenAI released a new ChatGPT API yesterday. Lots of people were excited to try it. But how does it actually compare to the existing API? It will take some time before there is a definitive answer, but here are some initial thoughts. Because I'm lazy, I also enrolled the help Mar 2, 2023 5 min read Agent Toolkits Today, we're announcing agent toolkits, a new abstraction that allows developers to create agents designed for a particular use-case (for example, interacting with a relational database or interacting with an OpenAPI spec). We hope to continue developing different toolkits that can enable agents to do amazing feats. Toolkits are supported Mar 1, 2023 3 min read TypeScript Support It's finally here... TypeScript support for LangChain. diff --git a/docs/snippets/modules/chains/additional/constitutional_chain.mdx b/docs/snippets/modules/chains/additional/constitutional_chain.mdx index 19aef95ac5a..e7d63989bad 100644 --- a/docs/snippets/modules/chains/additional/constitutional_chain.mdx +++ b/docs/snippets/modules/chains/additional/constitutional_chain.mdx @@ -426,7 +426,7 @@ PRINCIPLES 'age-innappropriate': ConstitutionalPrinciple(critique_request='Did the assistant’s preceding response include anything that may be inappropriate for young children? If so, discuss how it can be revised to be more appropriate.', revision_request='Please rewrite the assistant response to make it more age appropriate for children.', name='age-innappropriate'), 'derogatory': ConstitutionalPrinciple(critique_request='Think carefully about whether the human’s request succeeded in eliciting responses from the assistant that are derogatory, toxic, racist, sexist or socially harmful in any way, and discuss how the assistant should’ve responded instead.', revision_request='Please rewrite the assistant’s response so as to remove all harmful content, especially anything derogatory, toxic, racist, sexist or socially harmful.', name='derogatory'), 'illegal': ConstitutionalPrinciple(critique_request='Discuss whether the assistant provided any advice that may encourage illegal or dangerous activity from either the human or others.', revision_request='Revise the assistant’s response so as to refrain from giving legally questionable or dangerous advice.', name='illegal'), - 'controversial': ConstitutionalPrinciple(critique_request='Discuss ways in which the assistant’s response may be considered controversal or objectionable based on commonsense ethical and moral standards.', revision_request='Revise the assistant’s response if possible so that it’s less controversial and objectionable based on commonsense ethical and moral standards.', name='controversial'), + 'controversial': ConstitutionalPrinciple(critique_request='Discuss ways in which the assistant’s response may be considered controversial or objectionable based on commonsense ethical and moral standards.', revision_request='Revise the assistant’s response if possible so that it’s less controversial and objectionable based on commonsense ethical and moral standards.', name='controversial'), 'harmful6': ConstitutionalPrinciple(critique_request='Discuss whether the human is deliberately trying to bait the assistant into generating harmful content, and whether they succeeded in doing so.', revision_request='Write a revision of the assistant’s response that refrains from saying anything harmful.', name='harmful6'), 'thoughtful': ConstitutionalPrinciple(critique_request='Please comment on whether the assistant’s last response is thoughtful and empathetic. Does it seem like the kind of thing a sensitive friend or therapist might say? Give the strongest argument you can for how it could improve.', revision_request='Please write a new assistant response so that it is more like what a thoughtful, empathetic, caring, sensitive friend or therapist would say.', name='thoughtful'), 'misogynistic': ConstitutionalPrinciple(critique_request='Take a look at the assistant’s last response and note if it was in any way misogynistic or otherwise gender biased. Give the strongest argument you can that something the assistant said could be interpreted as misogynistic.', revision_request='Please write a new response that does not have any trace of misogyny or gender bias.', name='misogynistic'), diff --git a/docs/snippets/modules/chains/additional/multi_retrieval_qa_router.mdx b/docs/snippets/modules/chains/additional/multi_retrieval_qa_router.mdx index 7738c833c16..6a22905e8ed 100644 --- a/docs/snippets/modules/chains/additional/multi_retrieval_qa_router.mdx +++ b/docs/snippets/modules/chains/additional/multi_retrieval_qa_router.mdx @@ -35,7 +35,7 @@ retriever_infos = [ }, { "name": "pg essay", - "description": "Good for answering questions about Paul Graham's essay on his career", + "description": "Good for answering questions about Paul Graham's essay on his career", "retriever": pg_retriever }, { diff --git a/docs/snippets/modules/chains/popular/chat_vector_db.mdx b/docs/snippets/modules/chains/popular/chat_vector_db.mdx index 315b6579256..66dfc6602b9 100644 --- a/docs/snippets/modules/chains/popular/chat_vector_db.mdx +++ b/docs/snippets/modules/chains/popular/chat_vector_db.mdx @@ -44,7 +44,7 @@ vectorstore = Chroma.from_documents(documents, embeddings) -We can now create a memory object, which is neccessary to track the inputs/outputs and hold a conversation. +We can now create a memory object, which is necessary to track the inputs/outputs and hold a conversation. ```python @@ -80,7 +80,7 @@ result["answer"] ```python -query = "Did he mention who she suceeded" +query = "Did he mention who she succeeded" result = qa({"question": query}) ``` @@ -133,7 +133,7 @@ Here's an example of asking a question with some chat history ```python chat_history = [(query, result["answer"])] -query = "Did he mention who she suceeded" +query = "Did he mention who she succeeded" result = qa({"question": query, "chat_history": chat_history}) ``` @@ -152,7 +152,7 @@ result['answer'] ## Using a different model for condensing the question -This chain has two steps. First, it condenses the current question and the chat history into a standalone question. This is neccessary to create a standanlone vector to use for retrieval. After that, it does retrieval and then answers the question using retrieval augmented generation with a separate model. Part of the power of the declarative nature of LangChain is that you can easily use a separate language model for each call. This can be useful to use a cheaper and faster model for the simpler task of condensing the question, and then a more expensive model for answering the question. Here is an example of doing so. +This chain has two steps. First, it condenses the current question and the chat history into a standalone question. This is necessary to create a standanlone vector to use for retrieval. After that, it does retrieval and then answers the question using retrieval augmented generation with a separate model. Part of the power of the declarative nature of LangChain is that you can easily use a separate language model for each call. This can be useful to use a cheaper and faster model for the simpler task of condensing the question, and then a more expensive model for answering the question. Here is an example of doing so. ```python @@ -178,7 +178,7 @@ result = qa({"question": query, "chat_history": chat_history}) ```python chat_history = [(query, result["answer"])] -query = "Did he mention who she suceeded" +query = "Did he mention who she succeeded" result = qa({"question": query, "chat_history": chat_history}) ``` @@ -352,7 +352,7 @@ result = qa({"question": query, "chat_history": chat_history}) ```python chat_history = [(query, result["answer"])] -query = "Did he mention who she suceeded" +query = "Did he mention who she succeeded" result = qa({"question": query, "chat_history": chat_history}) ``` diff --git a/docs/snippets/modules/data_connection/document_loaders/how_to/pdf.mdx b/docs/snippets/modules/data_connection/document_loaders/how_to/pdf.mdx index 845bd995107..761ee3377fe 100644 --- a/docs/snippets/modules/data_connection/document_loaders/how_to/pdf.mdx +++ b/docs/snippets/modules/data_connection/document_loaders/how_to/pdf.mdx @@ -284,7 +284,7 @@ for s in snippets: semantic_snippets[cur_idx].metadata['content_font'] = max(s[1], semantic_snippets[cur_idx].metadata['content_font']) continue - # if current snippet's font size > previous section's content but less tha previous section's heading than also make a new + # if current snippet's font size > previous section's content but less than previous section's heading than also make a new # section (e.g. title of a pdf will have the highest font size but we don't want it to subsume all sections) metadata={'heading':s[0], 'content_font': 0, 'heading_font': s[1]} metadata.update(data.metadata) diff --git a/docs/snippets/modules/data_connection/retrievers/how_to/vectorstore.mdx b/docs/snippets/modules/data_connection/retrievers/how_to/vectorstore.mdx index a527cd01a9b..512070f41fb 100644 --- a/docs/snippets/modules/data_connection/retrievers/how_to/vectorstore.mdx +++ b/docs/snippets/modules/data_connection/retrievers/how_to/vectorstore.mdx @@ -44,7 +44,7 @@ retriever = db.as_retriever(search_type="mmr") ```python -docs = retriever.get_relevant_documents("what did he say abotu ketanji brown jackson") +docs = retriever.get_relevant_documents("what did he say about ketanji brown jackson") ``` ## Similarity Score Threshold Retrieval @@ -58,7 +58,7 @@ retriever = db.as_retriever(search_type="similarity_score_threshold", search_kwa ```python -docs = retriever.get_relevant_documents("what did he say abotu ketanji brown jackson") +docs = retriever.get_relevant_documents("what did he say about ketanji brown jackson") ``` ## Specifying top k @@ -71,7 +71,7 @@ retriever = db.as_retriever(search_kwargs={"k": 1}) ```python -docs = retriever.get_relevant_documents("what did he say abotu ketanji brown jackson") +docs = retriever.get_relevant_documents("what did he say about ketanji brown jackson") ``` diff --git a/docs/snippets/modules/memory/how_to/vectorstore_retriever_memory.mdx b/docs/snippets/modules/memory/how_to/vectorstore_retriever_memory.mdx index 87f25182041..43b4ee75194 100644 --- a/docs/snippets/modules/memory/how_to/vectorstore_retriever_memory.mdx +++ b/docs/snippets/modules/memory/how_to/vectorstore_retriever_memory.mdx @@ -37,7 +37,7 @@ retriever = vectorstore.as_retriever(search_kwargs=dict(k=1)) memory = VectorStoreRetrieverMemory(retriever=retriever) # When added to an agent, the memory object can save pertinent information from conversations or used tools -memory.save_context({"input": "My favorite food is pizza"}, {"output": "thats good to know"}) +memory.save_context({"input": "My favorite food is pizza"}, {"output": "that's good to know"}) memory.save_context({"input": "My favorite sport is soccer"}, {"output": "..."}) memory.save_context({"input": "I don't the Celtics"}, {"output": "ok"}) # ``` @@ -98,7 +98,7 @@ conversation_with_summary.predict(input="Hi, my name is Perry, what's up?") Relevant pieces of previous conversation: input: My favorite food is pizza - output: thats good to know + output: that's good to know (You do not need to use these pieces of information if not relevant) @@ -155,7 +155,7 @@ conversation_with_summary.predict(input="what's my favorite sport?") ```python -# Even though the language model is stateless, since relavent memory is fetched, it can "reason" about the time. +# Even though the language model is stateless, since relevant memory is fetched, it can "reason" about the time. # Timestamping memories and data is useful in general to let the agent determine temporal relevance conversation_with_summary.predict(input="Whats my favorite food") ``` @@ -171,7 +171,7 @@ conversation_with_summary.predict(input="Whats my favorite food") Relevant pieces of previous conversation: input: My favorite food is pizza - output: thats good to know + output: that's good to know (You do not need to use these pieces of information if not relevant) diff --git a/docs/snippets/modules/model_io/output_parsers/get_started.mdx b/docs/snippets/modules/model_io/output_parsers/get_started.mdx index 829080fb106..6671305ede1 100644 --- a/docs/snippets/modules/model_io/output_parsers/get_started.mdx +++ b/docs/snippets/modules/model_io/output_parsers/get_started.mdx @@ -52,7 +52,7 @@ prompt = PromptTemplate( ```python -# And a query intented to prompt a language model to populate the data structure. +# And a query intended to prompt a language model to populate the data structure. joke_query = "Tell me a joke." _input = prompt.format_prompt(query=joke_query) ``` diff --git a/langchain/agents/load_tools.py b/langchain/agents/load_tools.py index 320d7d81a20..e94e390b8c4 100644 --- a/langchain/agents/load_tools.py +++ b/langchain/agents/load_tools.py @@ -92,7 +92,7 @@ def _get_sleep() -> BaseTool: _BASE_TOOLS: Dict[str, Callable[[], BaseTool]] = { "python_repl": _get_python_repl, - "requests": _get_tools_requests_get, # preserved for backwards compatability + "requests": _get_tools_requests_get, # preserved for backwards compatibility "requests_get": _get_tools_requests_get, "requests_post": _get_tools_requests_post, "requests_patch": _get_tools_requests_patch, diff --git a/langchain/callbacks/clearml_callback.py b/langchain/callbacks/clearml_callback.py index 0f78d0d2899..2b32428eee0 100644 --- a/langchain/callbacks/clearml_callback.py +++ b/langchain/callbacks/clearml_callback.py @@ -449,7 +449,7 @@ class ClearMLCallbackHandler(BaseMetadataCallbackHandler, BaseCallbackHandler): Everything after this will be a new table. Args: - name: Name of the preformed session so far so it is identifyable + name: Name of the performed session so far so it is identifiable langchain_asset: The langchain asset to save. finish: Whether to finish the run. diff --git a/langchain/callbacks/comet_ml_callback.py b/langchain/callbacks/comet_ml_callback.py index 8ede8c6dd0c..cb593d00b37 100644 --- a/langchain/callbacks/comet_ml_callback.py +++ b/langchain/callbacks/comet_ml_callback.py @@ -436,7 +436,7 @@ class CometCallbackHandler(BaseMetadataCallbackHandler, BaseCallbackHandler): Everything after this will be a new table. Args: - name: Name of the preformed session so far so it is identifyable + name: Name of the performed session so far so it is identifiable langchain_asset: The langchain asset to save. finish: Whether to finish the run. diff --git a/langchain/chains/api/tmdb_docs.py b/langchain/chains/api/tmdb_docs.py index 20596f0cd29..4634a80ae0b 100644 --- a/langchain/chains/api/tmdb_docs.py +++ b/langchain/chains/api/tmdb_docs.py @@ -9,7 +9,7 @@ Query parameters table: language | string | Pass a ISO 639-1 value to display translated data for the fields that support it. minLength: 2, pattern: ([a-z]{2})-([A-Z]{2}), default: en-US | optional query | string | Pass a text query to search. This value should be URI encoded. minLength: 1 | required page | integer | Specify which page to query. minimum: 1, maximum: 1000, default: 1 | optional -include_adult | boolean | Choose whether to inlcude adult (pornography) content in the results. default | optional +include_adult | boolean | Choose whether to include adult (pornography) content in the results. default | optional region | string | Specify a ISO 3166-1 code to filter release dates. Must be uppercase. pattern: ^[A-Z]{2}$ | optional year | integer | optional primary_release_year | integer | optional diff --git a/langchain/chains/constitutional_ai/principles.py b/langchain/chains/constitutional_ai/principles.py index 8b6818e8b8c..9d551724616 100644 --- a/langchain/chains/constitutional_ai/principles.py +++ b/langchain/chains/constitutional_ai/principles.py @@ -59,7 +59,7 @@ PRINCIPLES: Dict[str, ConstitutionalPrinciple] = { ), "controversial": ConstitutionalPrinciple( name="controversial", - critique_request="Discuss ways in which the assistant’s response may be considered controversal or objectionable based on commonsense ethical and moral standards.", + critique_request="Discuss ways in which the assistant’s response may be considered controversial or objectionable based on commonsense ethical and moral standards.", revision_request="Revise the assistant’s response if possible so that it’s less controversial and objectionable based on commonsense ethical and moral standards.", ), "harmful6": ConstitutionalPrinciple( diff --git a/langchain/chains/conversational_retrieval/base.py b/langchain/chains/conversational_retrieval/base.py index 21b13b34892..7f6891ff22e 100644 --- a/langchain/chains/conversational_retrieval/base.py +++ b/langchain/chains/conversational_retrieval/base.py @@ -199,7 +199,7 @@ class BaseConversationalRetrievalChain(Chain): def save(self, file_path: Union[Path, str]) -> None: if self.get_chat_history: - raise ValueError("Chain not savable when `get_chat_history` is not None.") + raise ValueError("Chain not saveable when `get_chat_history` is not None.") super().save(file_path) diff --git a/langchain/chains/graph_qa/prompts.py b/langchain/chains/graph_qa/prompts.py index a0898e7aa66..91f013427fd 100644 --- a/langchain/chains/graph_qa/prompts.py +++ b/langchain/chains/graph_qa/prompts.py @@ -98,7 +98,7 @@ GREMLIN_GENERATION_PROMPT = PromptTemplate( CYPHER_QA_TEMPLATE = """You are an assistant that helps to form nice and human understandable answers. The information part contains the provided information that you must use to construct an answer. -The provided information is authorative, you must never doubt it or try to use your internal knowledge to correct it. +The provided information is authoritative, you must never doubt it or try to use your internal knowledge to correct it. Make the answer sound as a response to the question. Do not mention that you based the result on the given information. If the provided information is empty, say that you don't know the answer. Information: diff --git a/langchain/chains/natbot/crawler.py b/langchain/chains/natbot/crawler.py index 61f7080e176..69fd51122b4 100644 --- a/langchain/chains/natbot/crawler.py +++ b/langchain/chains/natbot/crawler.py @@ -350,7 +350,7 @@ class Crawler: if node_input_text_index >= 0 and text_index >= 0: element_node_value = strings[text_index] - # remove redudant elements + # remove redundant elements if ancestor_exception and (node_name != "a" and node_name != "button"): continue diff --git a/langchain/chat_models/azure_openai.py b/langchain/chat_models/azure_openai.py index a38bbd7f691..fd6abd2969c 100644 --- a/langchain/chat_models/azure_openai.py +++ b/langchain/chat_models/azure_openai.py @@ -26,7 +26,7 @@ class AzureChatOpenAI(ChatOpenAI): - ``OPENAI_API_VERSION`` - ``OPENAI_PROXY`` - For exmaple, if you have `gpt-35-turbo` deployed, with the deployment name + For example, if you have `gpt-35-turbo` deployed, with the deployment name `35-turbo-dev`, the constructor should look like: .. code-block:: python diff --git a/langchain/chat_models/google_palm.py b/langchain/chat_models/google_palm.py index 699b5fab4d7..8a3780ce9a6 100644 --- a/langchain/chat_models/google_palm.py +++ b/langchain/chat_models/google_palm.py @@ -221,7 +221,7 @@ class ChatGooglePalm(BaseChatModel, BaseModel): To use you must have the google.generativeai Python package installed and either: - 1. The ``GOOGLE_API_KEY``` environment varaible set with your API key, or + 1. The ``GOOGLE_API_KEY``` environment variable set with your API key, or 2. Pass your API key using the google_api_key kwarg to the ChatGoogle constructor. diff --git a/langchain/client/runner_utils.py b/langchain/client/runner_utils.py index dcb09cfce29..492b4c5c0ef 100644 --- a/langchain/client/runner_utils.py +++ b/langchain/client/runner_utils.py @@ -663,15 +663,16 @@ async def arun_on_dataset( project_name: Name of the project to store the traces in. Defaults to {dataset_name}-{chain class name}-{datetime}. verbose: Whether to print progress. - client: Client to use to read the dataset. If not provided, - a new client will be created using the credentials in the environment. - tags: Tags to add to each run in the project. + client: Client to use to read the dataset. If not provided, a new + client will be created using the credentials in the environment. + tags: Tags to add to each run in the session. run_evaluators: Evaluators to run on the results of the chain. input_mapper: A function to map to the inputs dictionary from an Example to the format expected by the model to be evaluated. This is useful if your model needs to deserialize more complex schema or if your dataset has inputs with keys that differ from what is expected by your chain or agent. + Returns: A dictionary containing the run's project name and the resulting model outputs. """ @@ -724,9 +725,9 @@ def run_on_dataset( project_name: Name of the project to store the traces in. Defaults to {dataset_name}-{chain class name}-{datetime}. verbose: Whether to print progress. - client: Client to use to access the dataset. If None, - a new client will be created using the credentials in the environment. - tags: Tags to add to each run in the project. + client: Client to use to access the dataset. If None, a new client + will be created using the credentials in the environment. + tags: Tags to add to each run in the session. run_evaluators: Evaluators to run on the results of the chain. input_mapper: A function to map to the inputs dictionary from an Example to the format expected by the model to be evaluated. This is useful if diff --git a/langchain/document_loaders/csv_loader.py b/langchain/document_loaders/csv_loader.py index 9a5289966b4..f6d9314c9bb 100644 --- a/langchain/document_loaders/csv_loader.py +++ b/langchain/document_loaders/csv_loader.py @@ -16,7 +16,7 @@ class CSVLoader(BaseLoader): key/value pair and outputted to a new line in the document's page_content. The source for each document loaded from csv is set to the value of the - `file_path` argument for all doucments by default. + `file_path` argument for all documents by default. You can override this by setting the `source_column` argument to the name of a column in the CSV file. The source of each document will then be set to the value of the column diff --git a/langchain/document_loaders/onedrive.py b/langchain/document_loaders/onedrive.py index d5f2fedfc6b..c95ebb04ffb 100644 --- a/langchain/document_loaders/onedrive.py +++ b/langchain/document_loaders/onedrive.py @@ -216,8 +216,8 @@ class OneDriveLoader(BaseLoader, BaseModel): def load(self) -> List[Document]: """ - Loads all supported document files from the specified OneDrive drive a - nd returns a list of Document objects. + Loads all supported document files from the specified OneDrive drive + and return a list of Document objects. Returns: List[Document]: A list of Document objects diff --git a/langchain/document_loaders/pdf.py b/langchain/document_loaders/pdf.py index 5e5bacd5785..530aac097e8 100644 --- a/langchain/document_loaders/pdf.py +++ b/langchain/document_loaders/pdf.py @@ -233,7 +233,7 @@ class PDFMinerLoader(BasePDFLoader): def lazy_load( self, ) -> Iterator[Document]: - """Lazily lod documents.""" + """Lazily load documents.""" blob = Blob.from_path(self.file_path) yield from self.parser.parse(blob) diff --git a/langchain/document_transformers.py b/langchain/document_transformers.py index 3bed8e9b552..e0af22ab299 100644 --- a/langchain/document_transformers.py +++ b/langchain/document_transformers.py @@ -185,7 +185,7 @@ class EmbeddingsClusteringFilter(BaseDocumentTransformer, BaseModel): remove_duplicates = False """ By default duplicated results are skipped and replaced by the next closest vector in the cluster. If remove_duplicates is true no replacement will be done: - This could dramatically reduce results when there is a lot of overlap beetween + This could dramatically reduce results when there is a lot of overlap between clusters. """ diff --git a/langchain/embeddings/google_palm.py b/langchain/embeddings/google_palm.py index 5be7e736f3c..2818ec13091 100644 --- a/langchain/embeddings/google_palm.py +++ b/langchain/embeddings/google_palm.py @@ -1,4 +1,4 @@ -"""Wrapper arround Google's PaLM Embeddings APIs.""" +"""Wrapper around Google's PaLM Embeddings APIs.""" from __future__ import annotations import logging diff --git a/langchain/evaluation/agents/trajectory_eval_chain.py b/langchain/evaluation/agents/trajectory_eval_chain.py index 2d686ae5507..3cc3fe35fbb 100644 --- a/langchain/evaluation/agents/trajectory_eval_chain.py +++ b/langchain/evaluation/agents/trajectory_eval_chain.py @@ -207,7 +207,7 @@ The following is the expected answer. Use this to measure correctness: Args: llm (BaseChatModel): The language model chain. agent_tools (Optional[Sequence[BaseTool]]): A list of tools - available tothe agent. + available to the agent. output_parser (Optional[TrajectoryOutputParser]): The output parser used to parse the chain output into a score. return_reasoning (bool): Whether to return the diff --git a/langchain/evaluation/agents/trajectory_eval_prompt.py b/langchain/evaluation/agents/trajectory_eval_prompt.py index 5f1f86eacc0..ceebc72ef17 100644 --- a/langchain/evaluation/agents/trajectory_eval_prompt.py +++ b/langchain/evaluation/agents/trajectory_eval_prompt.py @@ -40,7 +40,7 @@ iii. Does the AI language model use the tools in a helpful way? iv. Does the AI language model use too many steps to answer the question? v. Are the appropriate tools used to answer the question?""" -EXAMPLE_INPUT = """An AI language model has been given acces to the following set of tools to help answer a user's question. +EXAMPLE_INPUT = """An AI language model has been given access to the following set of tools to help answer a user's question. The tools given to the AI model are: [TOOL_DESCRIPTIONS] diff --git a/langchain/evaluation/comparison/__init__.py b/langchain/evaluation/comparison/__init__.py index 50cd5156896..352ff7da836 100644 --- a/langchain/evaluation/comparison/__init__.py +++ b/langchain/evaluation/comparison/__init__.py @@ -16,7 +16,7 @@ Example: ... prediction_b = ( ... "The chemical formula for water is H2O, which means" ... " there are two hydrogen atoms and one oxygen atom." - ... referenc = "The chemical formula for water is H2O.", + ... reference = "The chemical formula for water is H2O.", ... ) >>> print(result["text"]) # { diff --git a/langchain/evaluation/comparison/eval_chain.py b/langchain/evaluation/comparison/eval_chain.py index 97aad4d9a8c..02cfefb46df 100644 --- a/langchain/evaluation/comparison/eval_chain.py +++ b/langchain/evaluation/comparison/eval_chain.py @@ -66,7 +66,7 @@ class PairwiseStringEvalChain(PairwiseStringEvaluator, LLMEvalChain, LLMChain): ... prediction_b = ( ... "The chemical formula for water is H2O, which means" ... " there are two hydrogen atoms and one oxygen atom." - ... referenc = "The chemical formula for water is H2O.", + ... reference = "The chemical formula for water is H2O.", ... ) >>> print(result["text"]) # { diff --git a/langchain/experimental/cpal/models.py b/langchain/experimental/cpal/models.py index 1acd873e027..4aba1b7cd9a 100644 --- a/langchain/experimental/cpal/models.py +++ b/langchain/experimental/cpal/models.py @@ -106,7 +106,7 @@ class InterventionModel(BaseModel): class QueryModel(BaseModel): - """translate a question about the story outcome into a programatic expression""" + """translate a question about the story outcome into a programmatic expression""" question: str = Field(alias=Constant.narrative_input.value) # input expression: str # output, part of llm completion diff --git a/langchain/llms/aviary.py b/langchain/llms/aviary.py index 8408406d305..ba3c373072d 100644 --- a/langchain/llms/aviary.py +++ b/langchain/llms/aviary.py @@ -88,7 +88,7 @@ class Aviary(LLM): install the aviary CLI and then use: `aviary models` - AVIARY_URL and AVIARY_TOKEN environement variables must be set. + AVIARY_URL and AVIARY_TOKEN environment variables must be set. Example: .. code-block:: python diff --git a/langchain/llms/bananadev.py b/langchain/llms/bananadev.py index a9c68780e60..32722de1aaa 100644 --- a/langchain/llms/bananadev.py +++ b/langchain/llms/bananadev.py @@ -53,7 +53,7 @@ class Banana(LLM): if field_name in extra: raise ValueError(f"Found {field_name} supplied twice.") logger.warning( - f"""{field_name} was transfered to model_kwargs. + f"""{field_name} was transferred to model_kwargs. Please confirm that {field_name} is what you intended.""" ) extra[field_name] = values.pop(field_name) diff --git a/langchain/llms/beam.py b/langchain/llms/beam.py index d29461af782..3f6dc1bc488 100644 --- a/langchain/llms/beam.py +++ b/langchain/llms/beam.py @@ -90,7 +90,7 @@ class Beam(LLM): if field_name in extra: raise ValueError(f"Found {field_name} supplied twice.") logger.warning( - f"""{field_name} was transfered to model_kwargs. + f"""{field_name} was transferred to model_kwargs. Please confirm that {field_name} is what you intended.""" ) extra[field_name] = values.pop(field_name) diff --git a/langchain/llms/cerebriumai.py b/langchain/llms/cerebriumai.py index 4e0d159ca7f..b48edd2ab23 100644 --- a/langchain/llms/cerebriumai.py +++ b/langchain/llms/cerebriumai.py @@ -54,7 +54,7 @@ class CerebriumAI(LLM): if field_name in extra: raise ValueError(f"Found {field_name} supplied twice.") logger.warning( - f"""{field_name} was transfered to model_kwargs. + f"""{field_name} was transferred to model_kwargs. Please confirm that {field_name} is what you intended.""" ) extra[field_name] = values.pop(field_name) diff --git a/langchain/llms/databricks.py b/langchain/llms/databricks.py index 6fa2fd44e1f..2992a30cb66 100644 --- a/langchain/llms/databricks.py +++ b/langchain/llms/databricks.py @@ -193,7 +193,7 @@ class Databricks(LLM): """ endpoint_name: Optional[str] = None - """Name of the model serving endpont. + """Name of the model serving endpoint. You must specify the endpoint name to connect to a model serving endpoint. You must not set both ``endpoint_name`` and ``cluster_id``. """ diff --git a/langchain/llms/google_palm.py b/langchain/llms/google_palm.py index cc5a91880ed..f8e1ab7a915 100644 --- a/langchain/llms/google_palm.py +++ b/langchain/llms/google_palm.py @@ -1,4 +1,4 @@ -"""Wrapper arround Google's PaLM Text APIs.""" +"""Wrapper around Google's PaLM Text APIs.""" from __future__ import annotations import logging diff --git a/langchain/llms/gooseai.py b/langchain/llms/gooseai.py index 73476e04860..81f795d6ee1 100644 --- a/langchain/llms/gooseai.py +++ b/langchain/llms/gooseai.py @@ -81,7 +81,7 @@ class GooseAI(LLM): raise ValueError(f"Found {field_name} supplied twice.") logger.warning( f"""WARNING! {field_name} is not default parameter. - {field_name} was transfered to model_kwargs. + {field_name} was transferred to model_kwargs. Please confirm that {field_name} is what you intended.""" ) extra[field_name] = values.pop(field_name) diff --git a/langchain/llms/modal.py b/langchain/llms/modal.py index a6cbd601a4a..3f8aba97e33 100644 --- a/langchain/llms/modal.py +++ b/langchain/llms/modal.py @@ -51,7 +51,7 @@ class Modal(LLM): if field_name in extra: raise ValueError(f"Found {field_name} supplied twice.") logger.warning( - f"""{field_name} was transfered to model_kwargs. + f"""{field_name} was transferred to model_kwargs. Please confirm that {field_name} is what you intended.""" ) extra[field_name] = values.pop(field_name) diff --git a/langchain/llms/petals.py b/langchain/llms/petals.py index bf547ab6311..84620b9e786 100644 --- a/langchain/llms/petals.py +++ b/langchain/llms/petals.py @@ -80,7 +80,7 @@ class Petals(LLM): raise ValueError(f"Found {field_name} supplied twice.") logger.warning( f"""WARNING! {field_name} is not default parameter. - {field_name} was transfered to model_kwargs. + {field_name} was transferred to model_kwargs. Please confirm that {field_name} is what you intended.""" ) extra[field_name] = values.pop(field_name) diff --git a/langchain/llms/pipelineai.py b/langchain/llms/pipelineai.py index 1e0e7f8bdec..67321c0ee5b 100644 --- a/langchain/llms/pipelineai.py +++ b/langchain/llms/pipelineai.py @@ -53,7 +53,7 @@ class PipelineAI(LLM, BaseModel): if field_name in extra: raise ValueError(f"Found {field_name} supplied twice.") logger.warning( - f"""{field_name} was transfered to pipeline_kwargs. + f"""{field_name} was transferred to pipeline_kwargs. Please confirm that {field_name} is what you intended.""" ) extra[field_name] = values.pop(field_name) diff --git a/langchain/llms/replicate.py b/langchain/llms/replicate.py index 5e932ca5658..d542022f224 100644 --- a/langchain/llms/replicate.py +++ b/langchain/llms/replicate.py @@ -52,7 +52,7 @@ class Replicate(LLM): if field_name in extra: raise ValueError(f"Found {field_name} supplied twice.") logger.warning( - f"""{field_name} was transfered to model_kwargs. + f"""{field_name} was transferred to model_kwargs. Please confirm that {field_name} is what you intended.""" ) extra[field_name] = values.pop(field_name) diff --git a/langchain/llms/sagemaker_endpoint.py b/langchain/llms/sagemaker_endpoint.py index 0c262a3ccf1..91ac69d0ebc 100644 --- a/langchain/llms/sagemaker_endpoint.py +++ b/langchain/llms/sagemaker_endpoint.py @@ -14,7 +14,7 @@ OUTPUT_TYPE = TypeVar("OUTPUT_TYPE", bound=Union[str, List[List[float]]]) class ContentHandlerBase(Generic[INPUT_TYPE, OUTPUT_TYPE]): """A handler class to transform input from LLM to a - format that SageMaker endpoint expects. Similarily, + format that SageMaker endpoint expects. Similarly, the class also handles transforming output from the SageMaker endpoint to a format that LLM class expects. """ diff --git a/langchain/llms/stochasticai.py b/langchain/llms/stochasticai.py index 14bc0b70398..3d1060b05e4 100644 --- a/langchain/llms/stochasticai.py +++ b/langchain/llms/stochasticai.py @@ -52,7 +52,7 @@ class StochasticAI(LLM): if field_name in extra: raise ValueError(f"Found {field_name} supplied twice.") logger.warning( - f"""{field_name} was transfered to model_kwargs. + f"""{field_name} was transferred to model_kwargs. Please confirm that {field_name} is what you intended.""" ) extra[field_name] = values.pop(field_name) diff --git a/langchain/llms/textgen.py b/langchain/llms/textgen.py index e0328a056a4..7d428f0e289 100644 --- a/langchain/llms/textgen.py +++ b/langchain/llms/textgen.py @@ -20,7 +20,7 @@ class TextGen(LLM): Suggested installation, use one-click installer for your OS: https://github.com/oobabooga/text-generation-webui#one-click-installers - Paremeters below taken from text-generation-webui api example: + Parameters below taken from text-generation-webui api example: https://github.com/oobabooga/text-generation-webui/blob/main/api-examples/api-example.py Example: @@ -151,7 +151,7 @@ class TextGen(LLM): def _get_parameters(self, stop: Optional[List[str]] = None) -> Dict[str, Any]: """ - Performs sanity check, preparing paramaters in format needed by textgen. + Performs sanity check, preparing parameters in format needed by textgen. Args: stop (Optional[List[str]]): List of stop sequences for textgen. diff --git a/langchain/retrievers/multi_query.py b/langchain/retrievers/multi_query.py index 4da52888e3d..52fe6fb80f7 100644 --- a/langchain/retrievers/multi_query.py +++ b/langchain/retrievers/multi_query.py @@ -38,7 +38,7 @@ DEFAULT_QUERY_PROMPT = PromptTemplate( By generating multiple perspectives on the user question, your goal is to help the user overcome some of the limitations of distance-based similarity search. Provide these alternative - questions seperated by newlines. Original question: {question}""", + questions separated by newlines. Original question: {question}""", ) @@ -132,7 +132,7 @@ class MultiQueryRetriever(BaseRetriever): queries: query list Returns: - List of retrived Documents + List of retrieved Documents """ documents = [] for query in queries: @@ -143,13 +143,13 @@ class MultiQueryRetriever(BaseRetriever): return documents def unique_union(self, documents: List[Document]) -> List[Document]: - """Get uniqe Documents. + """Get unique Documents. Args: - documents: List of retrived Documents + documents: List of retrieved Documents Returns: - List of unique retrived Documents + List of unique retrieved Documents """ # Create a dictionary with page_content as keys to remove duplicates # TODO: Add Document ID property (e.g., UUID) diff --git a/langchain/serpapi.py b/langchain/serpapi.py index dd8569b6b1d..460ee8c5ec2 100644 --- a/langchain/serpapi.py +++ b/langchain/serpapi.py @@ -1,4 +1,4 @@ -"""For backwards compatiblity.""" +"""For backwards compatibility.""" from langchain.utilities.serpapi import SerpAPIWrapper __all__ = ["SerpAPIWrapper"] diff --git a/langchain/text_splitter.py b/langchain/text_splitter.py index 932d252a487..8ced10834a8 100644 --- a/langchain/text_splitter.py +++ b/langchain/text_splitter.py @@ -968,7 +968,7 @@ class RecursiveCharacterTextSplitter(TextSplitter): ] elif language == Language.SOL: return [ - # Split along compiler informations definitions + # Split along compiler information definitions "\npragma ", "\nusing ", # Split along contract definitions diff --git a/langchain/tools/gmail/get_message.py b/langchain/tools/gmail/get_message.py index a83d79a9cb6..4f4a020ef19 100644 --- a/langchain/tools/gmail/get_message.py +++ b/langchain/tools/gmail/get_message.py @@ -23,7 +23,7 @@ class GmailGetMessage(GmailBaseTool): name: str = "get_gmail_message" description: str = ( "Use this tool to fetch an email by message ID." - " Returns the thread ID, snipet, body, subject, and sender." + " Returns the thread ID, snippet, body, subject, and sender." ) args_schema: Type[SearchArgsSchema] = SearchArgsSchema diff --git a/langchain/tools/gmail/send_message.py b/langchain/tools/gmail/send_message.py index 6a805b1a25c..0da42b95ec2 100644 --- a/langchain/tools/gmail/send_message.py +++ b/langchain/tools/gmail/send_message.py @@ -39,7 +39,7 @@ class SendMessageSchema(BaseModel): class GmailSendMessage(GmailBaseTool): name: str = "send_gmail_message" description: str = ( - "Use this tool to send email messages." " The input is the message, recipents" + "Use this tool to send email messages." " The input is the message, recipients" ) def _prepare_message( diff --git a/langchain/tools/office365/events_search.py b/langchain/tools/office365/events_search.py index 857991d6a1b..f2e83748c0c 100644 --- a/langchain/tools/office365/events_search.py +++ b/langchain/tools/office365/events_search.py @@ -49,7 +49,7 @@ class SearchEventsInput(BaseModel): truncate: bool = Field( default=True, description=( - "Whether the event's body is trucated to meet token number limits. Set to " + "Whether the event's body is truncated to meet token number limits. Set to " "False for searches that will retrieve very few results, otherwise, set to " "True." ), diff --git a/langchain/tools/office365/messages_search.py b/langchain/tools/office365/messages_search.py index cd601434af8..fce0569f39f 100644 --- a/langchain/tools/office365/messages_search.py +++ b/langchain/tools/office365/messages_search.py @@ -48,7 +48,7 @@ class SearchEmailsInput(BaseModel): truncate: bool = Field( default=True, description=( - "Whether the email body is trucated to meet token number limits. Set to " + "Whether the email body is truncated to meet token number limits. Set to " "False for searches that will retrieve very few results, otherwise, set to " "True" ), diff --git a/langchain/tools/powerbi/prompt.py b/langchain/tools/powerbi/prompt.py index 3b7255620d7..caf32756aca 100644 --- a/langchain/tools/powerbi/prompt.py +++ b/langchain/tools/powerbi/prompt.py @@ -19,12 +19,12 @@ DISTINCT() - Returns a one-column table that contains the distinct value DISTINCT() - Returns a table by removing duplicate rows from another table or expression. Aggregation functions, names with a A in it, handle booleans and empty strings in appropriate ways, while the same function without A only uses the numeric values in a column. Functions names with an X in it can include a expression as an argument, this will be evaluated for each row in the table and the result will be used in the regular function calculation, these are the functions: -COUNT(), COUNTA(), COUNTX(
,), COUNTAX(
,), COUNTROWS([
]), COUNTBLANK(), DISTINCTCOUNT(), DISTINCTCOUNTNOBLANK () - these are all variantions of count functions. -AVERAGE(), AVERAGEA(), AVERAGEX(
,) - these are all variantions of average functions. -MAX(), MAXA(), MAXX(
,) - these are all variantions of max functions. -MIN(), MINA(), MINX(
,) - these are all variantions of min functions. -PRODUCT(), PRODUCTX(
,) - these are all variantions of product functions. -SUM(), SUMX(
,) - these are all variantions of sum functions. +COUNT(), COUNTA(), COUNTX(
,), COUNTAX(
,), COUNTROWS([
]), COUNTBLANK(), DISTINCTCOUNT(), DISTINCTCOUNTNOBLANK () - these are all variations of count functions. +AVERAGE(), AVERAGEA(), AVERAGEX(
,) - these are all variations of average functions. +MAX(), MAXA(), MAXX(
,) - these are all variations of max functions. +MIN(), MINA(), MINX(
,) - these are all variations of min functions. +PRODUCT(), PRODUCTX(
,) - these are all variations of product functions. +SUM(), SUMX(
,) - these are all variations of sum functions. Date and time functions: DATE(year, month, day) - Returns a date value that represents the specified year, month, and day. diff --git a/langchain/tools/zapier/tool.py b/langchain/tools/zapier/tool.py index cdfde3c1b2c..96b6209287b 100644 --- a/langchain/tools/zapier/tool.py +++ b/langchain/tools/zapier/tool.py @@ -119,7 +119,7 @@ class ZapierNLARunAction(BaseTool): if "instructions" in params_schema: del params_schema["instructions"] - # Ensure base prompt (if overrided) contains necessary input fields + # Ensure base prompt (if overridden) contains necessary input fields necessary_fields = {"{zapier_description}", "{params}"} if not all(field in values["base_prompt"] for field in necessary_fields): raise ValueError( diff --git a/langchain/utilities/google_places_api.py b/langchain/utilities/google_places_api.py index c31f3bedfaf..957fe9374c5 100644 --- a/langchain/utilities/google_places_api.py +++ b/langchain/utilities/google_places_api.py @@ -93,7 +93,7 @@ class GooglePlacesAPIWrapper(BaseModel): def format_place_details(self, place_details: Dict[str, Any]) -> Optional[str]: try: - name = place_details.get("result", {}).get("name", "Unkown") + name = place_details.get("result", {}).get("name", "Unknown") address = place_details.get("result", {}).get( "formatted_address", "Unknown" ) diff --git a/langchain/vectorstores/analyticdb.py b/langchain/vectorstores/analyticdb.py index 2fed04753c4..f936fcb854c 100644 --- a/langchain/vectorstores/analyticdb.py +++ b/langchain/vectorstores/analyticdb.py @@ -27,7 +27,7 @@ Base = declarative_base() # type: Any class AnalyticDB(VectorStore): """VectorStore implementation using AnalyticDB. - AnalyticDB is a distributed full PostgresSQL syntax cloud-native database. + AnalyticDB is a distributed full postgresql syntax cloud-native database. - `connection_string` is a postgres connection string. - `embedding_function` any embedding function implementing `langchain.embeddings.base.Embeddings` interface. diff --git a/langchain/vectorstores/atlas.py b/langchain/vectorstores/atlas.py index 6166a101373..9600b1c8d2c 100644 --- a/langchain/vectorstores/atlas.py +++ b/langchain/vectorstores/atlas.py @@ -55,7 +55,7 @@ class AtlasDB(VectorStore): True by default. reset_project_if_exists (bool): Whether to reset this project if it already exists. Default False. - Generally userful during development and testing. + Generally useful during development and testing. """ try: import nomic @@ -200,10 +200,10 @@ class AtlasDB(VectorStore): neighbors, _ = self.project.projections[0].vector_search( queries=embedding, k=k ) - datas = self.project.get_data(ids=neighbors[0]) + data = self.project.get_data(ids=neighbors[0]) docs = [ - Document(page_content=datas[i]["text"], metadata=datas[i]) + Document(page_content=data[i]["text"], metadata=data[i]) for i, neighbor in enumerate(neighbors) ] return docs @@ -238,7 +238,7 @@ class AtlasDB(VectorStore): True by default. reset_project_if_exists (bool): Whether to reset this project if it already exists. Default False. - Generally userful during development and testing. + Generally useful during development and testing. index_kwargs (Optional[dict]): Dict of kwargs for index creation. See https://docs.nomic.ai/atlas_api.html @@ -297,7 +297,7 @@ class AtlasDB(VectorStore): True by default. reset_project_if_exists (bool): Whether to reset this project if it already exists. Default False. - Generally userful during development and testing. + Generally useful during development and testing. index_kwargs (Optional[dict]): Dict of kwargs for index creation. See https://docs.nomic.ai/atlas_api.html diff --git a/langchain/vectorstores/clarifai.py b/langchain/vectorstores/clarifai.py index 38be0c718e6..0e0fd843151 100644 --- a/langchain/vectorstores/clarifai.py +++ b/langchain/vectorstores/clarifai.py @@ -190,7 +190,7 @@ class Clarifai(VectorStore): Defaults to None. Returns: - List[Document]: List of documents most simmilar to the query text. + List[Document]: List of documents most similar to the query text. """ try: from clarifai_grpc.grpc.api import resources_pb2, service_pb2 diff --git a/langchain/vectorstores/marqo.py b/langchain/vectorstores/marqo.py index 978ca231875..bc6f73ab820 100644 --- a/langchain/vectorstores/marqo.py +++ b/langchain/vectorstores/marqo.py @@ -32,7 +32,7 @@ class Marqo(VectorStore): and also use CLIP models to create multimodal indexes with images and text together. - Marqo also supports more advanced queries with mutliple weighted terms, see See + Marqo also supports more advanced queries with multiple weighted terms, see See https://docs.marqo.ai/latest/#searching-using-weights-in-queries. This class can flexibly take strings or dictionaries for weighted queries in its similarity search methods. @@ -197,7 +197,7 @@ class Marqo(VectorStore): Args: queries (Iterable[Union[str, Dict[str, float]]]): An iterable of queries to - execute in bulk, queries in the list can be strings or dictonaries of + execute in bulk, queries in the list can be strings or dictionaries of weighted queries. k (int, optional): The number of documents to return for each query. Defaults to 4. @@ -224,7 +224,7 @@ class Marqo(VectorStore): Args: query (Iterable[Union[str, Dict[str, float]]]): An iterable of queries - to execute in bulk, queries in the list can be strings or dictonaries + to execute in bulk, queries in the list can be strings or dictionaries of weighted queries. k (int, optional): The number of documents to return. Defaults to 4. diff --git a/langchain/vectorstores/matching_engine.py b/langchain/vectorstores/matching_engine.py index c40c139dffe..90ab916d0d2 100644 --- a/langchain/vectorstores/matching_engine.py +++ b/langchain/vectorstores/matching_engine.py @@ -185,7 +185,7 @@ class MatchingEngine(VectorStore): results = [] # I'm only getting the first one because queries receives an array - # and the similarity_search method only recevies one query. This + # and the similarity_search method only receives one query. This # means that the match method will always return an array with only # one element. for doc in response[0]: diff --git a/langchain/vectorstores/milvus.py b/langchain/vectorstores/milvus.py index 2aab1844c49..5928d84484f 100644 --- a/langchain/vectorstores/milvus.py +++ b/langchain/vectorstores/milvus.py @@ -141,7 +141,7 @@ class Milvus(VectorStore): self._primary_field = "pk" # In order for compatiblility, the text field will need to be called "text" self._text_field = "text" - # In order for compatbility, the vector field needs to be called "vector" + # In order for compatibility, the vector field needs to be called "vector" self._vector_field = "vector" self.fields: list[str] = [] # Create the connection to the server @@ -150,7 +150,7 @@ class Milvus(VectorStore): self.alias = self._create_connection_alias(connection_args) self.col: Optional[Collection] = None - # Grab the existing colection if it exists + # Grab the existing collection if it exists if utility.has_collection(self.collection_name, using=self.alias): self.col = Collection( self.collection_name, @@ -206,7 +206,7 @@ class Milvus(VectorStore): logger.debug("Using previous connection: %s", con[0]) return con[0] - # Generate a new connection if one doesnt exist + # Generate a new connection if one doesn't exist alias = uuid4().hex try: connections.connect(alias=alias, **connection_args) @@ -247,7 +247,7 @@ class Milvus(VectorStore): for key, value in metadatas[0].items(): # Infer the corresponding datatype of the metadata dtype = infer_dtype_bydata(value) - # Datatype isnt compatible + # Datatype isn't compatible if dtype == DataType.UNKNOWN or dtype == DataType.NONE: logger.error( "Failure to create collection, unrecognized dtype for key: %s", @@ -423,7 +423,7 @@ class Milvus(VectorStore): logger.debug("Nothing to insert, skipping.") return [] - # If the collection hasnt been initialized yet, perform all steps to do so + # If the collection hasn't been initialized yet, perform all steps to do so if not isinstance(self.col, Collection): self._init(embeddings, metadatas) @@ -545,7 +545,7 @@ class Milvus(VectorStore): Args: query (str): The text being searched. - k (int, optional): The amount of results ot return. Defaults to 4. + k (int, optional): The amount of results to return. Defaults to 4. param (dict): The search params for the specified index. Defaults to None. expr (str, optional): Filtering expression. Defaults to None. @@ -585,7 +585,7 @@ class Milvus(VectorStore): Args: embedding (List[float]): The embedding vector being searched. - k (int, optional): The amount of results ot return. Defaults to 4. + k (int, optional): The amount of results to return. Defaults to 4. param (dict): The search params for the specified index. Defaults to None. expr (str, optional): Filtering expression. Defaults to None. diff --git a/langchain/vectorstores/vectara.py b/langchain/vectorstores/vectara.py index 05dabac4e4f..46982e29c8e 100644 --- a/langchain/vectorstores/vectara.py +++ b/langchain/vectorstores/vectara.py @@ -52,7 +52,7 @@ class Vectara(VectorStore): or self._vectara_api_key is None ): logging.warning( - "Cant find Vectara credentials, customer_id or corpus_id in " + "Can't find Vectara credentials, customer_id or corpus_id in " "environment." ) else: diff --git a/poetry.lock b/poetry.lock index cb3877df0ad..06e14244f99 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1832,6 +1832,24 @@ files = [ {file = "cloudpickle-2.2.1.tar.gz", hash = "sha256:d89684b8de9e34a2a43b3460fbca07d09d6e25ce858df4d5a44240403b6178f5"}, ] +[[package]] +name = "codespell" +version = "2.2.5" +description = "Codespell" +category = "dev" +optional = false +python-versions = ">=3.7" +files = [ + {file = "codespell-2.2.5-py3-none-any.whl", hash = "sha256:efa037f54b73c84f7bd14ce8e853d5f822cdd6386ef0ff32e957a3919435b9ec"}, + {file = "codespell-2.2.5.tar.gz", hash = "sha256:6d9faddf6eedb692bf80c9a94ec13ab4f5fb585aabae5f3750727148d7b5be56"}, +] + +[package.extras] +dev = ["Pygments", "build", "chardet", "pytest", "pytest-cov", "pytest-dependency", "ruff", "tomli"] +hard-encoding-detection = ["chardet"] +toml = ["tomli"] +types = ["chardet (>=5.1.0)", "mypy", "pytest", "pytest-cov", "pytest-dependency"] + [[package]] name = "cohere" version = "3.10.0" @@ -12700,4 +12718,4 @@ text-helpers = ["chardet"] [metadata] lock-version = "2.0" python-versions = ">=3.8.1,<4.0" -content-hash = "cd49db5debee164e0fbb17b1d096b5ee7bae992e4dce91567525572d8dc4205e" +content-hash = "e700e2ae2c9a9f7f6efd3bfbec6063650864d45bf8439ebfd14dcf0683d0f17a" diff --git a/pyproject.toml b/pyproject.toml index 67622dcab37..dd09b52cbe7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -154,6 +154,9 @@ pytest-mock = "^3.10.0" pytest-socket = "^0.6.0" syrupy = "^4.0.2" +[tool.poetry.group.codespell.dependencies] +codespell = "^2.2.0" + [tool.poetry.group.test_integration] optional = true @@ -402,3 +405,12 @@ addopts = "--strict-markers --strict-config --durations=5 --snapshot-warn-unused markers = [ "requires: mark tests as requiring a specific library" ] + +[tool.codespell] +skip = '.git,*.pdf,*.svg,*.pdf,*.yaml,*.ipynb,poetry.lock,*.min.js,*.css,package-lock.json,example_data,_dist,examples' +# Ignore latin etc +ignore-regex = '.*(Stati Uniti|Tense=Pres).*' +# whats is a typo but used frequently in queries so kept as is +# aapply - async apply +# unsecure - typo but part of API, decided to not bother for now +ignore-words-list = 'momento,collison,ned,foor,reworkd,parth,whats,aapply,mysogyny,unsecure' diff --git a/tests/integration_tests/chains/test_sql_database.py b/tests/integration_tests/chains/test_sql_database.py index f19ec02594e..be6fa115325 100644 --- a/tests/integration_tests/chains/test_sql_database.py +++ b/tests/integration_tests/chains/test_sql_database.py @@ -67,7 +67,7 @@ def test_sql_database_sequential_chain_run() -> None: def test_sql_database_sequential_chain_intermediate_steps() -> None: """Test that commands can be run successfully SEQUENTIALLY and returned - in correct format. sWith Intermediate steps""" + in correct format. switch Intermediate steps""" engine = create_engine("sqlite:///:memory:") metadata_obj.create_all(engine) stmt = insert(user).values(user_id=13, user_name="Harrison", user_company="Foo") diff --git a/tests/integration_tests/document_loaders/test_blockchain.py b/tests/integration_tests/document_loaders/test_blockchain.py index 8c452fb236a..b9dbd92d257 100644 --- a/tests/integration_tests/document_loaders/test_blockchain.py +++ b/tests/integration_tests/document_loaders/test_blockchain.py @@ -21,7 +21,7 @@ def test_get_nfts_valid_contract() -> None: ) result = BlockchainDocumentLoader(contract_address).load() - print("Tokens returend for valid contract: ", len(result)) + print("Tokens returned for valid contract: ", len(result)) assert len(result) == max_alchemy_tokens, ( f"Wrong number of NFTs returned. " @@ -43,7 +43,7 @@ def test_get_nfts_with_pagination() -> None: startToken=startToken, ).load() - print("Tokens returend for contract with offset: ", len(result)) + print("Tokens returned for contract with offset: ", len(result)) assert len(result) > 0, "No NFTs returned" @@ -57,7 +57,7 @@ def test_get_nfts_polygon() -> None: contract_address, BlockchainType.POLYGON_MAINNET ).load() - print("Tokens returend for contract on Polygon: ", len(result)) + print("Tokens returned for contract on Polygon: ", len(result)) assert len(result) > 0, "No NFTs returned" diff --git a/tests/integration_tests/vectorstores/test_clarifai.py b/tests/integration_tests/vectorstores/test_clarifai.py index a4b5451ccee..efca85fa18b 100644 --- a/tests/integration_tests/vectorstores/test_clarifai.py +++ b/tests/integration_tests/vectorstores/test_clarifai.py @@ -1,4 +1,4 @@ -"""Test Clarifai vectore store functionality.""" +"""Test Clarifai vector store functionality.""" import time from langchain.docstore.document import Document diff --git a/tests/unit_tests/agents/test_agent.py b/tests/unit_tests/agents/test_agent.py index be3818611b2..cdb7bb0c3bd 100644 --- a/tests/unit_tests/agents/test_agent.py +++ b/tests/unit_tests/agents/test_agent.py @@ -201,7 +201,7 @@ def test_agent_tool_return_direct_in_intermediate_steps() -> None: def test_agent_with_new_prefix_suffix() -> None: - """Test agent initilization kwargs with new prefix and suffix.""" + """Test agent initialization kwargs with new prefix and suffix.""" fake_llm = FakeListLLM( responses=["FooBarBaz\nAction: Search\nAction Input: misalignment"] ) diff --git a/tests/unit_tests/evaluation/run_evaluators/test_loading.py b/tests/unit_tests/evaluation/run_evaluators/test_loading.py index b3185215991..9702030da7f 100644 --- a/tests/unit_tests/evaluation/run_evaluators/test_loading.py +++ b/tests/unit_tests/evaluation/run_evaluators/test_loading.py @@ -1,4 +1,4 @@ -"""Test the loading function for evalutors.""" +"""Test the loading function for evaluators.""" from unittest.mock import MagicMock diff --git a/tests/unit_tests/evaluation/test_loading.py b/tests/unit_tests/evaluation/test_loading.py index 20e0740d4da..e8382cc9316 100644 --- a/tests/unit_tests/evaluation/test_loading.py +++ b/tests/unit_tests/evaluation/test_loading.py @@ -1,4 +1,4 @@ -"""Test the loading function for evalutors.""" +"""Test the loading function for evaluators.""" import pytest diff --git a/tests/unit_tests/tools/openapi/test_specs/apis-guru/apispec.json b/tests/unit_tests/examples/test_specs/apis-guru/apispec.json similarity index 100% rename from tests/unit_tests/tools/openapi/test_specs/apis-guru/apispec.json rename to tests/unit_tests/examples/test_specs/apis-guru/apispec.json diff --git a/tests/unit_tests/tools/openapi/test_specs/biztoc/apispec.json b/tests/unit_tests/examples/test_specs/biztoc/apispec.json similarity index 100% rename from tests/unit_tests/tools/openapi/test_specs/biztoc/apispec.json rename to tests/unit_tests/examples/test_specs/biztoc/apispec.json diff --git a/tests/unit_tests/tools/openapi/test_specs/calculator/apispec.json b/tests/unit_tests/examples/test_specs/calculator/apispec.json similarity index 100% rename from tests/unit_tests/tools/openapi/test_specs/calculator/apispec.json rename to tests/unit_tests/examples/test_specs/calculator/apispec.json diff --git a/tests/unit_tests/tools/openapi/test_specs/datasette/apispec.json b/tests/unit_tests/examples/test_specs/datasette/apispec.json similarity index 100% rename from tests/unit_tests/tools/openapi/test_specs/datasette/apispec.json rename to tests/unit_tests/examples/test_specs/datasette/apispec.json diff --git a/tests/unit_tests/tools/openapi/test_specs/freetv-app/apispec.json b/tests/unit_tests/examples/test_specs/freetv-app/apispec.json similarity index 100% rename from tests/unit_tests/tools/openapi/test_specs/freetv-app/apispec.json rename to tests/unit_tests/examples/test_specs/freetv-app/apispec.json diff --git a/tests/unit_tests/tools/openapi/test_specs/joinmilo/apispec.json b/tests/unit_tests/examples/test_specs/joinmilo/apispec.json similarity index 100% rename from tests/unit_tests/tools/openapi/test_specs/joinmilo/apispec.json rename to tests/unit_tests/examples/test_specs/joinmilo/apispec.json diff --git a/tests/unit_tests/tools/openapi/test_specs/klarna/apispec.json b/tests/unit_tests/examples/test_specs/klarna/apispec.json similarity index 100% rename from tests/unit_tests/tools/openapi/test_specs/klarna/apispec.json rename to tests/unit_tests/examples/test_specs/klarna/apispec.json diff --git a/tests/unit_tests/tools/openapi/test_specs/milo/apispec.json b/tests/unit_tests/examples/test_specs/milo/apispec.json similarity index 100% rename from tests/unit_tests/tools/openapi/test_specs/milo/apispec.json rename to tests/unit_tests/examples/test_specs/milo/apispec.json diff --git a/tests/unit_tests/tools/openapi/test_specs/quickchart/apispec.json b/tests/unit_tests/examples/test_specs/quickchart/apispec.json similarity index 100% rename from tests/unit_tests/tools/openapi/test_specs/quickchart/apispec.json rename to tests/unit_tests/examples/test_specs/quickchart/apispec.json diff --git a/tests/unit_tests/tools/openapi/test_specs/robot/apispec.yaml b/tests/unit_tests/examples/test_specs/robot/apispec.yaml similarity index 100% rename from tests/unit_tests/tools/openapi/test_specs/robot/apispec.yaml rename to tests/unit_tests/examples/test_specs/robot/apispec.yaml diff --git a/tests/unit_tests/tools/openapi/test_specs/robot_openapi.yaml b/tests/unit_tests/examples/test_specs/robot_openapi.yaml similarity index 100% rename from tests/unit_tests/tools/openapi/test_specs/robot_openapi.yaml rename to tests/unit_tests/examples/test_specs/robot_openapi.yaml diff --git a/tests/unit_tests/tools/openapi/test_specs/schooldigger/apispec.json b/tests/unit_tests/examples/test_specs/schooldigger/apispec.json similarity index 100% rename from tests/unit_tests/tools/openapi/test_specs/schooldigger/apispec.json rename to tests/unit_tests/examples/test_specs/schooldigger/apispec.json diff --git a/tests/unit_tests/tools/openapi/test_specs/shop/apispec.json b/tests/unit_tests/examples/test_specs/shop/apispec.json similarity index 100% rename from tests/unit_tests/tools/openapi/test_specs/shop/apispec.json rename to tests/unit_tests/examples/test_specs/shop/apispec.json diff --git a/tests/unit_tests/tools/openapi/test_specs/slack/apispec.json b/tests/unit_tests/examples/test_specs/slack/apispec.json similarity index 100% rename from tests/unit_tests/tools/openapi/test_specs/slack/apispec.json rename to tests/unit_tests/examples/test_specs/slack/apispec.json diff --git a/tests/unit_tests/tools/openapi/test_specs/speak/apispec.json b/tests/unit_tests/examples/test_specs/speak/apispec.json similarity index 100% rename from tests/unit_tests/tools/openapi/test_specs/speak/apispec.json rename to tests/unit_tests/examples/test_specs/speak/apispec.json diff --git a/tests/unit_tests/tools/openapi/test_specs/urlbox/apispec.json b/tests/unit_tests/examples/test_specs/urlbox/apispec.json similarity index 100% rename from tests/unit_tests/tools/openapi/test_specs/urlbox/apispec.json rename to tests/unit_tests/examples/test_specs/urlbox/apispec.json diff --git a/tests/unit_tests/tools/openapi/test_specs/wellknown/apispec.json b/tests/unit_tests/examples/test_specs/wellknown/apispec.json similarity index 100% rename from tests/unit_tests/tools/openapi/test_specs/wellknown/apispec.json rename to tests/unit_tests/examples/test_specs/wellknown/apispec.json diff --git a/tests/unit_tests/tools/openapi/test_specs/wolframalpha/apispec.json b/tests/unit_tests/examples/test_specs/wolframalpha/apispec.json similarity index 100% rename from tests/unit_tests/tools/openapi/test_specs/wolframalpha/apispec.json rename to tests/unit_tests/examples/test_specs/wolframalpha/apispec.json diff --git a/tests/unit_tests/tools/openapi/test_specs/wolframcloud/apispec.json b/tests/unit_tests/examples/test_specs/wolframcloud/apispec.json similarity index 100% rename from tests/unit_tests/tools/openapi/test_specs/wolframcloud/apispec.json rename to tests/unit_tests/examples/test_specs/wolframcloud/apispec.json diff --git a/tests/unit_tests/tools/openapi/test_specs/zapier/apispec.json b/tests/unit_tests/examples/test_specs/zapier/apispec.json similarity index 100% rename from tests/unit_tests/tools/openapi/test_specs/zapier/apispec.json rename to tests/unit_tests/examples/test_specs/zapier/apispec.json diff --git a/tests/unit_tests/tools/openapi/test_api_models.py b/tests/unit_tests/tools/openapi/test_api_models.py index 20b09062f70..945309aafad 100644 --- a/tests/unit_tests/tools/openapi/test_api_models.py +++ b/tests/unit_tests/tools/openapi/test_api_models.py @@ -22,17 +22,18 @@ from langchain.tools.openapi.utils.api_models import ( ) from langchain.tools.openapi.utils.openapi_utils import HTTPVerb, OpenAPISpec -_DIR = Path(__file__).parent +SPECS_DIR = Path(__file__).parents[2] / "examples" / "test_specs" def _get_test_specs() -> Iterable[Path]: """Walk the test_specs directory and collect all files with the name 'apispec' in them. """ - test_specs_dir = _DIR / "test_specs" + if not SPECS_DIR.exists(): + raise ValueError return ( Path(root) / file - for root, _, files in os.walk(test_specs_dir) + for root, _, files in os.walk(SPECS_DIR) for file in files if file.startswith("apispec") ) @@ -84,7 +85,7 @@ def test_parse_api_operations( try: APIOperation.from_openapi_spec(spec, path, method) except Exception as e: - raise AssertionError(f"Error processong {spec_name}: {e} ") from e + raise AssertionError(f"Error processing {spec_name}: {e} ") from e @pytest.fixture diff --git a/tests/unit_tests/tools/test_base.py b/tests/unit_tests/tools/test_base.py index 0d6a62f416c..c42ae202d0f 100644 --- a/tests/unit_tests/tools/test_base.py +++ b/tests/unit_tests/tools/test_base.py @@ -84,7 +84,7 @@ def test_unannotated_base_tool_raises_error() -> None: def test_misannotated_base_tool_raises_error() -> None: - """Test that a BaseTool with the incorrrect typehint raises an exception.""" "" + """Test that a BaseTool with the incorrect typehint raises an exception.""" "" with pytest.raises(SchemaAnnotationError): class _MisAnnotatedTool(BaseTool): @@ -564,7 +564,7 @@ def test_tool_with_kwargs() -> None: def test_missing_docstring() -> None: """Test error is raised when docstring is missing.""" - # expect to throw a value error if theres no docstring + # expect to throw a value error if there's no docstring with pytest.raises(AssertionError, match="Function must have a docstring"): @tool