more

2026-02-04 00:00:34 +00:00 · 2023-12-06 20:19:32 -08:00
parent 8226b81fb3
commit a66df25a89
34 changed files with 1183 additions and 0 deletions
--- a/.scripts/community_split/libs/partners/openai/tests/integration_tests/chat_models/test_azure.py
+++ b/.scripts/community_split/libs/partners/openai/tests/integration_tests/chat_models/test_azure.py
@@ -0,0 +1,145 @@
+"""Test AzureChatOpenAI wrapper."""
+import os
+from typing import Any
+
+import pytest
+from langchain_core.messages import BaseMessage, HumanMessage
+from langchain_core.outputs import ChatGeneration, ChatResult, LLMResult
+
+from langchain_openai.chat_models import AzureChatOpenAI
+
+OPENAI_API_VERSION = os.environ.get("AZURE_OPENAI_API_VERSION", "")
+OPENAI_API_BASE = os.environ.get("AZURE_OPENAI_API_BASE", "")
+OPENAI_API_KEY = os.environ.get("AZURE_OPENAI_API_KEY", "")
+DEPLOYMENT_NAME = os.environ.get("AZURE_OPENAI_DEPLOYMENT_NAME", "")
+
+
+def _get_llm(**kwargs: Any) -> AzureChatOpenAI:
+    return AzureChatOpenAI(
+        deployment_name=DEPLOYMENT_NAME,
+        openai_api_version=OPENAI_API_VERSION,
+        openai_api_base=OPENAI_API_BASE,
+        openai_api_key=OPENAI_API_KEY,
+        **kwargs,
+    )
+
+
+@pytest.mark.scheduled
+@pytest.fixture
+def llm() -> AzureChatOpenAI:
+    return _get_llm(
+        max_tokens=10,
+    )
+
+
+def test_chat_openai(llm: AzureChatOpenAI) -> None:
+    """Test AzureChatOpenAI wrapper."""
+    message = HumanMessage(content="Hello")
+    response = llm([message])
+    assert isinstance(response, BaseMessage)
+    assert isinstance(response.content, str)
+
+
+@pytest.mark.scheduled
+def test_chat_openai_generate() -> None:
+    """Test AzureChatOpenAI wrapper with generate."""
+    chat = _get_llm(max_tokens=10, n=2)
+    message = HumanMessage(content="Hello")
+    response = chat.generate([[message], [message]])
+    assert isinstance(response, LLMResult)
+    assert len(response.generations) == 2
+    for generations in response.generations:
+        assert len(generations) == 2
+        for generation in generations:
+            assert isinstance(generation, ChatGeneration)
+            assert isinstance(generation.text, str)
+            assert generation.text == generation.message.content
+
+
+@pytest.mark.scheduled
+def test_chat_openai_multiple_completions() -> None:
+    """Test AzureChatOpenAI wrapper with multiple completions."""
+    chat = _get_llm(max_tokens=10, n=5)
+    message = HumanMessage(content="Hello")
+    response = chat._generate([message])
+    assert isinstance(response, ChatResult)
+    assert len(response.generations) == 5
+    for generation in response.generations:
+        assert isinstance(generation.message, BaseMessage)
+        assert isinstance(generation.message.content, str)
+
+
+@pytest.mark.scheduled
+async def test_async_chat_openai() -> None:
+    """Test async generation."""
+    chat = _get_llm(max_tokens=10, n=2)
+    message = HumanMessage(content="Hello")
+    response = await chat.agenerate([[message], [message]])
+    assert isinstance(response, LLMResult)
+    assert len(response.generations) == 2
+    for generations in response.generations:
+        assert len(generations) == 2
+        for generation in generations:
+            assert isinstance(generation, ChatGeneration)
+            assert isinstance(generation.text, str)
+            assert generation.text == generation.message.content
+
+
+@pytest.mark.scheduled
+def test_openai_streaming(llm: AzureChatOpenAI) -> None:
+    """Test streaming tokens from OpenAI."""
+
+    for token in llm.stream("I'm Pickle Rick"):
+        assert isinstance(token.content, str)
+
+
+@pytest.mark.scheduled
+async def test_openai_astream(llm: AzureChatOpenAI) -> None:
+    """Test streaming tokens from OpenAI."""
+    async for token in llm.astream("I'm Pickle Rick"):
+        assert isinstance(token.content, str)
+
+
+@pytest.mark.scheduled
+async def test_openai_abatch(llm: AzureChatOpenAI) -> None:
+    """Test streaming tokens from AzureChatOpenAI."""
+
+    result = await llm.abatch(["I'm Pickle Rick", "I'm not Pickle Rick"])
+    for token in result:
+        assert isinstance(token.content, str)
+
+
+@pytest.mark.scheduled
+async def test_openai_abatch_tags(llm: AzureChatOpenAI) -> None:
+    """Test batch tokens from AzureChatOpenAI."""
+
+    result = await llm.abatch(
+        ["I'm Pickle Rick", "I'm not Pickle Rick"], config={"tags": ["foo"]}
+    )
+    for token in result:
+        assert isinstance(token.content, str)
+
+
+@pytest.mark.scheduled
+def test_openai_batch(llm: AzureChatOpenAI) -> None:
+    """Test batch tokens from AzureChatOpenAI."""
+
+    result = llm.batch(["I'm Pickle Rick", "I'm not Pickle Rick"])
+    for token in result:
+        assert isinstance(token.content, str)
+
+
+@pytest.mark.scheduled
+async def test_openai_ainvoke(llm: AzureChatOpenAI) -> None:
+    """Test invoke tokens from AzureChatOpenAI."""
+
+    result = await llm.ainvoke("I'm Pickle Rick", config={"tags": ["foo"]})
+    assert isinstance(result.content, str)
+
+
+@pytest.mark.scheduled
+def test_openai_invoke(llm: AzureChatOpenAI) -> None:
+    """Test invoke tokens from AzureChatOpenAI."""
+
+    result = llm.invoke("I'm Pickle Rick", config=dict(tags=["foo"]))
+    assert isinstance(result.content, str)
--- a/.scripts/community_split/libs/partners/openai/tests/integration_tests/llms/test_azure.py
+++ b/.scripts/community_split/libs/partners/openai/tests/integration_tests/llms/test_azure.py
@@ -0,0 +1,141 @@
+"""Test AzureOpenAI wrapper."""
+import os
+from typing import Any, Generator
+
+import pytest
+from langchain_core.outputs import LLMResult
+
+from langchain_openai.llms import AzureOpenAI
+
+OPENAI_API_VERSION = os.environ.get("AZURE_OPENAI_API_VERSION", "")
+OPENAI_API_BASE = os.environ.get("AZURE_OPENAI_API_BASE", "")
+OPENAI_API_KEY = os.environ.get("AZURE_OPENAI_API_KEY", "")
+DEPLOYMENT_NAME = os.environ.get("AZURE_OPENAI_DEPLOYMENT_NAME", "")
+
+
+def _get_llm(**kwargs: Any) -> AzureOpenAI:
+    return AzureOpenAI(
+        deployment_name=DEPLOYMENT_NAME,
+        openai_api_version=OPENAI_API_VERSION,
+        openai_api_base=OPENAI_API_BASE,
+        openai_api_key=OPENAI_API_KEY,
+        **kwargs,
+    )
+
+
+@pytest.mark.scheduled
+@pytest.fixture
+def llm() -> AzureOpenAI:
+    return _get_llm(
+        max_tokens=10,
+    )
+
+
+@pytest.mark.scheduled
+def test_openai_call(llm: AzureOpenAI) -> None:
+    """Test valid call to openai."""
+    output = llm("Say something nice:")
+    assert isinstance(output, str)
+
+
+@pytest.mark.scheduled
+def test_openai_streaming(llm: AzureOpenAI) -> None:
+    """Test streaming tokens from AzureOpenAI."""
+    generator = llm.stream("I'm Pickle Rick")
+
+    assert isinstance(generator, Generator)
+
+    full_response = ""
+    for token in generator:
+        assert isinstance(token, str)
+        full_response += token
+    assert full_response
+
+
+@pytest.mark.scheduled
+async def test_openai_astream(llm: AzureOpenAI) -> None:
+    """Test streaming tokens from AzureOpenAI."""
+    async for token in llm.astream("I'm Pickle Rick"):
+        assert isinstance(token, str)
+
+
+@pytest.mark.scheduled
+async def test_openai_abatch(llm: AzureOpenAI) -> None:
+    """Test streaming tokens from AzureOpenAI."""
+    result = await llm.abatch(["I'm Pickle Rick", "I'm not Pickle Rick"])
+    for token in result:
+        assert isinstance(token, str)
+
+
+async def test_openai_abatch_tags(llm: AzureOpenAI) -> None:
+    """Test streaming tokens from AzureOpenAI."""
+    result = await llm.abatch(
+        ["I'm Pickle Rick", "I'm not Pickle Rick"], config={"tags": ["foo"]}
+    )
+    for token in result:
+        assert isinstance(token, str)
+
+
+@pytest.mark.scheduled
+def test_openai_batch(llm: AzureOpenAI) -> None:
+    """Test streaming tokens from AzureOpenAI."""
+    result = llm.batch(["I'm Pickle Rick", "I'm not Pickle Rick"])
+    for token in result:
+        assert isinstance(token, str)
+
+
+@pytest.mark.scheduled
+async def test_openai_ainvoke(llm: AzureOpenAI) -> None:
+    """Test streaming tokens from AzureOpenAI."""
+    result = await llm.ainvoke("I'm Pickle Rick", config={"tags": ["foo"]})
+    assert isinstance(result, str)
+
+
+@pytest.mark.scheduled
+def test_openai_invoke(llm: AzureOpenAI) -> None:
+    """Test streaming tokens from AzureOpenAI."""
+    result = llm.invoke("I'm Pickle Rick", config=dict(tags=["foo"]))
+    assert isinstance(result, str)
+
+
+@pytest.mark.scheduled
+def test_openai_multiple_prompts(llm: AzureOpenAI) -> None:
+    """Test completion with multiple prompts."""
+    output = llm.generate(["I'm Pickle Rick", "I'm Pickle Rick"])
+    assert isinstance(output, LLMResult)
+    assert isinstance(output.generations, list)
+    assert len(output.generations) == 2
+
+
+def test_openai_streaming_best_of_error() -> None:
+    """Test validation for streaming fails if best_of is not 1."""
+    with pytest.raises(ValueError):
+        _get_llm(best_of=2, streaming=True)
+
+
+def test_openai_streaming_n_error() -> None:
+    """Test validation for streaming fails if n is not 1."""
+    with pytest.raises(ValueError):
+        _get_llm(n=2, streaming=True)
+
+
+def test_openai_streaming_multiple_prompts_error() -> None:
+    """Test validation for streaming fails if multiple prompts are given."""
+    with pytest.raises(ValueError):
+        _get_llm(streaming=True).generate(["I'm Pickle Rick", "I'm Pickle Rick"])
+
+
+@pytest.mark.scheduled
+def test_openai_streaming_call() -> None:
+    """Test valid call to openai."""
+    llm = _get_llm(max_tokens=10, streaming=True)
+    output = llm("Say foo:")
+    assert isinstance(output, str)
+
+
+@pytest.mark.scheduled
+async def test_openai_async_generate() -> None:
+    """Test async generation."""
+    llm = _get_llm(max_tokens=10)
+    output = await llm.agenerate(["Hello, how are you?"])
+    assert isinstance(output, LLMResult)
--- a/libs/community/langchain_community/agent_toolkits/amadeus/init.py
+++ b/libs/community/langchain_community/agent_toolkits/amadeus/init.py
--- a/libs/community/tests/examples/README.org
+++ b/libs/community/tests/examples/README.org
@@ -0,0 +1,27 @@
+* Example Docs
+
+The sample docs directory contains the following files:
+
+-  ~example-10k.html~ - A 10-K SEC filing in HTML format
+-  ~layout-parser-paper.pdf~ - A PDF copy of the layout parser paper
+-  ~factbook.xml~ / ~factbook.xsl~ - Example XML/XLS files that you
+   can use to test stylesheets
+
+These documents can be used to test out the parsers in the library. In
+addition, here are instructions for pulling in some sample docs that are
+too big to store in the repo.
+
+** XBRL 10-K
+
+You can get an example 10-K in inline XBRL format using the following
+~curl~. Note, you need to have the user agent set in the header or the
+SEC site will reject your request.
+
+#+BEGIN_SRC bash
+
+   curl -O \
+     -A '${organization} ${email}'
+     https://www.sec.gov/Archives/edgar/data/311094/000117184321001344/0001171843-21-001344.txt
+#+END_SRC
+
+You can parse this document using the HTML parser.
--- a/libs/community/tests/examples/README.rst
+++ b/libs/community/tests/examples/README.rst
@@ -0,0 +1,28 @@
+Example Docs
+------------
+
+The sample docs directory contains the following files:
+
+-  ``example-10k.html`` - A 10-K SEC filing in HTML format
+-  ``layout-parser-paper.pdf`` - A PDF copy of the layout parser paper
+-  ``factbook.xml``/``factbook.xsl`` - Example XML/XLS files that you
+   can use to test stylesheets
+
+These documents can be used to test out the parsers in the library. In
+addition, here are instructions for pulling in some sample docs that are
+too big to store in the repo.
+
+XBRL 10-K
+^^^^^^^^^
+
+You can get an example 10-K in inline XBRL format using the following
+``curl``. Note, you need to have the user agent set in the header or the
+SEC site will reject your request.
+
+.. code:: bash
+
+   curl -O \
+     -A '${organization} ${email}'
+     https://www.sec.gov/Archives/edgar/data/311094/000117184321001344/0001171843-21-001344.txt
+
+You can parse this document using the HTML parser.
--- a/libs/community/tests/examples/brandfetch-brandfetch-2.0.0-resolved.json
+++ b/libs/community/tests/examples/brandfetch-brandfetch-2.0.0-resolved.json
@@ -0,0 +1,282 @@
+{
+  "openapi": "3.0.1",
+  "info": {
+    "title": "Brandfetch API",
+    "description": "Brandfetch API (v2) for retrieving brand information.\n\nSee our [documentation](https://docs.brandfetch.com/) for further details.                   ",
+    "termsOfService": "https://brandfetch.com/terms",
+    "contact": {
+      "url": "https://brandfetch.com/developers"
+    },
+    "version": "2.0.0"
+  },
+  "externalDocs": {
+    "description": "Documentation",
+    "url": "https://docs.brandfetch.com/"
+  },
+  "servers": [
+    {
+      "url": "https://api.brandfetch.io/v2"
+    }
+  ],
+  "paths": {
+    "/brands/{domainOrId}": {
+      "get": {
+        "summary": "Retrieve a brand",
+        "description": "Fetch brand information by domain or ID\n\nFurther details here: https://docs.brandfetch.com/reference/retrieve-brand\n",
+        "parameters": [
+          {
+            "name": "domainOrId",
+            "in": "path",
+            "description": "Domain or ID of the brand",
+            "required": true,
+            "style": "simple",
+            "explode": false,
+            "schema": {
+              "type": "string"
+            }
+          }
+        ],
+        "responses": {
+          "200": {
+            "description": "Brand data",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/Brand"
+                },
+                "examples": {
+                  "brandfetch.com": {
+                    "value": "{\"name\":\"Brandfetch\",\"domain\":\"brandfetch.com\",\"claimed\":true,\"description\":\"All brands. In one place\",\"links\":[{\"name\":\"twitter\",\"url\":\"https://twitter.com/brandfetch\"},{\"name\":\"linkedin\",\"url\":\"https://linkedin.com/company/brandfetch\"}],\"logos\":[{\"type\":\"logo\",\"theme\":\"light\",\"formats\":[{\"src\":\"https://asset.brandfetch.io/idL0iThUh6/id9WE9j86h.svg\",\"background\":\"transparent\",\"format\":\"svg\",\"size\":15555}]},{\"type\":\"logo\",\"theme\":\"dark\",\"formats\":[{\"src\":\"https://asset.brandfetch.io/idL0iThUh6/idWbsK1VCy.png\",\"background\":\"transparent\",\"format\":\"png\",\"height\":215,\"width\":800,\"size\":33937},{\"src\":\"https://asset.brandfetch.io/idL0iThUh6/idtCMfbWO0.svg\",\"background\":\"transparent\",\"format\":\"svg\",\"height\":null,\"width\":null,\"size\":15567}]},{\"type\":\"symbol\",\"theme\":\"light\",\"formats\":[{\"src\":\"https://asset.brandfetch.io/idL0iThUh6/idXGq6SIu2.svg\",\"background\":\"transparent\",\"format\":\"svg\",\"size\":2215}]},{\"type\":\"symbol\",\"theme\":\"dark\",\"formats\":[{\"src\":\"https://asset.brandfetch.io/idL0iThUh6/iddCQ52AR5.svg\",\"background\":\"transparent\",\"format\":\"svg\",\"size\":2215}]},{\"type\":\"icon\",\"theme\":\"dark\",\"formats\":[{\"src\":\"https://asset.brandfetch.io/idL0iThUh6/idls3LaPPQ.png\",\"background\":null,\"format\":\"png\",\"height\":400,\"width\":400,\"size\":2565}]}],\"colors\":[{\"hex\":\"#0084ff\",\"type\":\"accent\",\"brightness\":113},{\"hex\":\"#00193E\",\"type\":\"brand\",\"brightness\":22},{\"hex\":\"#F03063\",\"type\":\"brand\",\"brightness\":93},{\"hex\":\"#7B0095\",\"type\":\"brand\",\"brightness\":37},{\"hex\":\"#76CC4B\",\"type\":\"brand\",\"brightness\":176},{\"hex\":\"#FFDA00\",\"type\":\"brand\",\"brightness\":210},{\"hex\":\"#000000\",\"type\":\"dark\",\"brightness\":0},{\"hex\":\"#ffffff\",\"type\":\"light\",\"brightness\":255}],\"fonts\":[{\"name\":\"Poppins\",\"type\":\"title\",\"origin\":\"google\",\"originId\":\"Poppins\",\"weights\":[]},{\"name\":\"Inter\",\"type\":\"body\",\"origin\":\"google\",\"originId\":\"Inter\",\"weights\":[]}],\"images\":[{\"type\":\"banner\",\"formats\":[{\"src\":\"https://asset.brandfetch.io/idL0iThUh6/idUuia5imo.png\",\"background\":\"transparent\",\"format\":\"png\",\"height\":500,\"width\":1500,\"size\":5539}]}]}"
+                  }
+                }
+              }
+            }
+          },
+          "400": {
+            "description": "Invalid domain or ID supplied"
+          },
+          "404": {
+            "description": "The brand does not exist or the domain can't be resolved."
+          }
+        },
+        "security": [
+          {
+            "bearerAuth": []
+          }
+        ]
+      }
+    }
+  },
+  "components": {
+    "schemas": {
+      "Brand": {
+        "required": [
+          "claimed",
+          "colors",
+          "description",
+          "domain",
+          "fonts",
+          "images",
+          "links",
+          "logos",
+          "name"
+        ],
+        "type": "object",
+        "properties": {
+          "images": {
+            "type": "array",
+            "items": {
+              "$ref": "#/components/schemas/ImageAsset"
+            }
+          },
+          "fonts": {
+            "type": "array",
+            "items": {
+              "$ref": "#/components/schemas/FontAsset"
+            }
+          },
+          "domain": {
+            "type": "string"
+          },
+          "claimed": {
+            "type": "boolean"
+          },
+          "name": {
+            "type": "string"
+          },
+          "description": {
+            "type": "string"
+          },
+          "links": {
+            "type": "array",
+            "items": {
+              "$ref": "#/components/schemas/Brand_links"
+            }
+          },
+          "logos": {
+            "type": "array",
+            "items": {
+              "$ref": "#/components/schemas/ImageAsset"
+            }
+          },
+          "colors": {
+            "type": "array",
+            "items": {
+              "$ref": "#/components/schemas/ColorAsset"
+            }
+          }
+        },
+        "description": "Object representing a brand"
+      },
+      "ColorAsset": {
+        "required": [
+          "brightness",
+          "hex",
+          "type"
+        ],
+        "type": "object",
+        "properties": {
+          "brightness": {
+            "type": "integer"
+          },
+          "hex": {
+            "type": "string"
+          },
+          "type": {
+            "type": "string",
+            "enum": [
+              "accent",
+              "brand",
+              "customizable",
+              "dark",
+              "light",
+              "vibrant"
+            ]
+          }
+        },
+        "description": "Brand color asset"
+      },
+      "FontAsset": {
+        "type": "object",
+        "properties": {
+          "originId": {
+            "type": "string"
+          },
+          "origin": {
+            "type": "string",
+            "enum": [
+              "adobe",
+              "custom",
+              "google",
+              "system"
+            ]
+          },
+          "name": {
+            "type": "string"
+          },
+          "type": {
+            "type": "string"
+          },
+          "weights": {
+            "type": "array",
+            "items": {
+              "type": "number"
+            }
+          },
+          "items": {
+            "type": "string"
+          }
+        },
+        "description": "Brand font asset"
+      },
+      "ImageAsset": {
+        "required": [
+          "formats",
+          "theme",
+          "type"
+        ],
+        "type": "object",
+        "properties": {
+          "formats": {
+            "type": "array",
+            "items": {
+              "$ref": "#/components/schemas/ImageFormat"
+            }
+          },
+          "theme": {
+            "type": "string",
+            "enum": [
+              "light",
+              "dark"
+            ]
+          },
+          "type": {
+            "type": "string",
+            "enum": [
+              "logo",
+              "icon",
+              "symbol",
+              "banner"
+            ]
+          }
+        },
+        "description": "Brand image asset"
+      },
+      "ImageFormat": {
+        "required": [
+          "background",
+          "format",
+          "size",
+          "src"
+        ],
+        "type": "object",
+        "properties": {
+          "size": {
+            "type": "integer"
+          },
+          "src": {
+            "type": "string"
+          },
+          "background": {
+            "type": "string",
+            "enum": [
+              "transparent"
+            ]
+          },
+          "format": {
+            "type": "string"
+          },
+          "width": {
+            "type": "integer"
+          },
+          "height": {
+            "type": "integer"
+          }
+        },
+        "description": "Brand image asset image format"
+      },
+      "Brand_links": {
+        "required": [
+          "name",
+          "url"
+        ],
+        "type": "object",
+        "properties": {
+          "name": {
+            "type": "string"
+          },
+          "url": {
+            "type": "string"
+          }
+        }
+      }
+    },
+    "securitySchemes": {
+      "bearerAuth": {
+        "type": "http",
+        "scheme": "bearer",
+        "bearerFormat": "API Key"
+      }
+    }
+  }
+}
--- a/libs/community/tests/examples/default-encoding.py
+++ b/libs/community/tests/examples/default-encoding.py
@@ -0,0 +1 @@
+u = "🦜🔗"
--- a/libs/community/tests/examples/docusaurus-sitemap.xml
+++ b/libs/community/tests/examples/docusaurus-sitemap.xml
@@ -0,0 +1,42 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"
+  xmlns:news="http://www.google.com/schemas/sitemap-news/0.9"
+  xmlns:xhtml="http://www.w3.org/1999/xhtml"
+  xmlns:image="http://www.google.com/schemas/sitemap-image/1.1"
+  xmlns:video="http://www.google.com/schemas/sitemap-video/1.1">
+  <url>
+    <loc>https://python.langchain.com/docs/integrations/document_loaders/sitemap</loc>
+    <changefreq>weekly</changefreq>
+    <priority>0.5</priority>
+  </url>
+  <url>
+    <loc>https://python.langchain.com/cookbook</loc>
+    <changefreq>weekly</changefreq>
+    <priority>0.5</priority>
+  </url>
+  <url>
+    <loc>https://python.langchain.com/docs/additional_resources</loc>
+    <changefreq>weekly</changefreq>
+    <priority>0.5</priority>
+  </url>
+  <url>
+    <loc>https://python.langchain.com/docs/modules/chains/how_to/</loc>
+    <changefreq>weekly</changefreq>
+    <priority>0.5</priority>
+  </url>
+  <url>
+    <loc>https://python.langchain.com/docs/use_cases/question_answering/local_retrieval_qa</loc>
+    <changefreq>weekly</changefreq>
+    <priority>0.5</priority>
+  </url>
+  <url>
+    <loc>https://python.langchain.com/docs/use_cases/summarization</loc>
+    <changefreq>weekly</changefreq>
+    <priority>0.5</priority>
+  </url>
+  <url>
+    <loc>https://python.langchain.com/</loc>
+    <changefreq>weekly</changefreq>
+    <priority>0.5</priority>
+  </url>
+</urlset>
--- a/libs/community/tests/examples/duplicate-chars.pdf
+++ b/libs/community/tests/examples/duplicate-chars.pdf
--- a/libs/community/tests/examples/example-utf8.html
+++ b/libs/community/tests/examples/example-utf8.html
@@ -0,0 +1,25 @@
+<html>
+  <head>
+    <title>Chew dad's slippers</title>
+  </head>
+  <body>
+    <h1>
+      Instead of drinking water from the cat bowl, make sure to steal water from
+      the toilet
+    </h1>
+    <h2>Chase the red dot</h2>
+    <p>
+      Munch, munch, chomp, chomp hate dogs. Spill litter box, scratch at owner,
+      destroy all furniture, especially couch get scared by sudden appearance of
+      cucumber cat is love, cat is life fat baby cat best buddy little guy for
+      catch eat throw up catch eat throw up bad birds jump on fridge. Purr like
+      a car engine oh yes, there is my human woman she does best pats ever that
+      all i like about her hiss meow .
+    </p>
+    <p>
+      Dead stare with ears cocked when “owners” are asleep, cry for no apparent
+      reason meow all night. Plop down in the middle where everybody walks favor
+      packaging over toy. Sit on the laptop kitty pounce, trip, faceplant.
+    </p>
+  </body>
+</html>
--- a/libs/community/tests/examples/example.html
+++ b/libs/community/tests/examples/example.html
@@ -0,0 +1,25 @@
+<html>
+  <head>
+    <title>Chew dad's slippers</title>
+  </head>
+  <body>
+    <h1>
+      Instead of drinking water from the cat bowl, make sure to steal water from
+      the toilet
+    </h1>
+    <h2>Chase the red dot</h2>
+    <p>
+      Munch, munch, chomp, chomp hate dogs. Spill litter box, scratch at owner,
+      destroy all furniture, especially couch get scared by sudden appearance of
+      cucumber cat is love, cat is life fat baby cat best buddy little guy for
+      catch eat throw up catch eat throw up bad birds jump on fridge. Purr like
+      a car engine oh yes, there is my human woman she does best pats ever that
+      all i like about her hiss meow .
+    </p>
+    <p>
+      Dead stare with ears cocked when owners are asleep, cry for no apparent
+      reason meow all night. Plop down in the middle where everybody walks favor
+      packaging over toy. Sit on the laptop kitty pounce, trip, faceplant.
+    </p>
+  </body>
+</html>
--- a/libs/community/tests/examples/example.json
+++ b/libs/community/tests/examples/example.json
@@ -0,0 +1,25 @@
+{
+    "messages": [
+        {
+            "sender_name": "User 2",
+            "timestamp_ms": 1675597571851,
+            "content": "Bye!"
+        },
+        {
+            "sender_name": "User 1",
+            "timestamp_ms": 1675597435669,
+            "content": "Oh no worries! Bye"
+        },
+        {
+            "sender_name": "User 2",
+            "timestamp_ms": 1675595060730,
+            "photos": [
+                {
+                    "uri": "url_of_some_picture.jpg",
+                    "creation_timestamp": 1675595059
+                }
+            ]
+        }
+    ],
+    "title": "User 1 and User 2 chat"
+}
--- a/libs/community/tests/examples/example.mht
+++ b/libs/community/tests/examples/example.mht
@@ -0,0 +1,108 @@
+From: <Saved by Blink>
+Snapshot-Content-Location: https://langchain.com/
+Subject: 
+Date: Fri, 16 Jun 2023 19:32:59 -0000
+MIME-Version: 1.0
+Content-Type: multipart/related;
+	type="text/html";
+	boundary="----MultipartBoundary--dYaUgeoeP18TqraaeOwkeZyu1vI09OtkFwH2rcnJMt----"
+
+
+------MultipartBoundary--dYaUgeoeP18TqraaeOwkeZyu1vI09OtkFwH2rcnJMt----
+Content-Type: text/html
+Content-ID: <frame-2F1DB31BBD26C55A7F1EEC7561350515@mhtml.blink>
+Content-Transfer-Encoding: quoted-printable
+Content-Location: https://langchain.com/
+
+<html><head><title>LangChain</title><meta http-equiv=3D"Content-Type" content=3D"text/html; charset=
+=3DUTF-8"><link rel=3D"stylesheet" type=3D"text/css" href=3D"cid:css-c9ac93=
+be-2ab2-46d8-8690-80da3a6d1832@mhtml.blink" /></head><body data-new-gr-c-s-=
+check-loaded=3D"14.1112.0" data-gr-ext-installed=3D""><p align=3D"center">
+	<b><font size=3D"6">L</font><font size=3D"4">ANG </font><font size=3D"6">C=
+</font><font size=3D"4">HAIN </font><font size=3D"2">=F0=9F=A6=9C=EF=B8=8F=
+=F0=9F=94=97</font><br>Official Home Page</b><font size=3D"1">&nbsp;</font>=
+</p>
+
+<hr>
+<center>
+<table border=3D"0" cellspacing=3D"0" width=3D"90%">
+  <tbody>
+  <tr>
+    <td height=3D"55" valign=3D"top" width=3D"50%">
+      <ul>
+        <li><a href=3D"https://langchain.com/integrations.html">Integration=
+s</a>=20
+    </li></ul></td>
+   <td height=3D"45" valign=3D"top" width=3D"50%">
+      <ul>
+        <li><a href=3D"https://langchain.com/features.html">Features</a>=20
+        </li></ul></td></tr>
+    <tr>
+    <td height=3D"55" valign=3D"top" width=3D"50%">
+      <ul>
+        <li><a href=3D"https://blog.langchain.dev/">Blog</a>=20
+    </li></ul></td>
+   <td height=3D"45" valign=3D"top" width=3D"50%">
+      <ul>
+        <li><a href=3D"https://docs.langchain.com/docs/">Conceptual Guide</=
+a>=20
+        </li></ul></td></tr>
+
+  <tr>
+    <td height=3D"45" valign=3D"top" width=3D"50%">
+      <ul>
+        <li><a href=3D"https://github.com/langchain-ai/langchain">Python Repo<=
+/a></li></ul></td>
+    <td height=3D"45" valign=3D"top" width=3D"50%">
+		  <ul>
+        <li><a href=3D"https://github.com/langchain-ai/langchainjs">JavaScript=
+ Repo</a></li></ul></td></tr>
+ =20
+=09
+  <tr>
+    <td height=3D"45" valign=3D"top" width=3D"50%">
+      <ul>
+        <li><a href=3D"https://python.langchain.com/en/latest/">Python Docu=
+mentation</a> </li></ul></td>
+    <td height=3D"45" valign=3D"top" width=3D"50%">
+      <ul>
+         <li><a href=3D"https://js.langchain.com/docs/">JavaScript Document=
+ation</a>
+					</li></ul></td></tr>
+  <tr>
+    <td height=3D"45" valign=3D"top" width=3D"50%">
+      <ul>
+        <li><a href=3D"https://github.com/langchain-ai/chat-langchain">Python =
+ChatLangChain</a> </li></ul></td>
+    <td height=3D"45" valign=3D"top" width=3D"50%">
+      <ul>
+         <li><a href=3D"https://github.com/sullivan-sean/chat-langchainjs">=
+JavaScript ChatLangChain</a>
+					</li></ul></td></tr>
+  <tr>
+    <td height=3D"45" valign=3D"top" width=3D"50%">
+      <ul>
+        <li><a href=3D"https://discord.gg/6adMQxSpJS">Discord</a> </li></ul=
+></td>
+    <td height=3D"55" valign=3D"top" width=3D"50%">
+      <ul>
+        <li><a href=3D"https://twitter.com/langchainai">Twitter</a>
+					</li></ul></td></tr>
+			=09
+
+
+</tbody></table></center>
+<hr>
+<font size=3D"2">
+<p>If you have any comments about our WEB page, you can=20
+write us at the address shown above.  However, due to=20
+the limited number of personnel in our corporate office, we are unable to=
+=20
+provide a direct response.</p></font>
+<hr>
+<p align=3D"left"><font size=3D"2">Copyright =C2=A9 2023-2023<b> LangChain =
+Inc.</b></font><font size=3D"2">=20
+</font></p>
+</body></html>
+
+------MultipartBoundary--dYaUgeoeP18TqraaeOwkeZyu1vI09OtkFwH2rcnJMt------
--- a/libs/community/tests/examples/facebook_chat.json
+++ b/libs/community/tests/examples/facebook_chat.json
@@ -0,0 +1,64 @@
+{
+    "participants": [{"name": "User 1"}, {"name": "User 2"}],
+    "messages": [
+        {"sender_name": "User 2", "timestamp_ms": 1675597571851, "content": "Bye!"},
+        {
+            "sender_name": "User 1",
+            "timestamp_ms": 1675597435669,
+            "content": "Oh no worries! Bye"
+        },
+        {
+            "sender_name": "User 2",
+            "timestamp_ms": 1675596277579,
+            "content": "No Im sorry it was my mistake, the blue one is not for sale"
+        },
+        {
+            "sender_name": "User 1",
+            "timestamp_ms": 1675595140251,
+            "content": "I thought you were selling the blue one!"
+        },
+        {
+            "sender_name": "User 1",
+            "timestamp_ms": 1675595109305,
+            "content": "Im not interested in this bag. Im interested in the blue one!"
+        },
+        {
+            "sender_name": "User 2",
+            "timestamp_ms": 1675595068468,
+            "content": "Here is $129"
+        },
+        {
+            "sender_name": "User 2",
+            "timestamp_ms": 1675595060730,
+            "photos": [
+                {"uri": "url_of_some_picture.jpg", "creation_timestamp": 1675595059}
+            ]
+        },
+        {
+            "sender_name": "User 2",
+            "timestamp_ms": 1675595045152,
+            "content": "Online is at least $100"
+        },
+        {
+            "sender_name": "User 1",
+            "timestamp_ms": 1675594799696,
+            "content": "How much do you want?"
+        },
+        {
+            "sender_name": "User 2",
+            "timestamp_ms": 1675577876645,
+            "content": "Goodmorning! $50 is too low."
+        },
+        {
+            "sender_name": "User 1",
+            "timestamp_ms": 1675549022673,
+            "content": "Hi! Im interested in your bag. Im offering $50. Let me know if you are interested. Thanks!"
+        }
+    ],
+    "title": "User 1 and User 2 chat",
+    "is_still_participant": true,
+    "thread_path": "inbox/User 1 and User 2 chat",
+    "magic_words": [],
+    "image": {"uri": "image_of_the_chat.jpg", "creation_timestamp": 1675549016},
+    "joinable_mode": {"mode": 1, "link": ""}
+}
--- a/libs/community/tests/examples/factbook.xml
+++ b/libs/community/tests/examples/factbook.xml
@@ -0,0 +1,27 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<factbook>
+  <country>
+    <name>United States</name>
+    <capital>Washington, DC</capital>
+    <leader>Joe Biden</leader>
+    <sport>Baseball</sport>
+  </country>
+  <country>
+    <name>Canada</name>
+    <capital>Ottawa</capital>
+    <leader>Justin Trudeau</leader>
+    <sport>Hockey</sport>
+  </country>
+  <country>
+    <name>France</name>
+    <capital>Paris</capital>
+    <leader>Emmanuel Macron</leader>
+    <sport>Soccer</sport>
+  </country>
+  <country>
+    <name>Trinidad &amp; Tobado</name>
+    <capital>Port of Spain</capital>
+    <leader>Keith Rowley</leader>
+    <sport>Track &amp; Field</sport>
+  </country>
+</factbook>
--- a/libs/community/tests/examples/fake-email-attachment.eml
+++ b/libs/community/tests/examples/fake-email-attachment.eml
@@ -0,0 +1,50 @@
+MIME-Version: 1.0
+Date: Fri, 23 Dec 2022 12:08:48 -0600
+Message-ID: <CAPgNNXSzLVJ-d1OCX_TjFgJU7ugtQrjFybPtAMmmYZzphxNFYg@mail.gmail.com>
+Subject: Fake email with attachment
+From: Mallori Harrell <mallori@unstructured.io>
+To: Mallori Harrell <mallori@unstructured.io>
+Content-Type: multipart/mixed; boundary="0000000000005d654405f082adb7"
+
+--0000000000005d654405f082adb7
+Content-Type: multipart/alternative; boundary="0000000000005d654205f082adb5"
+
+--0000000000005d654205f082adb5
+Content-Type: text/plain; charset="UTF-8"
+
+Hello!
+
+Here's the attachments!
+
+It includes:
+
+   - Lots of whitespace
+   - Little to no content
+   - and is a quick read
+
+Best,
+
+Mallori
+
+--0000000000005d654205f082adb5
+Content-Type: text/html; charset="UTF-8"
+Content-Transfer-Encoding: quoted-printable
+
+<div dir=3D"ltr">Hello!=C2=A0<div><br></div><div>Here&#39;s the attachments=
+!</div><div><br></div><div>It includes:</div><div><ul><li style=3D"margin-l=
+eft:15px">Lots of whitespace</li><li style=3D"margin-left:15px">Little=C2=
+=A0to no content</li><li style=3D"margin-left:15px">and is a quick read</li=
+></ul><div>Best,</div></div><div><br></div><div>Mallori</div><div dir=3D"lt=
+r" class=3D"gmail_signature" data-smartmail=3D"gmail_signature"><div dir=3D=
+"ltr"><div><div><br></div></div></div></div></div>
+
+--0000000000005d654205f082adb5--
+--0000000000005d654405f082adb7
+Content-Type: text/plain; charset="US-ASCII"; name="fake-attachment.txt"
+Content-Disposition: attachment; filename="fake-attachment.txt"
+Content-Transfer-Encoding: base64
+X-Attachment-Id: f_lc0tto5j0
+Content-ID: <f_lc0tto5j0>
+
+SGV5IHRoaXMgaXMgYSBmYWtlIGF0dGFjaG1lbnQh
+--0000000000005d654405f082adb7--
--- a/libs/community/tests/examples/fake.odt
+++ b/libs/community/tests/examples/fake.odt
--- a/libs/community/tests/examples/hello.msg
+++ b/libs/community/tests/examples/hello.msg
--- a/libs/community/tests/examples/hello.pdf
+++ b/libs/community/tests/examples/hello.pdf
--- a/libs/community/tests/examples/hello_world.js
+++ b/libs/community/tests/examples/hello_world.js
@@ -0,0 +1,12 @@
+class HelloWorld {
+  sayHello() {
+    console.log("Hello World!");
+  }
+}
+
+function main() {
+  const hello = new HelloWorld();
+  hello.sayHello();
+}
+
+main();
--- a/libs/community/tests/examples/hello_world.py
+++ b/libs/community/tests/examples/hello_world.py
@@ -0,0 +1,13 @@
+#!/usr/bin/env python3
+
+import sys
+
+
+def main() -> int:
+    print("Hello World!")
+
+    return 0
+
+
+if __name__ == "__main__":
+    sys.exit(main())
--- a/libs/community/tests/examples/layout-parser-paper.pdf
+++ b/libs/community/tests/examples/layout-parser-paper.pdf
--- a/libs/community/tests/examples/multi-page-forms-sample-2-page.pdf
+++ b/libs/community/tests/examples/multi-page-forms-sample-2-page.pdf
--- a/libs/community/tests/examples/non-utf8-encoding.py
+++ b/libs/community/tests/examples/non-utf8-encoding.py
@@ -0,0 +1,3 @@
+# coding: iso-8859-5
+# <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <- Cyrillic characters
+u = "<EFBFBD><EFBFBD><EFBFBD><EFBFBD>"
--- a/libs/community/tests/examples/sample_rss_feeds.opml
+++ b/libs/community/tests/examples/sample_rss_feeds.opml
@@ -0,0 +1,13 @@
+<?xml version="1.0" encoding="UTF-8"?>
+
+<opml version="1.0">
+    <head>
+        <title>Sample RSS feed subscriptions</title>
+    </head>
+    <body>
+        <outline text="Tech" title="Tech">
+            <outline type="rss" text="Engadget" title="Engadget" xmlUrl="http://www.engadget.com/rss-full.xml" htmlUrl="http://www.engadget.com"/>
+            <outline type="rss" text="Ars Technica - All content" title="Ars Technica - All content" xmlUrl="http://feeds.arstechnica.com/arstechnica/index/" htmlUrl="https://arstechnica.com"/>
+        </outline>
+    </body>
+</opml>
--- a/libs/community/tests/examples/sitemap.xml
+++ b/libs/community/tests/examples/sitemap.xml
@@ -0,0 +1,35 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"
+  xmlns:xhtml="http://www.w3.org/1999/xhtml">
+
+  <url>
+    <loc>https://python.langchain.com/en/stable/</loc>
+
+
+    <lastmod>2023-05-04T16:15:31.377584+00:00</lastmod>
+
+    <changefreq>weekly</changefreq>
+    <priority>1</priority>
+  </url>
+
+  <url>
+    <loc>https://python.langchain.com/en/latest/</loc>
+
+
+    <lastmod>2023-05-05T07:52:19.633878+00:00</lastmod>
+
+    <changefreq>daily</changefreq>
+    <priority>0.9</priority>
+  </url>
+
+  <url>
+    <loc>https://python.langchain.com/en/harrison-docs-refactor-3-24/</loc>
+
+
+    <lastmod>2023-03-27T02:32:55.132916+00:00</lastmod>
+
+    <changefreq>monthly</changefreq>
+    <priority>0.8</priority>
+  </url>
+
+</urlset>
--- a/libs/community/tests/examples/slack_export.zip
+++ b/libs/community/tests/examples/slack_export.zip
--- a/libs/community/tests/examples/stanley-cups.csv
+++ b/libs/community/tests/examples/stanley-cups.csv
@@ -0,0 +1,5 @@
+Stanley Cups,,
+Team,Location,Stanley Cups
+Blues,STL,1
+Flyers,PHI,2
+Maple Leafs,TOR,13
--- a/libs/community/tests/examples/stanley-cups.tsv
+++ b/libs/community/tests/examples/stanley-cups.tsv
@@ -0,0 +1,5 @@
+Stanley Cups		
+Team	Location	Stanley Cups
+Blues	STL	1
+Flyers	PHI	2
+Maple Leafs	TOR	13
--- a/libs/community/tests/examples/stanley-cups.xlsx
+++ b/libs/community/tests/examples/stanley-cups.xlsx
--- a/libs/community/tests/examples/whatsapp_chat.txt
+++ b/libs/community/tests/examples/whatsapp_chat.txt
@@ -0,0 +1,10 @@
+[05.05.23, 15:48:11] James: Hi here
+[11/8/21, 9:41:32 AM] User name: Message 123
+1/23/23, 3:19 AM - User 2: Bye!
+1/23/23, 3:22_AM - User 1: And let me know if anything changes
+[1/24/21, 12:41:03 PM] ~ User name 2: Of course!
+[2023/5/4, 16:13:23] ~ User 2: See you!
+7/19/22, 11:32 PM - User 1: Hello
+7/20/22, 11:32 am - User 2: Goodbye
+4/20/23, 9:42 am - User 3: <Media omitted>
+6/29/23, 12:16 am - User 4: This message was deleted
--- a/libs/community/tests/integration_tests/test_compile.py
+++ b/libs/community/tests/integration_tests/test_compile.py
@@ -0,0 +1,7 @@
+import pytest
+
+
+@pytest.mark.compile
+def test_placeholder() -> None:
+    """Used for compiling integration tests without running any real tests."""
+    pass
--- a/libs/community/tests/unit_tests/conftest.py
+++ b/libs/community/tests/unit_tests/conftest.py
@@ -0,0 +1,83 @@
+"""Configuration for unit tests."""
+from importlib import util
+from typing import Dict, Sequence
+
+import pytest
+from pytest import Config, Function, Parser
+
+
+def pytest_addoption(parser: Parser) -> None:
+    """Add custom command line options to pytest."""
+    parser.addoption(
+        "--only-extended",
+        action="store_true",
+        help="Only run extended tests. Does not allow skipping any extended tests.",
+    )
+    parser.addoption(
+        "--only-core",
+        action="store_true",
+        help="Only run core tests. Never runs any extended tests.",
+    )
+
+
+def pytest_collection_modifyitems(config: Config, items: Sequence[Function]) -> None:
+    """Add implementations for handling custom markers.
+
+    At the moment, this adds support for a custom `requires` marker.
+
+    The `requires` marker is used to denote tests that require one or more packages
+    to be installed to run. If the package is not installed, the test is skipped.
+
+    The `requires` marker syntax is:
+
+    .. code-block:: python
+
+        @pytest.mark.requires("package1", "package2")
+        def test_something():
+            ...
+    """
+    # Mapping from the name of a package to whether it is installed or not.
+    # Used to avoid repeated calls to `util.find_spec`
+    required_pkgs_info: Dict[str, bool] = {}
+
+    only_extended = config.getoption("--only-extended") or False
+    only_core = config.getoption("--only-core") or False
+
+    if only_extended and only_core:
+        raise ValueError("Cannot specify both `--only-extended` and `--only-core`.")
+
+    for item in items:
+        requires_marker = item.get_closest_marker("requires")
+        if requires_marker is not None:
+            if only_core:
+                item.add_marker(pytest.mark.skip(reason="Skipping not a core test."))
+                continue
+
+            # Iterate through the list of required packages
+            required_pkgs = requires_marker.args
+            for pkg in required_pkgs:
+                # If we haven't yet checked whether the pkg is installed
+                # let's check it and store the result.
+                if pkg not in required_pkgs_info:
+                    required_pkgs_info[pkg] = util.find_spec(pkg) is not None
+
+                if not required_pkgs_info[pkg]:
+                    if only_extended:
+                        pytest.fail(
+                            f"Package `{pkg}` is not installed but is required for "
+                            f"extended tests. Please install the given package and "
+                            f"try again.",
+                        )
+
+                    else:
+                        # If the package is not installed, we immediately break
+                        # and mark the test as skipped.
+                        item.add_marker(
+                            pytest.mark.skip(reason=f"Requires pkg: `{pkg}`")
+                        )
+                        break
+        else:
+            if only_extended:
+                item.add_marker(
+                    pytest.mark.skip(reason="Skipping not an extended test.")
+                )
--- a/libs/partners/openai/tests/integration_tests/test_compile.py
+++ b/libs/partners/openai/tests/integration_tests/test_compile.py
@@ -0,0 +1,7 @@
+import pytest
+
+
+@pytest.mark.compile
+def test_placeholder() -> None:
+    """Used for compiling integration tests without running any real tests."""
+    pass