infra: fix notebook tests (#28722)

Bump unstructured to pick up resolution of
https://github.com/Unstructured-IO/unstructured/issues/3795
This commit is contained in:
ccurme 2024-12-14 10:13:19 -05:00 committed by GitHub
parent 387284c259
commit 23b433f683
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 72 additions and 96 deletions

165
poetry.lock generated
View File

@ -161,13 +161,13 @@ files = [
[[package]]
name = "anthropic"
version = "0.37.1"
version = "0.40.0"
description = "The official Python library for the anthropic API"
optional = false
python-versions = ">=3.7"
python-versions = ">=3.8"
files = [
{file = "anthropic-0.37.1-py3-none-any.whl", hash = "sha256:8f550f88906823752e2abf99fbe491fbc8d40bce4cb26b9663abdf7be990d721"},
{file = "anthropic-0.37.1.tar.gz", hash = "sha256:99f688265795daa7ba9256ee68eaf2f05d53cd99d7417f4a0c2dc292c106d00a"},
{file = "anthropic-0.40.0-py3-none-any.whl", hash = "sha256:442028ae8790ff9e3b6f8912043918755af1230d193904ae2ef78cc22995280c"},
{file = "anthropic-0.40.0.tar.gz", hash = "sha256:3efeca6d9e97813f93ed34322c6c7ea2279bf0824cd0aa71b59ce222665e2b87"},
]
[package.dependencies]
@ -177,7 +177,6 @@ httpx = ">=0.23.0,<1"
jiter = ">=0.4.0,<1"
pydantic = ">=1.9.0,<3"
sniffio = "*"
tokenizers = ">=0.13.0"
typing-extensions = ">=4.7,<5"
[package.extras]
@ -1934,6 +1933,27 @@ files = [
{file = "h11-0.14.0.tar.gz", hash = "sha256:8f19fbbe99e72420ff35c00b27a34cb9937e902a8b810e2c88300c6f0a3b699d"},
]
[[package]]
name = "html5lib"
version = "1.1"
description = "HTML parser based on the WHATWG HTML specification"
optional = false
python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*"
files = [
{file = "html5lib-1.1-py2.py3-none-any.whl", hash = "sha256:0d78f8fde1c230e99fe37986a60526d7049ed4bf8a9fadbad5f00e22e58e041d"},
{file = "html5lib-1.1.tar.gz", hash = "sha256:b2e5b40261e20f354d198eae92afc10d750afb487ed5e50f9c4eaf07c184146f"},
]
[package.dependencies]
six = ">=1.9"
webencodings = "*"
[package.extras]
all = ["chardet (>=2.2)", "genshi", "lxml"]
chardet = ["chardet (>=2.2)"]
genshi = ["genshi"]
lxml = ["lxml"]
[[package]]
name = "httpcore"
version = "1.0.6"
@ -2794,7 +2814,7 @@ adal = ["adal (>=1.0.2)"]
[[package]]
name = "langchain"
version = "0.3.4"
version = "0.3.11"
description = "Building applications with LLMs through composability"
optional = false
python-versions = ">=3.9,<4.0"
@ -2804,12 +2824,12 @@ develop = true
[package.dependencies]
aiohttp = "^3.8.3"
async-timeout = {version = "^4.0.0", markers = "python_version < \"3.11\""}
langchain-core = "^0.3.12"
langchain-core = "^0.3.24"
langchain-text-splitters = "^0.3.0"
langsmith = "^0.1.17"
langsmith = ">=0.1.17,<0.3"
numpy = [
{version = ">=1,<2", markers = "python_version < \"3.12\""},
{version = ">=1.26.0,<2.0.0", markers = "python_version >= \"3.12\""},
{version = ">=1.22.4,<2", markers = "python_version < \"3.12\""},
{version = ">=1.26.2,<3", markers = "python_version >= \"3.12\""},
]
pydantic = "^2.7.4"
PyYAML = ">=5.3"
@ -2823,7 +2843,7 @@ url = "libs/langchain"
[[package]]
name = "langchain-anthropic"
version = "0.2.3"
version = "0.3.0"
description = "An integration package connecting AnthropicMessages and LangChain"
optional = false
python-versions = ">=3.9,<4.0"
@ -2831,8 +2851,8 @@ files = []
develop = true
[package.dependencies]
anthropic = ">=0.30.0,<1"
langchain-core = "^0.3.9"
anthropic = ">=0.39.0,<1"
langchain-core = "^0.3.17"
pydantic = "^2.7.4"
[package.source]
@ -2866,19 +2886,19 @@ subdirectory = "libs/aws"
[[package]]
name = "langchain-chroma"
version = "0.1.5"
version = "0.2.0"
description = "An integration package connecting Chroma and LangChain"
optional = false
python-versions = ">=3.8.1,<4"
python-versions = ">=3.9,<4"
files = []
develop = true
[package.dependencies]
chromadb = ">=0.4.0,<0.6.0,!=0.5.4,!=0.5.5,!=0.5.7,!=0.5.9,!=0.5.10,!=0.5.11,!=0.5.12"
langchain-core = {version = ">=0.1.40,<0.4", markers = "python_version >= \"3.9\""}
langchain-core = ">=0.2.43,<0.4.0,!=0.3.0,!=0.3.1,!=0.3.2,!=0.3.3,!=0.3.4,!=0.3.5,!=0.3.6,!=0.3.7,!=0.3.8,!=0.3.9,!=0.3.10,!=0.3.11,!=0.3.12,!=0.3.13,!=0.3.14"
numpy = [
{version = ">=1,<2", markers = "python_version < \"3.12\""},
{version = ">=1.26.0,<2.0.0", markers = "python_version >= \"3.12\""},
{version = ">=1.22.4,<2.0.0", markers = "python_version < \"3.12\""},
{version = ">=1.26.2,<2.0.0", markers = "python_version >= \"3.12\""},
]
[package.source]
@ -2887,7 +2907,7 @@ url = "libs/partners/chroma"
[[package]]
name = "langchain-community"
version = "0.3.3"
version = "0.3.11"
description = "Community contributed LangChain integrations."
optional = false
python-versions = ">=3.9,<4.0"
@ -2898,12 +2918,12 @@ develop = true
aiohttp = "^3.8.3"
dataclasses-json = ">= 0.5.7, < 0.7"
httpx-sse = "^0.4.0"
langchain = "^0.3.4"
langchain-core = "^0.3.12"
langsmith = "^0.1.125"
langchain = "^0.3.11"
langchain-core = "^0.3.24"
langsmith = ">=0.1.125,<0.3"
numpy = [
{version = ">=1,<2", markers = "python_version < \"3.12\""},
{version = ">=1.26.0,<2.0.0", markers = "python_version >= \"3.12\""},
{version = ">=1.22.4,<2", markers = "python_version < \"3.12\""},
{version = ">=1.26.2,<3", markers = "python_version >= \"3.12\""},
]
pydantic-settings = "^2.4.0"
PyYAML = ">=5.3"
@ -2917,7 +2937,7 @@ url = "libs/community"
[[package]]
name = "langchain-core"
version = "0.3.20"
version = "0.3.25"
description = "Building applications with LLMs through composability"
optional = false
python-versions = ">=3.9,<4.0"
@ -2926,7 +2946,7 @@ develop = true
[package.dependencies]
jsonpatch = "^1.33"
langsmith = "^0.1.125"
langsmith = ">=0.1.125,<0.3"
packaging = ">=23.2,<25"
pydantic = [
{version = ">=2.5.2,<3.0.0", markers = "python_full_version < \"3.12.4\""},
@ -2962,7 +2982,7 @@ subdirectory = "libs/experimental"
[[package]]
name = "langchain-fireworks"
version = "0.2.1"
version = "0.2.5"
description = "An integration package connecting Fireworks and LangChain"
optional = false
python-versions = ">=3.9,<4.0"
@ -2972,7 +2992,7 @@ develop = true
[package.dependencies]
aiohttp = "^3.9.1"
fireworks-ai = ">=0.13.0"
langchain-core = "^0.3.9"
langchain-core = "^0.3.15"
openai = "^1.10.0"
requests = "^2"
@ -3010,7 +3030,7 @@ subdirectory = "libs/vertexai"
[[package]]
name = "langchain-groq"
version = "0.2.0"
version = "0.2.1"
description = "An integration package connecting Groq and LangChain"
optional = false
python-versions = ">=3.9,<4.0"
@ -3019,7 +3039,7 @@ develop = true
[package.dependencies]
groq = ">=0.4.1,<1"
langchain-core = "^0.3"
langchain-core = "^0.3.15"
[package.source]
type = "directory"
@ -3027,7 +3047,7 @@ url = "libs/partners/groq"
[[package]]
name = "langchain-mistralai"
version = "0.2.0"
version = "0.2.3"
description = "An integration package connecting Mistral and LangChain"
optional = false
python-versions = ">=3.9,<4.0"
@ -3037,7 +3057,7 @@ develop = true
[package.dependencies]
httpx = ">=0.25.2,<1"
httpx-sse = ">=0.3.1,<1"
langchain-core = "^0.3.0"
langchain-core = "^0.3.21"
pydantic = ">=2,<3"
tokenizers = ">=0.15.1,<1"
@ -3047,7 +3067,7 @@ url = "libs/partners/mistralai"
[[package]]
name = "langchain-openai"
version = "0.2.4"
version = "0.2.12"
description = "An integration package connecting OpenAI and LangChain"
optional = false
python-versions = ">=3.9,<4.0"
@ -3055,8 +3075,8 @@ files = []
develop = true
[package.dependencies]
langchain-core = "^0.3.13"
openai = "^1.52.0"
langchain-core = "^0.3.21"
openai = "^1.55.3"
tiktoken = ">=0.7,<1"
[package.source]
@ -3065,7 +3085,7 @@ url = "libs/partners/openai"
[[package]]
name = "langchain-text-splitters"
version = "0.3.0"
version = "0.3.2"
description = "LangChain text splitting utilities"
optional = false
python-versions = ">=3.9,<4.0"
@ -3073,7 +3093,7 @@ files = []
develop = true
[package.dependencies]
langchain-core = "^0.3.0"
langchain-core = "^0.3.15"
[package.source]
type = "directory"
@ -4154,13 +4174,13 @@ sympy = "*"
[[package]]
name = "openai"
version = "1.52.2"
version = "1.57.4"
description = "The official Python library for the openai API"
optional = false
python-versions = ">=3.7.1"
python-versions = ">=3.8"
files = [
{file = "openai-1.52.2-py3-none-any.whl", hash = "sha256:57e9e37bc407f39bb6ec3a27d7e8fb9728b2779936daa1fcf95df17d3edfaccc"},
{file = "openai-1.52.2.tar.gz", hash = "sha256:87b7d0f69d85f5641678d414b7ee3082363647a5c66a462ed7f3ccb59582da0d"},
{file = "openai-1.57.4-py3-none-any.whl", hash = "sha256:7def1ab2d52f196357ce31b9cfcf4181529ce00838286426bb35be81c035dafb"},
{file = "openai-1.57.4.tar.gz", hash = "sha256:a8f071a3e9198e2818f63aade68e759417b9f62c0971bdb83de82504b70b77f7"},
]
[package.dependencies]
@ -6646,13 +6666,13 @@ files = [
[[package]]
name = "unstructured"
version = "0.15.14"
version = "0.16.11"
description = "A library that prepares raw documents for downstream ML tasks."
optional = false
python-versions = "<3.13,>=3.9.0"
files = [
{file = "unstructured-0.15.14-py3-none-any.whl", hash = "sha256:502903cbcc60844c82f5351a0bc2e77f00f16a144cb884ac44d2f175470a1df8"},
{file = "unstructured-0.15.14.tar.gz", hash = "sha256:876546c308c257314865996ce15745139c9fd4f79c7b4f09ad9d719d466b5b55"},
{file = "unstructured-0.16.11-py3-none-any.whl", hash = "sha256:a92d5bc2c2b7bb23369641fb7a7f0daba1775639199306ce4cd83ca564a03763"},
{file = "unstructured-0.16.11.tar.gz", hash = "sha256:33ebf68aae11ce33c8a96335296557b5abd8ba96eaba3e5a1554c0b9eee40bb5"},
]
[package.dependencies]
@ -6662,6 +6682,7 @@ chardet = "*"
dataclasses-json = "*"
emoji = "*"
filetype = "*"
html5lib = "*"
langdetect = "*"
lxml = "*"
markdown = {version = "*", optional = true, markers = "extra == \"md\""}
@ -6673,76 +6694,30 @@ python-magic = "*"
python-oxmsg = "*"
rapidfuzz = "*"
requests = "*"
tabulate = "*"
tqdm = "*"
typing-extensions = "*"
unstructured-client = "*"
wrapt = "*"
[package.extras]
airtable = ["pyairtable"]
all-docs = ["effdet", "google-cloud-vision", "markdown", "networkx", "onnx", "openpyxl", "pandas", "pdf2image", "pdfminer.six", "pi-heif", "pikepdf", "pypandoc", "pypdf", "python-docx (>=1.1.2)", "python-pptx (>=1.0.1)", "unstructured-inference (==0.7.36)", "unstructured.pytesseract (>=0.3.12)", "xlrd"]
astradb = ["astrapy"]
azure = ["adlfs", "fsspec"]
azure-cognitive-search = ["azure-search-documents"]
bedrock = ["boto3", "langchain-community"]
biomed = ["bs4"]
box = ["boxfs", "fsspec"]
chroma = ["chromadb (>0.4.14)", "importlib-metadata (>=8.2.0)", "tenacity (==8.5.0)", "typer (<=0.9.0)"]
clarifai = ["clarifai"]
confluence = ["atlassian-python-api"]
all-docs = ["effdet", "google-cloud-vision", "markdown", "networkx", "onnx", "openpyxl", "pandas", "pdf2image", "pdfminer.six", "pi-heif", "pikepdf", "pypandoc", "pypdf", "python-docx (>=1.1.2)", "python-pptx (>=1.0.1)", "unstructured-inference (==0.8.1)", "unstructured.pytesseract (>=0.3.12)", "xlrd"]
csv = ["pandas"]
databricks-volumes = ["databricks-sdk"]
delta-table = ["deltalake (<=0.19.1)", "fsspec"]
discord = ["discord-py"]
doc = ["python-docx (>=1.1.2)"]
docx = ["python-docx (>=1.1.2)"]
dropbox = ["dropboxdrivefs", "fsspec"]
elasticsearch = ["elasticsearch[async]"]
embed-huggingface = ["langchain-huggingface"]
embed-mixedbreadai = ["mixedbread-ai"]
embed-octoai = ["openai", "tiktoken"]
embed-vertexai = ["langchain", "langchain-community", "langchain-google-vertexai"]
embed-voyageai = ["langchain", "langchain-voyageai"]
epub = ["pypandoc"]
gcs = ["bs4", "fsspec", "gcsfs"]
github = ["pygithub (>1.58.0)"]
gitlab = ["python-gitlab"]
google-drive = ["google-api-python-client"]
hubspot = ["hubspot-api-client", "urllib3"]
huggingface = ["langdetect", "sacremoses", "sentencepiece", "torch", "transformers"]
image = ["effdet", "google-cloud-vision", "onnx", "pdf2image", "pdfminer.six", "pi-heif", "pikepdf", "pypdf", "unstructured-inference (==0.7.36)", "unstructured.pytesseract (>=0.3.12)"]
jira = ["atlassian-python-api"]
kafka = ["confluent-kafka"]
local-inference = ["effdet", "google-cloud-vision", "markdown", "networkx", "onnx", "openpyxl", "pandas", "pdf2image", "pdfminer.six", "pi-heif", "pikepdf", "pypandoc", "pypdf", "python-docx (>=1.1.2)", "python-pptx (>=1.0.1)", "unstructured-inference (==0.7.36)", "unstructured.pytesseract (>=0.3.12)", "xlrd"]
image = ["effdet", "google-cloud-vision", "onnx", "pdf2image", "pdfminer.six", "pi-heif", "pikepdf", "pypdf", "unstructured-inference (==0.8.1)", "unstructured.pytesseract (>=0.3.12)"]
local-inference = ["effdet", "google-cloud-vision", "markdown", "networkx", "onnx", "openpyxl", "pandas", "pdf2image", "pdfminer.six", "pi-heif", "pikepdf", "pypandoc", "pypdf", "python-docx (>=1.1.2)", "python-pptx (>=1.0.1)", "unstructured-inference (==0.8.1)", "unstructured.pytesseract (>=0.3.12)", "xlrd"]
md = ["markdown"]
mongodb = ["pymongo"]
notion = ["htmlBuilder", "notion-client"]
odt = ["pypandoc", "python-docx (>=1.1.2)"]
onedrive = ["Office365-REST-Python-Client", "bs4", "msal"]
openai = ["langchain-openai"]
opensearch = ["opensearch-py"]
org = ["pypandoc"]
outlook = ["Office365-REST-Python-Client", "msal"]
paddleocr = ["paddlepaddle (==3.0.0b1)", "unstructured.paddleocr (==2.8.1.0)"]
pdf = ["effdet", "google-cloud-vision", "onnx", "pdf2image", "pdfminer.six", "pi-heif", "pikepdf", "pypdf", "unstructured-inference (==0.7.36)", "unstructured.pytesseract (>=0.3.12)"]
pinecone = ["pinecone-client (>=3.7.1)"]
postgres = ["psycopg2-binary"]
pdf = ["effdet", "google-cloud-vision", "onnx", "pdf2image", "pdfminer.six", "pi-heif", "pikepdf", "pypdf", "unstructured-inference (==0.8.1)", "unstructured.pytesseract (>=0.3.12)"]
ppt = ["python-pptx (>=1.0.1)"]
pptx = ["python-pptx (>=1.0.1)"]
qdrant = ["qdrant-client"]
reddit = ["praw"]
rst = ["pypandoc"]
rtf = ["pypandoc"]
s3 = ["fsspec", "s3fs"]
salesforce = ["simple-salesforce"]
sftp = ["fsspec", "paramiko"]
sharepoint = ["Office365-REST-Python-Client", "msal"]
singlestore = ["singlestoredb"]
slack = ["slack-sdk"]
tsv = ["pandas"]
weaviate = ["weaviate-client"]
wikipedia = ["wikipedia"]
xlsx = ["networkx", "openpyxl", "pandas", "xlrd"]
[[package]]
@ -7407,4 +7382,4 @@ type = ["pytest-mypy"]
[metadata]
lock-version = "2.0"
python-versions = ">=3.9,<4.0"
content-hash = "cb6b45ac7f487c6510a0eeef80c6cfd4b163f671eedb42d9ebf5315f42fa1ab1"
content-hash = "138c279994b75a02c377fd5fde3808770c9ae6259c59728b9986480d93790aa1"

View File

@ -47,7 +47,8 @@ grandalf = "^0.8"
lark = "^1.1.9"
pandas = "^2"
rank-bm25 = "^0.2.2"
unstructured = { version = "^0.15.12", extras = ["md"], python = "<3.13" }
tabulate = "^0.9.0"
unstructured = { version = "^0.16.11", extras = ["md"], python = "<3.13" }
wikipedia = "^1.4.0"
pypdf = "^5.0.0"
vcrpy = "^6.0.1"