diff --git a/docs/docs/integrations/document_loaders/rspace.ipynb b/docs/docs/integrations/document_loaders/rspace.ipynb new file mode 100644 index 00000000000..6f8dc48ecfd --- /dev/null +++ b/docs/docs/integrations/document_loaders/rspace.ipynb @@ -0,0 +1,127 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "2f1572a5-9f8c-44f1-82f3-ddeee8f55145", + "metadata": {}, + "source": [ + "This notebook shows how to use the RSpace document loader to import research notes and documents from RSpace Electronic\n", + "Lab Notebook into Langchain pipelines.\n", + "\n", + "To start you'll need an RSpace account and an API key.\n", + "\n", + "You can set up a free account at [https://community.researchspace.com](https://community.researchspace.com) or use your institutional RSpace.\n", + "\n", + "You can get an RSpace API token from your account's profile page. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9e5310d2-a864-4464-bdca-81f30c9d0bdb", + "metadata": {}, + "outputs": [], + "source": [ + "!pip install rspace_client" + ] + }, + { + "cell_type": "markdown", + "id": "61b1d1b7-a28c-4fba-83a3-df64baa8b6b8", + "metadata": {}, + "source": [ + "It's best to store your RSpace API key as an environment variable. \n", + "\n", + " RSPACE_API_KEY=\n", + "\n", + "You'll also need to set the URL of your RSpace installation e.g.\n", + "\n", + " RSPACE_URL=https://community.researchspace.com\n", + "\n", + "If you use these exact environment variable names, they will be detected automatically. " + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "13c19ea4-100f-417e-b52f-7e8730c7c1d1", + "metadata": {}, + "outputs": [], + "source": [ + "from langchain.document_loaders.rspace import RSpaceLoader" + ] + }, + { + "cell_type": "markdown", + "id": "4fd42831-0e79-4068-a5e1-7e2cfc242789", + "metadata": {}, + "source": [ + "You can import various items from RSpace:\n", + "\n", + "* A single RSpace structured or basic document. This will map 1-1 to a Langchain document.\n", + "* A folder or noteook. All documents inside the notebook or folder are imported as Langchain documents. \n", + "* If you have PDF files in the RSpace Gallery, these can be imported individually as well. Under the hood, Langchain's PDF loader will be used and this creates one Langchain document per PDF page. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8e614357-5eca-401b-ab98-ea55b0465009", + "metadata": {}, + "outputs": [], + "source": [ + "## replace these ids with some from your own research notes.\n", + "## Make sure to use global ids (with the 2 character prefix). This helps the loader know which API calls to make \n", + "## to RSpace API.\n", + "\n", + "rspace_ids = [\"NB1932027\", \"FL1921314\", \"SD1932029\", \"GL1932384\"]\n", + "for rs_id in rspace_ids:\n", + " loader = RSpaceLoader(global_id=rs_id)\n", + " docs = loader.load()\n", + " for doc in docs:\n", + " ## the name and ID are added to the 'source' metadata property.\n", + " print (doc.metadata)\n", + " print(doc.page_content[:500])" + ] + }, + { + "cell_type": "markdown", + "id": "1b41758d-24e0-4994-a30f-3acccc7795e4", + "metadata": {}, + "source": [ + "If you don't want to use the environment variables as above, you can pass these into the RSpaceLoader" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "aa079ca6-439d-4010-9edd-cd77d8884fab", + "metadata": {}, + "outputs": [], + "source": [ + "loader = RSpaceLoader(global_id=rs_id, api_key=\"MY_API_KEY\", url=\"https://my.researchspace.com\")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.5" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/libs/langchain/langchain/document_loaders/rspace.py b/libs/langchain/langchain/document_loaders/rspace.py new file mode 100644 index 00000000000..4921cb34e65 --- /dev/null +++ b/libs/langchain/langchain/document_loaders/rspace.py @@ -0,0 +1,130 @@ +import os +from typing import Any, Dict, Iterator, List, Optional, Union + +from langchain.docstore.document import Document +from langchain.document_loaders import PyPDFLoader +from langchain.document_loaders.base import BaseLoader +from langchain.utils import get_from_dict_or_env + + +class RSpaceLoader(BaseLoader): + """ + Loads content from RSpace notebooks, folders, documents or PDF Gallery files into + Langchain documents. + + Maps RSpace document <-> Langchain Document in 1-1. PDFs are imported using PyPDF. + + Requirements are rspace_client (`pip install rspace_client`) and PyPDF if importing + PDF docs (`pip install pypdf`). + + """ + + def __init__( + self, global_id: str, api_key: Optional[str] = None, url: Optional[str] = None + ): + """api_key: RSpace API key - can also be supplied as environment variable + 'RSPACE_API_KEY' + url: str + The URL of your RSpace instance - can also be supplied as environment + variable 'RSPACE_URL' + global_id: str + The global ID of the resource to load, + e.g. 'SD12344' (a single document); 'GL12345'(A PDF file in the gallery); + 'NB4567' (a notebook); 'FL12244' (a folder) + """ + args: Dict[str, Optional[str]] = { + "api_key": api_key, + "url": url, + "global_id": global_id, + } + verified_args: Dict[str, str] = RSpaceLoader.validate_environment(args) + self.api_key = verified_args["api_key"] + self.url = verified_args["url"] + self.global_id: str = verified_args["global_id"] + + @classmethod + def validate_environment(cls, values: Dict) -> Dict: + """Validate that API key and URL exists in environment.""" + values["api_key"] = get_from_dict_or_env(values, "api_key", "RSPACE_API_KEY") + values["url"] = get_from_dict_or_env(values, "url", "RSPACE_URL") + if "global_id" not in values or values["global_id"] is None: + raise ValueError( + "No value supplied for global_id. Please supply an RSpace global ID" + ) + return values + + def _create_rspace_client(self) -> Any: + """Create a RSpace client.""" + try: + from rspace_client.eln import eln, field_content + + except ImportError: + raise ImportError("You must run " "`pip install rspace_client`") + + try: + eln = eln.ELNClient(self.url, self.api_key) + eln.get_status() + + except Exception: + raise Exception( + f"Unable to initialise client - is url {self.url} or " + f"api key correct?" + ) + + return eln, field_content.FieldContent + + def _get_doc(self, cli: Any, field_content: Any, d_id: Union[str, int]) -> Document: + content = "" + doc = cli.get_document(d_id) + content += f"

{doc['name']}

" + for f in doc["fields"]: + content += f"{f['name']}\n" + fc = field_content(f["content"]) + content += fc.get_text() + content += "\n" + return Document( + metadata={"source": f"rspace: {doc['name']}-{doc['globalId']}"}, + page_content=content, + ) + + def _load_structured_doc(self) -> Iterator[Document]: + cli, field_content = self._create_rspace_client() + yield self._get_doc(cli, field_content, self.global_id) + + def _load_folder_tree(self) -> Iterator[Document]: + cli, field_content = self._create_rspace_client() + if self.global_id: + docs_in_folder = cli.list_folder_tree( + folder_id=self.global_id[2:], typesToInclude=["document"] + ) + doc_ids: List[int] = [d["id"] for d in docs_in_folder["records"]] + for doc_id in doc_ids: + yield self._get_doc(cli, field_content, doc_id) + + def _load_pdf(self) -> Iterator[Document]: + cli, field_content = self._create_rspace_client() + file_info = cli.get_file_info(self.global_id) + _, ext = os.path.splitext(file_info["name"]) + if ext.lower() == ".pdf": + outfile = f"{self.global_id}.pdf" + cli.download_file(self.global_id, outfile) + pdf_loader = PyPDFLoader(outfile) + for pdf in pdf_loader.lazy_load(): + pdf.metadata["rspace_src"] = self.global_id + yield pdf + + def lazy_load(self) -> Iterator[Document]: + if self.global_id and "GL" in self.global_id: + for d in self._load_pdf(): + yield d + elif self.global_id and "SD" in self.global_id: + for d in self._load_structured_doc(): + yield d + elif self.global_id and self.global_id[0:2] in ["FL", "NB"]: + for d in self._load_folder_tree(): + yield d + else: + raise ValueError("Unknown global ID type") + + def load(self) -> List[Document]: + return list(self.lazy_load()) diff --git a/libs/langchain/poetry.lock b/libs/langchain/poetry.lock index 2565ffb006f..b2e4d99b071 100644 --- a/libs/langchain/poetry.lock +++ b/libs/langchain/poetry.lock @@ -2884,6 +2884,7 @@ files = [ {file = "greenlet-2.0.2-cp27-cp27m-win32.whl", hash = "sha256:6c3acb79b0bfd4fe733dff8bc62695283b57949ebcca05ae5c129eb606ff2d74"}, {file = "greenlet-2.0.2-cp27-cp27m-win_amd64.whl", hash = "sha256:283737e0da3f08bd637b5ad058507e578dd462db259f7f6e4c5c365ba4ee9343"}, {file = "greenlet-2.0.2-cp27-cp27mu-manylinux2010_x86_64.whl", hash = "sha256:d27ec7509b9c18b6d73f2f5ede2622441de812e7b1a80bbd446cb0633bd3d5ae"}, + {file = "greenlet-2.0.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:d967650d3f56af314b72df7089d96cda1083a7fc2da05b375d2bc48c82ab3f3c"}, {file = "greenlet-2.0.2-cp310-cp310-macosx_11_0_x86_64.whl", hash = "sha256:30bcf80dda7f15ac77ba5af2b961bdd9dbc77fd4ac6105cee85b0d0a5fcf74df"}, {file = "greenlet-2.0.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:26fbfce90728d82bc9e6c38ea4d038cba20b7faf8a0ca53a9c07b67318d46088"}, {file = "greenlet-2.0.2-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9190f09060ea4debddd24665d6804b995a9c122ef5917ab26e1566dcc712ceeb"}, @@ -2892,6 +2893,7 @@ files = [ {file = "greenlet-2.0.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:76ae285c8104046b3a7f06b42f29c7b73f77683df18c49ab5af7983994c2dd91"}, {file = "greenlet-2.0.2-cp310-cp310-win_amd64.whl", hash = "sha256:2d4686f195e32d36b4d7cf2d166857dbd0ee9f3d20ae349b6bf8afc8485b3645"}, {file = "greenlet-2.0.2-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:c4302695ad8027363e96311df24ee28978162cdcdd2006476c43970b384a244c"}, + {file = "greenlet-2.0.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:d4606a527e30548153be1a9f155f4e283d109ffba663a15856089fb55f933e47"}, {file = "greenlet-2.0.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c48f54ef8e05f04d6eff74b8233f6063cb1ed960243eacc474ee73a2ea8573ca"}, {file = "greenlet-2.0.2-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a1846f1b999e78e13837c93c778dcfc3365902cfb8d1bdb7dd73ead37059f0d0"}, {file = "greenlet-2.0.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3a06ad5312349fec0ab944664b01d26f8d1f05009566339ac6f63f56589bc1a2"}, @@ -2921,6 +2923,7 @@ files = [ {file = "greenlet-2.0.2-cp37-cp37m-win32.whl", hash = "sha256:3f6ea9bd35eb450837a3d80e77b517ea5bc56b4647f5502cd28de13675ee12f7"}, {file = "greenlet-2.0.2-cp37-cp37m-win_amd64.whl", hash = "sha256:7492e2b7bd7c9b9916388d9df23fa49d9b88ac0640db0a5b4ecc2b653bf451e3"}, {file = "greenlet-2.0.2-cp38-cp38-macosx_10_15_x86_64.whl", hash = "sha256:b864ba53912b6c3ab6bcb2beb19f19edd01a6bfcbdfe1f37ddd1778abfe75a30"}, + {file = "greenlet-2.0.2-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:1087300cf9700bbf455b1b97e24db18f2f77b55302a68272c56209d5587c12d1"}, {file = "greenlet-2.0.2-cp38-cp38-manylinux2010_x86_64.whl", hash = "sha256:ba2956617f1c42598a308a84c6cf021a90ff3862eddafd20c3333d50f0edb45b"}, {file = "greenlet-2.0.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fc3a569657468b6f3fb60587e48356fe512c1754ca05a564f11366ac9e306526"}, {file = "greenlet-2.0.2-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:8eab883b3b2a38cc1e050819ef06a7e6344d4a990d24d45bc6f2cf959045a45b"}, @@ -2929,6 +2932,7 @@ files = [ {file = "greenlet-2.0.2-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:b0ef99cdbe2b682b9ccbb964743a6aca37905fda5e0452e5ee239b1654d37f2a"}, {file = "greenlet-2.0.2-cp38-cp38-win32.whl", hash = "sha256:b80f600eddddce72320dbbc8e3784d16bd3fb7b517e82476d8da921f27d4b249"}, {file = "greenlet-2.0.2-cp38-cp38-win_amd64.whl", hash = "sha256:4d2e11331fc0c02b6e84b0d28ece3a36e0548ee1a1ce9ddde03752d9b79bba40"}, + {file = "greenlet-2.0.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:8512a0c38cfd4e66a858ddd1b17705587900dd760c6003998e9472b77b56d417"}, {file = "greenlet-2.0.2-cp39-cp39-macosx_11_0_x86_64.whl", hash = "sha256:88d9ab96491d38a5ab7c56dd7a3cc37d83336ecc564e4e8816dbed12e5aaefc8"}, {file = "greenlet-2.0.2-cp39-cp39-manylinux2010_x86_64.whl", hash = "sha256:561091a7be172ab497a3527602d467e2b3fbe75f9e783d8b8ce403fa414f71a6"}, {file = "greenlet-2.0.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:971ce5e14dc5e73715755d0ca2975ac88cfdaefcaab078a284fea6cfabf866df"}, @@ -3722,6 +3726,7 @@ optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*, !=3.6.*" files = [ {file = "jsonpointer-2.4-py2.py3-none-any.whl", hash = "sha256:15d51bba20eea3165644553647711d150376234112651b4f1811022aecad7d7a"}, + {file = "jsonpointer-2.4.tar.gz", hash = "sha256:585cee82b70211fa9e6043b7bb89db6e1aa49524340dde8ad6b63206ea689d88"}, ] [[package]] @@ -4529,6 +4534,16 @@ files = [ {file = "MarkupSafe-2.1.3-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:5bbe06f8eeafd38e5d0a4894ffec89378b6c6a625ff57e3028921f8ff59318ac"}, {file = "MarkupSafe-2.1.3-cp311-cp311-win32.whl", hash = "sha256:dd15ff04ffd7e05ffcb7fe79f1b98041b8ea30ae9234aed2a9168b5797c3effb"}, {file = "MarkupSafe-2.1.3-cp311-cp311-win_amd64.whl", hash = "sha256:134da1eca9ec0ae528110ccc9e48041e0828d79f24121a1a146161103c76e686"}, + {file = "MarkupSafe-2.1.3-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:f698de3fd0c4e6972b92290a45bd9b1536bffe8c6759c62471efaa8acb4c37bc"}, + {file = "MarkupSafe-2.1.3-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:aa57bd9cf8ae831a362185ee444e15a93ecb2e344c8e52e4d721ea3ab6ef1823"}, + {file = "MarkupSafe-2.1.3-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ffcc3f7c66b5f5b7931a5aa68fc9cecc51e685ef90282f4a82f0f5e9b704ad11"}, + {file = "MarkupSafe-2.1.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:47d4f1c5f80fc62fdd7777d0d40a2e9dda0a05883ab11374334f6c4de38adffd"}, + {file = "MarkupSafe-2.1.3-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1f67c7038d560d92149c060157d623c542173016c4babc0c1913cca0564b9939"}, + {file = "MarkupSafe-2.1.3-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:9aad3c1755095ce347e26488214ef77e0485a3c34a50c5a5e2471dff60b9dd9c"}, + {file = "MarkupSafe-2.1.3-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:14ff806850827afd6b07a5f32bd917fb7f45b046ba40c57abdb636674a8b559c"}, + {file = "MarkupSafe-2.1.3-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8f9293864fe09b8149f0cc42ce56e3f0e54de883a9de90cd427f191c346eb2e1"}, + {file = "MarkupSafe-2.1.3-cp312-cp312-win32.whl", hash = "sha256:715d3562f79d540f251b99ebd6d8baa547118974341db04f5ad06d5ea3eb8007"}, + {file = "MarkupSafe-2.1.3-cp312-cp312-win_amd64.whl", hash = "sha256:1b8dd8c3fd14349433c79fa8abeb573a55fc0fdd769133baac1f5e07abf54aeb"}, {file = "MarkupSafe-2.1.3-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:8e254ae696c88d98da6555f5ace2279cf7cd5b3f52be2b5cf97feafe883b58d2"}, {file = "MarkupSafe-2.1.3-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cb0932dc158471523c9637e807d9bfb93e06a95cbf010f1a38b98623b929ef2b"}, {file = "MarkupSafe-2.1.3-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9402b03f1a1b4dc4c19845e5c749e3ab82d5078d16a2a4c2cd2df62d57bb0707"}, @@ -7628,6 +7643,7 @@ files = [ {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:69b023b2b4daa7548bcfbd4aa3da05b3a74b772db9e23b982788168117739938"}, {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:81e0b275a9ecc9c0c0c07b4b90ba548307583c125f54d5b6946cfee6360c733d"}, {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ba336e390cd8e4d1739f42dfe9bb83a3cc2e80f567d8805e11b46f4a943f5515"}, + {file = "PyYAML-6.0.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:326c013efe8048858a6d312ddd31d56e468118ad4cdeda36c719bf5bb6192290"}, {file = "PyYAML-6.0.1-cp310-cp310-win32.whl", hash = "sha256:bd4af7373a854424dabd882decdc5579653d7868b8fb26dc7d0e99f823aa5924"}, {file = "PyYAML-6.0.1-cp310-cp310-win_amd64.whl", hash = "sha256:fd1592b3fdf65fff2ad0004b5e363300ef59ced41c2e6b3a99d4089fa8c5435d"}, {file = "PyYAML-6.0.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:6965a7bc3cf88e5a1c3bd2e0b5c22f8d677dc88a455344035f03399034eb3007"}, @@ -7635,8 +7651,15 @@ files = [ {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:42f8152b8dbc4fe7d96729ec2b99c7097d656dc1213a3229ca5383f973a5ed6d"}, {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:062582fca9fabdd2c8b54a3ef1c978d786e0f6b3a1510e0ac93ef59e0ddae2bc"}, {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d2b04aac4d386b172d5b9692e2d2da8de7bfb6c387fa4f801fbf6fb2e6ba4673"}, + {file = "PyYAML-6.0.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:e7d73685e87afe9f3b36c799222440d6cf362062f78be1013661b00c5c6f678b"}, {file = "PyYAML-6.0.1-cp311-cp311-win32.whl", hash = "sha256:1635fd110e8d85d55237ab316b5b011de701ea0f29d07611174a1b42f1444741"}, {file = "PyYAML-6.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:bf07ee2fef7014951eeb99f56f39c9bb4af143d8aa3c21b1677805985307da34"}, + {file = "PyYAML-6.0.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:855fb52b0dc35af121542a76b9a84f8d1cd886ea97c84703eaa6d88e37a2ad28"}, + {file = "PyYAML-6.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:40df9b996c2b73138957fe23a16a4f0ba614f4c0efce1e9406a184b6d07fa3a9"}, + {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c22bec3fbe2524cde73d7ada88f6566758a8f7227bfbf93a408a9d86bcc12a0"}, + {file = "PyYAML-6.0.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8d4e9c88387b0f5c7d5f281e55304de64cf7f9c0021a3525bd3b1c542da3b0e4"}, + {file = "PyYAML-6.0.1-cp312-cp312-win32.whl", hash = "sha256:d483d2cdf104e7c9fa60c544d92981f12ad66a457afae824d146093b8c294c54"}, + {file = "PyYAML-6.0.1-cp312-cp312-win_amd64.whl", hash = "sha256:0d3304d8c0adc42be59c5f8a4d9e3d7379e6955ad754aa9d6ab7a398b59dd1df"}, {file = "PyYAML-6.0.1-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:50550eb667afee136e9a77d6dc71ae76a44df8b3e51e41b77f6de2932bfe0f47"}, {file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1fe35611261b29bd1de0070f0b2f47cb6ff71fa6595c077e42bd0c419fa27b98"}, {file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:704219a11b772aea0d8ecd7058d0082713c3562b4e271b849ad7dc4a5c90c13c"}, @@ -7653,6 +7676,7 @@ files = [ {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a0cd17c15d3bb3fa06978b4e8958dcdc6e0174ccea823003a106c7d4d7899ac5"}, {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:28c119d996beec18c05208a8bd78cbe4007878c6dd15091efb73a30e90539696"}, {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7e07cbde391ba96ab58e532ff4803f79c4129397514e1413a7dc761ccd755735"}, + {file = "PyYAML-6.0.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:49a183be227561de579b4a36efbb21b3eab9651dd81b1858589f796549873dd6"}, {file = "PyYAML-6.0.1-cp38-cp38-win32.whl", hash = "sha256:184c5108a2aca3c5b3d3bf9395d50893a7ab82a38004c8f61c258d4428e80206"}, {file = "PyYAML-6.0.1-cp38-cp38-win_amd64.whl", hash = "sha256:1e2722cc9fbb45d9b87631ac70924c11d3a401b2d7f410cc0e3bbf249f2dca62"}, {file = "PyYAML-6.0.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:9eb6caa9a297fc2c2fb8862bc5370d0303ddba53ba97e71f08023b6cd73d16a8"}, @@ -7660,6 +7684,7 @@ files = [ {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5773183b6446b2c99bb77e77595dd486303b4faab2b086e7b17bc6bef28865f6"}, {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b786eecbdf8499b9ca1d697215862083bd6d2a99965554781d0d8d1ad31e13a0"}, {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bc1bf2925a1ecd43da378f4db9e4f799775d6367bdb94671027b73b393a7c42c"}, + {file = "PyYAML-6.0.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:04ac92ad1925b2cff1db0cfebffb6ffc43457495c9b3c39d3fcae417d7125dc5"}, {file = "PyYAML-6.0.1-cp39-cp39-win32.whl", hash = "sha256:faca3bdcf85b2fc05d06ff3fbc1f83e1391b3e724afa3feba7d13eeab355484c"}, {file = "PyYAML-6.0.1-cp39-cp39-win_amd64.whl", hash = "sha256:510c9deebc5c0225e8c96813043e62b680ba2f9c50a08d3724c7f28a747d1486"}, {file = "PyYAML-6.0.1.tar.gz", hash = "sha256:bfdf460b1736c775f2ba9f6a92bca30bc2095067b8a9d77876d1fad6cc3b4a43"}, @@ -8426,6 +8451,21 @@ files = [ [package.dependencies] pyasn1 = ">=0.1.3" +[[package]] +name = "rspace-client" +version = "2.5.0" +description = "A client for calling RSpace ELN and Inventory APIs" +optional = true +python-versions = ">=3.7.11,<4.0.0" +files = [ + {file = "rspace-client-2.5.0.tar.gz", hash = "sha256:101abc83d094051d2babcaa133fa1a47221b3d5953d72eef3c331ef7084071a1"}, + {file = "rspace_client-2.5.0-py3-none-any.whl", hash = "sha256:b1072df88dfa8f068f3137584d20cf135493b0521a9809c2f6ddec6b378a9cc3"}, +] + +[package.dependencies] +beautifulsoup4 = ">=4.9.3,<5.0.0" +requests = ">=2.25.1,<3.0.0" + [[package]] name = "ruff" version = "0.0.249" @@ -10843,7 +10883,7 @@ cli = ["typer"] cohere = ["cohere"] docarray = ["docarray"] embeddings = ["sentence-transformers"] -extended-testing = ["aiosqlite", "amazon-textract-caller", "anthropic", "arxiv", "assemblyai", "atlassian-python-api", "beautifulsoup4", "bibtexparser", "cassio", "chardet", "dashvector", "esprima", "faiss-cpu", "feedparser", "geopandas", "gitpython", "gql", "html2text", "jinja2", "jq", "lxml", "markdownify", "motor", "mwparserfromhell", "mwxml", "newspaper3k", "numexpr", "openai", "openai", "openapi-schema-pydantic", "pandas", "pdfminer-six", "pgvector", "psychicapi", "py-trello", "pymupdf", "pypdf", "pypdfium2", "pyspark", "rank-bm25", "rapidfuzz", "rapidocr-onnxruntime", "requests-toolbelt", "scikit-learn", "sqlite-vss", "streamlit", "sympy", "telethon", "timescale-vector", "tqdm", "xata", "xmltodict"] +extended-testing = ["aiosqlite", "amazon-textract-caller", "anthropic", "arxiv", "assemblyai", "atlassian-python-api", "beautifulsoup4", "bibtexparser", "cassio", "chardet", "dashvector", "esprima", "faiss-cpu", "feedparser", "geopandas", "gitpython", "gql", "html2text", "jinja2", "jq", "lxml", "markdownify", "motor", "mwparserfromhell", "mwxml", "newspaper3k", "numexpr", "openai", "openai", "openapi-schema-pydantic", "pandas", "pdfminer-six", "pgvector", "psychicapi", "py-trello", "pymupdf", "pypdf", "pypdfium2", "pyspark", "rank-bm25", "rapidfuzz", "rapidocr-onnxruntime", "requests-toolbelt", "rspace_client", "scikit-learn", "sqlite-vss", "streamlit", "sympy", "telethon", "timescale-vector", "tqdm", "xata", "xmltodict"] javascript = ["esprima"] llms = ["clarifai", "cohere", "huggingface_hub", "manifest-ml", "nlpcloud", "openai", "openlm", "torch", "transformers"] openai = ["openai", "tiktoken"] @@ -10853,4 +10893,4 @@ text-helpers = ["chardet"] [metadata] lock-version = "2.0" python-versions = ">=3.8.1,<4.0" -content-hash = "de11d7f6257615dd61f579d5137d5a6b9ee5433e0d9b3cac3feb8a759ad70393" +content-hash = "3a5bca34a60eaa9b66a4d1f9ec14de5e6a0e5ca1071a0a874499fe122cc0ee36" diff --git a/libs/langchain/pyproject.toml b/libs/langchain/pyproject.toml index b3549550807..8d9451e7c14 100644 --- a/libs/langchain/pyproject.toml +++ b/libs/langchain/pyproject.toml @@ -138,6 +138,7 @@ timescale-vector = {version = "^0.0.1", optional = true} typer = {version= "^0.9.0", optional = true} anthropic = {version = "^0.3.11", optional = true} aiosqlite = {version = "^0.19.0", optional = true} +rspace_client = {version = "^2.5.0", optional = true} [tool.poetry.group.test.dependencies] @@ -366,6 +367,7 @@ extended_testing = [ "motor", "timescale-vector", "anthropic", + "rspace_client", ] [tool.ruff] diff --git a/libs/langchain/tests/unit_tests/document_loaders/test_rspace_loader.py b/libs/langchain/tests/unit_tests/document_loaders/test_rspace_loader.py new file mode 100644 index 00000000000..b73ef3442a1 --- /dev/null +++ b/libs/langchain/tests/unit_tests/document_loaders/test_rspace_loader.py @@ -0,0 +1,35 @@ +import unittest + +from langchain.document_loaders.rspace import RSpaceLoader + + +class TestRSpaceLoader(unittest.TestCase): + url = "https://community.researchspace.com" + api_key = "myapikey" + global_id = "SD12345" + + def test_valid_arguments(self) -> None: + loader = RSpaceLoader( + url=TestRSpaceLoader.url, + api_key=TestRSpaceLoader.api_key, + global_id=TestRSpaceLoader.global_id, + ) + self.assertEqual(TestRSpaceLoader.url, loader.url) # add assertion here + self.assertEqual(TestRSpaceLoader.api_key, loader.api_key) # add assertion here + self.assertEqual( + TestRSpaceLoader.global_id, loader.global_id + ) # add assertion here + + def test_missing_apikey_raises_validation_error(self) -> None: + with self.assertRaises(ValueError) as cm: + RSpaceLoader(url=TestRSpaceLoader.url, global_id=TestRSpaceLoader.global_id) + e = cm.exception + self.assertRegex(str(e), r"Did not find api_key") + + def test_missing_url_raises_validation_error(self) -> None: + with self.assertRaises(ValueError) as cm: + RSpaceLoader( + api_key=TestRSpaceLoader.api_key, global_id=TestRSpaceLoader.global_id + ) + e = cm.exception + self.assertRegex(str(e), r"Did not find url")