mirror of
https://github.com/hwchase17/langchain.git
synced 2025-05-28 10:39:23 +00:00
Rspace doc loader (#11511)
**Description:** Add a document loader for the RSpace Electronic Lab Notebook (www.researchspace.com), so that scientific documents and research notes can be easily pulled into Langchain pipelines. **Issue** This is an new contribution, rather than an issue fix. **Dependencies:** There are no new required dependencies. In order to use the loader, clients will need to install rspace_client SDK using `pip install rspace_client` --------- Co-authored-by: richarda23 <richard.c.adams@infinityworks.com> Co-authored-by: Bagatur <baskaryan@gmail.com>
This commit is contained in:
parent
9d1867c77f
commit
35965df20d
127
docs/docs/integrations/document_loaders/rspace.ipynb
Normal file
127
docs/docs/integrations/document_loaders/rspace.ipynb
Normal file
@ -0,0 +1,127 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "2f1572a5-9f8c-44f1-82f3-ddeee8f55145",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"This notebook shows how to use the RSpace document loader to import research notes and documents from RSpace Electronic\n",
|
||||
"Lab Notebook into Langchain pipelines.\n",
|
||||
"\n",
|
||||
"To start you'll need an RSpace account and an API key.\n",
|
||||
"\n",
|
||||
"You can set up a free account at [https://community.researchspace.com](https://community.researchspace.com) or use your institutional RSpace.\n",
|
||||
"\n",
|
||||
"You can get an RSpace API token from your account's profile page. "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "9e5310d2-a864-4464-bdca-81f30c9d0bdb",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"!pip install rspace_client"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "61b1d1b7-a28c-4fba-83a3-df64baa8b6b8",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"It's best to store your RSpace API key as an environment variable. \n",
|
||||
"\n",
|
||||
" RSPACE_API_KEY=<YOUR_KEY>\n",
|
||||
"\n",
|
||||
"You'll also need to set the URL of your RSpace installation e.g.\n",
|
||||
"\n",
|
||||
" RSPACE_URL=https://community.researchspace.com\n",
|
||||
"\n",
|
||||
"If you use these exact environment variable names, they will be detected automatically. "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "13c19ea4-100f-417e-b52f-7e8730c7c1d1",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.document_loaders.rspace import RSpaceLoader"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "4fd42831-0e79-4068-a5e1-7e2cfc242789",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"You can import various items from RSpace:\n",
|
||||
"\n",
|
||||
"* A single RSpace structured or basic document. This will map 1-1 to a Langchain document.\n",
|
||||
"* A folder or noteook. All documents inside the notebook or folder are imported as Langchain documents. \n",
|
||||
"* If you have PDF files in the RSpace Gallery, these can be imported individually as well. Under the hood, Langchain's PDF loader will be used and this creates one Langchain document per PDF page. "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "8e614357-5eca-401b-ab98-ea55b0465009",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"## replace these ids with some from your own research notes.\n",
|
||||
"## Make sure to use global ids (with the 2 character prefix). This helps the loader know which API calls to make \n",
|
||||
"## to RSpace API.\n",
|
||||
"\n",
|
||||
"rspace_ids = [\"NB1932027\", \"FL1921314\", \"SD1932029\", \"GL1932384\"]\n",
|
||||
"for rs_id in rspace_ids:\n",
|
||||
" loader = RSpaceLoader(global_id=rs_id)\n",
|
||||
" docs = loader.load()\n",
|
||||
" for doc in docs:\n",
|
||||
" ## the name and ID are added to the 'source' metadata property.\n",
|
||||
" print (doc.metadata)\n",
|
||||
" print(doc.page_content[:500])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "1b41758d-24e0-4994-a30f-3acccc7795e4",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"If you don't want to use the environment variables as above, you can pass these into the RSpaceLoader"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "aa079ca6-439d-4010-9edd-cd77d8884fab",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"loader = RSpaceLoader(global_id=rs_id, api_key=\"MY_API_KEY\", url=\"https://my.researchspace.com\")"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.5"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
130
libs/langchain/langchain/document_loaders/rspace.py
Normal file
130
libs/langchain/langchain/document_loaders/rspace.py
Normal file
@ -0,0 +1,130 @@
|
||||
import os
|
||||
from typing import Any, Dict, Iterator, List, Optional, Union
|
||||
|
||||
from langchain.docstore.document import Document
|
||||
from langchain.document_loaders import PyPDFLoader
|
||||
from langchain.document_loaders.base import BaseLoader
|
||||
from langchain.utils import get_from_dict_or_env
|
||||
|
||||
|
||||
class RSpaceLoader(BaseLoader):
|
||||
"""
|
||||
Loads content from RSpace notebooks, folders, documents or PDF Gallery files into
|
||||
Langchain documents.
|
||||
|
||||
Maps RSpace document <-> Langchain Document in 1-1. PDFs are imported using PyPDF.
|
||||
|
||||
Requirements are rspace_client (`pip install rspace_client`) and PyPDF if importing
|
||||
PDF docs (`pip install pypdf`).
|
||||
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self, global_id: str, api_key: Optional[str] = None, url: Optional[str] = None
|
||||
):
|
||||
"""api_key: RSpace API key - can also be supplied as environment variable
|
||||
'RSPACE_API_KEY'
|
||||
url: str
|
||||
The URL of your RSpace instance - can also be supplied as environment
|
||||
variable 'RSPACE_URL'
|
||||
global_id: str
|
||||
The global ID of the resource to load,
|
||||
e.g. 'SD12344' (a single document); 'GL12345'(A PDF file in the gallery);
|
||||
'NB4567' (a notebook); 'FL12244' (a folder)
|
||||
"""
|
||||
args: Dict[str, Optional[str]] = {
|
||||
"api_key": api_key,
|
||||
"url": url,
|
||||
"global_id": global_id,
|
||||
}
|
||||
verified_args: Dict[str, str] = RSpaceLoader.validate_environment(args)
|
||||
self.api_key = verified_args["api_key"]
|
||||
self.url = verified_args["url"]
|
||||
self.global_id: str = verified_args["global_id"]
|
||||
|
||||
@classmethod
|
||||
def validate_environment(cls, values: Dict) -> Dict:
|
||||
"""Validate that API key and URL exists in environment."""
|
||||
values["api_key"] = get_from_dict_or_env(values, "api_key", "RSPACE_API_KEY")
|
||||
values["url"] = get_from_dict_or_env(values, "url", "RSPACE_URL")
|
||||
if "global_id" not in values or values["global_id"] is None:
|
||||
raise ValueError(
|
||||
"No value supplied for global_id. Please supply an RSpace global ID"
|
||||
)
|
||||
return values
|
||||
|
||||
def _create_rspace_client(self) -> Any:
|
||||
"""Create a RSpace client."""
|
||||
try:
|
||||
from rspace_client.eln import eln, field_content
|
||||
|
||||
except ImportError:
|
||||
raise ImportError("You must run " "`pip install rspace_client`")
|
||||
|
||||
try:
|
||||
eln = eln.ELNClient(self.url, self.api_key)
|
||||
eln.get_status()
|
||||
|
||||
except Exception:
|
||||
raise Exception(
|
||||
f"Unable to initialise client - is url {self.url} or "
|
||||
f"api key correct?"
|
||||
)
|
||||
|
||||
return eln, field_content.FieldContent
|
||||
|
||||
def _get_doc(self, cli: Any, field_content: Any, d_id: Union[str, int]) -> Document:
|
||||
content = ""
|
||||
doc = cli.get_document(d_id)
|
||||
content += f"<h2>{doc['name']}<h2/>"
|
||||
for f in doc["fields"]:
|
||||
content += f"{f['name']}\n"
|
||||
fc = field_content(f["content"])
|
||||
content += fc.get_text()
|
||||
content += "\n"
|
||||
return Document(
|
||||
metadata={"source": f"rspace: {doc['name']}-{doc['globalId']}"},
|
||||
page_content=content,
|
||||
)
|
||||
|
||||
def _load_structured_doc(self) -> Iterator[Document]:
|
||||
cli, field_content = self._create_rspace_client()
|
||||
yield self._get_doc(cli, field_content, self.global_id)
|
||||
|
||||
def _load_folder_tree(self) -> Iterator[Document]:
|
||||
cli, field_content = self._create_rspace_client()
|
||||
if self.global_id:
|
||||
docs_in_folder = cli.list_folder_tree(
|
||||
folder_id=self.global_id[2:], typesToInclude=["document"]
|
||||
)
|
||||
doc_ids: List[int] = [d["id"] for d in docs_in_folder["records"]]
|
||||
for doc_id in doc_ids:
|
||||
yield self._get_doc(cli, field_content, doc_id)
|
||||
|
||||
def _load_pdf(self) -> Iterator[Document]:
|
||||
cli, field_content = self._create_rspace_client()
|
||||
file_info = cli.get_file_info(self.global_id)
|
||||
_, ext = os.path.splitext(file_info["name"])
|
||||
if ext.lower() == ".pdf":
|
||||
outfile = f"{self.global_id}.pdf"
|
||||
cli.download_file(self.global_id, outfile)
|
||||
pdf_loader = PyPDFLoader(outfile)
|
||||
for pdf in pdf_loader.lazy_load():
|
||||
pdf.metadata["rspace_src"] = self.global_id
|
||||
yield pdf
|
||||
|
||||
def lazy_load(self) -> Iterator[Document]:
|
||||
if self.global_id and "GL" in self.global_id:
|
||||
for d in self._load_pdf():
|
||||
yield d
|
||||
elif self.global_id and "SD" in self.global_id:
|
||||
for d in self._load_structured_doc():
|
||||
yield d
|
||||
elif self.global_id and self.global_id[0:2] in ["FL", "NB"]:
|
||||
for d in self._load_folder_tree():
|
||||
yield d
|
||||
else:
|
||||
raise ValueError("Unknown global ID type")
|
||||
|
||||
def load(self) -> List[Document]:
|
||||
return list(self.lazy_load())
|
44
libs/langchain/poetry.lock
generated
44
libs/langchain/poetry.lock
generated
@ -2884,6 +2884,7 @@ files = [
|
||||
{file = "greenlet-2.0.2-cp27-cp27m-win32.whl", hash = "sha256:6c3acb79b0bfd4fe733dff8bc62695283b57949ebcca05ae5c129eb606ff2d74"},
|
||||
{file = "greenlet-2.0.2-cp27-cp27m-win_amd64.whl", hash = "sha256:283737e0da3f08bd637b5ad058507e578dd462db259f7f6e4c5c365ba4ee9343"},
|
||||
{file = "greenlet-2.0.2-cp27-cp27mu-manylinux2010_x86_64.whl", hash = "sha256:d27ec7509b9c18b6d73f2f5ede2622441de812e7b1a80bbd446cb0633bd3d5ae"},
|
||||
{file = "greenlet-2.0.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:d967650d3f56af314b72df7089d96cda1083a7fc2da05b375d2bc48c82ab3f3c"},
|
||||
{file = "greenlet-2.0.2-cp310-cp310-macosx_11_0_x86_64.whl", hash = "sha256:30bcf80dda7f15ac77ba5af2b961bdd9dbc77fd4ac6105cee85b0d0a5fcf74df"},
|
||||
{file = "greenlet-2.0.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:26fbfce90728d82bc9e6c38ea4d038cba20b7faf8a0ca53a9c07b67318d46088"},
|
||||
{file = "greenlet-2.0.2-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9190f09060ea4debddd24665d6804b995a9c122ef5917ab26e1566dcc712ceeb"},
|
||||
@ -2892,6 +2893,7 @@ files = [
|
||||
{file = "greenlet-2.0.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:76ae285c8104046b3a7f06b42f29c7b73f77683df18c49ab5af7983994c2dd91"},
|
||||
{file = "greenlet-2.0.2-cp310-cp310-win_amd64.whl", hash = "sha256:2d4686f195e32d36b4d7cf2d166857dbd0ee9f3d20ae349b6bf8afc8485b3645"},
|
||||
{file = "greenlet-2.0.2-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:c4302695ad8027363e96311df24ee28978162cdcdd2006476c43970b384a244c"},
|
||||
{file = "greenlet-2.0.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:d4606a527e30548153be1a9f155f4e283d109ffba663a15856089fb55f933e47"},
|
||||
{file = "greenlet-2.0.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c48f54ef8e05f04d6eff74b8233f6063cb1ed960243eacc474ee73a2ea8573ca"},
|
||||
{file = "greenlet-2.0.2-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a1846f1b999e78e13837c93c778dcfc3365902cfb8d1bdb7dd73ead37059f0d0"},
|
||||
{file = "greenlet-2.0.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3a06ad5312349fec0ab944664b01d26f8d1f05009566339ac6f63f56589bc1a2"},
|
||||
@ -2921,6 +2923,7 @@ files = [
|
||||
{file = "greenlet-2.0.2-cp37-cp37m-win32.whl", hash = "sha256:3f6ea9bd35eb450837a3d80e77b517ea5bc56b4647f5502cd28de13675ee12f7"},
|
||||
{file = "greenlet-2.0.2-cp37-cp37m-win_amd64.whl", hash = "sha256:7492e2b7bd7c9b9916388d9df23fa49d9b88ac0640db0a5b4ecc2b653bf451e3"},
|
||||
{file = "greenlet-2.0.2-cp38-cp38-macosx_10_15_x86_64.whl", hash = "sha256:b864ba53912b6c3ab6bcb2beb19f19edd01a6bfcbdfe1f37ddd1778abfe75a30"},
|
||||
{file = "greenlet-2.0.2-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:1087300cf9700bbf455b1b97e24db18f2f77b55302a68272c56209d5587c12d1"},
|
||||
{file = "greenlet-2.0.2-cp38-cp38-manylinux2010_x86_64.whl", hash = "sha256:ba2956617f1c42598a308a84c6cf021a90ff3862eddafd20c3333d50f0edb45b"},
|
||||
{file = "greenlet-2.0.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fc3a569657468b6f3fb60587e48356fe512c1754ca05a564f11366ac9e306526"},
|
||||
{file = "greenlet-2.0.2-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:8eab883b3b2a38cc1e050819ef06a7e6344d4a990d24d45bc6f2cf959045a45b"},
|
||||
@ -2929,6 +2932,7 @@ files = [
|
||||
{file = "greenlet-2.0.2-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:b0ef99cdbe2b682b9ccbb964743a6aca37905fda5e0452e5ee239b1654d37f2a"},
|
||||
{file = "greenlet-2.0.2-cp38-cp38-win32.whl", hash = "sha256:b80f600eddddce72320dbbc8e3784d16bd3fb7b517e82476d8da921f27d4b249"},
|
||||
{file = "greenlet-2.0.2-cp38-cp38-win_amd64.whl", hash = "sha256:4d2e11331fc0c02b6e84b0d28ece3a36e0548ee1a1ce9ddde03752d9b79bba40"},
|
||||
{file = "greenlet-2.0.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:8512a0c38cfd4e66a858ddd1b17705587900dd760c6003998e9472b77b56d417"},
|
||||
{file = "greenlet-2.0.2-cp39-cp39-macosx_11_0_x86_64.whl", hash = "sha256:88d9ab96491d38a5ab7c56dd7a3cc37d83336ecc564e4e8816dbed12e5aaefc8"},
|
||||
{file = "greenlet-2.0.2-cp39-cp39-manylinux2010_x86_64.whl", hash = "sha256:561091a7be172ab497a3527602d467e2b3fbe75f9e783d8b8ce403fa414f71a6"},
|
||||
{file = "greenlet-2.0.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:971ce5e14dc5e73715755d0ca2975ac88cfdaefcaab078a284fea6cfabf866df"},
|
||||
@ -3722,6 +3726,7 @@ optional = false
|
||||
python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*, !=3.6.*"
|
||||
files = [
|
||||
{file = "jsonpointer-2.4-py2.py3-none-any.whl", hash = "sha256:15d51bba20eea3165644553647711d150376234112651b4f1811022aecad7d7a"},
|
||||
{file = "jsonpointer-2.4.tar.gz", hash = "sha256:585cee82b70211fa9e6043b7bb89db6e1aa49524340dde8ad6b63206ea689d88"},
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@ -4529,6 +4534,16 @@ files = [
|
||||
{file = "MarkupSafe-2.1.3-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:5bbe06f8eeafd38e5d0a4894ffec89378b6c6a625ff57e3028921f8ff59318ac"},
|
||||
{file = "MarkupSafe-2.1.3-cp311-cp311-win32.whl", hash = "sha256:dd15ff04ffd7e05ffcb7fe79f1b98041b8ea30ae9234aed2a9168b5797c3effb"},
|
||||
{file = "MarkupSafe-2.1.3-cp311-cp311-win_amd64.whl", hash = "sha256:134da1eca9ec0ae528110ccc9e48041e0828d79f24121a1a146161103c76e686"},
|
||||
{file = "MarkupSafe-2.1.3-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:f698de3fd0c4e6972b92290a45bd9b1536bffe8c6759c62471efaa8acb4c37bc"},
|
||||
{file = "MarkupSafe-2.1.3-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:aa57bd9cf8ae831a362185ee444e15a93ecb2e344c8e52e4d721ea3ab6ef1823"},
|
||||
{file = "MarkupSafe-2.1.3-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ffcc3f7c66b5f5b7931a5aa68fc9cecc51e685ef90282f4a82f0f5e9b704ad11"},
|
||||
{file = "MarkupSafe-2.1.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:47d4f1c5f80fc62fdd7777d0d40a2e9dda0a05883ab11374334f6c4de38adffd"},
|
||||
{file = "MarkupSafe-2.1.3-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1f67c7038d560d92149c060157d623c542173016c4babc0c1913cca0564b9939"},
|
||||
{file = "MarkupSafe-2.1.3-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:9aad3c1755095ce347e26488214ef77e0485a3c34a50c5a5e2471dff60b9dd9c"},
|
||||
{file = "MarkupSafe-2.1.3-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:14ff806850827afd6b07a5f32bd917fb7f45b046ba40c57abdb636674a8b559c"},
|
||||
{file = "MarkupSafe-2.1.3-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8f9293864fe09b8149f0cc42ce56e3f0e54de883a9de90cd427f191c346eb2e1"},
|
||||
{file = "MarkupSafe-2.1.3-cp312-cp312-win32.whl", hash = "sha256:715d3562f79d540f251b99ebd6d8baa547118974341db04f5ad06d5ea3eb8007"},
|
||||
{file = "MarkupSafe-2.1.3-cp312-cp312-win_amd64.whl", hash = "sha256:1b8dd8c3fd14349433c79fa8abeb573a55fc0fdd769133baac1f5e07abf54aeb"},
|
||||
{file = "MarkupSafe-2.1.3-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:8e254ae696c88d98da6555f5ace2279cf7cd5b3f52be2b5cf97feafe883b58d2"},
|
||||
{file = "MarkupSafe-2.1.3-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cb0932dc158471523c9637e807d9bfb93e06a95cbf010f1a38b98623b929ef2b"},
|
||||
{file = "MarkupSafe-2.1.3-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9402b03f1a1b4dc4c19845e5c749e3ab82d5078d16a2a4c2cd2df62d57bb0707"},
|
||||
@ -7628,6 +7643,7 @@ files = [
|
||||
{file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:69b023b2b4daa7548bcfbd4aa3da05b3a74b772db9e23b982788168117739938"},
|
||||
{file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:81e0b275a9ecc9c0c0c07b4b90ba548307583c125f54d5b6946cfee6360c733d"},
|
||||
{file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ba336e390cd8e4d1739f42dfe9bb83a3cc2e80f567d8805e11b46f4a943f5515"},
|
||||
{file = "PyYAML-6.0.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:326c013efe8048858a6d312ddd31d56e468118ad4cdeda36c719bf5bb6192290"},
|
||||
{file = "PyYAML-6.0.1-cp310-cp310-win32.whl", hash = "sha256:bd4af7373a854424dabd882decdc5579653d7868b8fb26dc7d0e99f823aa5924"},
|
||||
{file = "PyYAML-6.0.1-cp310-cp310-win_amd64.whl", hash = "sha256:fd1592b3fdf65fff2ad0004b5e363300ef59ced41c2e6b3a99d4089fa8c5435d"},
|
||||
{file = "PyYAML-6.0.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:6965a7bc3cf88e5a1c3bd2e0b5c22f8d677dc88a455344035f03399034eb3007"},
|
||||
@ -7635,8 +7651,15 @@ files = [
|
||||
{file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:42f8152b8dbc4fe7d96729ec2b99c7097d656dc1213a3229ca5383f973a5ed6d"},
|
||||
{file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:062582fca9fabdd2c8b54a3ef1c978d786e0f6b3a1510e0ac93ef59e0ddae2bc"},
|
||||
{file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d2b04aac4d386b172d5b9692e2d2da8de7bfb6c387fa4f801fbf6fb2e6ba4673"},
|
||||
{file = "PyYAML-6.0.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:e7d73685e87afe9f3b36c799222440d6cf362062f78be1013661b00c5c6f678b"},
|
||||
{file = "PyYAML-6.0.1-cp311-cp311-win32.whl", hash = "sha256:1635fd110e8d85d55237ab316b5b011de701ea0f29d07611174a1b42f1444741"},
|
||||
{file = "PyYAML-6.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:bf07ee2fef7014951eeb99f56f39c9bb4af143d8aa3c21b1677805985307da34"},
|
||||
{file = "PyYAML-6.0.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:855fb52b0dc35af121542a76b9a84f8d1cd886ea97c84703eaa6d88e37a2ad28"},
|
||||
{file = "PyYAML-6.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:40df9b996c2b73138957fe23a16a4f0ba614f4c0efce1e9406a184b6d07fa3a9"},
|
||||
{file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c22bec3fbe2524cde73d7ada88f6566758a8f7227bfbf93a408a9d86bcc12a0"},
|
||||
{file = "PyYAML-6.0.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8d4e9c88387b0f5c7d5f281e55304de64cf7f9c0021a3525bd3b1c542da3b0e4"},
|
||||
{file = "PyYAML-6.0.1-cp312-cp312-win32.whl", hash = "sha256:d483d2cdf104e7c9fa60c544d92981f12ad66a457afae824d146093b8c294c54"},
|
||||
{file = "PyYAML-6.0.1-cp312-cp312-win_amd64.whl", hash = "sha256:0d3304d8c0adc42be59c5f8a4d9e3d7379e6955ad754aa9d6ab7a398b59dd1df"},
|
||||
{file = "PyYAML-6.0.1-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:50550eb667afee136e9a77d6dc71ae76a44df8b3e51e41b77f6de2932bfe0f47"},
|
||||
{file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1fe35611261b29bd1de0070f0b2f47cb6ff71fa6595c077e42bd0c419fa27b98"},
|
||||
{file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:704219a11b772aea0d8ecd7058d0082713c3562b4e271b849ad7dc4a5c90c13c"},
|
||||
@ -7653,6 +7676,7 @@ files = [
|
||||
{file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a0cd17c15d3bb3fa06978b4e8958dcdc6e0174ccea823003a106c7d4d7899ac5"},
|
||||
{file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:28c119d996beec18c05208a8bd78cbe4007878c6dd15091efb73a30e90539696"},
|
||||
{file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7e07cbde391ba96ab58e532ff4803f79c4129397514e1413a7dc761ccd755735"},
|
||||
{file = "PyYAML-6.0.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:49a183be227561de579b4a36efbb21b3eab9651dd81b1858589f796549873dd6"},
|
||||
{file = "PyYAML-6.0.1-cp38-cp38-win32.whl", hash = "sha256:184c5108a2aca3c5b3d3bf9395d50893a7ab82a38004c8f61c258d4428e80206"},
|
||||
{file = "PyYAML-6.0.1-cp38-cp38-win_amd64.whl", hash = "sha256:1e2722cc9fbb45d9b87631ac70924c11d3a401b2d7f410cc0e3bbf249f2dca62"},
|
||||
{file = "PyYAML-6.0.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:9eb6caa9a297fc2c2fb8862bc5370d0303ddba53ba97e71f08023b6cd73d16a8"},
|
||||
@ -7660,6 +7684,7 @@ files = [
|
||||
{file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5773183b6446b2c99bb77e77595dd486303b4faab2b086e7b17bc6bef28865f6"},
|
||||
{file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b786eecbdf8499b9ca1d697215862083bd6d2a99965554781d0d8d1ad31e13a0"},
|
||||
{file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bc1bf2925a1ecd43da378f4db9e4f799775d6367bdb94671027b73b393a7c42c"},
|
||||
{file = "PyYAML-6.0.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:04ac92ad1925b2cff1db0cfebffb6ffc43457495c9b3c39d3fcae417d7125dc5"},
|
||||
{file = "PyYAML-6.0.1-cp39-cp39-win32.whl", hash = "sha256:faca3bdcf85b2fc05d06ff3fbc1f83e1391b3e724afa3feba7d13eeab355484c"},
|
||||
{file = "PyYAML-6.0.1-cp39-cp39-win_amd64.whl", hash = "sha256:510c9deebc5c0225e8c96813043e62b680ba2f9c50a08d3724c7f28a747d1486"},
|
||||
{file = "PyYAML-6.0.1.tar.gz", hash = "sha256:bfdf460b1736c775f2ba9f6a92bca30bc2095067b8a9d77876d1fad6cc3b4a43"},
|
||||
@ -8426,6 +8451,21 @@ files = [
|
||||
[package.dependencies]
|
||||
pyasn1 = ">=0.1.3"
|
||||
|
||||
[[package]]
|
||||
name = "rspace-client"
|
||||
version = "2.5.0"
|
||||
description = "A client for calling RSpace ELN and Inventory APIs"
|
||||
optional = true
|
||||
python-versions = ">=3.7.11,<4.0.0"
|
||||
files = [
|
||||
{file = "rspace-client-2.5.0.tar.gz", hash = "sha256:101abc83d094051d2babcaa133fa1a47221b3d5953d72eef3c331ef7084071a1"},
|
||||
{file = "rspace_client-2.5.0-py3-none-any.whl", hash = "sha256:b1072df88dfa8f068f3137584d20cf135493b0521a9809c2f6ddec6b378a9cc3"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
beautifulsoup4 = ">=4.9.3,<5.0.0"
|
||||
requests = ">=2.25.1,<3.0.0"
|
||||
|
||||
[[package]]
|
||||
name = "ruff"
|
||||
version = "0.0.249"
|
||||
@ -10843,7 +10883,7 @@ cli = ["typer"]
|
||||
cohere = ["cohere"]
|
||||
docarray = ["docarray"]
|
||||
embeddings = ["sentence-transformers"]
|
||||
extended-testing = ["aiosqlite", "amazon-textract-caller", "anthropic", "arxiv", "assemblyai", "atlassian-python-api", "beautifulsoup4", "bibtexparser", "cassio", "chardet", "dashvector", "esprima", "faiss-cpu", "feedparser", "geopandas", "gitpython", "gql", "html2text", "jinja2", "jq", "lxml", "markdownify", "motor", "mwparserfromhell", "mwxml", "newspaper3k", "numexpr", "openai", "openai", "openapi-schema-pydantic", "pandas", "pdfminer-six", "pgvector", "psychicapi", "py-trello", "pymupdf", "pypdf", "pypdfium2", "pyspark", "rank-bm25", "rapidfuzz", "rapidocr-onnxruntime", "requests-toolbelt", "scikit-learn", "sqlite-vss", "streamlit", "sympy", "telethon", "timescale-vector", "tqdm", "xata", "xmltodict"]
|
||||
extended-testing = ["aiosqlite", "amazon-textract-caller", "anthropic", "arxiv", "assemblyai", "atlassian-python-api", "beautifulsoup4", "bibtexparser", "cassio", "chardet", "dashvector", "esprima", "faiss-cpu", "feedparser", "geopandas", "gitpython", "gql", "html2text", "jinja2", "jq", "lxml", "markdownify", "motor", "mwparserfromhell", "mwxml", "newspaper3k", "numexpr", "openai", "openai", "openapi-schema-pydantic", "pandas", "pdfminer-six", "pgvector", "psychicapi", "py-trello", "pymupdf", "pypdf", "pypdfium2", "pyspark", "rank-bm25", "rapidfuzz", "rapidocr-onnxruntime", "requests-toolbelt", "rspace_client", "scikit-learn", "sqlite-vss", "streamlit", "sympy", "telethon", "timescale-vector", "tqdm", "xata", "xmltodict"]
|
||||
javascript = ["esprima"]
|
||||
llms = ["clarifai", "cohere", "huggingface_hub", "manifest-ml", "nlpcloud", "openai", "openlm", "torch", "transformers"]
|
||||
openai = ["openai", "tiktoken"]
|
||||
@ -10853,4 +10893,4 @@ text-helpers = ["chardet"]
|
||||
[metadata]
|
||||
lock-version = "2.0"
|
||||
python-versions = ">=3.8.1,<4.0"
|
||||
content-hash = "de11d7f6257615dd61f579d5137d5a6b9ee5433e0d9b3cac3feb8a759ad70393"
|
||||
content-hash = "3a5bca34a60eaa9b66a4d1f9ec14de5e6a0e5ca1071a0a874499fe122cc0ee36"
|
||||
|
@ -138,6 +138,7 @@ timescale-vector = {version = "^0.0.1", optional = true}
|
||||
typer = {version= "^0.9.0", optional = true}
|
||||
anthropic = {version = "^0.3.11", optional = true}
|
||||
aiosqlite = {version = "^0.19.0", optional = true}
|
||||
rspace_client = {version = "^2.5.0", optional = true}
|
||||
|
||||
|
||||
[tool.poetry.group.test.dependencies]
|
||||
@ -366,6 +367,7 @@ extended_testing = [
|
||||
"motor",
|
||||
"timescale-vector",
|
||||
"anthropic",
|
||||
"rspace_client",
|
||||
]
|
||||
|
||||
[tool.ruff]
|
||||
|
@ -0,0 +1,35 @@
|
||||
import unittest
|
||||
|
||||
from langchain.document_loaders.rspace import RSpaceLoader
|
||||
|
||||
|
||||
class TestRSpaceLoader(unittest.TestCase):
|
||||
url = "https://community.researchspace.com"
|
||||
api_key = "myapikey"
|
||||
global_id = "SD12345"
|
||||
|
||||
def test_valid_arguments(self) -> None:
|
||||
loader = RSpaceLoader(
|
||||
url=TestRSpaceLoader.url,
|
||||
api_key=TestRSpaceLoader.api_key,
|
||||
global_id=TestRSpaceLoader.global_id,
|
||||
)
|
||||
self.assertEqual(TestRSpaceLoader.url, loader.url) # add assertion here
|
||||
self.assertEqual(TestRSpaceLoader.api_key, loader.api_key) # add assertion here
|
||||
self.assertEqual(
|
||||
TestRSpaceLoader.global_id, loader.global_id
|
||||
) # add assertion here
|
||||
|
||||
def test_missing_apikey_raises_validation_error(self) -> None:
|
||||
with self.assertRaises(ValueError) as cm:
|
||||
RSpaceLoader(url=TestRSpaceLoader.url, global_id=TestRSpaceLoader.global_id)
|
||||
e = cm.exception
|
||||
self.assertRegex(str(e), r"Did not find api_key")
|
||||
|
||||
def test_missing_url_raises_validation_error(self) -> None:
|
||||
with self.assertRaises(ValueError) as cm:
|
||||
RSpaceLoader(
|
||||
api_key=TestRSpaceLoader.api_key, global_id=TestRSpaceLoader.global_id
|
||||
)
|
||||
e = cm.exception
|
||||
self.assertRegex(str(e), r"Did not find url")
|
Loading…
Reference in New Issue
Block a user