langchain[minor]: Updated DocugamiLoader, includes breaking changes (#13265)

There are the following main changes in this PR:

1. Rewrite of the DocugamiLoader to not do any XML parsing of the DGML
format internally, and instead use the `dgml-utils` library we are
separately working on. This is a very lightweight dependency.
2. Added MMR search type as an option to multi-vector retriever, similar
to other retrievers. MMR is especially useful when using Docugami for
RAG since we deal with large sets of documents within which a few might
be duplicates and straight similarity based search doesn't give great
results in many cases.

We are @docugami on twitter, and I am @tjaffri

---------

Co-authored-by: Taqi Jaffri <tjaffri@docugami.com>
This commit is contained in:
Taqi Jaffri
2023-11-28 15:56:22 -08:00
committed by GitHub
parent a20e8f8bb0
commit 144710ad9a
9 changed files with 924 additions and 594 deletions

View File

@@ -1,7 +1,7 @@
import hashlib
import io
import logging
import os
import re
from pathlib import Path
from typing import Any, Dict, List, Mapping, Optional, Sequence, Union
@@ -11,11 +11,10 @@ from langchain_core.pydantic_v1 import BaseModel, root_validator
from langchain.document_loaders.base import BaseLoader
TD_NAME = "{http://www.w3.org/1999/xhtml}td"
TABLE_NAME = "{http://www.w3.org/1999/xhtml}table"
XPATH_KEY = "xpath"
DOCUMENT_ID_KEY = "id"
ID_KEY = "id"
DOCUMENT_SOURCE_KEY = "source"
DOCUMENT_NAME_KEY = "name"
STRUCTURE_KEY = "structure"
@@ -30,7 +29,7 @@ logger = logging.getLogger(__name__)
class DocugamiLoader(BaseLoader, BaseModel):
"""Load from `Docugami`.
To use, you should have the ``lxml`` python package installed.
To use, you should have the ``dgml-utils`` python package installed.
"""
api: str = DEFAULT_API_ENDPOINT
@@ -38,14 +37,43 @@ class DocugamiLoader(BaseLoader, BaseModel):
access_token: Optional[str] = os.environ.get("DOCUGAMI_API_KEY")
"""The Docugami API access token to use."""
max_text_length = 4096
"""Max length of chunk text returned."""
min_text_length: int = 32
"""Threshold under which chunks are appended to next to avoid over-chunking."""
max_metadata_length = 512
"""Max length of metadata text returned."""
include_xml_tags: bool = False
"""Set to true for XML tags in chunk output text."""
parent_hierarchy_levels: int = 0
"""Set appropriately to get parent chunks using the chunk hierarchy."""
parent_id_key: str = "doc_id"
"""Metadata key for parent doc ID."""
sub_chunk_tables: bool = False
"""Set to True to return sub-chunks within tables."""
whitespace_normalize_text: bool = True
"""Set to False if you want to full whitespace formatting in the original
XML doc, including indentation."""
docset_id: Optional[str]
"""The Docugami API docset ID to use."""
document_ids: Optional[Sequence[str]]
"""The Docugami API document IDs to use."""
file_paths: Optional[Sequence[Union[Path, str]]]
"""The local file paths to use."""
min_chunk_size: int = 32 # appended to the next chunk to avoid over-chunking
"""The minimum chunk size to use when parsing DGML. Defaults to 32."""
include_project_metadata_in_doc_metadata: bool = True
"""Set to True if you want to include the project metadata in the doc metadata."""
@root_validator
def validate_local_or_remote(cls, values: Dict[str, Any]) -> Dict[str, Any]:
@@ -69,7 +97,10 @@ class DocugamiLoader(BaseLoader, BaseModel):
return values
def _parse_dgml(
self, document: Mapping, content: bytes, doc_metadata: Optional[Mapping] = None
self,
content: bytes,
document_name: Optional[str] = None,
additional_doc_metadata: Optional[Mapping] = None,
) -> List[Document]:
"""Parse a single DGML document into a list of Documents."""
try:
@@ -80,108 +111,65 @@ class DocugamiLoader(BaseLoader, BaseModel):
"Please install it with `pip install lxml`."
)
# helpers
def _xpath_qname_for_chunk(chunk: Any) -> str:
"""Get the xpath qname for a chunk."""
qname = f"{chunk.prefix}:{chunk.tag.split('}')[-1]}"
parent = chunk.getparent()
if parent is not None:
doppelgangers = [x for x in parent if x.tag == chunk.tag]
if len(doppelgangers) > 1:
idx_of_self = doppelgangers.index(chunk)
qname = f"{qname}[{idx_of_self + 1}]"
return qname
def _xpath_for_chunk(chunk: Any) -> str:
"""Get the xpath for a chunk."""
ancestor_chain = chunk.xpath("ancestor-or-self::*")
return "/" + "/".join(_xpath_qname_for_chunk(x) for x in ancestor_chain)
def _structure_value(node: Any) -> str:
"""Get the structure value for a node."""
structure = (
"table"
if node.tag == TABLE_NAME
else node.attrib["structure"]
if "structure" in node.attrib
else None
try:
from dgml_utils.models import Chunk
from dgml_utils.segmentation import get_chunks
except ImportError:
raise ImportError(
"Could not import from dgml-utils python package. "
"Please install it with `pip install dgml-utils`."
)
return structure
def _is_structural(node: Any) -> bool:
"""Check if a node is structural."""
return _structure_value(node) is not None
def _is_heading(node: Any) -> bool:
"""Check if a node is a heading."""
structure = _structure_value(node)
return structure is not None and structure.lower().startswith("h")
def _get_text(node: Any) -> str:
"""Get the text of a node."""
return " ".join(node.itertext()).strip()
def _has_structural_descendant(node: Any) -> bool:
"""Check if a node has a structural descendant."""
for child in node:
if _is_structural(child) or _has_structural_descendant(child):
return True
return False
def _leaf_structural_nodes(node: Any) -> List:
"""Get the leaf structural nodes of a node."""
if _is_structural(node) and not _has_structural_descendant(node):
return [node]
else:
leaf_nodes = []
for child in node:
leaf_nodes.extend(_leaf_structural_nodes(child))
return leaf_nodes
def _create_doc(node: Any, text: str) -> Document:
"""Create a Document from a node and text."""
def _build_framework_chunk(dg_chunk: Chunk) -> Document:
# Stable IDs for chunks with the same text.
_hashed_id = hashlib.md5(dg_chunk.text.encode()).hexdigest()
metadata = {
XPATH_KEY: _xpath_for_chunk(node),
DOCUMENT_ID_KEY: document[DOCUMENT_ID_KEY],
DOCUMENT_NAME_KEY: document[DOCUMENT_NAME_KEY],
DOCUMENT_SOURCE_KEY: document[DOCUMENT_NAME_KEY],
STRUCTURE_KEY: node.attrib.get("structure", ""),
TAG_KEY: re.sub(r"\{.*\}", "", node.tag),
XPATH_KEY: dg_chunk.xpath,
ID_KEY: _hashed_id,
DOCUMENT_NAME_KEY: document_name,
DOCUMENT_SOURCE_KEY: document_name,
STRUCTURE_KEY: dg_chunk.structure,
TAG_KEY: dg_chunk.tag,
}
if doc_metadata:
metadata.update(doc_metadata)
text = dg_chunk.text
if additional_doc_metadata:
if self.include_project_metadata_in_doc_metadata:
metadata.update(additional_doc_metadata)
return Document(
page_content=text,
page_content=text[: self.max_text_length],
metadata=metadata,
)
# parse the tree and return chunks
# Parse the tree and return chunks
tree = etree.parse(io.BytesIO(content))
root = tree.getroot()
chunks: List[Document] = []
prev_small_chunk_text = None
for node in _leaf_structural_nodes(root):
text = _get_text(node)
if prev_small_chunk_text:
text = prev_small_chunk_text + " " + text
prev_small_chunk_text = None
dg_chunks = get_chunks(
root,
min_text_length=self.min_text_length,
max_text_length=self.max_text_length,
whitespace_normalize_text=self.whitespace_normalize_text,
sub_chunk_tables=self.sub_chunk_tables,
include_xml_tags=self.include_xml_tags,
parent_hierarchy_levels=self.parent_hierarchy_levels,
)
if _is_heading(node) or len(text) < self.min_chunk_size:
# Save headings or other small chunks to be appended to the next chunk
prev_small_chunk_text = text
else:
chunks.append(_create_doc(node, text))
framework_chunks: Dict[str, Document] = {}
for dg_chunk in dg_chunks:
framework_chunk = _build_framework_chunk(dg_chunk)
chunk_id = framework_chunk.metadata.get(ID_KEY)
if chunk_id:
framework_chunks[chunk_id] = framework_chunk
if dg_chunk.parent:
framework_parent_chunk = _build_framework_chunk(dg_chunk.parent)
parent_id = framework_parent_chunk.metadata.get(ID_KEY)
if parent_id and framework_parent_chunk.page_content:
framework_chunk.metadata[self.parent_id_key] = parent_id
framework_chunks[parent_id] = framework_parent_chunk
if prev_small_chunk_text and len(chunks) > 0:
# small chunk at the end left over, just append to last chunk
chunks[-1].page_content += " " + prev_small_chunk_text
return chunks
return list(framework_chunks.values())
def _document_details_for_docset_id(self, docset_id: str) -> List[Dict]:
"""Gets all document details for the given docset ID"""
@@ -229,11 +217,12 @@ class DocugamiLoader(BaseLoader, BaseModel):
def _metadata_for_project(self, project: Dict) -> Dict:
"""Gets project metadata for all files"""
project_id = project.get("id")
project_id = project.get(ID_KEY)
url = f"{self.api}/projects/{project_id}/artifacts/latest"
all_artifacts = []
per_file_metadata: Dict = {}
while url:
response = requests.request(
"GET",
@@ -245,22 +234,24 @@ class DocugamiLoader(BaseLoader, BaseModel):
data = response.json()
all_artifacts.extend(data["artifacts"])
url = data.get("next", None)
elif response.status_code == 404:
# Not found is ok, just means no published projects
return per_file_metadata
else:
raise Exception(
f"Failed to download {url} (status: {response.status_code})"
)
per_file_metadata = {}
for artifact in all_artifacts:
artifact_name = artifact.get("name")
artifact_url = artifact.get("url")
artifact_doc = artifact.get("document")
if artifact_name == "report-values.xml" and artifact_url and artifact_doc:
doc_id = artifact_doc["id"]
doc_id = artifact_doc[ID_KEY]
metadata: Dict = {}
# the evaluated XML for each document is named after the project
# The evaluated XML for each document is named after the project
response = requests.request(
"GET",
f"{artifact_url}/content",
@@ -285,7 +276,7 @@ class DocugamiLoader(BaseLoader, BaseModel):
value = " ".join(
entry.xpath("./pr:Value", namespaces=ns)[0].itertext()
).strip()
metadata[heading] = value
metadata[heading] = value[: self.max_metadata_length]
per_file_metadata[doc_id] = metadata
else:
raise Exception(
@@ -296,10 +287,13 @@ class DocugamiLoader(BaseLoader, BaseModel):
return per_file_metadata
def _load_chunks_for_document(
self, docset_id: str, document: Dict, doc_metadata: Optional[Dict] = None
self,
document_id: str,
docset_id: str,
document_name: Optional[str] = None,
additional_metadata: Optional[Mapping] = None,
) -> List[Document]:
"""Load chunks for a document."""
document_id = document["id"]
url = f"{self.api}/docsets/{docset_id}/documents/{document_id}/dgml"
response = requests.request(
@@ -310,7 +304,11 @@ class DocugamiLoader(BaseLoader, BaseModel):
)
if response.ok:
return self._parse_dgml(document, response.content, doc_metadata)
return self._parse_dgml(
content=response.content,
document_name=document_name,
additional_doc_metadata=additional_metadata,
)
else:
raise Exception(
f"Failed to download {url} (status: {response.status_code})"
@@ -321,37 +319,44 @@ class DocugamiLoader(BaseLoader, BaseModel):
chunks: List[Document] = []
if self.access_token and self.docset_id:
# remote mode
# Remote mode
_document_details = self._document_details_for_docset_id(self.docset_id)
if self.document_ids:
_document_details = [
d for d in _document_details if d["id"] in self.document_ids
d for d in _document_details if d[ID_KEY] in self.document_ids
]
_project_details = self._project_details_for_docset_id(self.docset_id)
combined_project_metadata = {}
if _project_details:
# if there are any projects for this docset, load project metadata
combined_project_metadata: Dict[str, Dict] = {}
if _project_details and self.include_project_metadata_in_doc_metadata:
# If there are any projects for this docset and the caller requested
# project metadata, load it.
for project in _project_details:
metadata = self._metadata_for_project(project)
combined_project_metadata.update(metadata)
for file_id in metadata:
if file_id not in combined_project_metadata:
combined_project_metadata[file_id] = metadata[file_id]
else:
combined_project_metadata[file_id].update(metadata[file_id])
for doc in _document_details:
doc_metadata = combined_project_metadata.get(doc["id"])
doc_id = doc[ID_KEY]
doc_name = doc.get(DOCUMENT_NAME_KEY)
doc_metadata = combined_project_metadata.get(doc_id)
chunks += self._load_chunks_for_document(
self.docset_id, doc, doc_metadata
document_id=doc_id,
docset_id=self.docset_id,
document_name=doc_name,
additional_metadata=doc_metadata,
)
elif self.file_paths:
# local mode (for integration testing, or pre-downloaded XML)
# Local mode (for integration testing, or pre-downloaded XML)
for path in self.file_paths:
path = Path(path)
with open(path, "rb") as file:
chunks += self._parse_dgml(
{
DOCUMENT_ID_KEY: path.name,
DOCUMENT_NAME_KEY: path.name,
},
file.read(),
content=file.read(),
document_name=path.name,
)
return chunks

View File

@@ -1,3 +1,4 @@
from enum import Enum
from typing import List
from langchain_core.documents import Document
@@ -9,6 +10,15 @@ from langchain_core.vectorstores import VectorStore
from langchain.callbacks.manager import CallbackManagerForRetrieverRun
class SearchType(str, Enum):
"""Enumerator of the types of search to perform."""
similarity = "similarity"
"""Similarity search."""
mmr = "mmr"
"""Maximal Marginal Relevance reranking of similarity search."""
class MultiVectorRetriever(BaseRetriever):
"""Retrieve from a set of multiple embeddings for the same document."""
@@ -20,6 +30,8 @@ class MultiVectorRetriever(BaseRetriever):
id_key: str = "doc_id"
search_kwargs: dict = Field(default_factory=dict)
"""Keyword arguments to pass to the search function."""
search_type: SearchType = SearchType.similarity
"""Type of search to perform (similarity / mmr)"""
def _get_relevant_documents(
self, query: str, *, run_manager: CallbackManagerForRetrieverRun
@@ -31,7 +43,13 @@ class MultiVectorRetriever(BaseRetriever):
Returns:
List of relevant documents
"""
sub_docs = self.vectorstore.similarity_search(query, **self.search_kwargs)
if self.search_type == SearchType.mmr:
sub_docs = self.vectorstore.max_marginal_relevance_search(
query, **self.search_kwargs
)
else:
sub_docs = self.vectorstore.similarity_search(query, **self.search_kwargs)
# We do this to maintain the order of the ids that are returned
ids = []
for d in sub_docs:

View File

@@ -1973,6 +1973,21 @@ files = [
[package.dependencies]
packaging = "*"
[[package]]
name = "dgml-utils"
version = "0.3.0"
description = "Python utilities to work with the Docugami Markup Language (DGML) format."
optional = true
python-versions = ">=3.8.1,<4.0"
files = [
{file = "dgml_utils-0.3.0-py3-none-any.whl", hash = "sha256:0cb8f6fd7f5fa31919343266260c166aa53009b42a11a172e808fc707e1ac5ba"},
{file = "dgml_utils-0.3.0.tar.gz", hash = "sha256:02722e899122caedfb1e90d0be557c7e6dddf86f7f4c19d7888212efde9f78c9"},
]
[package.dependencies]
lxml = ">=4.9.3,<5.0.0"
tabulate = ">=0.9.0,<0.10.0"
[[package]]
name = "dill"
version = "0.3.7"
@@ -2952,7 +2967,7 @@ files = [
{file = "greenlet-3.0.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:0b72b802496cccbd9b31acea72b6f87e7771ccfd7f7927437d592e5c92ed703c"},
{file = "greenlet-3.0.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:527cd90ba3d8d7ae7dceb06fda619895768a46a1b4e423bdb24c1969823b8362"},
{file = "greenlet-3.0.0-cp311-cp311-win_amd64.whl", hash = "sha256:37f60b3a42d8b5499be910d1267b24355c495064f271cfe74bf28b17b099133c"},
{file = "greenlet-3.0.0-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:1482fba7fbed96ea7842b5a7fc11d61727e8be75a077e603e8ab49d24e234383"},
{file = "greenlet-3.0.0-cp311-universal2-macosx_10_9_universal2.whl", hash = "sha256:c3692ecf3fe754c8c0f2c95ff19626584459eab110eaab66413b1e7425cd84e9"},
{file = "greenlet-3.0.0-cp312-cp312-macosx_13_0_arm64.whl", hash = "sha256:be557119bf467d37a8099d91fbf11b2de5eb1fd5fc5b91598407574848dc910f"},
{file = "greenlet-3.0.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:73b2f1922a39d5d59cc0e597987300df3396b148a9bd10b76a058a2f2772fc04"},
{file = "greenlet-3.0.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d1e22c22f7826096ad503e9bb681b05b8c1f5a8138469b255eb91f26a76634f2"},
@@ -2962,6 +2977,7 @@ files = [
{file = "greenlet-3.0.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:952256c2bc5b4ee8df8dfc54fc4de330970bf5d79253c863fb5e6761f00dda35"},
{file = "greenlet-3.0.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:269d06fa0f9624455ce08ae0179430eea61085e3cf6457f05982b37fd2cefe17"},
{file = "greenlet-3.0.0-cp312-cp312-win_amd64.whl", hash = "sha256:9adbd8ecf097e34ada8efde9b6fec4dd2a903b1e98037adf72d12993a1c80b51"},
{file = "greenlet-3.0.0-cp312-universal2-macosx_10_9_universal2.whl", hash = "sha256:553d6fb2324e7f4f0899e5ad2c427a4579ed4873f42124beba763f16032959af"},
{file = "greenlet-3.0.0-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c6b5ce7f40f0e2f8b88c28e6691ca6806814157ff05e794cdd161be928550f4c"},
{file = "greenlet-3.0.0-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ecf94aa539e97a8411b5ea52fc6ccd8371be9550c4041011a091eb8b3ca1d810"},
{file = "greenlet-3.0.0-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:80dcd3c938cbcac986c5c92779db8e8ce51a89a849c135172c88ecbdc8c056b7"},
@@ -4651,16 +4667,6 @@ files = [
{file = "MarkupSafe-2.1.3-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:5bbe06f8eeafd38e5d0a4894ffec89378b6c6a625ff57e3028921f8ff59318ac"},
{file = "MarkupSafe-2.1.3-cp311-cp311-win32.whl", hash = "sha256:dd15ff04ffd7e05ffcb7fe79f1b98041b8ea30ae9234aed2a9168b5797c3effb"},
{file = "MarkupSafe-2.1.3-cp311-cp311-win_amd64.whl", hash = "sha256:134da1eca9ec0ae528110ccc9e48041e0828d79f24121a1a146161103c76e686"},
{file = "MarkupSafe-2.1.3-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:f698de3fd0c4e6972b92290a45bd9b1536bffe8c6759c62471efaa8acb4c37bc"},
{file = "MarkupSafe-2.1.3-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:aa57bd9cf8ae831a362185ee444e15a93ecb2e344c8e52e4d721ea3ab6ef1823"},
{file = "MarkupSafe-2.1.3-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ffcc3f7c66b5f5b7931a5aa68fc9cecc51e685ef90282f4a82f0f5e9b704ad11"},
{file = "MarkupSafe-2.1.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:47d4f1c5f80fc62fdd7777d0d40a2e9dda0a05883ab11374334f6c4de38adffd"},
{file = "MarkupSafe-2.1.3-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1f67c7038d560d92149c060157d623c542173016c4babc0c1913cca0564b9939"},
{file = "MarkupSafe-2.1.3-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:9aad3c1755095ce347e26488214ef77e0485a3c34a50c5a5e2471dff60b9dd9c"},
{file = "MarkupSafe-2.1.3-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:14ff806850827afd6b07a5f32bd917fb7f45b046ba40c57abdb636674a8b559c"},
{file = "MarkupSafe-2.1.3-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8f9293864fe09b8149f0cc42ce56e3f0e54de883a9de90cd427f191c346eb2e1"},
{file = "MarkupSafe-2.1.3-cp312-cp312-win32.whl", hash = "sha256:715d3562f79d540f251b99ebd6d8baa547118974341db04f5ad06d5ea3eb8007"},
{file = "MarkupSafe-2.1.3-cp312-cp312-win_amd64.whl", hash = "sha256:1b8dd8c3fd14349433c79fa8abeb573a55fc0fdd769133baac1f5e07abf54aeb"},
{file = "MarkupSafe-2.1.3-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:8e254ae696c88d98da6555f5ace2279cf7cd5b3f52be2b5cf97feafe883b58d2"},
{file = "MarkupSafe-2.1.3-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cb0932dc158471523c9637e807d9bfb93e06a95cbf010f1a38b98623b929ef2b"},
{file = "MarkupSafe-2.1.3-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9402b03f1a1b4dc4c19845e5c749e3ab82d5078d16a2a4c2cd2df62d57bb0707"},
@@ -7797,7 +7803,6 @@ files = [
{file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:69b023b2b4daa7548bcfbd4aa3da05b3a74b772db9e23b982788168117739938"},
{file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:81e0b275a9ecc9c0c0c07b4b90ba548307583c125f54d5b6946cfee6360c733d"},
{file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ba336e390cd8e4d1739f42dfe9bb83a3cc2e80f567d8805e11b46f4a943f5515"},
{file = "PyYAML-6.0.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:326c013efe8048858a6d312ddd31d56e468118ad4cdeda36c719bf5bb6192290"},
{file = "PyYAML-6.0.1-cp310-cp310-win32.whl", hash = "sha256:bd4af7373a854424dabd882decdc5579653d7868b8fb26dc7d0e99f823aa5924"},
{file = "PyYAML-6.0.1-cp310-cp310-win_amd64.whl", hash = "sha256:fd1592b3fdf65fff2ad0004b5e363300ef59ced41c2e6b3a99d4089fa8c5435d"},
{file = "PyYAML-6.0.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:6965a7bc3cf88e5a1c3bd2e0b5c22f8d677dc88a455344035f03399034eb3007"},
@@ -7805,15 +7810,8 @@ files = [
{file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:42f8152b8dbc4fe7d96729ec2b99c7097d656dc1213a3229ca5383f973a5ed6d"},
{file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:062582fca9fabdd2c8b54a3ef1c978d786e0f6b3a1510e0ac93ef59e0ddae2bc"},
{file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d2b04aac4d386b172d5b9692e2d2da8de7bfb6c387fa4f801fbf6fb2e6ba4673"},
{file = "PyYAML-6.0.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:e7d73685e87afe9f3b36c799222440d6cf362062f78be1013661b00c5c6f678b"},
{file = "PyYAML-6.0.1-cp311-cp311-win32.whl", hash = "sha256:1635fd110e8d85d55237ab316b5b011de701ea0f29d07611174a1b42f1444741"},
{file = "PyYAML-6.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:bf07ee2fef7014951eeb99f56f39c9bb4af143d8aa3c21b1677805985307da34"},
{file = "PyYAML-6.0.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:855fb52b0dc35af121542a76b9a84f8d1cd886ea97c84703eaa6d88e37a2ad28"},
{file = "PyYAML-6.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:40df9b996c2b73138957fe23a16a4f0ba614f4c0efce1e9406a184b6d07fa3a9"},
{file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c22bec3fbe2524cde73d7ada88f6566758a8f7227bfbf93a408a9d86bcc12a0"},
{file = "PyYAML-6.0.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8d4e9c88387b0f5c7d5f281e55304de64cf7f9c0021a3525bd3b1c542da3b0e4"},
{file = "PyYAML-6.0.1-cp312-cp312-win32.whl", hash = "sha256:d483d2cdf104e7c9fa60c544d92981f12ad66a457afae824d146093b8c294c54"},
{file = "PyYAML-6.0.1-cp312-cp312-win_amd64.whl", hash = "sha256:0d3304d8c0adc42be59c5f8a4d9e3d7379e6955ad754aa9d6ab7a398b59dd1df"},
{file = "PyYAML-6.0.1-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:50550eb667afee136e9a77d6dc71ae76a44df8b3e51e41b77f6de2932bfe0f47"},
{file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1fe35611261b29bd1de0070f0b2f47cb6ff71fa6595c077e42bd0c419fa27b98"},
{file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:704219a11b772aea0d8ecd7058d0082713c3562b4e271b849ad7dc4a5c90c13c"},
@@ -7830,7 +7828,6 @@ files = [
{file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a0cd17c15d3bb3fa06978b4e8958dcdc6e0174ccea823003a106c7d4d7899ac5"},
{file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:28c119d996beec18c05208a8bd78cbe4007878c6dd15091efb73a30e90539696"},
{file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7e07cbde391ba96ab58e532ff4803f79c4129397514e1413a7dc761ccd755735"},
{file = "PyYAML-6.0.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:49a183be227561de579b4a36efbb21b3eab9651dd81b1858589f796549873dd6"},
{file = "PyYAML-6.0.1-cp38-cp38-win32.whl", hash = "sha256:184c5108a2aca3c5b3d3bf9395d50893a7ab82a38004c8f61c258d4428e80206"},
{file = "PyYAML-6.0.1-cp38-cp38-win_amd64.whl", hash = "sha256:1e2722cc9fbb45d9b87631ac70924c11d3a401b2d7f410cc0e3bbf249f2dca62"},
{file = "PyYAML-6.0.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:9eb6caa9a297fc2c2fb8862bc5370d0303ddba53ba97e71f08023b6cd73d16a8"},
@@ -7838,7 +7835,6 @@ files = [
{file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5773183b6446b2c99bb77e77595dd486303b4faab2b086e7b17bc6bef28865f6"},
{file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b786eecbdf8499b9ca1d697215862083bd6d2a99965554781d0d8d1ad31e13a0"},
{file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bc1bf2925a1ecd43da378f4db9e4f799775d6367bdb94671027b73b393a7c42c"},
{file = "PyYAML-6.0.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:04ac92ad1925b2cff1db0cfebffb6ffc43457495c9b3c39d3fcae417d7125dc5"},
{file = "PyYAML-6.0.1-cp39-cp39-win32.whl", hash = "sha256:faca3bdcf85b2fc05d06ff3fbc1f83e1391b3e724afa3feba7d13eeab355484c"},
{file = "PyYAML-6.0.1-cp39-cp39-win_amd64.whl", hash = "sha256:510c9deebc5c0225e8c96813043e62b680ba2f9c50a08d3724c7f28a747d1486"},
{file = "PyYAML-6.0.1.tar.gz", hash = "sha256:bfdf460b1736c775f2ba9f6a92bca30bc2095067b8a9d77876d1fad6cc3b4a43"},
@@ -11179,14 +11175,14 @@ cffi = {version = ">=1.11", markers = "platform_python_implementation == \"PyPy\
cffi = ["cffi (>=1.11)"]
[extras]
all = ["O365", "aleph-alpha-client", "amadeus", "arxiv", "atlassian-python-api", "awadb", "azure-ai-formrecognizer", "azure-ai-textanalytics", "azure-ai-vision", "azure-cognitiveservices-speech", "azure-cosmos", "azure-identity", "beautifulsoup4", "clarifai", "clickhouse-connect", "cohere", "deeplake", "docarray", "duckduckgo-search", "elasticsearch", "esprima", "faiss-cpu", "google-api-python-client", "google-auth", "google-search-results", "gptcache", "html2text", "huggingface_hub", "jinja2", "jq", "lancedb", "langkit", "lark", "librosa", "lxml", "manifest-ml", "marqo", "momento", "nebula3-python", "neo4j", "networkx", "nlpcloud", "nltk", "nomic", "openai", "openlm", "opensearch-py", "pdfminer-six", "pexpect", "pgvector", "pinecone-client", "pinecone-text", "psycopg2-binary", "pymongo", "pyowm", "pypdf", "pytesseract", "python-arango", "pyvespa", "qdrant-client", "rdflib", "redis", "requests-toolbelt", "sentence-transformers", "singlestoredb", "tensorflow-text", "tigrisdb", "tiktoken", "torch", "transformers", "weaviate-client", "wikipedia", "wolframalpha"]
all = ["O365", "aleph-alpha-client", "amadeus", "arxiv", "atlassian-python-api", "awadb", "azure-ai-formrecognizer", "azure-ai-textanalytics", "azure-ai-vision", "azure-cognitiveservices-speech", "azure-cosmos", "azure-identity", "beautifulsoup4", "clarifai", "clickhouse-connect", "cohere", "deeplake", "dgml-utils", "docarray", "duckduckgo-search", "elasticsearch", "esprima", "faiss-cpu", "google-api-python-client", "google-auth", "google-search-results", "gptcache", "html2text", "huggingface_hub", "jinja2", "jq", "lancedb", "langkit", "lark", "librosa", "lxml", "manifest-ml", "marqo", "momento", "nebula3-python", "neo4j", "networkx", "nlpcloud", "nltk", "nomic", "openai", "openlm", "opensearch-py", "pdfminer-six", "pexpect", "pgvector", "pinecone-client", "pinecone-text", "psycopg2-binary", "pymongo", "pyowm", "pypdf", "pytesseract", "python-arango", "pyvespa", "qdrant-client", "rdflib", "redis", "requests-toolbelt", "sentence-transformers", "singlestoredb", "tensorflow-text", "tigrisdb", "tiktoken", "torch", "transformers", "weaviate-client", "wikipedia", "wolframalpha"]
azure = ["azure-ai-formrecognizer", "azure-ai-textanalytics", "azure-ai-vision", "azure-cognitiveservices-speech", "azure-core", "azure-cosmos", "azure-identity", "azure-search-documents", "openai"]
clarifai = ["clarifai"]
cli = ["typer"]
cohere = ["cohere"]
docarray = ["docarray"]
embeddings = ["sentence-transformers"]
extended-testing = ["aiosqlite", "aleph-alpha-client", "anthropic", "arxiv", "assemblyai", "atlassian-python-api", "beautifulsoup4", "bibtexparser", "cassio", "chardet", "dashvector", "databricks-vectorsearch", "esprima", "faiss-cpu", "feedparser", "fireworks-ai", "geopandas", "gitpython", "google-cloud-documentai", "gql", "html2text", "javelin-sdk", "jinja2", "jq", "jsonschema", "lxml", "markdownify", "motor", "msal", "mwparserfromhell", "mwxml", "newspaper3k", "numexpr", "openai", "openai", "openapi-pydantic", "pandas", "pdfminer-six", "pgvector", "psychicapi", "py-trello", "pymupdf", "pypdf", "pypdfium2", "pyspark", "rank-bm25", "rapidfuzz", "rapidocr-onnxruntime", "requests-toolbelt", "rspace_client", "scikit-learn", "sqlite-vss", "streamlit", "sympy", "telethon", "timescale-vector", "tqdm", "upstash-redis", "xata", "xmltodict"]
extended-testing = ["aiosqlite", "aleph-alpha-client", "anthropic", "arxiv", "assemblyai", "atlassian-python-api", "beautifulsoup4", "bibtexparser", "cassio", "chardet", "dashvector", "databricks-vectorsearch", "dgml-utils", "esprima", "faiss-cpu", "feedparser", "fireworks-ai", "geopandas", "gitpython", "google-cloud-documentai", "gql", "html2text", "javelin-sdk", "jinja2", "jq", "jsonschema", "lxml", "markdownify", "motor", "msal", "mwparserfromhell", "mwxml", "newspaper3k", "numexpr", "openai", "openai", "openapi-pydantic", "pandas", "pdfminer-six", "pgvector", "psychicapi", "py-trello", "pymupdf", "pypdf", "pypdfium2", "pyspark", "rank-bm25", "rapidfuzz", "rapidocr-onnxruntime", "requests-toolbelt", "rspace_client", "scikit-learn", "sqlite-vss", "streamlit", "sympy", "telethon", "timescale-vector", "tqdm", "upstash-redis", "xata", "xmltodict"]
javascript = ["esprima"]
llms = ["clarifai", "cohere", "huggingface_hub", "manifest-ml", "nlpcloud", "openai", "openlm", "torch", "transformers"]
openai = ["openai", "tiktoken"]
@@ -11196,4 +11192,4 @@ text-helpers = ["chardet"]
[metadata]
lock-version = "2.0"
python-versions = ">=3.8.1,<4.0"
content-hash = "943da392f7b9f8d3677e879ef971eb50c068e0b5658e6e01f3b2589e82fa3b71"
content-hash = "ef4b14aed39d823f33de6bda543aadf208c7adedf75bf9db28a682fcc46ea792"

View File

@@ -145,7 +145,7 @@ fireworks-ai = {version = "^0.6.0", optional = true, python = ">=3.9,<4.0"}
javelin-sdk = {version = "^0.1.8", optional = true}
msal = {version = "^1.25.0", optional = true}
databricks-vectorsearch = {version = "^0.21", optional = true}
dgml-utils = {version = "^0.3.0", optional = true}
[tool.poetry.group.test.dependencies]
# The only dependencies that should be added are
@@ -167,7 +167,6 @@ syrupy = "^4.0.2"
requests-mock = "^1.11.0"
langchain-core = {path = "../core", develop = true}
[tool.poetry.group.codespell.dependencies]
codespell = "^2.2.0"
@@ -314,6 +313,7 @@ all = [
"amadeus",
"librosa",
"python-arango",
"dgml-utils",
]
cli = [
@@ -384,6 +384,7 @@ extended_testing = [
"fireworks-ai",
"javelin-sdk",
"databricks-vectorsearch",
"dgml-utils",
]
[tool.ruff]

View File

@@ -1,336 +1,379 @@
<?xml version="1.0" encoding="utf-8"?>
<docset:MUTUALNON-DISCLOSUREAGREEMENT-section xmlns:docset="http://www.docugami.com/2021/dgml/PublishTest/NDA" xmlns:addedChunks="http://www.docugami.com/2021/dgml/PublishTest/NDA/addedChunks" xmlns:dg="http://www.docugami.com/2021/dgml" xmlns:dgc="http://www.docugami.com/2021/dgml/docugami/contracts" xmlns:dgm="http://www.docugami.com/2021/dgml/docugami/medical" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:xhtml="http://www.w3.org/1999/xhtml">
<docset:MutualNon-disclosure structure="h1">
<dg:chunk>MUTUAL NON-DISCLOSURE AGREEMENT </dg:chunk>
</docset:MutualNon-disclosure>
<docset:MUTUALNON-DISCLOSUREAGREEMENT structure="div">
<docset:ThisMutualNon-disclosureAgreement>
<docset:Preamble structure="p">
This
<dg:chunk>Mutual Non-Disclosure Agreement </dg:chunk>(this “
<dg:chunk>Agreement</dg:chunk>”) is entered into and made effective as of
<docset:EffectiveDate xsi:type="date">2/4/2018 </docset:EffectiveDate>between
<dgc:Org>Docugami Inc.</dgc:Org>, a
<dgc:USState>Delaware </dgc:USState>corporation, whose address is 150 Lake Street South, Suite 221, Kirkland,
<dgc:USState>Washington </dgc:USState>
<dg:chunk>Delaware corporation</dg:chunk>, whose address is
<docset:CompanyAddress>
<dgc:Street>
<dg:chunk>
<dgc:Number>150 </dgc:Number>
<dgc:StreetName>Lake Street South</dgc:StreetName>
</dg:chunk>,
<dgc:Apt>
<dg:chunk>Suite </dg:chunk>
<docset:Suite>221</docset:Suite>
</dgc:Apt>
</dgc:Street>,
<dgc:City>Kirkland</dgc:City>,
<dgc:State>Washington </dgc:State>
<dgc:Number>98033</dgc:Number>
</docset:CompanyAddress>, and
<docset:Signatory>Leonarda Hosler</docset:Signatory>, an individual, whose address is
<dgc:Address>
<dgc:Street>
<dgc:Number>374 </dgc:Number>
<dgc:StreetName>William S Canning Blvd</dgc:StreetName>
</dgc:Street>,
<dg:chunk>
<dgc:City>Fall River </dgc:City>
<dgc:State>MA </dgc:State>
</dg:chunk>
</dgc:Address>
<dgc:Number>2721</dgc:Number>
<docset:SignatoryAddress>374 William S Canning Blvd, Fall River MA 2721</docset:SignatoryAddress>.
</docset:Preamble>
<docset:Discussions structure="p">
<dg:chunk cp:version="2.10.10.0.1699162341377-69.0"
xmlns:docset="http://www.docugami.com/2021/dgml/TaqiTest20231103/NDA"
xmlns:addedChunks="http://www.docugami.com/2021/dgml/TaqiTest20231103/NDA/addedChunks"
xmlns:dg="http://www.docugami.com/2021/dgml"
xmlns:dgc="http://www.docugami.com/2021/dgml/docugami/contracts"
xmlns:dgm="http://www.docugami.com/2021/dgml/docugami/medical"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:xhtml="http://www.w3.org/1999/xhtml"
xmlns:cp="http://classifyprocess.com/2018/07/">
<docset:MUTUALNON-DISCLOSUREAGREEMENT-section>
<dg:chunk structure="h1">NON-DISCLOSURE AGREEMENT </dg:chunk>
<docset:MUTUALNON-DISCLOSUREAGREEMENT structure="div"> This Non-Disclosure Agreement
("Agreement") is entered into as of <docset:EffectiveDate>November 4, 2023 </docset:EffectiveDate>("Effective
Date"), by and between: </docset:MUTUALNON-DISCLOSUREAGREEMENT>
</docset:MUTUALNON-DISCLOSUREAGREEMENT-section>
<docset:DisclosingParty-section>
<dg:chunk structure="h1">
The above named parties desire to engage in discussions regarding a potential agreement or other transaction between the parties (the “Purpose”). In connection with such discussions, it may be necessary for the parties to disclose to each other certain confidential information or materials to enable them to evaluate whether to enter into such agreement or transaction. </docset:Discussions>
<docset:Consideration>
<docset:Consideration structure="p">
In consideration of the foregoing, the parties agree as follows: </docset:Consideration>
<docset:Purposes style="list-style-type: decimal; boundingBox:{left: 266.7; top: 1175.0; width: 2012.5; height: 1858.3; page: 1;}; boundingBox:{left: 266.7; top: 245.8; width: 2012.5; height: 1737.5; page: 2;}; " structure="ol">
<docset:Purposes style="boundingBox:{left: 266.7; top: 1175.0; width: 2012.5; height: 575.0; page: 1;}; " structure="li">
<dg:chunk style="boundingBox:{left: 416.7; top: 1175.0; width: 50.0; height: 50.0; page: 1;}; " structure="lim">1. </dg:chunk>
<docset:ConfidentialInformation-section>
<docset:ConfidentialInformation structure="h1">
<dg:chunk>Confidential Information</dg:chunk>.
</docset:ConfidentialInformation>
<docset:ConfidentialInformation structure="div">For purposes of this
<dg:chunk>Agreement</dg:chunk>, “
<dg:chunk>Confidential Information</dg:chunk>” means any information or materials disclosed by
<dg:chunk>
<dg:chunk>one </dg:chunk>party
</dg:chunk>to the other party that: (i) if disclosed in writing or in the form of tangible materials, is marked “confidential” or “proprietary” at the time of such disclosure; (ii) if disclosed orally or by visual presentation, is identified as “confidential” or “proprietary” at the time of such disclosure, and is summarized in a writing sent by the disclosing party to the receiving party within
<dgc:TimeDuration>
<dg:chunk>thirty </dg:chunk>(
<dg:chunk>30</dg:chunk>) days
</dgc:TimeDuration>after any such disclosure; or (iii) due to its nature or the circumstances of its disclosure, a person exercising reasonable business judgment would understand to be confidential or proprietary.
</docset:ConfidentialInformation>
</docset:ConfidentialInformation-section>
</docset:Purposes>
<docset:Obligations style="boundingBox:{left: 266.7; top: 1758.3; width: 2012.5; height: 691.7; page: 1;}; " structure="li">
<dg:chunk style="boundingBox:{left: 416.7; top: 1758.3; width: 50.0; height: 50.0; page: 1;}; " structure="lim">2. </dg:chunk>
<docset:ObligationsAndRestrictions-section>
<docset:Obligations structure="h1">Obligations and
<dg:chunk>Restrictions</dg:chunk>.
</docset:Obligations>
<docset:ObligationsAndRestrictions structure="div">Each party agrees: (i) to maintain the
<dg:chunk>other party's Confidential Information </dg:chunk>in strict confidence; (ii) not to disclose
<dg:chunk>such Confidential Information </dg:chunk>to any third party; and (iii) not to use
<dg:chunk>such Confidential Information </dg:chunk>for any purpose except for the Purpose. Each party may disclose the
<dg:chunk>other partys Confidential Information </dg:chunk>to its employees and consultants who have a bona fide need to know
<dg:chunk>such Confidential Information </dg:chunk>for the Purpose, but solely to the extent necessary to pursue the
<dg:chunk>Purpose </dg:chunk>and for no other purpose; provided, that each such employee and consultant first executes a written agreement (or is otherwise already bound by a written agreement) that contains use and nondisclosure restrictions at least as protective of the
<dg:chunk>other partys Confidential Information </dg:chunk>as those set forth in this
<dg:chunk>Agreement</dg:chunk>.
</docset:ObligationsAndRestrictions>
</docset:ObligationsAndRestrictions-section>
</docset:Obligations>
<docset:Exceptions style="boundingBox:{left: 266.7; top: 2458.3; width: 2012.5; height: 108.3; page: 1;}; " structure="li">
<dg:chunk style="boundingBox:{left: 416.7; top: 2458.3; width: 50.0; height: 50.0; page: 1;}; " structure="lim">3. </dg:chunk>
<docset:Exceptions-section>
<docset:Exceptions structure="h1">Exceptions. </docset:Exceptions>
<docset:Exceptions structure="div">The obligations and restrictions in Section
<dg:chunk>2 </dg:chunk>will not apply to any information or materials that:
</docset:Exceptions>
</docset:Exceptions-section>
</docset:Exceptions>
<docset:TheDate style="boundingBox:{left: 266.7; top: 2575.0; width: 2012.5; height: 166.7; page: 1;}; " structure="li">
<dg:chunk style="boundingBox:{left: 416.7; top: 2575.0; width: 58.3; height: 50.0; page: 1;}; " structure="lim">(i) </dg:chunk>
<docset:TheDate structure="p">were, at the date of disclosure, or have subsequently become, generally known or available to the public through no act or failure to act by the receiving party; </docset:TheDate>
</docset:TheDate>
<docset:SuchInformation style="boundingBox:{left: 266.7; top: 2750.0; width: 2012.5; height: 108.3; page: 1;}; " structure="li">
<dg:chunk style="boundingBox:{left: 416.7; top: 2750.0; width: 70.8; height: 50.0; page: 1;}; " structure="lim">(ii) </dg:chunk>
<docset:TheReceivingParty structure="p">were rightfully known by the receiving party prior to receiving such information or materials from the disclosing party; </docset:TheReceivingParty>
</docset:SuchInformation>
<docset:TheReceivingParty style="boundingBox:{left: 266.7; top: 2866.7; width: 2012.5; height: 166.7; page: 1;}; " structure="li">
<dg:chunk style="boundingBox:{left: 416.7; top: 2866.7; width: 87.5; height: 50.0; page: 1;}; " structure="lim">(iii) </dg:chunk>
<docset:TheReceivingParty structure="p">are rightfully acquired by the receiving party from a third party who has the right to disclose such information or materials without breach of any confidentiality obligation to the disclosing party; or </docset:TheReceivingParty>
</docset:TheReceivingParty>
<docset:TheReceivingParty style="boundingBox:{left: 266.7; top: 245.8; width: 2012.5; height: 108.3; page: 2;}; " structure="li">
<dg:chunk style="boundingBox:{left: 416.7; top: 245.8; width: 83.3; height: 50.0; page: 2;}; " structure="lim">(iv) </dg:chunk>
<docset:TheReceivingParty structure="p">are independently developed by the receiving party without access to any
<dg:chunk>Confidential Information </dg:chunk>of the disclosing party.
</docset:TheReceivingParty>
</docset:TheReceivingParty>
<docset:Disclosure style="boundingBox:{left: 266.7; top: 362.5; width: 2012.5; height: 341.7; page: 2;}; " structure="li">
<dg:chunk style="boundingBox:{left: 416.7; top: 362.5; width: 50.0; height: 50.0; page: 2;}; " structure="lim">4. </dg:chunk>
<docset:CompelledDisclosure-section>
<dg:chunk structure="h1">
<dg:chunk>Compelled Disclosure</dg:chunk>.
</dg:chunk>
<docset:CompelledDisclosure structure="div">Nothing in this
<dg:chunk>Agreement </dg:chunk>will be deemed to restrict a party from disclosing the
<dg:chunk>other partys Confidential Information </dg:chunk>to the extent required by any order, subpoena, law, statute or regulation; provided, that the party required to make such a disclosure uses reasonable efforts to give the other party reasonable advance notice of such required disclosure in order to enable the other party to prevent or limit such disclosure.
</docset:CompelledDisclosure>
</docset:CompelledDisclosure-section>
</docset:Disclosure>
<docset:TheCompletion style="boundingBox:{left: 266.7; top: 712.5; width: 2012.5; height: 512.5; page: 2;}; " structure="li">
<dg:chunk style="boundingBox:{left: 416.7; top: 712.5; width: 50.0; height: 50.0; page: 2;}; " structure="lim">5. </dg:chunk>
<docset:ReturnofConfidentialInformation-section>
<dg:chunk structure="h1">Return of
<dg:chunk>Confidential Information</dg:chunk>.
</dg:chunk>
<docset:ReturnofConfidentialInformation structure="div">Upon the completion or abandonment of the Purpose, and in any event upon the disclosing partys request, the receiving party will promptly return to the disclosing party all tangible items and embodiments containing or consisting of the
<dg:chunk>disclosing partys Confidential Information </dg:chunk>and all copies thereof (including electronic copies), and any notes, analyses, compilations, studies, interpretations, memoranda or other documents (regardless of the form thereof) prepared by or on behalf of the receiving party that contain or are based upon the
<dg:chunk>disclosing partys Confidential Information</dg:chunk>.
</docset:ReturnofConfidentialInformation>
</docset:ReturnofConfidentialInformation-section>
</docset:TheCompletion>
<docset:NoObligations style="boundingBox:{left: 266.7; top: 1233.3; width: 2012.5; height: 283.3; page: 2;}; " structure="li">
<dg:chunk style="boundingBox:{left: 416.7; top: 1233.3; width: 50.0; height: 50.0; page: 2;}; " structure="lim">6. </dg:chunk>
<docset:NoObligations-section>
<docset:NoObligations structure="h1">No
<dg:chunk>Obligations</dg:chunk>.
</docset:NoObligations>
<docset:NoObligations structure="div">Each party retains the right, in its sole discretion, to determine whether to disclose any
<dg:chunk>Confidential Information </dg:chunk>to the other party. Neither party will be required to negotiate nor enter into any other agreements or arrangements with the other party, whether or not related to the Purpose.
</docset:NoObligations>
</docset:NoObligations-section>
</docset:NoObligations>
<docset:TheSoleAndExclusiveProperty style="boundingBox:{left: 266.7; top: 1525.0; width: 2012.5; height: 399.0; page: 2;}; " structure="li">
<dg:chunk style="boundingBox:{left: 416.7; top: 1525.0; width: 50.0; height: 50.0; page: 2;}; " structure="lim">7. </dg:chunk>
<docset:NoLicense-section>
<docset:NoLicense structure="h1">No
<dg:chunk>License</dg:chunk>.
</docset:NoLicense>
<docset:NoLicense structure="div">All
<dg:chunk>Confidential Information </dg:chunk>remains the sole and exclusive property of the disclosing party. Each party acknowledges and agrees that nothing in this
<dg:chunk>Agreement </dg:chunk>will be construed as granting any rights to the receiving party, by license or otherwise, in or to any
<dg:chunk>Confidential Information </dg:chunk>of the disclosing party, or any patent, copyright or other intellectual property or proprietary rights of the disclosing party, except as specified in this
<dg:chunk>Agreement</dg:chunk>.
</docset:NoLicense>
</docset:NoLicense-section>
</docset:TheSoleAndExclusiveProperty>
<docset:NoWarranty style="boundingBox:{left: 416.7; top: 1933.3; width: 1862.5; height: 50.0; page: 2;}; " structure="li">
<dg:chunk style="boundingBox:{left: 416.7; top: 1933.3; width: 50.0; height: 50.0; page: 2;}; " structure="lim">8. </dg:chunk>
<docset:NoWarranty structure="h1">No Warranty. ALL CONFIDENTIAL
<dgm:Diagnosis>INFORMATION </dgm:Diagnosis>
<dg:chunk>CONFIDENTIAL INFORMATION </dg:chunk>IS PROVIDED
</docset:NoWarranty>
</docset:NoWarranty>
<docset:Exceptions>The obligations and restrictions in Section 2 will not apply to any information or materials that:
(i) were, at the date of disclosure, or have subsequently become, generally known or available to the public through no act or failure to act by the receiving party;
(ii) were rightfully known by the receiving party prior to receiving such information or materials from the disclosing party;
(iii) are rightfully acquired by the receiving party from a third party who has the right to disclose such information or materials without breach of any confidentiality obligation to the disclosing party; or
(iv) are independently developed by the receiving party without access to any Confidential Information of the disclosing party. </docset:Exceptions>
4. Compelled Disclosure. Nothing in this Agreement will be deemed to restrict a party from disclosing the other partys Confidential Information to the extent required by any order, subpoena, law, statute or regulation; provided, that the party required to make such a disclosure uses reasonable efforts to give the other party reasonable advance notice of such required disclosure in order to enable the other party to prevent or limit such disclosure.
5. Return of Confidential Information. Upon the completion or abandonment of the Purpose, and in any event upon the disclosing partys request, the receiving party will promptly return to the disclosing party all tangible items and embodiments containing or consisting of the disclosing partys Confidential Information and all copies thereof (including electronic copies), and any notes, analyses, compilations, studies, interpretations, memoranda or other documents (regardless of the form thereof) prepared by or on behalf of the receiving party that contain or are based upon the disclosing partys Confidential Information.
6. No Obligations. Each party retains the right, in its sole discretion, to determine whether to disclose any Confidential Information to the other party. Neither party will be required to negotiate nor enter into any other agreements or arrangements with the other party, whether or not related to the Purpose.
7. No License. All Confidential Information remains the sole and exclusive property of the disclosing party. Each party acknowledges and agrees that nothing in this Agreement will be construed as granting any rights to the receiving party, by license or otherwise, in or to any Confidential Information of the disclosing party, or any patent, copyright or other intellectual property or proprietary rights of the disclosing party, except as specified in this Agreement.
8. No Warranty. ALL CONFIDENTIAL INFORMATION IS PROVIDED
</docset:Purposes>
</docset:Consideration>
</docset:ThisMutualNon-disclosureAgreement>
<docset:Effect>
Disclosing Party: </dg:chunk>
<docset:DisclosingParty structure="div"><docset:PrincipalPlaceofBusiness>Widget Corp.</docset:PrincipalPlaceofBusiness>,
a <dgc:USState>Delaware </dgc:USState>corporation with its principal place of business
at <docset:PrincipalPlaceofBusiness><docset:PrincipalPlaceofBusiness>
<docset:WidgetCorpAddress>123 </docset:WidgetCorpAddress>
<docset:PrincipalPlaceofBusiness>Innovation Drive</docset:PrincipalPlaceofBusiness>
</docset:PrincipalPlaceofBusiness>
, <docset:PrincipalPlaceofBusiness>Techville</docset:PrincipalPlaceofBusiness>, <dgc:USState>
Delaware</dgc:USState>, <docset:PrincipalPlaceofBusiness>12345 </docset:PrincipalPlaceofBusiness></docset:PrincipalPlaceofBusiness>
("<dgc:Org>
<docset:CompanyName>Widget </docset:CompanyName>
<docset:CorporateName>Corp.</docset:CorporateName>
</dgc:Org>") </docset:DisclosingParty>
</docset:DisclosingParty-section>
<dg:chunk>
<docset:ReceivingParty-section>
<dg:chunk structure="h1">
BY THE
<dg:chunk>DISCLOSING PARTY </dg:chunk>“AS IS”.
</dg:chunk>
<docset:Effect structure="div">
<docset:Effect style="list-style-type: decimal; boundingBox:{left: 266.7; top: 2050.0; width: 2012.5; height: 979.2; page: 2;}; " structure="ol">
<docset:ThisAgreement style="boundingBox:{left: 266.7; top: 2050.0; width: 2012.5; height: 166.7; page: 2;}; " structure="li">
<dg:chunk style="boundingBox:{left: 416.7; top: 2050.0; width: 50.0; height: 50.0; page: 2;}; " structure="lim">9. </dg:chunk>
<docset:Term-section>
<dg:chunk structure="h1">Term. </dg:chunk>
<docset:Term structure="div">This
<dg:chunk>Agreement </dg:chunk>will remain in effect for a period of
<docset:RemaininEffect>
<dg:chunk>five </dg:chunk>(
<dg:chunk>5</dg:chunk>) years
</docset:RemaininEffect>from the date of last disclosure of
<dg:chunk>Confidential Information </dg:chunk>by either party, at which time it will terminate.
</docset:Term>
</docset:Term-section>
</docset:ThisAgreement>
<docset:EquitableRelief style="boundingBox:{left: 266.7; top: 2225.0; width: 2012.5; height: 400.0; page: 2;}; " structure="li">
<dg:chunk style="boundingBox:{left: 416.7; top: 2225.0; width: 79.2; height: 50.0; page: 2;}; " structure="lim">10. </dg:chunk>
<docset:EquitableRelief-section>
<docset:EquitableRelief structure="h1">
<dg:chunk>Equitable Relief</dg:chunk>.
</docset:EquitableRelief>
<docset:EquitableRelief structure="div">Each party acknowledges that the unauthorized use or disclosure of the
<dg:chunk>disclosing partys Confidential Information </dg:chunk>may cause the disclosing party to incur irreparable harm and significant damages, the degree of which may be difficult to ascertain. Accordingly, each party agrees that the disclosing party will have the right to seek immediate equitable relief to enjoin any unauthorized use or disclosure of
<dg:chunk>its Confidential Information</dg:chunk>, in addition to any other rights and remedies that it may have at law or otherwise.
</docset:EquitableRelief>
</docset:EquitableRelief-section>
</docset:EquitableRelief>
<docset:Accordance style="boundingBox:{left: 266.7; top: 2633.3; width: 2012.5; height: 395.8; page: 2;}; " structure="li">
<dg:chunk style="boundingBox:{left: 416.7; top: 2633.3; width: 79.2; height: 50.0; page: 2;}; " structure="lim">11. </dg:chunk>
<docset:Miscellaneous-section>
<dg:chunk structure="h1">Miscellaneous. </dg:chunk>
<docset:Miscellaneous structure="div">This
<dg:chunk>Agreement </dg:chunk>will be governed and construed in accordance with the laws of the
<dg:chunk>State </dg:chunk>of
<dgc:USState>Washington</dgc:USState>, excluding its body of law controlling conflict of laws. This
<dg:chunk>Agreement </dg:chunk>is the complete and exclusive understanding and agreement between the parties regarding the subject matter of this
<dg:chunk>Agreement </dg:chunk>and supersedes all prior agreements, understandings and communications, oral or written, between the parties regarding the subject matter of this
<dg:chunk>Agreement</dg:chunk>. If any provision of this
<dg:chunk>Agreement </dg:chunk>is held invalid or unenforceable by a court of competent jurisdiction, that provision of this
<dg:chunk>Agreement </dg:chunk>will be enforced to the maximum extent permissible and the other provisions of this
<dg:chunk>Agreement </dg:chunk>will remain in full force and effect. Neither party may assign this
<dg:chunk>Agreement</dg:chunk>, in whole or in part, by operation of law or otherwise, without the other partys prior written consent, and any attempted assignment without such consent will be void. This
<dg:chunk>Agreement </dg:chunk>may be executed in counterparts, each of which will be deemed an original, but all of which together will constitute one and the same instrument.
</docset:Miscellaneous>
</docset:Miscellaneous-section>
</docset:Accordance>
</docset:Effect>
<docset:SIGNATUREPAGEFOLLOWS-section>
<dg:chunk structure="h1">
Receiving Party: </dg:chunk>
<docset:ReceivingParty structure="div">
<dg:chunk structure="p"><docset:RecipientName>Jane Doe</docset:RecipientName>, an
individual residing at <docset:RecipientAddress><docset:RecipientAddress>
<docset:RecipientAddress>456 </docset:RecipientAddress>
<docset:RecipientAddress>Privacy Lane</docset:RecipientAddress>
</docset:RecipientAddress>
, <docset:RecipientAddress>Safetown</docset:RecipientAddress>, <dgc:USState>
California</dgc:USState>, <docset:RecipientAddress>67890 </docset:RecipientAddress></docset:RecipientAddress>
("Recipient") </dg:chunk>
<dg:chunk>
<dg:chunk structure="p">
[SIGNATURE PAGE FOLLOWS] </dg:chunk>
<docset:SIGNATUREPAGEFOLLOWS structure="div">
<dg:chunk structure="h1">
(collectively referred to as the "Parties"). </dg:chunk>
<dg:chunk>
IN
<dg:chunk>WITNESS </dg:chunk>WHEREOF,
<docset:ConfidentialityObligations structure="ol"
style="list-style-type: decimal; boundingBox:{left: 300.0; top: 936.0; width: 30.0; height: 1881.0; page: 1;}; boundingBox:{left: 300.0; top: 309.0; width: 30.0; height: 777.0; page: 2;}; ">
<dg:chunk structure="li"
style="boundingBox:{left: 300.0; top: 936.0; width: 30.0; height: 45.0; page: 1;}; ">
<dg:chunk structure="lim"
style="boundingBox:{left: 300.0; top: 936.0; width: 48.0; height: 45.0; page: 1;}; ">
1. </dg:chunk>
<docset:DefinitionofConfidentialInformation-section>
<dg:chunk structure="h1">Definition of <dg:chunk>Confidential
Information </dg:chunk></dg:chunk>
<docset:DefinitionofConfidentialInformation structure="div">For
purposes of this Agreement, "<dg:chunk>Confidential
Information</dg:chunk>" shall include all information or
material that has or could have commercial value or other
utility in the business in which Disclosing Party is
engaged. If <dg:chunk>Confidential Information </dg:chunk>is
in written form, the <dg:chunk>Disclosing Party </dg:chunk>shall
label or stamp the materials with the word "Confidential" or
some similar warning. If <dg:chunk>Confidential Information </dg:chunk>is
transmitted orally, the <dg:chunk>Disclosing Party </dg:chunk>shall
promptly provide writing indicating that such oral
communication constituted <dg:chunk>Confidential Information</dg:chunk>
. </docset:DefinitionofConfidentialInformation>
</docset:DefinitionofConfidentialInformation-section>
</dg:chunk>
<dg:chunk structure="li"
style="boundingBox:{left: 300.0; top: 1428.0; width: 30.0; height: 48.0; page: 1;}; ">
<dg:chunk structure="lim"
style="boundingBox:{left: 300.0; top: 1428.0; width: 48.0; height: 48.0; page: 1;}; ">
2. </dg:chunk>
<docset:ExclusionsFromConfidentialInformation-section>
<dg:chunk structure="h1">Exclusions from <dg:chunk>Confidential
Information </dg:chunk></dg:chunk>
<docset:ExclusionsFromConfidentialInformation structure="div">Recipient's
obligations under this Agreement do not extend to
information that is: (a) publicly known at the time of
disclosure or subsequently becomes publicly known through no
fault of the Recipient; (b) discovered or created by the
Recipient before disclosure by <dg:chunk>Disclosing Party</dg:chunk>;
(c) learned by the Recipient through legitimate means other
than from the <dg:chunk>Disclosing Party </dg:chunk>or
Disclosing Party's representatives; or (d) is disclosed by
Recipient with Disclosing Party's prior written approval. </docset:ExclusionsFromConfidentialInformation>
</docset:ExclusionsFromConfidentialInformation-section>
</dg:chunk>
<dg:chunk structure="li"
style="boundingBox:{left: 300.0; top: 1866.0; width: 30.0; height: 45.0; page: 1;}; ">
<dg:chunk structure="lim"
style="boundingBox:{left: 300.0; top: 1866.0; width: 48.0; height: 45.0; page: 1;}; ">
3. </dg:chunk>
<docset:ObligationsofReceivingParty-section>
<dg:chunk structure="h1">Obligations of Receiving Party </dg:chunk>
<docset:ObligationsofReceivingParty structure="div">Recipient
shall hold and maintain the <dg:chunk>Confidential
Information </dg:chunk>in strictest confidence for the sole
and exclusive benefit of the <dg:chunk>Disclosing Party</dg:chunk>.
Recipient shall carefully restrict access to <dg:chunk>Confidential
Information </dg:chunk>to employees, contractors, and third
parties as is reasonably required and shall require those
persons to sign nondisclosure restrictions at least as
protective as those in this Agreement. </docset:ObligationsofReceivingParty>
</docset:ObligationsofReceivingParty-section>
</dg:chunk>
<dg:chunk structure="li"
style="boundingBox:{left: 300.0; top: 2244.0; width: 30.0; height: 48.0; page: 1;}; ">
<dg:chunk structure="lim"
style="boundingBox:{left: 300.0; top: 2244.0; width: 48.0; height: 48.0; page: 1;}; ">
4. </dg:chunk>
<docset:TimePeriods-section>
<dg:chunk structure="h1">Time Periods </dg:chunk>
<docset:TimePeriods structure="div">The nondisclosure provisions
of this Agreement shall survive the termination of this
Agreement and Recipient's duty to hold <dg:chunk>Confidential
Information </dg:chunk>in confidence shall remain in effect
until the <dg:chunk>Confidential Information </dg:chunk>no
longer qualifies as a trade secret or until <dg:chunk>Disclosing
Party </dg:chunk>sends Recipient written notice releasing
Recipient from this Agreement, whichever occurs first. </docset:TimePeriods>
</docset:TimePeriods-section>
</dg:chunk>
<dg:chunk structure="li"
style="boundingBox:{left: 300.0; top: 2565.0; width: 30.0; height: 48.0; page: 1;}; ">
<dg:chunk structure="lim"
style="boundingBox:{left: 300.0; top: 2565.0; width: 48.0; height: 48.0; page: 1;}; ">
5. </dg:chunk>
<docset:Relationships-section>
<dg:chunk structure="h1">Relationships </dg:chunk>
<docset:Relationships structure="div">Nothing contained in this
Agreement shall be deemed to constitute either party a
partner, joint venture, or employee of the other party for
any purpose.
</docset:Relationships>
</docset:Relationships-section>
</dg:chunk>
<dg:chunk structure="li"
style="boundingBox:{left: 300.0; top: 2772.0; width: 30.0; height: 45.0; page: 1;}; ">
<dg:chunk structure="lim"
style="boundingBox:{left: 300.0; top: 2772.0; width: 48.0; height: 45.0; page: 1;}; ">
6. </dg:chunk>
<docset:Severability-section>
<dg:chunk structure="h1">Severability </dg:chunk>
<docset:Severability structure="div">If a court finds any
provision of this Agreement invalid or unenforceable, the
remainder of this Agreement shall be interpreted so as best
to effect the intent of the parties.
</docset:Severability>
</docset:Severability-section>
</dg:chunk>
<dg:chunk structure="li"
style="boundingBox:{left: 300.0; top: 309.0; width: 30.0; height: 45.0; page: 2;}; ">
<dg:chunk structure="lim"
style="boundingBox:{left: 300.0; top: 309.0; width: 48.0; height: 45.0; page: 2;}; ">
7. </dg:chunk>
<docset:Integration-section>
<dg:chunk structure="h1">Integration </dg:chunk>
<docset:Integration structure="div">This Agreement expresses the
complete understanding of the parties with respect to the
subject matter and supersedes all prior proposals,
agreements, representations, and understandings. This
Agreement may not be amended except in writing signed by
both parties.
</docset:Integration>
</docset:Integration-section>
</dg:chunk>
<dg:chunk structure="li"
style="boundingBox:{left: 300.0; top: 573.0; width: 30.0; height: 45.0; page: 2;}; ">
<dg:chunk structure="lim"
style="boundingBox:{left: 300.0; top: 573.0; width: 48.0; height: 45.0; page: 2;}; ">
8. </dg:chunk>
<docset:Waiver-section>
<dg:chunk structure="h1">Waiver </dg:chunk>
<docset:Waiver structure="div">The failure to exercise any right
provided in this Agreement shall not be a waiver of prior or
subsequent rights.
</docset:Waiver>
</docset:Waiver-section>
</dg:chunk>
<dg:chunk structure="li"
style="boundingBox:{left: 300.0; top: 720.0; width: 30.0; height: 48.0; page: 2;}; ">
<dg:chunk structure="lim"
style="boundingBox:{left: 300.0; top: 720.0; width: 48.0; height: 48.0; page: 2;}; ">
9. </dg:chunk>
<docset:NoticeofImmunity-section>
<dg:chunk structure="h1">Notice of Immunity </dg:chunk>
<docset:NoticeofImmunity structure="div">Employee is provided
notice that an individual shall not be held criminally or
civilly liable under any federal or state trade secret law
for the disclosure of a trade secret that is made (i) in
confidence to a federal, state, or local government
official, either directly or indirectly, or to an attorney;
and (ii) solely for the purpose of reporting or
investigating a suspected violation of law.
</docset:NoticeofImmunity>
</docset:NoticeofImmunity-section>
</dg:chunk>
<dg:chunk structure="li"
style="boundingBox:{left: 300.0; top: 1041.0; width: 30.0; height: 45.0; page: 2;}; ">
<dg:chunk structure="lim"
style="boundingBox:{left: 300.0; top: 1041.0; width: 81.0; height: 45.0; page: 2;}; ">
10. </dg:chunk>
<dg:chunk>Table of <dg:chunk>Authorized Disclosures </dg:chunk>
</dg:chunk>
</dg:chunk>
</docset:ConfidentialityObligations>
<dg:chunk>
<docset:AuthorizedRecipients structure="p">The following table outlines
individuals who are authorized to receive <dg:chunk>Confidential
Information</dg:chunk>, their role, and the purpose of disclosure: </docset:AuthorizedRecipients>
<docset:TableofAuthorizedDisclosures>
<xhtml:table structure="table"
style="boundingBox:{left: 300.0; top: 1272.0; width: 2040.0; height: 372.0; page: 2;}; ">
<xhtml:tbody structure="tbody"
style="boundingBox:{left: 300.0; top: 1272.0; width: 2040.0; height: 372.0; page: 2;}; ">
<xhtml:tr structure="tr"
style="boundingBox:{left: 300.0; top: 1272.0; width: 2040.0; height: 93.0; page: 2;}; ">
<xhtml:td structure="td"
style="boundingBox:{left: 300.0; top: 1272.0; width: 603.0; height: 93.0; page: 2;}; ">
<dg:chunk>Authorized Individual </dg:chunk>
</xhtml:td>
<xhtml:td structure="td"
style="boundingBox:{left: 924.0; top: 1272.0; width: 114.0; height: 93.0; page: 2;}; ">
Role
</xhtml:td>
<xhtml:td structure="td"
style="boundingBox:{left: 1338.0; top: 1272.0; width: 1002.0; height: 93.0; page: 2;}; ">Purpose
of Disclosure
</xhtml:td>
</xhtml:tr>
<xhtml:tr structure="tr"
style="boundingBox:{left: 300.0; top: 1365.0; width: 2040.0; height: 93.0; page: 2;}; ">
<xhtml:td structure="td"
style="boundingBox:{left: 300.0; top: 1365.0; width: 603.0; height: 93.0; page: 2;}; ">
<docset:AuthorizedIndividualJohnSmith>
<docset:Name>John Smith </docset:Name>
</docset:AuthorizedIndividualJohnSmith>
</xhtml:td>
<xhtml:td structure="td"
style="boundingBox:{left: 903.0; top: 1365.0; width: 435.0; height: 93.0; page: 2;}; ">
<docset:JohnSmithRole>
<docset:ProjectManagerName>Project Manager </docset:ProjectManagerName>
</docset:JohnSmithRole>
</xhtml:td>
<xhtml:td structure="td"
style="boundingBox:{left: 1338.0; top: 1365.0; width: 1002.0; height: 93.0; page: 2;}; ">
<docset:JohnSmithPurposeofDisclosure>
<dg:chunk structure="p">Oversee project to which
the NDA relates </dg:chunk>
</docset:JohnSmithPurposeofDisclosure>
</xhtml:td>
</xhtml:tr>
<xhtml:tr structure="tr"
style="boundingBox:{left: 300.0; top: 1458.0; width: 2040.0; height: 93.0; page: 2;}; ">
<xhtml:td structure="td"
style="boundingBox:{left: 300.0; top: 1458.0; width: 603.0; height: 93.0; page: 2;}; ">
<docset:AuthorizedIndividualLisaWhite>
<docset:Author>Lisa White </docset:Author>
</docset:AuthorizedIndividualLisaWhite>
</xhtml:td>
<xhtml:td structure="td"
style="boundingBox:{left: 903.0; top: 1458.0; width: 435.0; height: 93.0; page: 2;}; ">
<docset:LisaWhiteRole>
<dg:chunk>Lead Developer </dg:chunk>
</docset:LisaWhiteRole>
</xhtml:td>
<xhtml:td structure="td"
style="boundingBox:{left: 1338.0; top: 1458.0; width: 1002.0; height: 93.0; page: 2;}; ">
<docset:LisaWhitePurposeofDisclosure>Software
development and analysis
</docset:LisaWhitePurposeofDisclosure>
</xhtml:td>
</xhtml:tr>
<xhtml:tr structure="tr"
style="boundingBox:{left: 300.0; top: 1551.0; width: 2040.0; height: 93.0; page: 2;}; ">
<xhtml:td structure="td"
style="boundingBox:{left: 300.0; top: 1551.0; width: 603.0; height: 93.0; page: 2;}; ">
<docset:AuthorizedIndividualMichaelBrown>
<docset:Name>Michael Brown </docset:Name>
</docset:AuthorizedIndividualMichaelBrown>
</xhtml:td>
<xhtml:td structure="td"
style="boundingBox:{left: 903.0; top: 1551.0; width: 435.0; height: 93.0; page: 2;}; ">
<docset:MichaelBrownRole>
<dg:chunk>Financial <docset:FinancialAnalyst>
Analyst </docset:FinancialAnalyst></dg:chunk>
</docset:MichaelBrownRole>
</xhtml:td>
<xhtml:td structure="td"
style="boundingBox:{left: 1338.0; top: 1551.0; width: 1002.0; height: 93.0; page: 2;}; ">
<docset:MichaelBrownPurposeofDisclosure>Financial
analysis and reporting </docset:MichaelBrownPurposeofDisclosure>
</xhtml:td>
</xhtml:tr>
</xhtml:tbody>
</xhtml:table>
</docset:TableofAuthorizedDisclosures>
</dg:chunk>
<docset:INWITNESSWHEREOF structure="div">
<docset:TheParties structure="p">the parties hereto have executed this
<dg:chunk>Mutual Non-Disclosure Agreement </dg:chunk>by their duly authorized officers or representatives as of the date first set forth above.
</docset:TheParties>
<docset:DocugamiInc>
<docset:DocugamiInc style="boundingBox:{left: 316.7; top: 529.2; width: 1958.8; height: 247.7; page: 4;}; ">
<xhtml:table style="boundingBox:{left: 316.7; top: 529.2; width: 1958.8; height: 247.7; page: 4;}; ">
<xhtml:tbody style="boundingBox:{left: 316.7; top: 529.2; width: 1958.8; height: 247.7; page: 4;}; ">
<xhtml:tr style="boundingBox:{left: 316.7; top: 529.2; width: 1958.8; height: 91.0; page: 4;}; ">
<xhtml:td style="boundingBox:{left: 316.7; top: 529.2; width: 768.8; height: 91.0; page: 4;}; ">
<docset:DocugamiInc structure="h1">
<dgc:Org>
<dg:chunk>DOCUGAMI INC</dg:chunk>.
</dgc:Org>:
</docset:DocugamiInc>
</xhtml:td>
<xhtml:td style="boundingBox:{left: 1085.4; top: 529.2; width: 1190.0; height: 91.0; page: 4;}; ">
<docset:DOCUGAMIINC structure="h1">
<dgc:Person>Leonarda Hosler</dgc:Person>:
</docset:DOCUGAMIINC>
</xhtml:td>
</xhtml:tr>
<xhtml:tr style="boundingBox:{left: 316.7; top: 620.2; width: 1958.8; height: 156.7; page: 4;}; ">
<xhtml:td style="boundingBox:{left: 316.7; top: 620.2; width: 768.8; height: 156.7; page: 4;}; ">
<docset:DOCUGAMIINCSignatuRe style="boundingBox:{left: 316.7; top: 620.2; width: 768.8; height: 156.7; page: 4;}; ">
<dg:chunk>Signatu </dg:chunk>re:
</docset:DOCUGAMIINCSignatuRe>
</xhtml:td>
<xhtml:td style="boundingBox:{left: 1085.4; top: 620.2; width: 1190.0; height: 156.7; page: 4;}; ">
<docset:LeonardaHosler style="boundingBox:{left: 1085.4; top: 620.2; width: 1190.0; height: 156.7; page: 4;}; ">
<dg:chunk>Signatu </dg:chunk>re:
</docset:LeonardaHosler>
</xhtml:td>
</xhtml:tr>
</xhtml:tbody>
</xhtml:table>
</docset:DocugamiInc>
<docset:JeanPaoliName>
<docset:JeanPaoliName style="boundingBox:{left: 316.7; top: 858.3; width: 1958.8; height: 189.1; page: 4;}; ">
<xhtml:table style="boundingBox:{left: 316.7; top: 858.3; width: 1958.8; height: 189.1; page: 4;}; ">
<xhtml:tbody style="boundingBox:{left: 316.7; top: 858.3; width: 1958.8; height: 189.1; page: 4;}; ">
<xhtml:tr style="boundingBox:{left: 316.7; top: 858.3; width: 1958.8; height: 91.7; page: 4;}; ">
<xhtml:td style="boundingBox:{left: 316.7; top: 858.3; width: 229.2; height: 91.7; page: 4;}; ">
<dg:chunk structure="h1">Name: </dg:chunk>
</xhtml:td>
<xhtml:td style="boundingBox:{left: 545.8; top: 858.3; width: 564.6; height: 91.7; page: 4;}; ">
<dgc:Person style="boundingBox:{left: 545.8; top: 858.3; width: 564.6; height: 91.7; page: 4;}; ">Jean Paoli </dgc:Person>
</xhtml:td>
<xhtml:td style="boundingBox:{left: 1110.4; top: 858.3; width: 1165.0; height: 91.7; page: 4;}; ">
<dg:chunk structure="h1">Name: </dg:chunk>
</xhtml:td>
</xhtml:tr>
<xhtml:tr style="boundingBox:{left: 316.7; top: 950.0; width: 1958.8; height: 97.4; page: 4;}; ">
<xhtml:td style="boundingBox:{left: 316.7; top: 950.0; width: 229.2; height: 97.4; page: 4;}; ">
<docset:NameTitle structure="h1">Title: </docset:NameTitle>
</xhtml:td>
<xhtml:td style="boundingBox:{left: 545.8; top: 950.0; width: 564.6; height: 97.4; page: 4;}; ">
<docset:TitleJeanPaoli style="boundingBox:{left: 545.8; top: 950.0; width: 564.6; height: 97.4; page: 4;}; ">CEO </docset:TitleJeanPaoli>
</xhtml:td>
<xhtml:td style="boundingBox:{left: 1110.4; top: 950.0; width: 1165.0; height: 97.4; page: 4;}; ">
<docset:Name style="boundingBox:{left: 1110.4; top: 950.0; width: 1165.0; height: 97.4; page: 4;}; ">
<docset:Title structure="h1">Title: </docset:Title>
</docset:Name>
</xhtml:td>
</xhtml:tr>
</xhtml:tbody>
</xhtml:table>
</docset:JeanPaoliName>
</docset:JeanPaoliName>
</docset:DocugamiInc>
</docset:INWITNESSWHEREOF>
</docset:SIGNATUREPAGEFOLLOWS>
</docset:SIGNATUREPAGEFOLLOWS-section>
</docset:Effect>
</docset:Effect>
</docset:MUTUALNON-DISCLOSUREAGREEMENT>
</docset:MUTUALNON-DISCLOSUREAGREEMENT-section>
</dg:chunk>
</dg:chunk>
</docset:ReceivingParty>
</docset:ReceivingParty-section>
<docset:INWITNESSWHEREOF-section>
<dg:chunk structure="h1"> IN <dg:chunk>WITNESS WHEREOF</dg:chunk>, </dg:chunk>
<docset:INWITNESSWHEREOF structure="div">the Parties have executed this Non-Disclosure
Agreement as of the <dg:chunk>Effective Date </dg:chunk>first above written. </docset:INWITNESSWHEREOF>
</docset:INWITNESSWHEREOF-section>
</dg:chunk>
<docset:WidgetCorp-section>
<dg:chunk structure="h1">
<docset:CompanyName>Widget Corp. </docset:CompanyName>
</dg:chunk>
<docset:By-section structure="div">
<dg:chunk structure="h1">
By: </dg:chunk>
<docset:By structure="div">_____________________________ </docset:By>
</docset:By-section>
</docset:WidgetCorp-section>
<dg:chunk structure="h1"> Name: <docset:Name>Alan Black </docset:Name></dg:chunk>
<dg:chunk>
<dg:chunk structure="h1"> Title: <docset:ChiefExecutiveOfficer>Chief Executive Officer </docset:ChiefExecutiveOfficer></dg:chunk>
<docset:Date-section structure="div">
<dg:chunk structure="h1">
Date: </dg:chunk>
<docset:Date structure="div">___________________________ </docset:Date>
</docset:Date-section>
</dg:chunk>
<docset:Recipient-section>
<dg:chunk structure="h1">
Recipient </dg:chunk>
<docset:By-section structure="div">
<dg:chunk structure="h1">
By: </dg:chunk>
<docset:By structure="div">_____________________________ </docset:By>
</docset:By-section>
</docset:Recipient-section>
<docset:NameJaneDoe-section>
<dg:chunk structure="h1"> Name: <docset:Name>Jane Doe </docset:Name></dg:chunk>
<docset:Date-section structure="div">
<dg:chunk structure="h1">
Date: </dg:chunk>
<docset:Date structure="div">___________________________</docset:Date>
</docset:Date-section>
</docset:NameJaneDoe-section>
</dg:chunk>

View File

@@ -8,19 +8,18 @@ from langchain.document_loaders import DocugamiLoader
DOCUGAMI_XML_PATH = Path(__file__).parent / "test_data" / "docugami-example.xml"
@pytest.mark.requires("lxml")
@pytest.mark.requires("dgml_utils")
def test_docugami_loader_local() -> None:
"""Test DocugamiLoader."""
loader = DocugamiLoader(file_paths=[DOCUGAMI_XML_PATH])
docs = loader.load()
assert len(docs) == 19
assert len(docs) == 25
xpath = docs[0].metadata.get("xpath")
assert str(xpath).endswith("/docset:Preamble")
assert docs[0].metadata["structure"] == "p"
assert docs[0].metadata["tag"] == "Preamble"
assert docs[0].page_content.startswith("MUTUAL NON-DISCLOSURE AGREEMENT")
assert "/docset:DisclosingParty" in docs[1].metadata["xpath"]
assert "h1" in docs[1].metadata["structure"]
assert "DisclosingParty" in docs[1].metadata["tag"]
assert docs[1].page_content.startswith("Disclosing")
def test_docugami_initialization() -> None: