CR feedback

This commit is contained in:
Taqi Jaffri 2023-08-19 13:48:15 -07:00
parent 5919c0f4a2
commit 5cd244e9b7
2 changed files with 3 additions and 10 deletions

View File

@ -19,18 +19,10 @@
"metadata": { "metadata": {
"tags": [] "tags": []
}, },
"outputs": [ "outputs": [],
{
"name": "stdout",
"output_type": "stream",
"text": [
"Requirement already satisfied: lxml in /root/Source/github/docugami.langchain/libs/langchain/.venv/lib/python3.9/site-packages (4.9.3)\n"
]
}
],
"source": [ "source": [
"# You need the lxml package to use the DocugamiLoader\n", "# You need the lxml package to use the DocugamiLoader\n",
"!poetry run pip install lxml" "!poetry run pip install lxml --quiet"
] ]
}, },
{ {

View File

@ -147,6 +147,7 @@ class DocugamiLoader(BaseLoader, BaseModel):
metadata = { metadata = {
XPATH_KEY: _xpath_for_chunk(node), XPATH_KEY: _xpath_for_chunk(node),
DOCUMENT_ID_KEY: document[DOCUMENT_ID_KEY], DOCUMENT_ID_KEY: document[DOCUMENT_ID_KEY],
DOCUMENT_NAME_KEY: document[DOCUMENT_NAME_KEY],
DOCUMENT_SOURCE_KEY: document[DOCUMENT_NAME_KEY], DOCUMENT_SOURCE_KEY: document[DOCUMENT_NAME_KEY],
STRUCTURE_KEY: node.attrib.get("structure", ""), STRUCTURE_KEY: node.attrib.get("structure", ""),
TAG_KEY: re.sub(r"\{.*\}", "", node.tag), TAG_KEY: re.sub(r"\{.*\}", "", node.tag),