Fixed source key name for docugami loader

This commit is contained in:
Taqi Jaffri
2023-08-01 12:54:26 -07:00
parent 7cbe28ba9b
commit 96843f3bd4
2 changed files with 86 additions and 79 deletions

View File

@@ -18,7 +18,7 @@ TABLE_NAME = "{http://www.w3.org/1999/xhtml}table"
XPATH_KEY = "xpath"
DOCUMENT_ID_KEY = "id"
DOCUMENT_NAME_KEY = "name"
DOCUMENT_SOURCE_KEY = "source"
STRUCTURE_KEY = "structure"
TAG_KEY = "tag"
PROJECTS_KEY = "projects"
@@ -146,7 +146,7 @@ class DocugamiLoader(BaseLoader, BaseModel):
metadata = {
XPATH_KEY: _xpath_for_chunk(node),
DOCUMENT_ID_KEY: document["id"],
DOCUMENT_NAME_KEY: document["name"],
DOCUMENT_SOURCE_KEY: document["name"],
STRUCTURE_KEY: node.attrib.get("structure", ""),
TAG_KEY: re.sub(r"\{.*\}", "", node.tag),
}
@@ -349,7 +349,7 @@ class DocugamiLoader(BaseLoader, BaseModel):
chunks += self._parse_dgml(
{
DOCUMENT_ID_KEY: path.name,
DOCUMENT_NAME_KEY: path.name,
DOCUMENT_SOURCE_KEY: path.name,
},
file.read(),
)