mirror of
https://github.com/hwchase17/langchain.git
synced 2025-09-01 11:02:37 +00:00
Fixed source key name for docugami loader
This commit is contained in:
@@ -18,7 +18,7 @@ TABLE_NAME = "{http://www.w3.org/1999/xhtml}table"
|
||||
|
||||
XPATH_KEY = "xpath"
|
||||
DOCUMENT_ID_KEY = "id"
|
||||
DOCUMENT_NAME_KEY = "name"
|
||||
DOCUMENT_SOURCE_KEY = "source"
|
||||
STRUCTURE_KEY = "structure"
|
||||
TAG_KEY = "tag"
|
||||
PROJECTS_KEY = "projects"
|
||||
@@ -146,7 +146,7 @@ class DocugamiLoader(BaseLoader, BaseModel):
|
||||
metadata = {
|
||||
XPATH_KEY: _xpath_for_chunk(node),
|
||||
DOCUMENT_ID_KEY: document["id"],
|
||||
DOCUMENT_NAME_KEY: document["name"],
|
||||
DOCUMENT_SOURCE_KEY: document["name"],
|
||||
STRUCTURE_KEY: node.attrib.get("structure", ""),
|
||||
TAG_KEY: re.sub(r"\{.*\}", "", node.tag),
|
||||
}
|
||||
@@ -349,7 +349,7 @@ class DocugamiLoader(BaseLoader, BaseModel):
|
||||
chunks += self._parse_dgml(
|
||||
{
|
||||
DOCUMENT_ID_KEY: path.name,
|
||||
DOCUMENT_NAME_KEY: path.name,
|
||||
DOCUMENT_SOURCE_KEY: path.name,
|
||||
},
|
||||
file.read(),
|
||||
)
|
||||
|
Reference in New Issue
Block a user