ArcGISLoader update (#9240)

Small bug fixes and added metadata based on user feedback. This PR is
from the author of https://github.com/langchain-ai/langchain/pull/8873 .
This commit is contained in:
Joshua Sundance Bailey
2023-08-15 02:44:29 -04:00
committed by GitHub
parent eac4ddb4bb
commit ef0664728e
3 changed files with 210 additions and 218 deletions

View File

@@ -1,9 +1,11 @@
"""Document Loader for ArcGIS FeatureLayers."""
from __future__ import annotations
import json
import re
import warnings
from datetime import datetime, timezone
from typing import TYPE_CHECKING, Any, Iterator, List, Optional, Union
from langchain.docstore.document import Document
@@ -25,6 +27,7 @@ class ArcGISLoader(BaseLoader):
where: str = "1=1",
out_fields: Optional[Union[List[str], str]] = None,
return_geometry: bool = False,
return_all_records: bool = True,
**kwargs: Any,
):
try:
@@ -64,10 +67,12 @@ class ArcGISLoader(BaseLoader):
self.out_fields = ",".join(out_fields)
self.return_geometry = return_geometry
self.return_all_records = return_all_records
self.kwargs = kwargs
def _get_layer_properties(self) -> dict:
"""Get the layer properties from the FeatureLayer."""
import arcgis
layer_number_pattern = re.compile(r"/\d+$")
props = self.layer.properties
@@ -109,7 +114,7 @@ class ArcGISLoader(BaseLoader):
where=self.where,
out_fields=self.out_fields,
return_geometry=self.return_geometry,
return_all_records=True,
return_all_records=self.return_all_records,
**self.kwargs,
)
features = (feature.as_dict["attributes"] for feature in query_response)
@@ -117,6 +122,8 @@ class ArcGISLoader(BaseLoader):
yield Document(
page_content=json.dumps(feature),
metadata={
"accessed": f"{datetime.now(timezone.utc).isoformat()}Z",
"name": self.layer_properties["layer_properties"]["name"],
"url": self.url,
"layer_description": self.layer_properties["layer_description"],
"item_description": self.layer_properties["item_description"],

View File

@@ -24,7 +24,8 @@ def mock_feature_layer(): # type: ignore
]
feature_layer.url = "https://example.com/layer_url"
feature_layer.properties = {
"description": "<html><body>Some HTML content</body></html>"
"description": "<html><body>Some HTML content</body></html>",
"name": "test",
}
return feature_layer