ArcGISLoader: add keyword arguments, error handling, and better tests (#10558)

* More clarity around how geometry is handled. Not returned by default;
when returned, stored in metadata. This is because it's usually a waste
of tokens, but it should be accessible if needed.
* User can supply layer description to avoid errors when layer
properties are inaccessible due to passthrough access.
* Enhanced testing
* Updated notebook

---------

Co-authored-by: Connor Sutton <connor.sutton@swca.com>
Co-authored-by: connorsutton <135151649+connorsutton@users.noreply.github.com>
This commit is contained in:
Joshua Sundance Bailey
2023-09-13 17:12:42 -04:00
committed by GitHub
parent ac9609f58f
commit 85e05fa5d6
3 changed files with 208 additions and 52 deletions

View File

@@ -28,6 +28,7 @@ class ArcGISLoader(BaseLoader):
out_fields: Optional[Union[List[str], str]] = None,
return_geometry: bool = False,
return_all_records: bool = True,
lyr_desc: Optional[str] = None,
**kwargs: Any,
):
try:
@@ -55,7 +56,7 @@ class ArcGISLoader(BaseLoader):
self.url = layer.url
self.layer = layer
self.layer_properties = self._get_layer_properties()
self.layer_properties = self._get_layer_properties(lyr_desc)
self.where = where
@@ -70,21 +71,23 @@ class ArcGISLoader(BaseLoader):
self.return_all_records = return_all_records
self.kwargs = kwargs
def _get_layer_properties(self) -> dict:
def _get_layer_properties(self, lyr_desc: Optional[str] = None) -> dict:
"""Get the layer properties from the FeatureLayer."""
import arcgis
layer_number_pattern = re.compile(r"/\d+$")
props = self.layer.properties
try:
if self.BEAUTIFULSOUP:
lyr_desc = self.BEAUTIFULSOUP(props["description"]).text
else:
lyr_desc = props["description"]
lyr_desc = lyr_desc or _NOT_PROVIDED
except KeyError:
lyr_desc = _NOT_PROVIDED
if lyr_desc is None:
# retrieve description from the FeatureLayer if not provided
try:
if self.BEAUTIFULSOUP:
lyr_desc = self.BEAUTIFULSOUP(props["description"]).text
else:
lyr_desc = props["description"]
lyr_desc = lyr_desc or _NOT_PROVIDED
except KeyError:
lyr_desc = _NOT_PROVIDED
try:
item_id = props["serviceItemId"]
item = self.gis.content.get(item_id) or arcgis.features.FeatureLayer(
@@ -109,7 +112,6 @@ class ArcGISLoader(BaseLoader):
def lazy_load(self) -> Iterator[Document]:
"""Lazy load records from FeatureLayer."""
query_response = self.layer.query(
where=self.where,
out_fields=self.out_fields,
@@ -117,19 +119,30 @@ class ArcGISLoader(BaseLoader):
return_all_records=self.return_all_records,
**self.kwargs,
)
features = (feature.as_dict["attributes"] for feature in query_response)
features = (feature.as_dict for feature in query_response)
for feature in features:
yield Document(
page_content=json.dumps(feature),
metadata={
"accessed": f"{datetime.now(timezone.utc).isoformat()}Z",
"name": self.layer_properties["layer_properties"]["name"],
"url": self.url,
"layer_description": self.layer_properties["layer_description"],
"item_description": self.layer_properties["item_description"],
"layer_properties": self.layer_properties["layer_properties"],
},
)
attributes = feature["attributes"]
page_content = json.dumps(attributes)
metadata = {
"accessed": f"{datetime.now(timezone.utc).isoformat()}Z",
"name": self.layer_properties["layer_properties"]["name"],
"url": self.url,
"layer_description": self.layer_properties["layer_description"],
"item_description": self.layer_properties["item_description"],
"layer_properties": self.layer_properties["layer_properties"],
}
if self.return_geometry:
try:
geometry = feature["geometry"]
metadata.update({"geometry": geometry})
except KeyError:
warnings.warn(
"Geometry could not be retrieved from the feature layer."
)
yield Document(page_content=page_content, metadata=metadata)
def load(self) -> List[Document]:
"""Load all records from FeatureLayer."""

View File

@@ -26,6 +26,7 @@ def mock_feature_layer(): # type: ignore
feature_layer.properties = {
"description": "<html><body>Some HTML content</body></html>",
"name": "test",
"serviceItemId": "testItemId",
}
return feature_layer
@@ -46,3 +47,80 @@ def test_lazy_load(arcgis_mocks, mock_feature_layer, mock_gis): # type: ignore
assert len(documents) == 1
assert documents[0].metadata["url"] == "https://example.com/layer_url"
# Add more assertions based on your expected behavior
def test_initialization_with_string_layer( # type: ignore
arcgis_mocks, mock_feature_layer, mock_gis
):
layer_url = "https://example.com/layer_url"
with patch("arcgis.features.FeatureLayer", return_value=mock_feature_layer):
loader = ArcGISLoader(layer=layer_url, gis=mock_gis)
assert loader.url == layer_url
def test_layer_description_provided_by_user( # type: ignore
arcgis_mocks, mock_feature_layer, mock_gis
):
custom_description = "Custom Layer Description"
loader = ArcGISLoader(
layer=mock_feature_layer, gis=mock_gis, lyr_desc=custom_description
)
layer_properties = loader._get_layer_properties(lyr_desc=custom_description)
assert layer_properties["layer_description"] == custom_description
def test_initialization_without_arcgis(mock_feature_layer, mock_gis): # type: ignore
with patch.dict("sys.modules", {"arcgis": None}):
with pytest.raises(
ImportError, match="arcgis is required to use the ArcGIS Loader"
):
ArcGISLoader(layer=mock_feature_layer, gis=mock_gis)
def test_get_layer_properties_with_description( # type: ignore
arcgis_mocks, mock_feature_layer, mock_gis
):
loader = ArcGISLoader(
layer=mock_feature_layer, gis=mock_gis, lyr_desc="Custom Description"
)
props = loader._get_layer_properties("Custom Description")
assert props["layer_description"] == "Custom Description"
def test_load_method(arcgis_mocks, mock_feature_layer, mock_gis): # type: ignore
loader = ArcGISLoader(layer=mock_feature_layer, gis=mock_gis)
documents = loader.load()
assert len(documents) == 1
def test_geometry_returned(arcgis_mocks, mock_feature_layer, mock_gis): # type: ignore
mock_feature_layer.query.return_value = [
MagicMock(
as_dict={
"attributes": {"field": "value"},
"geometry": {"type": "point", "coordinates": [0, 0]},
}
)
]
loader = ArcGISLoader(layer=mock_feature_layer, gis=mock_gis, return_geometry=True)
documents = list(loader.lazy_load())
assert "geometry" in documents[0].metadata
def test_geometry_not_returned( # type: ignore
arcgis_mocks, mock_feature_layer, mock_gis
):
loader = ArcGISLoader(layer=mock_feature_layer, gis=mock_gis, return_geometry=False)
documents = list(loader.lazy_load())
assert "geometry" not in documents[0].metadata