mirror of
https://github.com/hwchase17/langchain.git
synced 2025-09-13 21:47:12 +00:00
ArcGISLoader: add keyword arguments, error handling, and better tests (#10558)
* More clarity around how geometry is handled. Not returned by default; when returned, stored in metadata. This is because it's usually a waste of tokens, but it should be accessible if needed. * User can supply layer description to avoid errors when layer properties are inaccessible due to passthrough access. * Enhanced testing * Updated notebook --------- Co-authored-by: Connor Sutton <connor.sutton@swca.com> Co-authored-by: connorsutton <135151649+connorsutton@users.noreply.github.com>
This commit is contained in:
committed by
GitHub
parent
ac9609f58f
commit
85e05fa5d6
@@ -28,6 +28,7 @@ class ArcGISLoader(BaseLoader):
|
||||
out_fields: Optional[Union[List[str], str]] = None,
|
||||
return_geometry: bool = False,
|
||||
return_all_records: bool = True,
|
||||
lyr_desc: Optional[str] = None,
|
||||
**kwargs: Any,
|
||||
):
|
||||
try:
|
||||
@@ -55,7 +56,7 @@ class ArcGISLoader(BaseLoader):
|
||||
self.url = layer.url
|
||||
self.layer = layer
|
||||
|
||||
self.layer_properties = self._get_layer_properties()
|
||||
self.layer_properties = self._get_layer_properties(lyr_desc)
|
||||
|
||||
self.where = where
|
||||
|
||||
@@ -70,21 +71,23 @@ class ArcGISLoader(BaseLoader):
|
||||
self.return_all_records = return_all_records
|
||||
self.kwargs = kwargs
|
||||
|
||||
def _get_layer_properties(self) -> dict:
|
||||
def _get_layer_properties(self, lyr_desc: Optional[str] = None) -> dict:
|
||||
"""Get the layer properties from the FeatureLayer."""
|
||||
import arcgis
|
||||
|
||||
layer_number_pattern = re.compile(r"/\d+$")
|
||||
props = self.layer.properties
|
||||
|
||||
try:
|
||||
if self.BEAUTIFULSOUP:
|
||||
lyr_desc = self.BEAUTIFULSOUP(props["description"]).text
|
||||
else:
|
||||
lyr_desc = props["description"]
|
||||
lyr_desc = lyr_desc or _NOT_PROVIDED
|
||||
except KeyError:
|
||||
lyr_desc = _NOT_PROVIDED
|
||||
if lyr_desc is None:
|
||||
# retrieve description from the FeatureLayer if not provided
|
||||
try:
|
||||
if self.BEAUTIFULSOUP:
|
||||
lyr_desc = self.BEAUTIFULSOUP(props["description"]).text
|
||||
else:
|
||||
lyr_desc = props["description"]
|
||||
lyr_desc = lyr_desc or _NOT_PROVIDED
|
||||
except KeyError:
|
||||
lyr_desc = _NOT_PROVIDED
|
||||
try:
|
||||
item_id = props["serviceItemId"]
|
||||
item = self.gis.content.get(item_id) or arcgis.features.FeatureLayer(
|
||||
@@ -109,7 +112,6 @@ class ArcGISLoader(BaseLoader):
|
||||
|
||||
def lazy_load(self) -> Iterator[Document]:
|
||||
"""Lazy load records from FeatureLayer."""
|
||||
|
||||
query_response = self.layer.query(
|
||||
where=self.where,
|
||||
out_fields=self.out_fields,
|
||||
@@ -117,19 +119,30 @@ class ArcGISLoader(BaseLoader):
|
||||
return_all_records=self.return_all_records,
|
||||
**self.kwargs,
|
||||
)
|
||||
features = (feature.as_dict["attributes"] for feature in query_response)
|
||||
features = (feature.as_dict for feature in query_response)
|
||||
for feature in features:
|
||||
yield Document(
|
||||
page_content=json.dumps(feature),
|
||||
metadata={
|
||||
"accessed": f"{datetime.now(timezone.utc).isoformat()}Z",
|
||||
"name": self.layer_properties["layer_properties"]["name"],
|
||||
"url": self.url,
|
||||
"layer_description": self.layer_properties["layer_description"],
|
||||
"item_description": self.layer_properties["item_description"],
|
||||
"layer_properties": self.layer_properties["layer_properties"],
|
||||
},
|
||||
)
|
||||
attributes = feature["attributes"]
|
||||
page_content = json.dumps(attributes)
|
||||
|
||||
metadata = {
|
||||
"accessed": f"{datetime.now(timezone.utc).isoformat()}Z",
|
||||
"name": self.layer_properties["layer_properties"]["name"],
|
||||
"url": self.url,
|
||||
"layer_description": self.layer_properties["layer_description"],
|
||||
"item_description": self.layer_properties["item_description"],
|
||||
"layer_properties": self.layer_properties["layer_properties"],
|
||||
}
|
||||
|
||||
if self.return_geometry:
|
||||
try:
|
||||
geometry = feature["geometry"]
|
||||
metadata.update({"geometry": geometry})
|
||||
except KeyError:
|
||||
warnings.warn(
|
||||
"Geometry could not be retrieved from the feature layer."
|
||||
)
|
||||
|
||||
yield Document(page_content=page_content, metadata=metadata)
|
||||
|
||||
def load(self) -> List[Document]:
|
||||
"""Load all records from FeatureLayer."""
|
||||
|
@@ -26,6 +26,7 @@ def mock_feature_layer(): # type: ignore
|
||||
feature_layer.properties = {
|
||||
"description": "<html><body>Some HTML content</body></html>",
|
||||
"name": "test",
|
||||
"serviceItemId": "testItemId",
|
||||
}
|
||||
return feature_layer
|
||||
|
||||
@@ -46,3 +47,80 @@ def test_lazy_load(arcgis_mocks, mock_feature_layer, mock_gis): # type: ignore
|
||||
assert len(documents) == 1
|
||||
assert documents[0].metadata["url"] == "https://example.com/layer_url"
|
||||
# Add more assertions based on your expected behavior
|
||||
|
||||
|
||||
def test_initialization_with_string_layer( # type: ignore
|
||||
arcgis_mocks, mock_feature_layer, mock_gis
|
||||
):
|
||||
layer_url = "https://example.com/layer_url"
|
||||
|
||||
with patch("arcgis.features.FeatureLayer", return_value=mock_feature_layer):
|
||||
loader = ArcGISLoader(layer=layer_url, gis=mock_gis)
|
||||
|
||||
assert loader.url == layer_url
|
||||
|
||||
|
||||
def test_layer_description_provided_by_user( # type: ignore
|
||||
arcgis_mocks, mock_feature_layer, mock_gis
|
||||
):
|
||||
custom_description = "Custom Layer Description"
|
||||
loader = ArcGISLoader(
|
||||
layer=mock_feature_layer, gis=mock_gis, lyr_desc=custom_description
|
||||
)
|
||||
|
||||
layer_properties = loader._get_layer_properties(lyr_desc=custom_description)
|
||||
|
||||
assert layer_properties["layer_description"] == custom_description
|
||||
|
||||
|
||||
def test_initialization_without_arcgis(mock_feature_layer, mock_gis): # type: ignore
|
||||
with patch.dict("sys.modules", {"arcgis": None}):
|
||||
with pytest.raises(
|
||||
ImportError, match="arcgis is required to use the ArcGIS Loader"
|
||||
):
|
||||
ArcGISLoader(layer=mock_feature_layer, gis=mock_gis)
|
||||
|
||||
|
||||
def test_get_layer_properties_with_description( # type: ignore
|
||||
arcgis_mocks, mock_feature_layer, mock_gis
|
||||
):
|
||||
loader = ArcGISLoader(
|
||||
layer=mock_feature_layer, gis=mock_gis, lyr_desc="Custom Description"
|
||||
)
|
||||
|
||||
props = loader._get_layer_properties("Custom Description")
|
||||
|
||||
assert props["layer_description"] == "Custom Description"
|
||||
|
||||
|
||||
def test_load_method(arcgis_mocks, mock_feature_layer, mock_gis): # type: ignore
|
||||
loader = ArcGISLoader(layer=mock_feature_layer, gis=mock_gis)
|
||||
|
||||
documents = loader.load()
|
||||
|
||||
assert len(documents) == 1
|
||||
|
||||
|
||||
def test_geometry_returned(arcgis_mocks, mock_feature_layer, mock_gis): # type: ignore
|
||||
mock_feature_layer.query.return_value = [
|
||||
MagicMock(
|
||||
as_dict={
|
||||
"attributes": {"field": "value"},
|
||||
"geometry": {"type": "point", "coordinates": [0, 0]},
|
||||
}
|
||||
)
|
||||
]
|
||||
|
||||
loader = ArcGISLoader(layer=mock_feature_layer, gis=mock_gis, return_geometry=True)
|
||||
|
||||
documents = list(loader.lazy_load())
|
||||
assert "geometry" in documents[0].metadata
|
||||
|
||||
|
||||
def test_geometry_not_returned( # type: ignore
|
||||
arcgis_mocks, mock_feature_layer, mock_gis
|
||||
):
|
||||
loader = ArcGISLoader(layer=mock_feature_layer, gis=mock_gis, return_geometry=False)
|
||||
|
||||
documents = list(loader.lazy_load())
|
||||
assert "geometry" not in documents[0].metadata
|
||||
|
Reference in New Issue
Block a user