From 85e05fa5d62833d0301dfb46e0c58021e8e4867d Mon Sep 17 00:00:00 2001 From: Joshua Sundance Bailey <84336755+joshuasundance-swca@users.noreply.github.com> Date: Wed, 13 Sep 2023 17:12:42 -0400 Subject: [PATCH] ArcGISLoader: add keyword arguments, error handling, and better tests (#10558) * More clarity around how geometry is handled. Not returned by default; when returned, stored in metadata. This is because it's usually a waste of tokens, but it should be accessible if needed. * User can supply layer description to avoid errors when layer properties are inaccessible due to passthrough access. * Enhanced testing * Updated notebook --------- Co-authored-by: Connor Sutton Co-authored-by: connorsutton <135151649+connorsutton@users.noreply.github.com> --- .../document_loaders/arcgis.ipynb | 123 +++++++++++++----- .../document_loaders/arcgis_loader.py | 59 +++++---- .../document_loaders/test_arcgis_loader.py | 78 +++++++++++ 3 files changed, 208 insertions(+), 52 deletions(-) diff --git a/docs/extras/integrations/document_loaders/arcgis.ipynb b/docs/extras/integrations/document_loaders/arcgis.ipynb index 9420d4c4e0c..f6b3a16325f 100644 --- a/docs/extras/integrations/document_loaders/arcgis.ipynb +++ b/docs/extras/integrations/document_loaders/arcgis.ipynb @@ -23,9 +23,7 @@ "source": [ "from langchain.document_loaders import ArcGISLoader\n", "\n", - "\n", "url = \"https://maps1.vcgov.org/arcgis/rest/services/Beaches/MapServer/7\"\n", - "\n", "loader = ArcGISLoader(url)" ] }, @@ -39,8 +37,8 @@ "name": "stdout", "output_type": "stream", "text": [ - "CPU times: user 7.86 ms, sys: 0 ns, total: 7.86 ms\n", - "Wall time: 802 ms\n" + "CPU times: user 2.37 ms, sys: 5.83 ms, total: 8.19 ms\n", + "Wall time: 1.05 s\n" ] } ], @@ -59,7 +57,7 @@ { "data": { "text/plain": [ - "{'accessed': '2023-08-15T04:30:41.689270+00:00Z',\n", + "{'accessed': '2023-09-13T19:58:32.546576+00:00Z',\n", " 'name': 'Beach Ramps',\n", " 'url': 'https://maps1.vcgov.org/arcgis/rest/services/Beaches/MapServer/7',\n", " 'layer_description': '(Not Provided)',\n", @@ -243,9 +241,76 @@ "docs[0].metadata" ] }, + { + "cell_type": "markdown", + "id": "a9687fb6-5016-41a1-b4e4-7a042aa5291e", + "metadata": {}, + "source": [ + "### Retrieving Geometries \n", + "\n", + "\n", + "If you want to retrieve feature geometries, you may do so with the `return_geometry` keyword.\n", + "\n", + "Each document's geometry will be stored in its metadata dictionary." + ] + }, { "cell_type": "code", "execution_count": 4, + "id": "680247b1-cb2f-4d76-ad56-75d0230c2f2a", + "metadata": {}, + "outputs": [], + "source": [ + "loader_geom = ArcGISLoader(url, return_geometry=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "93656a43-8c97-4e79-b4e1-be2e4eff98d5", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "CPU times: user 9.6 ms, sys: 5.84 ms, total: 15.4 ms\n", + "Wall time: 1.06 s\n" + ] + } + ], + "source": [ + "%%time\n", + "\n", + "docs = loader_geom.load()" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "c02eca3b-634a-4d02-8ec0-ae29f5feac6b", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'x': -81.01508803280349,\n", + " 'y': 29.24246579525828,\n", + " 'spatialReference': {'wkid': 4326, 'latestWkid': 4326}}" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "docs[0].metadata['geometry']" + ] + }, + { + "cell_type": "code", + "execution_count": 7, "id": "1d132b7d-5a13-4d66-98e8-785ffdf87af0", "metadata": {}, "outputs": [ @@ -253,29 +318,29 @@ "name": "stdout", "output_type": "stream", "text": [ - "{\"OBJECTID\": 4, \"AccessName\": \"BEACHWAY AV\", \"AccessID\": \"NS-106\", \"AccessType\": \"OPEN VEHICLE RAMP\", \"GeneralLoc\": \"1400 N ATLANTIC AV\", \"MilePost\": 1.57, \"City\": \"NEW SMYRNA BEACH\", \"AccessStatus\": \"CLOSED\", \"Entry_Date_Time\": 1692039947000, \"DrivingZone\": \"YES\"}\n", - "{\"OBJECTID\": 5, \"AccessName\": \"SEABREEZE BLVD\", \"AccessID\": \"DB-051\", \"AccessType\": \"OPEN VEHICLE RAMP\", \"GeneralLoc\": \"500 BLK N ATLANTIC AV\", \"MilePost\": 14.24, \"City\": \"DAYTONA BEACH\", \"AccessStatus\": \"CLOSED\", \"Entry_Date_Time\": 1692039947000, \"DrivingZone\": \"BOTH\"}\n", - "{\"OBJECTID\": 6, \"AccessName\": \"27TH AV\", \"AccessID\": \"NS-141\", \"AccessType\": \"OPEN VEHICLE RAMP\", \"GeneralLoc\": \"3600 BLK S ATLANTIC AV\", \"MilePost\": 4.83, \"City\": \"NEW SMYRNA BEACH\", \"AccessStatus\": \"CLOSED\", \"Entry_Date_Time\": 1692039947000, \"DrivingZone\": \"BOTH\"}\n", - "{\"OBJECTID\": 11, \"AccessName\": \"INTERNATIONAL SPEEDWAY BLVD\", \"AccessID\": \"DB-059\", \"AccessType\": \"OPEN VEHICLE RAMP\", \"GeneralLoc\": \"300 BLK S ATLANTIC AV\", \"MilePost\": 15.27, \"City\": \"DAYTONA BEACH\", \"AccessStatus\": \"CLOSED\", \"Entry_Date_Time\": 1692039947000, \"DrivingZone\": \"BOTH\"}\n", - "{\"OBJECTID\": 14, \"AccessName\": \"GRANADA BLVD\", \"AccessID\": \"OB-030\", \"AccessType\": \"OPEN VEHICLE RAMP\", \"GeneralLoc\": \"20 BLK OCEAN SHORE BLVD\", \"MilePost\": 10.02, \"City\": \"ORMOND BEACH\", \"AccessStatus\": \"CLOSED\", \"Entry_Date_Time\": 1692039947000, \"DrivingZone\": \"BOTH\"}\n", - "{\"OBJECTID\": 27, \"AccessName\": \"UNIVERSITY BLVD\", \"AccessID\": \"DB-048\", \"AccessType\": \"OPEN VEHICLE RAMP\", \"GeneralLoc\": \"900 BLK N ATLANTIC AV\", \"MilePost\": 13.74, \"City\": \"DAYTONA BEACH\", \"AccessStatus\": \"CLOSED\", \"Entry_Date_Time\": 1692039947000, \"DrivingZone\": \"BOTH\"}\n", - "{\"OBJECTID\": 38, \"AccessName\": \"BEACH ST\", \"AccessID\": \"PI-097\", \"AccessType\": \"OPEN VEHICLE RAMP\", \"GeneralLoc\": \"4890 BLK S ATLANTIC AV\", \"MilePost\": 25.85, \"City\": \"PONCE INLET\", \"AccessStatus\": \"CLOSED\", \"Entry_Date_Time\": 1692039947000, \"DrivingZone\": \"BOTH\"}\n", - "{\"OBJECTID\": 42, \"AccessName\": \"BOTEFUHR AV\", \"AccessID\": \"DBS-067\", \"AccessType\": \"OPEN VEHICLE RAMP\", \"GeneralLoc\": \"1900 BLK S ATLANTIC AV\", \"MilePost\": 16.68, \"City\": \"DAYTONA BEACH SHORES\", \"AccessStatus\": \"CLOSED\", \"Entry_Date_Time\": 1692039947000, \"DrivingZone\": \"YES\"}\n", - "{\"OBJECTID\": 43, \"AccessName\": \"SILVER BEACH AV\", \"AccessID\": \"DB-064\", \"AccessType\": \"OPEN VEHICLE RAMP\", \"GeneralLoc\": \"1000 BLK S ATLANTIC AV\", \"MilePost\": 15.98, \"City\": \"DAYTONA BEACH\", \"AccessStatus\": \"CLOSED\", \"Entry_Date_Time\": 1692039947000, \"DrivingZone\": \"YES\"}\n", - "{\"OBJECTID\": 45, \"AccessName\": \"MILSAP RD\", \"AccessID\": \"OB-037\", \"AccessType\": \"OPEN VEHICLE RAMP\", \"GeneralLoc\": \"700 BLK S ATLANTIC AV\", \"MilePost\": 11.52, \"City\": \"ORMOND BEACH\", \"AccessStatus\": \"CLOSED\", \"Entry_Date_Time\": 1692039947000, \"DrivingZone\": \"YES\"}\n", - "{\"OBJECTID\": 56, \"AccessName\": \"3RD AV\", \"AccessID\": \"NS-118\", \"AccessType\": \"OPEN VEHICLE RAMP\", \"GeneralLoc\": \"1200 BLK HILL ST\", \"MilePost\": 3.25, \"City\": \"NEW SMYRNA BEACH\", \"AccessStatus\": \"CLOSED\", \"Entry_Date_Time\": 1692039947000, \"DrivingZone\": \"YES\"}\n", - "{\"OBJECTID\": 64, \"AccessName\": \"DUNLAWTON BLVD\", \"AccessID\": \"DBS-078\", \"AccessType\": \"OPEN VEHICLE RAMP\", \"GeneralLoc\": \"3400 BLK S ATLANTIC AV\", \"MilePost\": 20.61, \"City\": \"DAYTONA BEACH SHORES\", \"AccessStatus\": \"CLOSED\", \"Entry_Date_Time\": 1692039947000, \"DrivingZone\": \"YES\"}\n", - "{\"OBJECTID\": 69, \"AccessName\": \"EMILIA AV\", \"AccessID\": \"DBS-082\", \"AccessType\": \"OPEN VEHICLE RAMP\", \"GeneralLoc\": \"3790 BLK S ATLANTIC AV\", \"MilePost\": 21.38, \"City\": \"DAYTONA BEACH SHORES\", \"AccessStatus\": \"CLOSED\", \"Entry_Date_Time\": 1692039947000, \"DrivingZone\": \"BOTH\"}\n", - "{\"OBJECTID\": 94, \"AccessName\": \"FLAGLER AV\", \"AccessID\": \"NS-110\", \"AccessType\": \"OPEN VEHICLE RAMP\", \"GeneralLoc\": \"500 BLK FLAGLER AV\", \"MilePost\": 2.57, \"City\": \"NEW SMYRNA BEACH\", \"AccessStatus\": \"CLOSED\", \"Entry_Date_Time\": 1692039947000, \"DrivingZone\": \"YES\"}\n", - "{\"OBJECTID\": 96, \"AccessName\": \"CRAWFORD RD\", \"AccessID\": \"NS-108\", \"AccessType\": \"OPEN VEHICLE RAMP - PASS\", \"GeneralLoc\": \"800 BLK N ATLANTIC AV\", \"MilePost\": 2.19, \"City\": \"NEW SMYRNA BEACH\", \"AccessStatus\": \"CLOSED\", \"Entry_Date_Time\": 1692039947000, \"DrivingZone\": \"YES\"}\n", - "{\"OBJECTID\": 124, \"AccessName\": \"HARTFORD AV\", \"AccessID\": \"DB-043\", \"AccessType\": \"OPEN VEHICLE RAMP\", \"GeneralLoc\": \"1890 BLK N ATLANTIC AV\", \"MilePost\": 12.76, \"City\": \"DAYTONA BEACH\", \"AccessStatus\": \"CLOSED\", \"Entry_Date_Time\": 1692039947000, \"DrivingZone\": \"YES\"}\n", - "{\"OBJECTID\": 127, \"AccessName\": \"WILLIAMS AV\", \"AccessID\": \"DB-042\", \"AccessType\": \"OPEN VEHICLE RAMP\", \"GeneralLoc\": \"2200 BLK N ATLANTIC AV\", \"MilePost\": 12.5, \"City\": \"DAYTONA BEACH\", \"AccessStatus\": \"CLOSED\", \"Entry_Date_Time\": 1692039947000, \"DrivingZone\": \"YES\"}\n", - "{\"OBJECTID\": 136, \"AccessName\": \"CARDINAL DR\", \"AccessID\": \"OB-036\", \"AccessType\": \"OPEN VEHICLE RAMP\", \"GeneralLoc\": \"600 BLK S ATLANTIC AV\", \"MilePost\": 11.27, \"City\": \"ORMOND BEACH\", \"AccessStatus\": \"CLOSED\", \"Entry_Date_Time\": 1692039947000, \"DrivingZone\": \"YES\"}\n", - "{\"OBJECTID\": 229, \"AccessName\": \"EL PORTAL ST\", \"AccessID\": \"DBS-076\", \"AccessType\": \"OPEN VEHICLE RAMP\", \"GeneralLoc\": \"3200 BLK S ATLANTIC AV\", \"MilePost\": 20.04, \"City\": \"DAYTONA BEACH SHORES\", \"AccessStatus\": \"CLOSED\", \"Entry_Date_Time\": 1692039947000, \"DrivingZone\": \"YES\"}\n", - "{\"OBJECTID\": 230, \"AccessName\": \"HARVARD DR\", \"AccessID\": \"OB-038\", \"AccessType\": \"OPEN VEHICLE RAMP\", \"GeneralLoc\": \"900 BLK S ATLANTIC AV\", \"MilePost\": 11.72, \"City\": \"ORMOND BEACH\", \"AccessStatus\": \"CLOSED\", \"Entry_Date_Time\": 1692039947000, \"DrivingZone\": \"YES\"}\n", - "{\"OBJECTID\": 232, \"AccessName\": \"VAN AV\", \"AccessID\": \"DBS-075\", \"AccessType\": \"OPEN VEHICLE RAMP\", \"GeneralLoc\": \"3100 BLK S ATLANTIC AV\", \"MilePost\": 19.6, \"City\": \"DAYTONA BEACH SHORES\", \"AccessStatus\": \"CLOSED\", \"Entry_Date_Time\": 1692039947000, \"DrivingZone\": \"YES\"}\n", - "{\"OBJECTID\": 234, \"AccessName\": \"ROCKEFELLER DR\", \"AccessID\": \"OB-034\", \"AccessType\": \"OPEN VEHICLE RAMP\", \"GeneralLoc\": \"400 BLK S ATLANTIC AV\", \"MilePost\": 10.9, \"City\": \"ORMOND BEACH\", \"AccessStatus\": \"CLOSED\", \"Entry_Date_Time\": 1692039947000, \"DrivingZone\": \"YES\"}\n", - "{\"OBJECTID\": 235, \"AccessName\": \"MINERVA RD\", \"AccessID\": \"DBS-069\", \"AccessType\": \"OPEN VEHICLE RAMP\", \"GeneralLoc\": \"2300 BLK S ATLANTIC AV\", \"MilePost\": 17.52, \"City\": \"DAYTONA BEACH SHORES\", \"AccessStatus\": \"CLOSED\", \"Entry_Date_Time\": 1692039947000, \"DrivingZone\": \"YES\"}\n" + "{\"OBJECTID\": 4, \"AccessName\": \"UNIVERSITY BLVD\", \"AccessID\": \"DB-048\", \"AccessType\": \"OPEN VEHICLE RAMP\", \"GeneralLoc\": \"900 BLK N ATLANTIC AV\", \"MilePost\": 13.74, \"City\": \"DAYTONA BEACH\", \"AccessStatus\": \"OPEN\", \"Entry_Date_Time\": 1694597536000, \"DrivingZone\": \"BOTH\"}\n", + "{\"OBJECTID\": 18, \"AccessName\": \"BEACHWAY AV\", \"AccessID\": \"NS-106\", \"AccessType\": \"OPEN VEHICLE RAMP\", \"GeneralLoc\": \"1400 N ATLANTIC AV\", \"MilePost\": 1.57, \"City\": \"NEW SMYRNA BEACH\", \"AccessStatus\": \"OPEN\", \"Entry_Date_Time\": 1694600478000, \"DrivingZone\": \"YES\"}\n", + "{\"OBJECTID\": 24, \"AccessName\": \"27TH AV\", \"AccessID\": \"NS-141\", \"AccessType\": \"OPEN VEHICLE RAMP\", \"GeneralLoc\": \"3600 BLK S ATLANTIC AV\", \"MilePost\": 4.83, \"City\": \"NEW SMYRNA BEACH\", \"AccessStatus\": \"CLOSED FOR HIGH TIDE\", \"Entry_Date_Time\": 1694619363000, \"DrivingZone\": \"BOTH\"}\n", + "{\"OBJECTID\": 26, \"AccessName\": \"SEABREEZE BLVD\", \"AccessID\": \"DB-051\", \"AccessType\": \"OPEN VEHICLE RAMP\", \"GeneralLoc\": \"500 BLK N ATLANTIC AV\", \"MilePost\": 14.24, \"City\": \"DAYTONA BEACH\", \"AccessStatus\": \"OPEN\", \"Entry_Date_Time\": 1694597536000, \"DrivingZone\": \"BOTH\"}\n", + "{\"OBJECTID\": 30, \"AccessName\": \"INTERNATIONAL SPEEDWAY BLVD\", \"AccessID\": \"DB-059\", \"AccessType\": \"OPEN VEHICLE RAMP\", \"GeneralLoc\": \"300 BLK S ATLANTIC AV\", \"MilePost\": 15.27, \"City\": \"DAYTONA BEACH\", \"AccessStatus\": \"OPEN\", \"Entry_Date_Time\": 1694598638000, \"DrivingZone\": \"BOTH\"}\n", + "{\"OBJECTID\": 33, \"AccessName\": \"GRANADA BLVD\", \"AccessID\": \"OB-030\", \"AccessType\": \"OPEN VEHICLE RAMP\", \"GeneralLoc\": \"20 BLK OCEAN SHORE BLVD\", \"MilePost\": 10.02, \"City\": \"ORMOND BEACH\", \"AccessStatus\": \"4X4 ONLY\", \"Entry_Date_Time\": 1694595424000, \"DrivingZone\": \"BOTH\"}\n", + "{\"OBJECTID\": 39, \"AccessName\": \"BEACH ST\", \"AccessID\": \"PI-097\", \"AccessType\": \"OPEN VEHICLE RAMP\", \"GeneralLoc\": \"4890 BLK S ATLANTIC AV\", \"MilePost\": 25.85, \"City\": \"PONCE INLET\", \"AccessStatus\": \"4X4 ONLY\", \"Entry_Date_Time\": 1694596294000, \"DrivingZone\": \"BOTH\"}\n", + "{\"OBJECTID\": 44, \"AccessName\": \"SILVER BEACH AV\", \"AccessID\": \"DB-064\", \"AccessType\": \"OPEN VEHICLE RAMP\", \"GeneralLoc\": \"1000 BLK S ATLANTIC AV\", \"MilePost\": 15.98, \"City\": \"DAYTONA BEACH\", \"AccessStatus\": \"OPEN\", \"Entry_Date_Time\": 1694598638000, \"DrivingZone\": \"YES\"}\n", + "{\"OBJECTID\": 45, \"AccessName\": \"BOTEFUHR AV\", \"AccessID\": \"DBS-067\", \"AccessType\": \"OPEN VEHICLE RAMP\", \"GeneralLoc\": \"1900 BLK S ATLANTIC AV\", \"MilePost\": 16.68, \"City\": \"DAYTONA BEACH SHORES\", \"AccessStatus\": \"OPEN\", \"Entry_Date_Time\": 1694598638000, \"DrivingZone\": \"YES\"}\n", + "{\"OBJECTID\": 46, \"AccessName\": \"MINERVA RD\", \"AccessID\": \"DBS-069\", \"AccessType\": \"OPEN VEHICLE RAMP\", \"GeneralLoc\": \"2300 BLK S ATLANTIC AV\", \"MilePost\": 17.52, \"City\": \"DAYTONA BEACH SHORES\", \"AccessStatus\": \"OPEN\", \"Entry_Date_Time\": 1694598638000, \"DrivingZone\": \"YES\"}\n", + "{\"OBJECTID\": 56, \"AccessName\": \"3RD AV\", \"AccessID\": \"NS-118\", \"AccessType\": \"OPEN VEHICLE RAMP\", \"GeneralLoc\": \"1200 BLK HILL ST\", \"MilePost\": 3.25, \"City\": \"NEW SMYRNA BEACH\", \"AccessStatus\": \"OPEN\", \"Entry_Date_Time\": 1694600478000, \"DrivingZone\": \"YES\"}\n", + "{\"OBJECTID\": 65, \"AccessName\": \"MILSAP RD\", \"AccessID\": \"OB-037\", \"AccessType\": \"OPEN VEHICLE RAMP\", \"GeneralLoc\": \"700 BLK S ATLANTIC AV\", \"MilePost\": 11.52, \"City\": \"ORMOND BEACH\", \"AccessStatus\": \"4X4 ONLY\", \"Entry_Date_Time\": 1694595749000, \"DrivingZone\": \"YES\"}\n", + "{\"OBJECTID\": 72, \"AccessName\": \"ROCKEFELLER DR\", \"AccessID\": \"OB-034\", \"AccessType\": \"OPEN VEHICLE RAMP\", \"GeneralLoc\": \"400 BLK S ATLANTIC AV\", \"MilePost\": 10.9, \"City\": \"ORMOND BEACH\", \"AccessStatus\": \"CLOSED - SEASONAL\", \"Entry_Date_Time\": 1694591351000, \"DrivingZone\": \"YES\"}\n", + "{\"OBJECTID\": 74, \"AccessName\": \"DUNLAWTON BLVD\", \"AccessID\": \"DBS-078\", \"AccessType\": \"OPEN VEHICLE RAMP\", \"GeneralLoc\": \"3400 BLK S ATLANTIC AV\", \"MilePost\": 20.61, \"City\": \"DAYTONA BEACH SHORES\", \"AccessStatus\": \"OPEN\", \"Entry_Date_Time\": 1694601124000, \"DrivingZone\": \"YES\"}\n", + "{\"OBJECTID\": 77, \"AccessName\": \"EMILIA AV\", \"AccessID\": \"DBS-082\", \"AccessType\": \"OPEN VEHICLE RAMP\", \"GeneralLoc\": \"3790 BLK S ATLANTIC AV\", \"MilePost\": 21.38, \"City\": \"DAYTONA BEACH SHORES\", \"AccessStatus\": \"OPEN\", \"Entry_Date_Time\": 1694601124000, \"DrivingZone\": \"BOTH\"}\n", + "{\"OBJECTID\": 84, \"AccessName\": \"VAN AV\", \"AccessID\": \"DBS-075\", \"AccessType\": \"OPEN VEHICLE RAMP\", \"GeneralLoc\": \"3100 BLK S ATLANTIC AV\", \"MilePost\": 19.6, \"City\": \"DAYTONA BEACH SHORES\", \"AccessStatus\": \"OPEN\", \"Entry_Date_Time\": 1694601124000, \"DrivingZone\": \"YES\"}\n", + "{\"OBJECTID\": 104, \"AccessName\": \"HARVARD DR\", \"AccessID\": \"OB-038\", \"AccessType\": \"OPEN VEHICLE RAMP\", \"GeneralLoc\": \"900 BLK S ATLANTIC AV\", \"MilePost\": 11.72, \"City\": \"ORMOND BEACH\", \"AccessStatus\": \"OPEN\", \"Entry_Date_Time\": 1694597536000, \"DrivingZone\": \"YES\"}\n", + "{\"OBJECTID\": 106, \"AccessName\": \"WILLIAMS AV\", \"AccessID\": \"DB-042\", \"AccessType\": \"OPEN VEHICLE RAMP\", \"GeneralLoc\": \"2200 BLK N ATLANTIC AV\", \"MilePost\": 12.5, \"City\": \"DAYTONA BEACH\", \"AccessStatus\": \"OPEN\", \"Entry_Date_Time\": 1694597536000, \"DrivingZone\": \"YES\"}\n", + "{\"OBJECTID\": 109, \"AccessName\": \"HARTFORD AV\", \"AccessID\": \"DB-043\", \"AccessType\": \"OPEN VEHICLE RAMP\", \"GeneralLoc\": \"1890 BLK N ATLANTIC AV\", \"MilePost\": 12.76, \"City\": \"DAYTONA BEACH\", \"AccessStatus\": \"CLOSED - SEASONAL\", \"Entry_Date_Time\": 1694591351000, \"DrivingZone\": \"YES\"}\n", + "{\"OBJECTID\": 138, \"AccessName\": \"CRAWFORD RD\", \"AccessID\": \"NS-108\", \"AccessType\": \"OPEN VEHICLE RAMP - PASS\", \"GeneralLoc\": \"800 BLK N ATLANTIC AV\", \"MilePost\": 2.19, \"City\": \"NEW SMYRNA BEACH\", \"AccessStatus\": \"OPEN\", \"Entry_Date_Time\": 1694600478000, \"DrivingZone\": \"YES\"}\n", + "{\"OBJECTID\": 140, \"AccessName\": \"FLAGLER AV\", \"AccessID\": \"NS-110\", \"AccessType\": \"OPEN VEHICLE RAMP\", \"GeneralLoc\": \"500 BLK FLAGLER AV\", \"MilePost\": 2.57, \"City\": \"NEW SMYRNA BEACH\", \"AccessStatus\": \"OPEN\", \"Entry_Date_Time\": 1694600478000, \"DrivingZone\": \"YES\"}\n", + "{\"OBJECTID\": 144, \"AccessName\": \"CARDINAL DR\", \"AccessID\": \"OB-036\", \"AccessType\": \"OPEN VEHICLE RAMP\", \"GeneralLoc\": \"600 BLK S ATLANTIC AV\", \"MilePost\": 11.27, \"City\": \"ORMOND BEACH\", \"AccessStatus\": \"4X4 ONLY\", \"Entry_Date_Time\": 1694595749000, \"DrivingZone\": \"YES\"}\n", + "{\"OBJECTID\": 174, \"AccessName\": \"EL PORTAL ST\", \"AccessID\": \"DBS-076\", \"AccessType\": \"OPEN VEHICLE RAMP\", \"GeneralLoc\": \"3200 BLK S ATLANTIC AV\", \"MilePost\": 20.04, \"City\": \"DAYTONA BEACH SHORES\", \"AccessStatus\": \"OPEN\", \"Entry_Date_Time\": 1694601124000, \"DrivingZone\": \"YES\"}\n" ] } ], @@ -301,7 +366,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.13" + "version": "3.10.12" } }, "nbformat": 4, diff --git a/libs/langchain/langchain/document_loaders/arcgis_loader.py b/libs/langchain/langchain/document_loaders/arcgis_loader.py index 87222813281..89ad2a8d7af 100644 --- a/libs/langchain/langchain/document_loaders/arcgis_loader.py +++ b/libs/langchain/langchain/document_loaders/arcgis_loader.py @@ -28,6 +28,7 @@ class ArcGISLoader(BaseLoader): out_fields: Optional[Union[List[str], str]] = None, return_geometry: bool = False, return_all_records: bool = True, + lyr_desc: Optional[str] = None, **kwargs: Any, ): try: @@ -55,7 +56,7 @@ class ArcGISLoader(BaseLoader): self.url = layer.url self.layer = layer - self.layer_properties = self._get_layer_properties() + self.layer_properties = self._get_layer_properties(lyr_desc) self.where = where @@ -70,21 +71,23 @@ class ArcGISLoader(BaseLoader): self.return_all_records = return_all_records self.kwargs = kwargs - def _get_layer_properties(self) -> dict: + def _get_layer_properties(self, lyr_desc: Optional[str] = None) -> dict: """Get the layer properties from the FeatureLayer.""" import arcgis layer_number_pattern = re.compile(r"/\d+$") props = self.layer.properties - try: - if self.BEAUTIFULSOUP: - lyr_desc = self.BEAUTIFULSOUP(props["description"]).text - else: - lyr_desc = props["description"] - lyr_desc = lyr_desc or _NOT_PROVIDED - except KeyError: - lyr_desc = _NOT_PROVIDED + if lyr_desc is None: + # retrieve description from the FeatureLayer if not provided + try: + if self.BEAUTIFULSOUP: + lyr_desc = self.BEAUTIFULSOUP(props["description"]).text + else: + lyr_desc = props["description"] + lyr_desc = lyr_desc or _NOT_PROVIDED + except KeyError: + lyr_desc = _NOT_PROVIDED try: item_id = props["serviceItemId"] item = self.gis.content.get(item_id) or arcgis.features.FeatureLayer( @@ -109,7 +112,6 @@ class ArcGISLoader(BaseLoader): def lazy_load(self) -> Iterator[Document]: """Lazy load records from FeatureLayer.""" - query_response = self.layer.query( where=self.where, out_fields=self.out_fields, @@ -117,19 +119,30 @@ class ArcGISLoader(BaseLoader): return_all_records=self.return_all_records, **self.kwargs, ) - features = (feature.as_dict["attributes"] for feature in query_response) + features = (feature.as_dict for feature in query_response) for feature in features: - yield Document( - page_content=json.dumps(feature), - metadata={ - "accessed": f"{datetime.now(timezone.utc).isoformat()}Z", - "name": self.layer_properties["layer_properties"]["name"], - "url": self.url, - "layer_description": self.layer_properties["layer_description"], - "item_description": self.layer_properties["item_description"], - "layer_properties": self.layer_properties["layer_properties"], - }, - ) + attributes = feature["attributes"] + page_content = json.dumps(attributes) + + metadata = { + "accessed": f"{datetime.now(timezone.utc).isoformat()}Z", + "name": self.layer_properties["layer_properties"]["name"], + "url": self.url, + "layer_description": self.layer_properties["layer_description"], + "item_description": self.layer_properties["item_description"], + "layer_properties": self.layer_properties["layer_properties"], + } + + if self.return_geometry: + try: + geometry = feature["geometry"] + metadata.update({"geometry": geometry}) + except KeyError: + warnings.warn( + "Geometry could not be retrieved from the feature layer." + ) + + yield Document(page_content=page_content, metadata=metadata) def load(self) -> List[Document]: """Load all records from FeatureLayer.""" diff --git a/libs/langchain/tests/unit_tests/document_loaders/test_arcgis_loader.py b/libs/langchain/tests/unit_tests/document_loaders/test_arcgis_loader.py index a2f7d05e3ee..c1b667f1f63 100644 --- a/libs/langchain/tests/unit_tests/document_loaders/test_arcgis_loader.py +++ b/libs/langchain/tests/unit_tests/document_loaders/test_arcgis_loader.py @@ -26,6 +26,7 @@ def mock_feature_layer(): # type: ignore feature_layer.properties = { "description": "Some HTML content", "name": "test", + "serviceItemId": "testItemId", } return feature_layer @@ -46,3 +47,80 @@ def test_lazy_load(arcgis_mocks, mock_feature_layer, mock_gis): # type: ignore assert len(documents) == 1 assert documents[0].metadata["url"] == "https://example.com/layer_url" # Add more assertions based on your expected behavior + + +def test_initialization_with_string_layer( # type: ignore + arcgis_mocks, mock_feature_layer, mock_gis +): + layer_url = "https://example.com/layer_url" + + with patch("arcgis.features.FeatureLayer", return_value=mock_feature_layer): + loader = ArcGISLoader(layer=layer_url, gis=mock_gis) + + assert loader.url == layer_url + + +def test_layer_description_provided_by_user( # type: ignore + arcgis_mocks, mock_feature_layer, mock_gis +): + custom_description = "Custom Layer Description" + loader = ArcGISLoader( + layer=mock_feature_layer, gis=mock_gis, lyr_desc=custom_description + ) + + layer_properties = loader._get_layer_properties(lyr_desc=custom_description) + + assert layer_properties["layer_description"] == custom_description + + +def test_initialization_without_arcgis(mock_feature_layer, mock_gis): # type: ignore + with patch.dict("sys.modules", {"arcgis": None}): + with pytest.raises( + ImportError, match="arcgis is required to use the ArcGIS Loader" + ): + ArcGISLoader(layer=mock_feature_layer, gis=mock_gis) + + +def test_get_layer_properties_with_description( # type: ignore + arcgis_mocks, mock_feature_layer, mock_gis +): + loader = ArcGISLoader( + layer=mock_feature_layer, gis=mock_gis, lyr_desc="Custom Description" + ) + + props = loader._get_layer_properties("Custom Description") + + assert props["layer_description"] == "Custom Description" + + +def test_load_method(arcgis_mocks, mock_feature_layer, mock_gis): # type: ignore + loader = ArcGISLoader(layer=mock_feature_layer, gis=mock_gis) + + documents = loader.load() + + assert len(documents) == 1 + + +def test_geometry_returned(arcgis_mocks, mock_feature_layer, mock_gis): # type: ignore + mock_feature_layer.query.return_value = [ + MagicMock( + as_dict={ + "attributes": {"field": "value"}, + "geometry": {"type": "point", "coordinates": [0, 0]}, + } + ) + ] + + loader = ArcGISLoader(layer=mock_feature_layer, gis=mock_gis, return_geometry=True) + + documents = list(loader.lazy_load()) + assert "geometry" in documents[0].metadata + + +def test_geometry_not_returned( # type: ignore + arcgis_mocks, mock_feature_layer, mock_gis +): + loader = ArcGISLoader(layer=mock_feature_layer, gis=mock_gis, return_geometry=False) + + documents = list(loader.lazy_load()) + assert "geometry" not in documents[0].metadata