mirror of
https://github.com/hwchase17/langchain.git
synced 2025-09-02 11:39:18 +00:00
community[major], core[patch], langchain[patch], experimental[patch]: Create langchain-community (#14463)
Moved the following modules to new package langchain-community in a backwards compatible fashion: ``` mv langchain/langchain/adapters community/langchain_community mv langchain/langchain/callbacks community/langchain_community/callbacks mv langchain/langchain/chat_loaders community/langchain_community mv langchain/langchain/chat_models community/langchain_community mv langchain/langchain/document_loaders community/langchain_community mv langchain/langchain/docstore community/langchain_community mv langchain/langchain/document_transformers community/langchain_community mv langchain/langchain/embeddings community/langchain_community mv langchain/langchain/graphs community/langchain_community mv langchain/langchain/llms community/langchain_community mv langchain/langchain/memory/chat_message_histories community/langchain_community mv langchain/langchain/retrievers community/langchain_community mv langchain/langchain/storage community/langchain_community mv langchain/langchain/tools community/langchain_community mv langchain/langchain/utilities community/langchain_community mv langchain/langchain/vectorstores community/langchain_community mv langchain/langchain/agents/agent_toolkits community/langchain_community mv langchain/langchain/cache.py community/langchain_community mv langchain/langchain/adapters community/langchain_community mv langchain/langchain/callbacks community/langchain_community/callbacks mv langchain/langchain/chat_loaders community/langchain_community mv langchain/langchain/chat_models community/langchain_community mv langchain/langchain/document_loaders community/langchain_community mv langchain/langchain/docstore community/langchain_community mv langchain/langchain/document_transformers community/langchain_community mv langchain/langchain/embeddings community/langchain_community mv langchain/langchain/graphs community/langchain_community mv langchain/langchain/llms community/langchain_community mv langchain/langchain/memory/chat_message_histories community/langchain_community mv langchain/langchain/retrievers community/langchain_community mv langchain/langchain/storage community/langchain_community mv langchain/langchain/tools community/langchain_community mv langchain/langchain/utilities community/langchain_community mv langchain/langchain/vectorstores community/langchain_community mv langchain/langchain/agents/agent_toolkits community/langchain_community mv langchain/langchain/cache.py community/langchain_community ``` Moved the following to core ``` mv langchain/langchain/utils/json_schema.py core/langchain_core/utils mv langchain/langchain/utils/html.py core/langchain_core/utils mv langchain/langchain/utils/strings.py core/langchain_core/utils cat langchain/langchain/utils/env.py >> core/langchain_core/utils/env.py rm langchain/langchain/utils/env.py ``` See .scripts/community_split/script_integrations.sh for all changes
This commit is contained in:
@@ -0,0 +1,154 @@
|
||||
"""Document Loader for ArcGIS FeatureLayers."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import re
|
||||
import warnings
|
||||
from datetime import datetime, timezone
|
||||
from typing import TYPE_CHECKING, Any, Iterator, List, Optional, Union
|
||||
|
||||
from langchain_core.documents import Document
|
||||
|
||||
from langchain_community.document_loaders.base import BaseLoader
|
||||
|
||||
if TYPE_CHECKING:
|
||||
import arcgis
|
||||
|
||||
_NOT_PROVIDED = "(Not Provided)"
|
||||
|
||||
|
||||
class ArcGISLoader(BaseLoader):
|
||||
"""Load records from an ArcGIS FeatureLayer."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
layer: Union[str, arcgis.features.FeatureLayer],
|
||||
gis: Optional[arcgis.gis.GIS] = None,
|
||||
where: str = "1=1",
|
||||
out_fields: Optional[Union[List[str], str]] = None,
|
||||
return_geometry: bool = False,
|
||||
result_record_count: Optional[int] = None,
|
||||
lyr_desc: Optional[str] = None,
|
||||
**kwargs: Any,
|
||||
):
|
||||
try:
|
||||
import arcgis
|
||||
except ImportError as e:
|
||||
raise ImportError(
|
||||
"arcgis is required to use the ArcGIS Loader. "
|
||||
"Install it with pip or conda."
|
||||
) from e
|
||||
|
||||
try:
|
||||
from bs4 import BeautifulSoup # type: ignore
|
||||
|
||||
self.BEAUTIFULSOUP = BeautifulSoup
|
||||
except ImportError:
|
||||
warnings.warn("BeautifulSoup not found. HTML will not be parsed.")
|
||||
self.BEAUTIFULSOUP = None
|
||||
|
||||
self.gis = gis or arcgis.gis.GIS()
|
||||
|
||||
if isinstance(layer, str):
|
||||
self.url = layer
|
||||
self.layer = arcgis.features.FeatureLayer(layer, gis=gis)
|
||||
else:
|
||||
self.url = layer.url
|
||||
self.layer = layer
|
||||
|
||||
self.layer_properties = self._get_layer_properties(lyr_desc)
|
||||
|
||||
self.where = where
|
||||
|
||||
if isinstance(out_fields, str):
|
||||
self.out_fields = out_fields
|
||||
elif out_fields is None:
|
||||
self.out_fields = "*"
|
||||
else:
|
||||
self.out_fields = ",".join(out_fields)
|
||||
|
||||
self.return_geometry = return_geometry
|
||||
|
||||
self.result_record_count = result_record_count
|
||||
self.return_all_records = not isinstance(result_record_count, int)
|
||||
|
||||
query_params = dict(
|
||||
where=self.where,
|
||||
out_fields=self.out_fields,
|
||||
return_geometry=self.return_geometry,
|
||||
return_all_records=self.return_all_records,
|
||||
result_record_count=self.result_record_count,
|
||||
)
|
||||
query_params.update(kwargs)
|
||||
self.query_params = query_params
|
||||
|
||||
def _get_layer_properties(self, lyr_desc: Optional[str] = None) -> dict:
|
||||
"""Get the layer properties from the FeatureLayer."""
|
||||
import arcgis
|
||||
|
||||
layer_number_pattern = re.compile(r"/\d+$")
|
||||
props = self.layer.properties
|
||||
|
||||
if lyr_desc is None:
|
||||
# retrieve description from the FeatureLayer if not provided
|
||||
try:
|
||||
if self.BEAUTIFULSOUP:
|
||||
lyr_desc = self.BEAUTIFULSOUP(props["description"]).text
|
||||
else:
|
||||
lyr_desc = props["description"]
|
||||
lyr_desc = lyr_desc or _NOT_PROVIDED
|
||||
except KeyError:
|
||||
lyr_desc = _NOT_PROVIDED
|
||||
try:
|
||||
item_id = props["serviceItemId"]
|
||||
item = self.gis.content.get(item_id) or arcgis.features.FeatureLayer(
|
||||
re.sub(layer_number_pattern, "", self.url),
|
||||
)
|
||||
try:
|
||||
raw_desc = item.description
|
||||
except AttributeError:
|
||||
raw_desc = item.properties.description
|
||||
if self.BEAUTIFULSOUP:
|
||||
item_desc = self.BEAUTIFULSOUP(raw_desc).text
|
||||
else:
|
||||
item_desc = raw_desc
|
||||
item_desc = item_desc or _NOT_PROVIDED
|
||||
except KeyError:
|
||||
item_desc = _NOT_PROVIDED
|
||||
return {
|
||||
"layer_description": lyr_desc,
|
||||
"item_description": item_desc,
|
||||
"layer_properties": props,
|
||||
}
|
||||
|
||||
def lazy_load(self) -> Iterator[Document]:
|
||||
"""Lazy load records from FeatureLayer."""
|
||||
query_response = self.layer.query(**self.query_params)
|
||||
features = (feature.as_dict for feature in query_response)
|
||||
for feature in features:
|
||||
attributes = feature["attributes"]
|
||||
page_content = json.dumps(attributes)
|
||||
|
||||
metadata = {
|
||||
"accessed": f"{datetime.now(timezone.utc).isoformat()}Z",
|
||||
"name": self.layer_properties["layer_properties"]["name"],
|
||||
"url": self.url,
|
||||
"layer_description": self.layer_properties["layer_description"],
|
||||
"item_description": self.layer_properties["item_description"],
|
||||
"layer_properties": self.layer_properties["layer_properties"],
|
||||
}
|
||||
|
||||
if self.return_geometry:
|
||||
try:
|
||||
metadata["geometry"] = feature["geometry"]
|
||||
except KeyError:
|
||||
warnings.warn(
|
||||
"Geometry could not be retrieved from the feature layer."
|
||||
)
|
||||
|
||||
yield Document(page_content=page_content, metadata=metadata)
|
||||
|
||||
def load(self) -> List[Document]:
|
||||
"""Load all records from FeatureLayer."""
|
||||
return list(self.lazy_load())
|
Reference in New Issue
Block a user