mirror of
https://github.com/hwchase17/langchain.git
synced 2026-04-23 20:23:59 +00:00
x
This commit is contained in:
@@ -15,6 +15,7 @@ import abc
|
||||
import asyncio
|
||||
import mimetypes
|
||||
from typing import Any, List, Optional, Sequence
|
||||
from pydantic import ValidationError
|
||||
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
@@ -109,10 +110,18 @@ class RequestsDownloadHandler(DownloadHandler):
|
||||
|
||||
def _repackage_as_blobs(urls: Sequence[str], contents: Sequence[str]) -> List[Blob]:
|
||||
"""Repackage the contents as blobs."""
|
||||
return [
|
||||
Blob(data=content, mimetype=mimetypes.guess_type(url)[0], path=url)
|
||||
for url, content in zip(urls, contents)
|
||||
]
|
||||
blobs = []
|
||||
for url, content in zip(urls, contents):
|
||||
mimetype = mimetypes.guess_type(url)[0]
|
||||
try:
|
||||
blobs.append(Blob(data=content, mimetype=mimetype, path=url))
|
||||
except ValidationError:
|
||||
raise ValueError(
|
||||
f"Could not create a blob for content at {url}. "
|
||||
f"Content type is {type(content)}"
|
||||
)
|
||||
|
||||
return blobs
|
||||
|
||||
|
||||
class AutoDownloadHandler(DownloadHandler):
|
||||
@@ -1,55 +1,12 @@
|
||||
import abc
|
||||
from typing import Any, List, Mapping, Sequence
|
||||
from typing import List
|
||||
|
||||
from langchain.callbacks.manager import Callbacks
|
||||
from langchain.document_loaders.blob_loaders import Blob
|
||||
|
||||
#
|
||||
# class AbstractQueryGenerator(abc.ABC):
|
||||
# """Abstract class for generating queries."""
|
||||
#
|
||||
# @abc.abstractmethod
|
||||
# def generate_queries(self, question: str, callbacks: Callbacks = None) -> List[str]:
|
||||
# """Generate queries for the given question."""
|
||||
# raise NotImplementedError()
|
||||
#
|
||||
# @abc.abstractmethod
|
||||
# async def agenerate_queries(
|
||||
# self, question: str, callbacks: Callbacks = None
|
||||
# ) -> List[str]:
|
||||
# """Generate queries for the given question."""
|
||||
# raise NotImplementedError()
|
||||
#
|
||||
#
|
||||
# class AbstractSearcher(abc.ABC):
|
||||
# """Abstract class for running searches."""
|
||||
#
|
||||
# def search(self, queries: Sequence[str]) -> List[Mapping[str, Any]]:
|
||||
# """Run a search for the given query.
|
||||
#
|
||||
# Args:
|
||||
# queries: the query to run the search for.
|
||||
#
|
||||
# Returns:
|
||||
# a list of search results.
|
||||
# """
|
||||
# raise NotImplementedError()
|
||||
#
|
||||
# async def asearch(self, queries: Sequence[str]) -> List[Mapping[str, Any]]:
|
||||
# """Run a search for the given query.
|
||||
#
|
||||
# Args:
|
||||
# queries: the query to run the search for.
|
||||
#
|
||||
# Returns:
|
||||
# a list of search results.
|
||||
# """
|
||||
# raise NotImplementedError()
|
||||
#
|
||||
#
|
||||
# class BlobCrawler(abc.ABC):
|
||||
# """Crawl a blob and identify links to related content."""
|
||||
#
|
||||
# @abc.abstractmethod
|
||||
# def crawl(self, blob: Blob, query: str, callbacks: Callbacks = None) -> List[str]:
|
||||
# """Explore the blob and identify links to related content that is relevant to the query."""
|
||||
|
||||
class BlobCrawler(abc.ABC):
|
||||
"""Crawl a blob and identify links to related content."""
|
||||
|
||||
@abc.abstractmethod
|
||||
def crawl(self, blob: Blob, query: str) -> List[str]:
|
||||
"""Explore the blob and identify links to relevant content."""
|
||||
|
||||
Reference in New Issue
Block a user