This commit is contained in:
Eugene Yurtsev
2023-06-02 13:07:05 -04:00
parent ad2b777536
commit f0e78d7efd
2 changed files with 21 additions and 55 deletions

View File

@@ -15,6 +15,7 @@ import abc
import asyncio
import mimetypes
from typing import Any, List, Optional, Sequence
from pydantic import ValidationError
from bs4 import BeautifulSoup
@@ -109,10 +110,18 @@ class RequestsDownloadHandler(DownloadHandler):
def _repackage_as_blobs(urls: Sequence[str], contents: Sequence[str]) -> List[Blob]:
"""Repackage the contents as blobs."""
return [
Blob(data=content, mimetype=mimetypes.guess_type(url)[0], path=url)
for url, content in zip(urls, contents)
]
blobs = []
for url, content in zip(urls, contents):
mimetype = mimetypes.guess_type(url)[0]
try:
blobs.append(Blob(data=content, mimetype=mimetype, path=url))
except ValidationError:
raise ValueError(
f"Could not create a blob for content at {url}. "
f"Content type is {type(content)}"
)
return blobs
class AutoDownloadHandler(DownloadHandler):

View File

@@ -1,55 +1,12 @@
import abc
from typing import Any, List, Mapping, Sequence
from typing import List
from langchain.callbacks.manager import Callbacks
from langchain.document_loaders.blob_loaders import Blob
#
# class AbstractQueryGenerator(abc.ABC):
# """Abstract class for generating queries."""
#
# @abc.abstractmethod
# def generate_queries(self, question: str, callbacks: Callbacks = None) -> List[str]:
# """Generate queries for the given question."""
# raise NotImplementedError()
#
# @abc.abstractmethod
# async def agenerate_queries(
# self, question: str, callbacks: Callbacks = None
# ) -> List[str]:
# """Generate queries for the given question."""
# raise NotImplementedError()
#
#
# class AbstractSearcher(abc.ABC):
# """Abstract class for running searches."""
#
# def search(self, queries: Sequence[str]) -> List[Mapping[str, Any]]:
# """Run a search for the given query.
#
# Args:
# queries: the query to run the search for.
#
# Returns:
# a list of search results.
# """
# raise NotImplementedError()
#
# async def asearch(self, queries: Sequence[str]) -> List[Mapping[str, Any]]:
# """Run a search for the given query.
#
# Args:
# queries: the query to run the search for.
#
# Returns:
# a list of search results.
# """
# raise NotImplementedError()
#
#
# class BlobCrawler(abc.ABC):
# """Crawl a blob and identify links to related content."""
#
# @abc.abstractmethod
# def crawl(self, blob: Blob, query: str, callbacks: Callbacks = None) -> List[str]:
# """Explore the blob and identify links to related content that is relevant to the query."""
class BlobCrawler(abc.ABC):
"""Crawl a blob and identify links to related content."""
@abc.abstractmethod
def crawl(self, blob: Blob, query: str) -> List[str]:
"""Explore the blob and identify links to relevant content."""