mirror of
https://github.com/hwchase17/langchain.git
synced 2026-01-24 05:50:18 +00:00
q
This commit is contained in:
@@ -50,7 +50,7 @@
|
||||
"* Quality of results depends on LLM used, and can be improved by providing more specialized parsers (e.g., parse only the body of articles).\n",
|
||||
"* If asking about people, provide enough information to disambiguate the person.\n",
|
||||
"* Content downloader may get blocked (e.g., if attempting to download from linkedin) -- may need to read terms of service / user agents appropriately.\n",
|
||||
"* Chain can be potentially long running (Use initialization parameters to control how many options are eplored).\n",
|
||||
"* Chain can be potentially long running (use initialization parameters to control how many options are explored) -- use async implementation as it uses more concurrency.\n",
|
||||
"* This research chain only implements a single hop at the moment; i.e.,\n",
|
||||
" it goes from the questions to a list of URLs to documents to compiling answers.\n",
|
||||
" Without continuing the crawl, web-sites that require pagnation will not be explored fully.\n",
|
||||
@@ -62,9 +62,23 @@
|
||||
"* Continue crawling documents to discover more relevant pages that were not surfaced by the search engine.\n",
|
||||
"* Adapt reading strategy based on nature of question.\n",
|
||||
"* Analyze the query and determine whether the query is a multi-hop query and change search/crawling strategy based on that.\n",
|
||||
"* Provide smaller pieces to an agent. :)\n",
|
||||
"* Break components into tools that can be exposed to an agent. :)\n",
|
||||
"* Add cheaper strategies for selecting which links should be explored further (e.g., based on tf-idf similarity instead of gpt-4)\n",
|
||||
"* Add a summarization chain on top of the individually collected answers"
|
||||
"* Add a summarization chain on top of the individually collected answers.\n",
|
||||
"* Improve strategy to ignore irrelevant information."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "4d937a38-66c6-4aa2-87bb-337101cfb112",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Requirements\n",
|
||||
"\n",
|
||||
"Please install: \n",
|
||||
"\n",
|
||||
"* `playwright` for fetching content from the web (or use the RequestsDownloadHandler)\n",
|
||||
"* `lxml` and `markdownify` for parsing HTMLs"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -80,12 +94,15 @@
|
||||
"from langchain.chat_models import ChatOpenAI\n",
|
||||
"from langchain.llms import OpenAI\n",
|
||||
"from langchain.chains.question_answering import load_qa_chain\n",
|
||||
"from langchain.chains.research.download import PlaywrightDownloadHandler"
|
||||
"from langchain.chains.research.download import PlaywrightDownloadHandler\n",
|
||||
"# If you don't have playwright installed, can experiment with requests\n",
|
||||
"# Be aware that some web-pages won't download properly as javascript won't be executed\n",
|
||||
"from langchain.chains.research.download import RequestsDownloadHandler "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"execution_count": 3,
|
||||
"id": "70474885-0acd-41b2-8050-15dd54f44f1e",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
@@ -119,7 +136,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"execution_count": 4,
|
||||
"id": "e74a44b4-2075-4cc6-933e-c769bf3f6002",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
@@ -146,7 +163,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"execution_count": 5,
|
||||
"id": "2f538062-14e3-49ab-9b25-bc470eb5869c",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
@@ -158,7 +175,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"execution_count": 6,
|
||||
"id": "a96f3ed3-10de-4a85-9e93-a8b78d8bfbb6",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
@@ -177,7 +194,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"execution_count": 7,
|
||||
"id": "3207c696-a72c-4378-b427-7d285f5fdd1c",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
@@ -189,7 +206,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"execution_count": 8,
|
||||
"id": "843616b3-32d7-49c7-a42b-b0272d71f3ed",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
@@ -205,22 +222,23 @@
|
||||
"\n",
|
||||
"Albert Einstein:\n",
|
||||
"* Education history: Attended elementary school in Munich, Germany, and later attended the Swiss Federal Polytechnic School in Zurich, Switzerland.\n",
|
||||
"* Major contributions: Developed the theory of relativity, made major contributions to quantum theory, and won the Nobel Prize in Physics in 1921. He also made contributions to statistical mechanics, special relativity, general relativity, old quantum theory, quantum mechanics, unified field theory, and collaborated with other scientists on the Einstein–de Haas experiment and the Einstein refrigerator. He also translated his work into over 216 languages, including Afrikaans, Alemannisch, አማርኛ, Anarâškielâ, अंगिका, Ænglisc, Аԥсшәа, العربية, Aragonés, Արեւմտահայերէն, Armãneashti, Arpetan, অসমীয়া, Asturianu, Avañe'ẽ,\n",
|
||||
"* Major contributions: Developed the theory of relativity, made major contributions to quantum theory, and won the Nobel Prize in Physics in 1921. He also published more than 300 scientific papers and 150 non-scientific works. He was also the first to propose the existence of black holes and gravitational waves. He was also a polyglot, speaking over 15 languages, including Afrikaans, Alemannisch, Amharic, Anarâškielâ, Angika, Old English, Abkhazian, Arabic, Aragonese, Western Armenian, Aromanian, Arpitan, Assamese, Asturian, Guarani, Aymara, Azerbaijani, South Azerbaijani, Balinese, Bambara, Bangla, Min Nan Chinese, Basa Banyumasan, Bashkir, Belarusian, Belarusian (Taraškievica orthography), Bhojpuri, Central Bikol, and Bulgarian.\n",
|
||||
"* Names of spouse: Married Mileva Marić in 1903 and Elsa Löwenthal in 1919\n",
|
||||
"----------------------------------------------------------------------------------------------------------------------------------------------------------------\n",
|
||||
"https://www.advergize.com/edu/7-albert-einstein-inventions-contributions/\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"Education History:\n",
|
||||
"* Attended Aargau Cantonal School in Aarau, Switzerland from 1895-1896\n",
|
||||
"* Attended ETH Zurich in Zurich, Switzerland from 1896-1900\n",
|
||||
"* Attended University of Zurich in Zurich, Switzerland from 1900-1901\n",
|
||||
"* Attended Aargau Cantonal School in Switzerland from 1895-1896\n",
|
||||
"* Attended ETH Zurich from 1896-1900\n",
|
||||
"* Received a PhD from the University of Zurich in 1905\n",
|
||||
"\n",
|
||||
"Major Contributions:\n",
|
||||
"* Special Theory of Relativity\n",
|
||||
"* General Theory of Relativity\n",
|
||||
"* Theory of Relativity\n",
|
||||
"* Photoelectric Effect\n",
|
||||
"* Brownian Motion\n",
|
||||
"* Mass-Energy Equivalence\n",
|
||||
"* Bose-Einstein Condensate\n",
|
||||
"* Unified Field Theory\n",
|
||||
"* Quantum Theory of Light\n",
|
||||
"* E=mc2\n",
|
||||
"* Manhattan Project\n",
|
||||
@@ -242,7 +260,8 @@
|
||||
"\n",
|
||||
"Place of Birth: Ulm, Germany\n",
|
||||
"\n",
|
||||
"Short Biography: Albert Einstein was a German-born theoretical physicist who developed the theory of relativity. He is widely considered one of the most influential scientists of the 20\n",
|
||||
"Short Biography:\n",
|
||||
"Albert Einstein was a German-born physicist who developed the theory of relativity. He is widely considered one of the most influential scientists of the 20th century and is known for his mass-energy equivalence formula\n",
|
||||
"----------------------------------------------------------------------------------------------------------------------------------------------------------------\n",
|
||||
"https://www.nobelprize.org/prizes/physics/1921/einstein/biographical/\n",
|
||||
"\n",
|
||||
@@ -288,7 +307,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 13,
|
||||
"execution_count": 9,
|
||||
"id": "76136bff-b7df-4539-9bcb-760fc4449390",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
@@ -300,7 +319,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 14,
|
||||
"execution_count": 10,
|
||||
"id": "b352f3f3-6777-4795-acbb-ed26ecac137d",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
@@ -314,7 +333,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 15,
|
||||
"execution_count": 11,
|
||||
"id": "1c90c305-a89d-42e2-b975-dda039e816b6",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
@@ -371,7 +390,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 16,
|
||||
"execution_count": 12,
|
||||
"id": "4e2c369c-8763-458e-9ab8-684466395890",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
@@ -383,10 +402,11 @@
|
||||
"{'question': \"Compile information about Albert Einstein.\\nIgnore if it's a different Albert Einstein. \\nOnly include information you're certain about.\\n\\nInclude:\\n* education history\\n* major contributions\\n* names of spouse \\n* date of birth\\n* place of birth\\n* a 3 sentence short biography\\n\\nFormat your answer in a bullet point format for each sub-question.\",\n",
|
||||
" 'urls': ['https://en.wikipedia.org/wiki/Albert_Einstein',\n",
|
||||
" 'https://www.britannica.com/biography/Albert-Einstein',\n",
|
||||
" 'https://www.advergize.com/edu/7-albert-einstein-inventions-contributions/',\n",
|
||||
" 'https://www.nobelprize.org/prizes/physics/1921/einstein/biographical/']}"
|
||||
]
|
||||
},
|
||||
"execution_count": 16,
|
||||
"execution_count": 12,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@@ -406,7 +426,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 17,
|
||||
"execution_count": 13,
|
||||
"id": "c6f9d1b5-e513-4d8d-b325-32eacbee92b4",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
@@ -441,7 +461,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.10"
|
||||
"version": "3.11.2"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -143,10 +143,11 @@ def _run_searches(queries: Sequence[str], top_k: int = -1) -> List[Mapping[str,
|
||||
results = []
|
||||
for query in queries:
|
||||
result = wrapper.results(query)
|
||||
all_organic_results = result.get("organic_results", [])
|
||||
if top_k <= 0:
|
||||
organic_results = result["organic_results"]
|
||||
organic_results = all_organic_results
|
||||
else:
|
||||
organic_results = result["organic_results"][:top_k]
|
||||
organic_results = all_organic_results[:top_k]
|
||||
results.extend(organic_results)
|
||||
return results
|
||||
|
||||
@@ -173,10 +174,11 @@ async def _arun_searches(
|
||||
finalized_results = []
|
||||
|
||||
for result in results:
|
||||
all_organic_results = result.get("organic_results", [])
|
||||
if top_k <= 0:
|
||||
organic_results = result["organic_results"]
|
||||
organic_results = all_organic_results
|
||||
else:
|
||||
organic_results = result["organic_results"][:top_k]
|
||||
organic_results = all_organic_results[:top_k]
|
||||
|
||||
finalized_results.extend(organic_results)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user