mirror of
https://github.com/hwchase17/langchain.git
synced 2026-03-18 02:53:16 +00:00
cr
This commit is contained in:
@@ -38,7 +38,7 @@
|
||||
"from langchain.llms import BaseLLM\n",
|
||||
"from langchain.vectorstores.base import VectorStore\n",
|
||||
"from pydantic import BaseModel, Field\n",
|
||||
"from langchain.chains.base import Chain\n"
|
||||
"from langchain.chains.base import Chain"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -73,6 +73,7 @@
|
||||
"embeddings_model = OpenAIEmbeddings()\n",
|
||||
"# Initialize the vectorstore as empty\n",
|
||||
"import faiss\n",
|
||||
"\n",
|
||||
"embedding_size = 1536\n",
|
||||
"index = faiss.IndexFlatL2(embedding_size)\n",
|
||||
"vectorstore = FAISS(embeddings_model.embed_query, index, InMemoryDocstore({}), {})"
|
||||
@@ -116,7 +117,12 @@
|
||||
" )\n",
|
||||
" prompt = PromptTemplate(\n",
|
||||
" template=task_creation_template,\n",
|
||||
" input_variables=[\"result\", \"task_description\", \"incomplete_tasks\", \"objective\"],\n",
|
||||
" input_variables=[\n",
|
||||
" \"result\",\n",
|
||||
" \"task_description\",\n",
|
||||
" \"incomplete_tasks\",\n",
|
||||
" \"objective\",\n",
|
||||
" ],\n",
|
||||
" )\n",
|
||||
" return cls(prompt=prompt, llm=llm, verbose=verbose)"
|
||||
]
|
||||
@@ -147,7 +153,7 @@
|
||||
" template=task_prioritization_template,\n",
|
||||
" input_variables=[\"task_names\", \"next_task_id\", \"objective\"],\n",
|
||||
" )\n",
|
||||
" return cls(prompt=prompt, llm=llm, verbose=verbose)\n"
|
||||
" return cls(prompt=prompt, llm=llm, verbose=verbose)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -173,7 +179,7 @@
|
||||
" template=execution_template,\n",
|
||||
" input_variables=[\"objective\", \"context\", \"task\"],\n",
|
||||
" )\n",
|
||||
" return cls(prompt=prompt, llm=llm, verbose=verbose)\n"
|
||||
" return cls(prompt=prompt, llm=llm, verbose=verbose)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -193,11 +199,22 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def get_next_task(task_creation_chain: LLMChain, result: Dict, task_description: str, task_list: List[str], objective: str) -> List[Dict]:\n",
|
||||
"def get_next_task(\n",
|
||||
" task_creation_chain: LLMChain,\n",
|
||||
" result: Dict,\n",
|
||||
" task_description: str,\n",
|
||||
" task_list: List[str],\n",
|
||||
" objective: str,\n",
|
||||
") -> List[Dict]:\n",
|
||||
" \"\"\"Get the next task.\"\"\"\n",
|
||||
" incomplete_tasks = \", \".join(task_list)\n",
|
||||
" response = task_creation_chain.run(result=result, task_description=task_description, incomplete_tasks=incomplete_tasks, objective=objective)\n",
|
||||
" new_tasks = response.split('\\n')\n",
|
||||
" response = task_creation_chain.run(\n",
|
||||
" result=result,\n",
|
||||
" task_description=task_description,\n",
|
||||
" incomplete_tasks=incomplete_tasks,\n",
|
||||
" objective=objective,\n",
|
||||
" )\n",
|
||||
" new_tasks = response.split(\"\\n\")\n",
|
||||
" return [{\"task_name\": task_name} for task_name in new_tasks if task_name.strip()]"
|
||||
]
|
||||
},
|
||||
@@ -208,12 +225,19 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def prioritize_tasks(task_prioritization_chain: LLMChain, this_task_id: int, task_list: List[Dict], objective: str) -> List[Dict]:\n",
|
||||
"def prioritize_tasks(\n",
|
||||
" task_prioritization_chain: LLMChain,\n",
|
||||
" this_task_id: int,\n",
|
||||
" task_list: List[Dict],\n",
|
||||
" objective: str,\n",
|
||||
") -> List[Dict]:\n",
|
||||
" \"\"\"Prioritize tasks.\"\"\"\n",
|
||||
" task_names = [t[\"task_name\"] for t in task_list]\n",
|
||||
" next_task_id = int(this_task_id) + 1\n",
|
||||
" response = task_prioritization_chain.run(task_names=task_names, next_task_id=next_task_id, objective=objective)\n",
|
||||
" new_tasks = response.split('\\n')\n",
|
||||
" response = task_prioritization_chain.run(\n",
|
||||
" task_names=task_names, next_task_id=next_task_id, objective=objective\n",
|
||||
" )\n",
|
||||
" new_tasks = response.split(\"\\n\")\n",
|
||||
" prioritized_task_list = []\n",
|
||||
" for task_string in new_tasks:\n",
|
||||
" if not task_string.strip():\n",
|
||||
@@ -239,9 +263,12 @@
|
||||
" if not results:\n",
|
||||
" return []\n",
|
||||
" sorted_results, _ = zip(*sorted(results, key=lambda x: x[1], reverse=True))\n",
|
||||
" return [str(item.metadata['task']) for item in sorted_results]\n",
|
||||
" return [str(item.metadata[\"task\"]) for item in sorted_results]\n",
|
||||
"\n",
|
||||
"def execute_task(vectorstore, execution_chain: LLMChain, objective: str, task: str, k: int = 5) -> str:\n",
|
||||
"\n",
|
||||
"def execute_task(\n",
|
||||
" vectorstore, execution_chain: LLMChain, objective: str, task: str, k: int = 5\n",
|
||||
") -> str:\n",
|
||||
" \"\"\"Execute a task.\"\"\"\n",
|
||||
" context = _get_top_tasks(vectorstore, query=objective, k=k)\n",
|
||||
" return execution_chain.run(objective=objective, context=context, task=task)"
|
||||
@@ -254,7 +281,6 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"\n",
|
||||
"class BabyAGI(Chain, BaseModel):\n",
|
||||
" \"\"\"Controller model for the BabyAGI agent.\"\"\"\n",
|
||||
"\n",
|
||||
@@ -265,9 +291,10 @@
|
||||
" task_id_counter: int = Field(1)\n",
|
||||
" vectorstore: VectorStore = Field(init=False)\n",
|
||||
" max_iterations: Optional[int] = None\n",
|
||||
" \n",
|
||||
"\n",
|
||||
" class Config:\n",
|
||||
" \"\"\"Configuration for this pydantic object.\"\"\"\n",
|
||||
"\n",
|
||||
" arbitrary_types_allowed = True\n",
|
||||
"\n",
|
||||
" def add_task(self, task: Dict):\n",
|
||||
@@ -285,18 +312,18 @@
|
||||
" def print_task_result(self, result: str):\n",
|
||||
" print(\"\\033[93m\\033[1m\" + \"\\n*****TASK RESULT*****\\n\" + \"\\033[0m\\033[0m\")\n",
|
||||
" print(result)\n",
|
||||
" \n",
|
||||
"\n",
|
||||
" @property\n",
|
||||
" def input_keys(self) -> List[str]:\n",
|
||||
" return [\"objective\"]\n",
|
||||
" \n",
|
||||
"\n",
|
||||
" @property\n",
|
||||
" def output_keys(self) -> List[str]:\n",
|
||||
" return []\n",
|
||||
"\n",
|
||||
" def _call(self, inputs: Dict[str, Any]) -> Dict[str, Any]:\n",
|
||||
" \"\"\"Run the agent.\"\"\"\n",
|
||||
" objective = inputs['objective']\n",
|
||||
" objective = inputs[\"objective\"]\n",
|
||||
" first_task = inputs.get(\"first_task\", \"Make a todo list\")\n",
|
||||
" self.add_task({\"task_id\": 1, \"task_name\": first_task})\n",
|
||||
" num_iters = 0\n",
|
||||
@@ -325,7 +352,11 @@
|
||||
"\n",
|
||||
" # Step 4: Create new tasks and reprioritize task list\n",
|
||||
" new_tasks = get_next_task(\n",
|
||||
" self.task_creation_chain, result, task[\"task_name\"], [t[\"task_name\"] for t in self.task_list], objective\n",
|
||||
" self.task_creation_chain,\n",
|
||||
" result,\n",
|
||||
" task[\"task_name\"],\n",
|
||||
" [t[\"task_name\"] for t in self.task_list],\n",
|
||||
" objective,\n",
|
||||
" )\n",
|
||||
" for new_task in new_tasks:\n",
|
||||
" self.task_id_counter += 1\n",
|
||||
@@ -333,27 +364,26 @@
|
||||
" self.add_task(new_task)\n",
|
||||
" self.task_list = deque(\n",
|
||||
" prioritize_tasks(\n",
|
||||
" self.task_prioritization_chain, this_task_id, list(self.task_list), objective\n",
|
||||
" self.task_prioritization_chain,\n",
|
||||
" this_task_id,\n",
|
||||
" list(self.task_list),\n",
|
||||
" objective,\n",
|
||||
" )\n",
|
||||
" )\n",
|
||||
" num_iters += 1\n",
|
||||
" if self.max_iterations is not None and num_iters == self.max_iterations:\n",
|
||||
" print(\"\\033[91m\\033[1m\" + \"\\n*****TASK ENDING*****\\n\" + \"\\033[0m\\033[0m\")\n",
|
||||
" print(\n",
|
||||
" \"\\033[91m\\033[1m\" + \"\\n*****TASK ENDING*****\\n\" + \"\\033[0m\\033[0m\"\n",
|
||||
" )\n",
|
||||
" break\n",
|
||||
" return {}\n",
|
||||
"\n",
|
||||
" @classmethod\n",
|
||||
" def from_llm(\n",
|
||||
" cls,\n",
|
||||
" llm: BaseLLM,\n",
|
||||
" vectorstore: VectorStore,\n",
|
||||
" verbose: bool = False,\n",
|
||||
" **kwargs\n",
|
||||
" cls, llm: BaseLLM, vectorstore: VectorStore, verbose: bool = False, **kwargs\n",
|
||||
" ) -> \"BabyAGI\":\n",
|
||||
" \"\"\"Initialize the BabyAGI Controller.\"\"\"\n",
|
||||
" task_creation_chain = TaskCreationChain.from_llm(\n",
|
||||
" llm, verbose=verbose\n",
|
||||
" )\n",
|
||||
" task_creation_chain = TaskCreationChain.from_llm(llm, verbose=verbose)\n",
|
||||
" task_prioritization_chain = TaskPrioritizationChain.from_llm(\n",
|
||||
" llm, verbose=verbose\n",
|
||||
" )\n",
|
||||
@@ -363,7 +393,7 @@
|
||||
" task_prioritization_chain=task_prioritization_chain,\n",
|
||||
" execution_chain=execution_chain,\n",
|
||||
" vectorstore=vectorstore,\n",
|
||||
" **kwargs\n",
|
||||
" **kwargs,\n",
|
||||
" )"
|
||||
]
|
||||
},
|
||||
@@ -405,14 +435,11 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Logging of LLMChains\n",
|
||||
"verbose=False\n",
|
||||
"verbose = False\n",
|
||||
"# If None, will keep on going forever\n",
|
||||
"max_iterations: Optional[int] = 3\n",
|
||||
"baby_agi = BabyAGI.from_llm(\n",
|
||||
" llm=llm,\n",
|
||||
" vectorstore=vectorstore,\n",
|
||||
" verbose=verbose,\n",
|
||||
" max_iterations=max_iterations\n",
|
||||
" llm=llm, vectorstore=vectorstore, verbose=verbose, max_iterations=max_iterations\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
|
||||
@@ -34,7 +34,7 @@
|
||||
"from langchain.llms import BaseLLM\n",
|
||||
"from langchain.vectorstores.base import VectorStore\n",
|
||||
"from pydantic import BaseModel, Field\n",
|
||||
"from langchain.chains.base import Chain\n"
|
||||
"from langchain.chains.base import Chain"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -54,7 +54,9 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%pip install faiss-cpu > /dev/null%pip install google-search-results > /dev/nullfrom langchain.vectorstores import FAISS\n",
|
||||
"%pip install faiss-cpu > /dev/null\n",
|
||||
"%pip install google-search-results > /dev/null\n",
|
||||
"from langchain.vectorstores import FAISS\n",
|
||||
"from langchain.docstore import InMemoryDocstore"
|
||||
]
|
||||
},
|
||||
@@ -69,6 +71,7 @@
|
||||
"embeddings_model = OpenAIEmbeddings()\n",
|
||||
"# Initialize the vectorstore as empty\n",
|
||||
"import faiss\n",
|
||||
"\n",
|
||||
"embedding_size = 1536\n",
|
||||
"index = faiss.IndexFlatL2(embedding_size)\n",
|
||||
"vectorstore = FAISS(embeddings_model.embed_query, index, InMemoryDocstore({}), {})"
|
||||
@@ -115,7 +118,12 @@
|
||||
" )\n",
|
||||
" prompt = PromptTemplate(\n",
|
||||
" template=task_creation_template,\n",
|
||||
" input_variables=[\"result\", \"task_description\", \"incomplete_tasks\", \"objective\"],\n",
|
||||
" input_variables=[\n",
|
||||
" \"result\",\n",
|
||||
" \"task_description\",\n",
|
||||
" \"incomplete_tasks\",\n",
|
||||
" \"objective\",\n",
|
||||
" ],\n",
|
||||
" )\n",
|
||||
" return cls(prompt=prompt, llm=llm, verbose=verbose)"
|
||||
]
|
||||
@@ -146,7 +154,7 @@
|
||||
" template=task_prioritization_template,\n",
|
||||
" input_variables=[\"task_names\", \"next_task_id\", \"objective\"],\n",
|
||||
" )\n",
|
||||
" return cls(prompt=prompt, llm=llm, verbose=verbose)\n"
|
||||
" return cls(prompt=prompt, llm=llm, verbose=verbose)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -158,20 +166,23 @@
|
||||
"source": [
|
||||
"from langchain.agents import ZeroShotAgent, Tool, AgentExecutor\n",
|
||||
"from langchain import OpenAI, SerpAPIWrapper, LLMChain\n",
|
||||
"todo_prompt = PromptTemplate.from_template(\"You are a planner who is an expert at coming up with a todo list for a given objective. Come up with a todo list for this objective: {objective}\")\n",
|
||||
"\n",
|
||||
"todo_prompt = PromptTemplate.from_template(\n",
|
||||
" \"You are a planner who is an expert at coming up with a todo list for a given objective. Come up with a todo list for this objective: {objective}\"\n",
|
||||
")\n",
|
||||
"todo_chain = LLMChain(llm=OpenAI(temperature=0), prompt=todo_prompt)\n",
|
||||
"search = SerpAPIWrapper()\n",
|
||||
"tools = [\n",
|
||||
" Tool(\n",
|
||||
" name = \"Search\",\n",
|
||||
" name=\"Search\",\n",
|
||||
" func=search.run,\n",
|
||||
" description=\"useful for when you need to answer questions about current events\"\n",
|
||||
" description=\"useful for when you need to answer questions about current events\",\n",
|
||||
" ),\n",
|
||||
" Tool(\n",
|
||||
" name = \"TODO\",\n",
|
||||
" name=\"TODO\",\n",
|
||||
" func=todo_chain.run,\n",
|
||||
" description=\"useful for when you need to come up with todo lists. Input: an objective to create a todo list for. Output: a todo list for that objective. Please be very clear what the objective is!\"\n",
|
||||
" )\n",
|
||||
" description=\"useful for when you need to come up with todo lists. Input: an objective to create a todo list for. Output: a todo list for that objective. Please be very clear what the objective is!\",\n",
|
||||
" ),\n",
|
||||
"]\n",
|
||||
"\n",
|
||||
"\n",
|
||||
@@ -179,10 +190,10 @@
|
||||
"suffix = \"\"\"Question: {task}\n",
|
||||
"{agent_scratchpad}\"\"\"\n",
|
||||
"prompt = ZeroShotAgent.create_prompt(\n",
|
||||
" tools, \n",
|
||||
" prefix=prefix, \n",
|
||||
" suffix=suffix, \n",
|
||||
" input_variables=[\"objective\", \"task\", \"context\",\"agent_scratchpad\"]\n",
|
||||
" tools,\n",
|
||||
" prefix=prefix,\n",
|
||||
" suffix=suffix,\n",
|
||||
" input_variables=[\"objective\", \"task\", \"context\", \"agent_scratchpad\"],\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
@@ -203,11 +214,22 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def get_next_task(task_creation_chain: LLMChain, result: Dict, task_description: str, task_list: List[str], objective: str) -> List[Dict]:\n",
|
||||
"def get_next_task(\n",
|
||||
" task_creation_chain: LLMChain,\n",
|
||||
" result: Dict,\n",
|
||||
" task_description: str,\n",
|
||||
" task_list: List[str],\n",
|
||||
" objective: str,\n",
|
||||
") -> List[Dict]:\n",
|
||||
" \"\"\"Get the next task.\"\"\"\n",
|
||||
" incomplete_tasks = \", \".join(task_list)\n",
|
||||
" response = task_creation_chain.run(result=result, task_description=task_description, incomplete_tasks=incomplete_tasks, objective=objective)\n",
|
||||
" new_tasks = response.split('\\n')\n",
|
||||
" response = task_creation_chain.run(\n",
|
||||
" result=result,\n",
|
||||
" task_description=task_description,\n",
|
||||
" incomplete_tasks=incomplete_tasks,\n",
|
||||
" objective=objective,\n",
|
||||
" )\n",
|
||||
" new_tasks = response.split(\"\\n\")\n",
|
||||
" return [{\"task_name\": task_name} for task_name in new_tasks if task_name.strip()]"
|
||||
]
|
||||
},
|
||||
@@ -218,12 +240,19 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def prioritize_tasks(task_prioritization_chain: LLMChain, this_task_id: int, task_list: List[Dict], objective: str) -> List[Dict]:\n",
|
||||
"def prioritize_tasks(\n",
|
||||
" task_prioritization_chain: LLMChain,\n",
|
||||
" this_task_id: int,\n",
|
||||
" task_list: List[Dict],\n",
|
||||
" objective: str,\n",
|
||||
") -> List[Dict]:\n",
|
||||
" \"\"\"Prioritize tasks.\"\"\"\n",
|
||||
" task_names = [t[\"task_name\"] for t in task_list]\n",
|
||||
" next_task_id = int(this_task_id) + 1\n",
|
||||
" response = task_prioritization_chain.run(task_names=task_names, next_task_id=next_task_id, objective=objective)\n",
|
||||
" new_tasks = response.split('\\n')\n",
|
||||
" response = task_prioritization_chain.run(\n",
|
||||
" task_names=task_names, next_task_id=next_task_id, objective=objective\n",
|
||||
" )\n",
|
||||
" new_tasks = response.split(\"\\n\")\n",
|
||||
" prioritized_task_list = []\n",
|
||||
" for task_string in new_tasks:\n",
|
||||
" if not task_string.strip():\n",
|
||||
@@ -249,9 +278,12 @@
|
||||
" if not results:\n",
|
||||
" return []\n",
|
||||
" sorted_results, _ = zip(*sorted(results, key=lambda x: x[1], reverse=True))\n",
|
||||
" return [str(item.metadata['task']) for item in sorted_results]\n",
|
||||
" return [str(item.metadata[\"task\"]) for item in sorted_results]\n",
|
||||
"\n",
|
||||
"def execute_task(vectorstore, execution_chain: LLMChain, objective: str, task: str, k: int = 5) -> str:\n",
|
||||
"\n",
|
||||
"def execute_task(\n",
|
||||
" vectorstore, execution_chain: LLMChain, objective: str, task: str, k: int = 5\n",
|
||||
") -> str:\n",
|
||||
" \"\"\"Execute a task.\"\"\"\n",
|
||||
" context = _get_top_tasks(vectorstore, query=objective, k=k)\n",
|
||||
" return execution_chain.run(objective=objective, context=context, task=task)"
|
||||
@@ -264,7 +296,6 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"\n",
|
||||
"class BabyAGI(Chain, BaseModel):\n",
|
||||
" \"\"\"Controller model for the BabyAGI agent.\"\"\"\n",
|
||||
"\n",
|
||||
@@ -275,9 +306,10 @@
|
||||
" task_id_counter: int = Field(1)\n",
|
||||
" vectorstore: VectorStore = Field(init=False)\n",
|
||||
" max_iterations: Optional[int] = None\n",
|
||||
" \n",
|
||||
"\n",
|
||||
" class Config:\n",
|
||||
" \"\"\"Configuration for this pydantic object.\"\"\"\n",
|
||||
"\n",
|
||||
" arbitrary_types_allowed = True\n",
|
||||
"\n",
|
||||
" def add_task(self, task: Dict):\n",
|
||||
@@ -295,18 +327,18 @@
|
||||
" def print_task_result(self, result: str):\n",
|
||||
" print(\"\\033[93m\\033[1m\" + \"\\n*****TASK RESULT*****\\n\" + \"\\033[0m\\033[0m\")\n",
|
||||
" print(result)\n",
|
||||
" \n",
|
||||
"\n",
|
||||
" @property\n",
|
||||
" def input_keys(self) -> List[str]:\n",
|
||||
" return [\"objective\"]\n",
|
||||
" \n",
|
||||
"\n",
|
||||
" @property\n",
|
||||
" def output_keys(self) -> List[str]:\n",
|
||||
" return []\n",
|
||||
"\n",
|
||||
" def _call(self, inputs: Dict[str, Any]) -> Dict[str, Any]:\n",
|
||||
" \"\"\"Run the agent.\"\"\"\n",
|
||||
" objective = inputs['objective']\n",
|
||||
" objective = inputs[\"objective\"]\n",
|
||||
" first_task = inputs.get(\"first_task\", \"Make a todo list\")\n",
|
||||
" self.add_task({\"task_id\": 1, \"task_name\": first_task})\n",
|
||||
" num_iters = 0\n",
|
||||
@@ -335,7 +367,11 @@
|
||||
"\n",
|
||||
" # Step 4: Create new tasks and reprioritize task list\n",
|
||||
" new_tasks = get_next_task(\n",
|
||||
" self.task_creation_chain, result, task[\"task_name\"], [t[\"task_name\"] for t in self.task_list], objective\n",
|
||||
" self.task_creation_chain,\n",
|
||||
" result,\n",
|
||||
" task[\"task_name\"],\n",
|
||||
" [t[\"task_name\"] for t in self.task_list],\n",
|
||||
" objective,\n",
|
||||
" )\n",
|
||||
" for new_task in new_tasks:\n",
|
||||
" self.task_id_counter += 1\n",
|
||||
@@ -343,40 +379,41 @@
|
||||
" self.add_task(new_task)\n",
|
||||
" self.task_list = deque(\n",
|
||||
" prioritize_tasks(\n",
|
||||
" self.task_prioritization_chain, this_task_id, list(self.task_list), objective\n",
|
||||
" self.task_prioritization_chain,\n",
|
||||
" this_task_id,\n",
|
||||
" list(self.task_list),\n",
|
||||
" objective,\n",
|
||||
" )\n",
|
||||
" )\n",
|
||||
" num_iters += 1\n",
|
||||
" if self.max_iterations is not None and num_iters == self.max_iterations:\n",
|
||||
" print(\"\\033[91m\\033[1m\" + \"\\n*****TASK ENDING*****\\n\" + \"\\033[0m\\033[0m\")\n",
|
||||
" print(\n",
|
||||
" \"\\033[91m\\033[1m\" + \"\\n*****TASK ENDING*****\\n\" + \"\\033[0m\\033[0m\"\n",
|
||||
" )\n",
|
||||
" break\n",
|
||||
" return {}\n",
|
||||
"\n",
|
||||
" @classmethod\n",
|
||||
" def from_llm(\n",
|
||||
" cls,\n",
|
||||
" llm: BaseLLM,\n",
|
||||
" vectorstore: VectorStore,\n",
|
||||
" verbose: bool = False,\n",
|
||||
" **kwargs\n",
|
||||
" cls, llm: BaseLLM, vectorstore: VectorStore, verbose: bool = False, **kwargs\n",
|
||||
" ) -> \"BabyAGI\":\n",
|
||||
" \"\"\"Initialize the BabyAGI Controller.\"\"\"\n",
|
||||
" task_creation_chain = TaskCreationChain.from_llm(\n",
|
||||
" llm, verbose=verbose\n",
|
||||
" )\n",
|
||||
" task_creation_chain = TaskCreationChain.from_llm(llm, verbose=verbose)\n",
|
||||
" task_prioritization_chain = TaskPrioritizationChain.from_llm(\n",
|
||||
" llm, verbose=verbose\n",
|
||||
" )\n",
|
||||
" llm_chain = LLMChain(llm=llm, prompt=prompt)\n",
|
||||
" tool_names = [tool.name for tool in tools]\n",
|
||||
" agent = ZeroShotAgent(llm_chain=llm_chain, allowed_tools=tool_names)\n",
|
||||
" agent_executor = AgentExecutor.from_agent_and_tools(agent=agent, tools=tools, verbose=True)\n",
|
||||
" agent_executor = AgentExecutor.from_agent_and_tools(\n",
|
||||
" agent=agent, tools=tools, verbose=True\n",
|
||||
" )\n",
|
||||
" return cls(\n",
|
||||
" task_creation_chain=task_creation_chain,\n",
|
||||
" task_prioritization_chain=task_prioritization_chain,\n",
|
||||
" execution_chain=agent_executor,\n",
|
||||
" vectorstore=vectorstore,\n",
|
||||
" **kwargs\n",
|
||||
" **kwargs,\n",
|
||||
" )"
|
||||
]
|
||||
},
|
||||
@@ -418,14 +455,11 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Logging of LLMChains\n",
|
||||
"verbose=False\n",
|
||||
"verbose = False\n",
|
||||
"# If None, will keep on going forever\n",
|
||||
"max_iterations: Optional[int] = 3\n",
|
||||
"baby_agi = BabyAGI.from_llm(\n",
|
||||
" llm=llm,\n",
|
||||
" vectorstore=vectorstore,\n",
|
||||
" verbose=verbose,\n",
|
||||
" max_iterations=max_iterations\n",
|
||||
" llm=llm, vectorstore=vectorstore, verbose=verbose, max_iterations=max_iterations\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
|
||||
@@ -23,3 +23,4 @@ Query Understanding: GPT-4 processes user queries, grasping the context and extr
|
||||
|
||||
The full tutorial is available below.
|
||||
- [Twitter the-algorithm codebase analysis with Deep Lake](code/twitter-the-algorithm-analysis-deeplake.ipynb): A notebook walking through how to parse github source code and run queries conversation.
|
||||
- [LangChain codebase analysis with Deep Lake](code/code-analysis-deeplake.ipynb): A notebook walking through how to analyze and do question answering over THIS code base.
|
||||
|
||||
644
docs/use_cases/code/code-analysis-deeplake.ipynb
Normal file
644
docs/use_cases/code/code-analysis-deeplake.ipynb
Normal file
@@ -0,0 +1,644 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Use LangChain, GPT and Deep Lake to work with code base\n",
|
||||
"In this tutorial, we are going to use Langchain + Deep Lake with GPT to analyze the code base of the LangChain itself. "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Design"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"1. Prepare data:\n",
|
||||
" 1. Upload all python project files using the `langchain.document_loaders.TextLoader`. We will call these files the **documents**.\n",
|
||||
" 2. Split all documents to chunks using the `langchain.text_splitter.CharacterTextSplitter`.\n",
|
||||
" 3. Embed chunks and upload them into the DeepLake using `langchain.embeddings.openai.OpenAIEmbeddings` and `langchain.vectorstores.DeepLake`\n",
|
||||
"2. Question-Answering:\n",
|
||||
" 1. Build a chain from `langchain.chat_models.ChatOpenAI` and `langchain.chains.ConversationalRetrievalChain`\n",
|
||||
" 2. Prepare questions.\n",
|
||||
" 3. Get answers running the chain.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Implementation"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"source": [
|
||||
"### Integration preparations"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"We need to set up keys for external services and install necessary python libraries."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"#!python3 -m pip install --upgrade langchain deeplake openai"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Set up OpenAI embeddings, Deep Lake multi-modal vector store api and authenticate. \n",
|
||||
"\n",
|
||||
"For full documentation of Deep Lake please follow https://docs.activeloop.ai/ and API reference https://docs.deeplake.ai/en/latest/"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
" ········\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"from getpass import getpass\n",
|
||||
"\n",
|
||||
"os.environ['OPENAI_API_KEY'] = getpass()\n",
|
||||
"# Please manually enter OpenAI Key"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Authenticate into Deep Lake if you want to create your own dataset and publish it. You can get an API key from the platform at [app.activeloop.ai](https://app.activeloop.ai)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
" ········\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"os.environ['ACTIVELOOP_TOKEN'] = getpass.getpass('Activeloop Token:')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Prepare data "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Load all repository files. Here we assume this notebook is downloaded as the part of the langchain fork and we work with the python files of the `langchain` repo.\n",
|
||||
"\n",
|
||||
"If you want to use files from different repo, change `root_dir` to the root dir of your repo."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"1147\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langchain.document_loaders import TextLoader\n",
|
||||
"\n",
|
||||
"root_dir = '../../../..'\n",
|
||||
"\n",
|
||||
"docs = []\n",
|
||||
"for dirpath, dirnames, filenames in os.walk(root_dir):\n",
|
||||
" for file in filenames:\n",
|
||||
" if file.endswith('.py') and '/.venv/' not in dirpath:\n",
|
||||
" try: \n",
|
||||
" loader = TextLoader(os.path.join(dirpath, file), encoding='utf-8')\n",
|
||||
" docs.extend(loader.load_and_split())\n",
|
||||
" except Exception as e: \n",
|
||||
" pass\n",
|
||||
"print(f'{len(docs)}')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Then, chunk the files"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 13,
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Created a chunk of size 1620, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1213, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1263, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1448, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1120, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1148, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1826, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1260, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1195, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 2147, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1410, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1269, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1030, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1046, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1024, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1026, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1285, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1370, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1031, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1999, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1029, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1120, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1033, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1143, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1416, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 2482, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1890, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1418, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1848, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1069, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 2369, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1045, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1501, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1208, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1950, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1283, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1414, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1304, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1224, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1060, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 2461, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1099, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1178, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1449, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1345, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 3359, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 2248, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1589, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 2104, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1505, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1387, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1215, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1240, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1635, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1075, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 2180, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1791, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1555, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1082, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1225, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1287, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1085, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1117, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1966, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1150, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1285, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1150, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1585, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1208, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1267, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1542, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1183, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 2424, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1017, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1304, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1379, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1324, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1205, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1056, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1195, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 3608, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1058, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1075, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1217, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1109, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1440, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1046, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1220, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1403, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1241, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1427, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1049, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1580, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1565, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1131, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1425, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1054, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1027, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 2559, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1028, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1382, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1888, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1475, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1652, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1891, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1899, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1021, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1085, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1854, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1672, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 2537, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1251, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1734, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1642, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1376, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1253, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1642, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1419, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1438, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1427, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1684, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1760, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1157, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 2504, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1082, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 2268, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1784, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1311, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 2972, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1144, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1825, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1508, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 2901, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1715, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1062, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1206, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1102, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1184, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1002, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1065, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1871, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1754, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 2413, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1771, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 2054, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 2000, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 2061, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1066, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1419, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1368, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1008, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1227, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1745, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 2296, which is longer than the specified 1000\n",
|
||||
"Created a chunk of size 1083, which is longer than the specified 1000\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"3477\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langchain.text_splitter import CharacterTextSplitter\n",
|
||||
"\n",
|
||||
"text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n",
|
||||
"texts = text_splitter.split_documents(docs)\n",
|
||||
"print(f\"{len(texts)}\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Then embed chunks and upload them to the DeepLake.\n",
|
||||
"\n",
|
||||
"This can take several minutes. "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 14,
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"OpenAIEmbeddings(client=<class 'openai.api_resources.embedding.Embedding'>, model='text-embedding-ada-002', document_model_name='text-embedding-ada-002', query_model_name='text-embedding-ada-002', embedding_ctx_length=8191, openai_api_key=None, openai_organization=None, allowed_special=set(), disallowed_special='all', chunk_size=1000, max_retries=6)"
|
||||
]
|
||||
},
|
||||
"execution_count": 14,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langchain.embeddings.openai import OpenAIEmbeddings\n",
|
||||
"\n",
|
||||
"embeddings = OpenAIEmbeddings()\n",
|
||||
"embeddings"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.vectorstores import DeepLake\n",
|
||||
"\n",
|
||||
"db = DeepLake.from_documents(texts, embeddings, dataset_path=f\"hub://{DEEPLAKE_ACCOUNT_NAME}/langchain-code\")\n",
|
||||
"db"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Question Answering\n",
|
||||
"First load the dataset, construct the retriever, then construct the Conversational Chain"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 16,
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"-"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"This dataset can be visualized in Jupyter Notebook by ds.visualize() or at https://app.activeloop.ai/user_name/langchain-code\n",
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"/"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"hub://user_name/langchain-code loaded successfully.\n",
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Deep Lake Dataset in hub://user_name/langchain-code already exists, loading from the storage\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Dataset(path='hub://user_name/langchain-code', read_only=True, tensors=['embedding', 'ids', 'metadata', 'text'])\n",
|
||||
"\n",
|
||||
" tensor htype shape dtype compression\n",
|
||||
" ------- ------- ------- ------- ------- \n",
|
||||
" embedding generic (3477, 1536) float32 None \n",
|
||||
" ids text (3477, 1) str None \n",
|
||||
" metadata json (3477, 1) str None \n",
|
||||
" text text (3477, 1) str None \n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"db = DeepLake(dataset_path=f\"hub://{DEEPLAKE_ACCOUNT_NAME}/langchain-code\", read_only=True, embedding_function=embeddings)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 17,
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"retriever = db.as_retriever()\n",
|
||||
"retriever.search_kwargs['distance_metric'] = 'cos'\n",
|
||||
"retriever.search_kwargs['fetch_k'] = 20\n",
|
||||
"retriever.search_kwargs['maximal_marginal_relevance'] = True\n",
|
||||
"retriever.search_kwargs['k'] = 20"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"You can also specify user defined functions using [Deep Lake filters](https://docs.deeplake.ai/en/latest/deeplake.core.dataset.html#deeplake.core.dataset.Dataset.filter)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 18,
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def filter(x):\n",
|
||||
" # filter based on source code\n",
|
||||
" if 'something' in x['text'].data()['value']:\n",
|
||||
" return False\n",
|
||||
" \n",
|
||||
" # filter based on path e.g. extension\n",
|
||||
" metadata = x['metadata'].data()['value']\n",
|
||||
" return 'only_this' in metadata['source'] or 'also_that' in metadata['source']\n",
|
||||
"\n",
|
||||
"### turn on below for custom filtering\n",
|
||||
"# retriever.search_kwargs['filter'] = filter"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 19,
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.chat_models import ChatOpenAI\n",
|
||||
"from langchain.chains import ConversationalRetrievalChain\n",
|
||||
"\n",
|
||||
"model = ChatOpenAI(model='gpt-3.5-turbo') # 'ada' 'gpt-3.5-turbo' 'gpt-4',\n",
|
||||
"qa = ConversationalRetrievalChain.from_llm(model,retriever=retriever)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"questions = [\n",
|
||||
" \"What is the class hierarchy?\",\n",
|
||||
" # \"What classes are derived from the Chain class?\",\n",
|
||||
" # \"What classes and functions in the ./langchain/utilities/ forlder are not covered by unit tests?\",\n",
|
||||
" # \"What one improvement do you propose in code in relation to the class herarchy for the Chain class?\",\n",
|
||||
"] \n",
|
||||
"chat_history = []\n",
|
||||
"\n",
|
||||
"for question in questions: \n",
|
||||
" result = qa({\"question\": question, \"chat_history\": chat_history})\n",
|
||||
" chat_history.append((question, result['answer']))\n",
|
||||
" print(f\"-> **Question**: {question} \\n\")\n",
|
||||
" print(f\"**Answer**: {result['answer']} \\n\")\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"source": [
|
||||
"-> **Question**: What is the class hierarchy? \n",
|
||||
"\n",
|
||||
"**Answer**: There are several class hierarchies in the provided code, so I'll list a few:\n",
|
||||
"\n",
|
||||
"1. `BaseModel` -> `ConstitutionalPrinciple`: `ConstitutionalPrinciple` is a subclass of `BaseModel`.\n",
|
||||
"2. `BasePromptTemplate` -> `StringPromptTemplate`, `AIMessagePromptTemplate`, `BaseChatPromptTemplate`, `ChatMessagePromptTemplate`, `ChatPromptTemplate`, `HumanMessagePromptTemplate`, `MessagesPlaceholder`, `SystemMessagePromptTemplate`, `FewShotPromptTemplate`, `FewShotPromptWithTemplates`, `Prompt`, `PromptTemplate`: All of these classes are subclasses of `BasePromptTemplate`.\n",
|
||||
"3. `APIChain`, `Chain`, `MapReduceDocumentsChain`, `MapRerankDocumentsChain`, `RefineDocumentsChain`, `StuffDocumentsChain`, `HypotheticalDocumentEmbedder`, `LLMChain`, `LLMBashChain`, `LLMCheckerChain`, `LLMMathChain`, `LLMRequestsChain`, `PALChain`, `QAWithSourcesChain`, `VectorDBQAWithSourcesChain`, `VectorDBQA`, `SQLDatabaseChain`: All of these classes are subclasses of `Chain`.\n",
|
||||
"4. `BaseLoader`: `BaseLoader` is a subclass of `ABC`.\n",
|
||||
"5. `BaseTracer` -> `ChainRun`, `LLMRun`, `SharedTracer`, `ToolRun`, `Tracer`, `TracerException`, `TracerSession`: All of these classes are subclasses of `BaseTracer`.\n",
|
||||
"6. `OpenAIEmbeddings`, `HuggingFaceEmbeddings`, `CohereEmbeddings`, `JinaEmbeddings`, `LlamaCppEmbeddings`, `HuggingFaceHubEmbeddings`, `TensorflowHubEmbeddings`, `SagemakerEndpointEmbeddings`, `HuggingFaceInstructEmbeddings`, `SelfHostedEmbeddings`, `SelfHostedHuggingFaceEmbeddings`, `SelfHostedHuggingFaceInstructEmbeddings`, `FakeEmbeddings`, `AlephAlphaAsymmetricSemanticEmbedding`, `AlephAlphaSymmetricSemanticEmbedding`: All of these classes are subclasses of `BaseLLM`. \n",
|
||||
"\n",
|
||||
"\n",
|
||||
"-> **Question**: What classes are derived from the Chain class? \n",
|
||||
"\n",
|
||||
"**Answer**: There are multiple classes that are derived from the Chain class. Some of them are:\n",
|
||||
"- APIChain\n",
|
||||
"- AnalyzeDocumentChain\n",
|
||||
"- ChatVectorDBChain\n",
|
||||
"- CombineDocumentsChain\n",
|
||||
"- ConstitutionalChain\n",
|
||||
"- ConversationChain\n",
|
||||
"- GraphQAChain\n",
|
||||
"- HypotheticalDocumentEmbedder\n",
|
||||
"- LLMChain\n",
|
||||
"- LLMCheckerChain\n",
|
||||
"- LLMRequestsChain\n",
|
||||
"- LLMSummarizationCheckerChain\n",
|
||||
"- MapReduceChain\n",
|
||||
"- OpenAPIEndpointChain\n",
|
||||
"- PALChain\n",
|
||||
"- QAWithSourcesChain\n",
|
||||
"- RetrievalQA\n",
|
||||
"- RetrievalQAWithSourcesChain\n",
|
||||
"- SequentialChain\n",
|
||||
"- SQLDatabaseChain\n",
|
||||
"- TransformChain\n",
|
||||
"- VectorDBQA\n",
|
||||
"- VectorDBQAWithSourcesChain\n",
|
||||
"\n",
|
||||
"There might be more classes that are derived from the Chain class as it is possible to create custom classes that extend the Chain class.\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"-> **Question**: What classes and functions in the ./langchain/utilities/ forlder are not covered by unit tests? \n",
|
||||
"\n",
|
||||
"**Answer**: All classes and functions in the `./langchain/utilities/` folder seem to have unit tests written for them. \n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.6"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 4
|
||||
}
|
||||
@@ -18,31 +18,13 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Define OpenAI embeddings, Deep Lake multi-modal vector store api and authenticate. For full documentation of Deep Lake please follow https://docs.activeloop.ai/ and API reference https://docs.deeplake.ai/en/latest/"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"from langchain.embeddings.openai import OpenAIEmbeddings\n",
|
||||
"from langchain.vectorstores import DeepLake\n",
|
||||
"Define OpenAI embeddings, Deep Lake multi-modal vector store api and authenticate. For full documentation of Deep Lake please follow [docs](https://docs.activeloop.ai/) and [API reference](https://docs.deeplake.ai/en/latest/).\n",
|
||||
"\n",
|
||||
"os.environ['OPENAI_API_KEY']='sk-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx'\n",
|
||||
"embeddings = OpenAIEmbeddings()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Authenticate into Deep Lake if you want to create your own dataset and publish it. You can get an API key from the platform at https://app.activeloop.ai"
|
||||
"Authenticate into Deep Lake if you want to create your own dataset and publish it. You can get an API key from the [platform](https://app.activeloop.ai)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -51,7 +33,15 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"!activeloop login -t <TOKEN>"
|
||||
"import os\n",
|
||||
"import getpass\n",
|
||||
"\n",
|
||||
"from langchain.embeddings.openai import OpenAIEmbeddings\n",
|
||||
"from langchain.vectorstores import DeepLake\n",
|
||||
"\n",
|
||||
"os.environ['OPENAI_API_KEY'] = getpass.getpass('OpenAI API Key:')\n",
|
||||
"os.environ['ACTIVELOOP_TOKEN'] = getpass.getpass('Activeloop Token:')\n",
|
||||
"embeddings = OpenAIEmbeddings()"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -143,15 +133,35 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"-"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"This dataset can be visualized in Jupyter Notebook by ds.visualize() or at https://app.activeloop.ai/davitbun/twitter-algorithm\n",
|
||||
"\n",
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"-"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"hub://davitbun/twitter-algorithm loaded successfully.\n",
|
||||
"\n"
|
||||
]
|
||||
@@ -184,7 +194,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 13,
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -205,7 +215,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 16,
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -224,7 +234,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 14,
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@@ -267,9 +277,14 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"-> **Question**: What does favCountParams do? \n",
|
||||
"\n",
|
||||
"**Answer**: `favCountParams` is an optional ThriftLinearFeatureRankingParams instance that represents the parameters related to the \"favorite count\" feature in the ranking process. It is used to control the weight of the favorite count feature while ranking tweets. The favorite count is the number of times a tweet has been marked as a favorite by users, and it is considered an important signal in the ranking of tweets. By using `favCountParams`, the system can adjust the importance of the favorite count while calculating the final ranking score of a tweet. \n",
|
||||
"\n",
|
||||
"-> **Question**: is it Likes + Bookmarks, or not clear from the code?\n",
|
||||
"\n",
|
||||
"**Answer**: From the provided code, it is not clear if the favorite count metric is determined by the sum of likes and bookmarks. The favorite count is mentioned in the code, but there is no explicit reference to how it is calculated in terms of likes and bookmarks. \n",
|
||||
@@ -423,7 +438,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.1"
|
||||
"version": "3.10.0"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
Evaluation
|
||||
==============
|
||||
==========
|
||||
|
||||
.. note::
|
||||
`Conceptual Guide <https://docs.langchain.com/docs/use-cases/evaluation>`_
|
||||
@@ -83,7 +83,7 @@ The existing examples we have are:
|
||||
|
||||
|
||||
Other Examples
|
||||
------------
|
||||
--------------
|
||||
|
||||
In addition, we also have some more generic resources for evaluation.
|
||||
|
||||
|
||||
Reference in New Issue
Block a user