wip

Merge branch 'master' into dev2049/retrieval_eval_nb
wip
2026-02-19 13:25:35 +00:00 · 2023-04-20 17:24:54 -07:00 · 2023-04-20 11:58:47 -07:00 · 2023-04-19 12:07:08 -07:00 · 2023-04-18 15:30:32 -07:00 · 2023-04-13 19:38:23 -07:00
8 changed files with 1522 additions and 10 deletions
--- a/docs/modules/indexes/retrievers/examples/pinecone_hybrid_search.ipynb
+++ b/docs/modules/indexes/retrievers/examples/pinecone_hybrid_search.ipynb
@@ -1,7 +1,6 @@
 {
 "cells": [
  {
-   "attachments": {},
   "cell_type": "markdown",
   "id": "ab66dd43",
   "metadata": {},
@@ -32,7 +31,6 @@
   ]
  },
  {
-   "attachments": {},
   "cell_type": "markdown",
   "id": "95d5d7f9",
   "metadata": {},
@@ -109,7 +107,6 @@
   ]
  },
  {
-   "attachments": {},
   "cell_type": "markdown",
   "id": "dbc025d6",
   "metadata": {},
@@ -131,7 +128,6 @@
   ]
  },
  {
-   "attachments": {},
   "cell_type": "markdown",
   "id": "96bf8879",
   "metadata": {},
@@ -156,7 +152,6 @@
   ]
  },
  {
-   "attachments": {},
   "cell_type": "markdown",
   "id": "23601ddb",
   "metadata": {},
@@ -269,7 +264,7 @@
 ],
 "metadata": {
  "kernelspec": {
-   "display_name": ".venv",
+   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
@@ -283,7 +278,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.9.13"
+   "version": "3.9.1"
  },
  "vscode": {
   "interpreter": {
--- a/langchain/evaluation/qa/eval_prompt.py
+++ b/langchain/evaluation/qa/eval_prompt.py
@@ -1,4 +1,5 @@
 # flake8: noqa
+from langchain.output_parsers.boolean import BooleanOutputParser
 from langchain.prompts import PromptTemplate

 template = """You are a teacher grading a quiz.
@@ -17,7 +18,9 @@ STUDENT ANSWER: {result}
 TRUE ANSWER: {answer}
 GRADE:"""
 PROMPT = PromptTemplate(
-    input_variables=["query", "result", "answer"], template=template
+    input_variables=["query", "result", "answer"],
+    template=template,
+    output_parser=BooleanOutputParser(true_val="CORRECT", false_val="INCORRECT"),
 )

 context_template = """You are a teacher grading a quiz.
@@ -36,7 +39,9 @@ CONTEXT: {context}
 STUDENT ANSWER: {result}
 GRADE:"""
 CONTEXT_PROMPT = PromptTemplate(
-    input_variables=["query", "context", "result"], template=context_template
+    input_variables=["query", "context", "result"],
+    template=context_template,
+    output_parser=BooleanOutputParser(true_val="CORRECT", false_val="INCORRECT"),
 )


@@ -58,5 +63,7 @@ CONTEXT: {context}
 STUDENT ANSWER: {result}
 EXPLANATION:"""
 COT_PROMPT = PromptTemplate(
-    input_variables=["query", "context", "result"], template=cot_template
+    input_variables=["query", "context", "result"],
+    template=cot_template,
+    output_parser=BooleanOutputParser(true_val="CORRECT", false_val="INCORRECT"),
 )
--- a/langchain/experimental/retriever_eval/base.py
+++ b/langchain/experimental/retriever_eval/base.py
@@ -0,0 +1,95 @@
+""""""
+from abc import ABC, abstractmethod
+from typing import Any, List, Tuple
+
+from pydantic import BaseModel, Field
+
+from langchain import OpenAI
+from langchain.chains.combine_documents.base import BaseCombineDocumentsChain
+from langchain.chains.question_answering import load_qa_chain
+from langchain.evaluation.qa import QAEvalChain
+from langchain.schema import BaseRetriever, Document
+
+
+class TestRetriever(BaseRetriever, BaseModel, ABC):
+    """Retriever that can also ingest new documents."""
+
+    identifying_params: dict
+
+    def add_documents(self, docs: List[Document], can_edit: bool = True) -> None:
+        """"""
+        if can_edit:
+            docs = self._transform_documents(docs)
+        self._insert_documents(docs)
+
+    def _transform_documents(self, docs: List[Document]) -> List[Document]:
+        """"""
+        return docs
+
+    @abstractmethod
+    def _insert_documents(self, docs: List[Document]) -> None:
+        """"""
+
+    def cleanup(self) -> None:
+        pass
+
+    @property
+    def name(self) -> str:
+        return str(self.identifying_params)
+
+
+class RetrieverTestCase(BaseModel, ABC):
+    """"""
+
+    name: str
+    query: str
+    docs: List[Document]
+    can_edit_docs: bool = True
+
+    @classmethod
+    def from_config(cls, **kwargs: Any) -> "RetrieverTestCase":
+        """"""
+        return cls(**kwargs)
+
+    @abstractmethod
+    def check_retrieved_docs(self, retrieved_docs: List[Document]) -> bool:
+        """"""
+
+    def run(self, retriever: TestRetriever) -> Tuple[bool, dict]:
+        retriever.add_documents(self.docs, can_edit=self.can_edit_docs)
+        retrieved_docs = retriever.get_relevant_documents(self.query)
+        passed = self.check_retrieved_docs(retrieved_docs)
+        extra_dict = {"retrieved_docs": retrieved_docs}
+        retriever.cleanup()
+        return passed, extra_dict
+
+
+class QAEvalChainTestCase(RetrieverTestCase):
+    """"""
+
+    gold_standard_answer: str
+    qa_chain: BaseCombineDocumentsChain = Field(
+        default_factory=lambda: load_qa_chain(OpenAI(temperature=0))
+    )
+    qa_eval_chain: QAEvalChain = Field(
+        default_factory=lambda: QAEvalChain.from_llm(OpenAI(temperature=0))
+    )
+
+    def check_retrieved_docs(self, retrieved_docs: List[Document]) -> bool:
+        qa_response = self.qa_chain(
+            {"input_documents": retrieved_docs, "question": self.query}
+        )
+        qa_response["answer"] = self.gold_standard_answer
+        return self.qa_eval_chain.predict_and_parse(qa_response)
+
+
+class ExpectedSubstringsTestCase(RetrieverTestCase):
+    expected_substrings: List[str]
+
+    def check_retrieved_docs(self, retrieved_docs: List[Document]) -> bool:
+        """"""
+        all_text = "\n".join([d.page_content for d in retrieved_docs])
+        for substring in self.expected_substrings:
+            if substring not in all_text:
+                return False
+        return True
--- a/langchain/experimental/retriever_eval/example.py
+++ b/langchain/experimental/retriever_eval/example.py
@@ -0,0 +1,131 @@
+""""""
+from typing import Dict, List, Type
+
+from pydantic import Field
+
+from langchain.experimental.retriever_eval.base import TestRetriever
+from langchain.experimental.retriever_eval.test_cases import (
+    EntityLinkingTestCase,
+    FirstMentionTestCase,
+    LongTextOneFactTestCase,
+    ManyDocsTestCase,
+    RedundantDocsTestCase,
+    RevisedStatementTestCase,
+    SpeakerTestCase,
+    TemporalQueryTestCase,
+)
+from langchain.experimental.retriever_eval.test_retrievers import (
+    VectorStoreTestRetriever,
+)
+from langchain.schema import BaseRetriever, Document
+from langchain.text_splitter import CharacterTextSplitter, TextSplitter
+
+
+def get_test_retrievers() -> List[Type[TestRetriever]]:
+    from langchain.embeddings.openai import OpenAIEmbeddings
+    from langchain.vectorstores import FAISS, Chroma
+
+    class ChromaTestRetriever(VectorStoreTestRetriever):
+        base_retriever: BaseRetriever = Field(
+            default_factory=lambda: Chroma(
+                embedding_function=OpenAIEmbeddings()
+            ).as_retriever()
+        )
+        text_splitter: TextSplitter = Field(
+            default_factory=lambda: CharacterTextSplitter(
+                chunk_size=1000, chunk_overlap=0
+            )
+        )
+        identifying_params = {
+            "chunk_size": 1000,
+            "search": "similarity",
+            "vectorstore": "Chroma",
+            "k": 4,
+        }
+
+    class ChromaTestRetrieverMMR(VectorStoreTestRetriever):
+        base_retriever: BaseRetriever = Field(
+            default_factory=lambda: Chroma(
+                embedding_function=OpenAIEmbeddings()
+            ).as_retriever(search_type="mmr", search_kwargs={"k": 6, "fetch_k": 12})
+        )
+        text_splitter: TextSplitter = Field(
+            default_factory=lambda: CharacterTextSplitter(
+                chunk_size=200, chunk_overlap=0
+            )
+        )
+        identifying_params = {
+            "chunk_size": 200,
+            "search": "mmr",
+            "vectorstore": "Chroma",
+            "k": 6,
+        }
+
+    class ChromaTestRetrieverStuffMetadata(ChromaTestRetrieverMMR):
+        identifying_params = {
+            "chunk_size": 200,
+            "search": "mmr",
+            "vectorstore": "Chroma",
+            "k": 6,
+            "metadata": "included_in_content",
+        }
+
+        def _transform_documents(self, docs: List[Document]) -> List[Document]:
+            docs = super()._transform_documents(docs)
+            for doc in docs:
+                doc.page_content = (
+                    f"Document metadata: {doc.metadata}\n\n" + doc.page_content
+                )
+            return docs
+
+    class FAISSTestRetriever(VectorStoreTestRetriever):
+        base_retriever: BaseRetriever = Field(
+            default_factory=lambda: FAISS.from_texts(
+                ["foo"], OpenAIEmbeddings()
+            ).as_retriever()
+        )
+        text_splitter: TextSplitter = Field(
+            default_factory=lambda: CharacterTextSplitter(
+                chunk_size=1000, chunk_overlap=0
+            )
+        )
+        identifying_params = {
+            "chunk_size": 1000,
+            "search": "similarity",
+            "vectorstore": "FAISS",
+            "k": 4,
+        }
+
+    return [
+        ChromaTestRetriever,
+        ChromaTestRetrieverMMR,
+        ChromaTestRetrieverStuffMetadata,
+        FAISSTestRetriever,
+    ]
+
+
+def get_test_cases() -> List:
+    return [
+        (ManyDocsTestCase, {}),
+        (RedundantDocsTestCase, {}),
+        (EntityLinkingTestCase, {}),
+        (TemporalQueryTestCase, {}),
+        (RevisedStatementTestCase, {}),
+        (LongTextOneFactTestCase, {}),
+        (FirstMentionTestCase, {}),
+        (SpeakerTestCase, {}),
+    ]
+
+
+def run_test_suite() -> Dict:
+    results = {}
+    test_cases = get_test_cases()
+    test_retrievers = get_test_retrievers()
+    for retriever_cls in test_retrievers:
+        retriever_name = retriever_cls().name
+        results[retriever_name] = {}
+        for test_case_cls, config in test_cases:
+            retriever = retriever_cls()
+            test_case = test_case_cls.from_config(**config)
+            results[retriever_name][test_case.name] = test_case.run(retriever)
+    return results
--- a/langchain/experimental/retriever_eval/test_cases.py
+++ b/langchain/experimental/retriever_eval/test_cases.py
@@ -0,0 +1,278 @@
+""""""
+import random
+import re
+from typing import Any, List, Optional
+
+import numpy as np
+import pandas as pd
+from pydantic import Field
+
+from langchain.document_loaders import TextLoader
+from langchain.experimental.retriever_eval.base import (
+    ExpectedSubstringsTestCase,
+    RetrieverTestCase,
+)
+from langchain.schema import Document
+from langchain.text_splitter import CharacterTextSplitter
+
+
+class ManyDocsTestCase(ExpectedSubstringsTestCase):
+    """"""
+
+    @classmethod
+    def from_config(
+        cls, retrieve: int = 5, total: int = 100, seed: int = 0, **kwargs: Any
+    ) -> "ManyDocsTestCase":
+        """"""
+        random.seed(seed)
+        name = f"Many docs ({retrieve=}, {total=})"
+        text_template = "On {date} the peak temperature was {temp} degrees"
+        dates = pd.date_range(start="01-01-2023", freq="D", periods=total).astype(str)
+        temps = [str(random.randint(50, 80)) for _ in range(len(dates))]
+        texts = [text_template.format(date=d, temp=t) for d, t in zip(dates, temps)]
+        docs = [Document(page_content=t) for t in texts]
+
+        sample_idxs = random.choices(range(len(dates)), k=retrieve)
+        expected_dates = [dates[i] for i in sample_idxs]
+        query = f"What were the peak temperatures on {', '.join(expected_dates)}?"
+        return cls(
+            name=name, query=query, docs=docs, expected_substrings=expected_dates
+        )
+
+
+class RedundantDocsTestCase(ExpectedSubstringsTestCase):
+    """"""
+
+    @classmethod
+    def from_config(cls, **kwargs: Any) -> "RedundantDocsTestCase":
+        """"""
+        name = "Redundant docs"
+        texts = [
+            "OpenAI announces the release of GPT-5",
+            "GPT-5 released by OpenAI",
+            "The next-generation OpenAI GPT model is here",
+            "GPT-5: OpenAI's next model is the biggest yet",
+            "Sam Altman's OpenAI comes out with new GPT-5 model",
+            "GPT-5 is here. What you need to know about the OpenAI model",
+            "OpenAI announces ChatGPT successor GPT-5",
+            "5 jaw-dropping things OpenAI's GPT-5 can do that ChatGPT couldn't",
+            "OpenAI's GPT-5 Is Exciting and Scary",
+            "OpenAI announces GPT-5, the new generation of AI",
+            "OpenAI says new model GPT-5 is more creative and less",
+            "Meta open sources new AI model, largest yet",
+        ]
+        docs = [Document(page_content=t) for t in texts]
+        query = "What companies have recently released new models?"
+        expected_substrings = ["OpenAI", "Meta"]
+        return cls(
+            name=name, docs=docs, query=query, expected_substrings=expected_substrings
+        )
+
+
+class EntityLinkingTestCase(ExpectedSubstringsTestCase):
+    """"""
+
+    @classmethod
+    def from_config(
+        cls, filler_texts: Optional[List[str]] = None, **kwargs: Any
+    ) -> "EntityLinkingTestCase":
+        """"""
+        if filler_texts is None:
+            filler_docs = TextLoader(
+                "../docs/modules/state_of_the_union.txt"
+            ).load_and_split()
+            filler_texts = [d.page_content for d in filler_docs]
+        name = f"Entity linking (num_filler={len(filler_texts)})"
+        texts = [
+            "The founder of ReallyCoolAICompany LLC is from Louisville, Kentucky.",
+            "Melissa Harkins, founder of ReallyCoolAICompany LLC, said in a recent interview that she will be stepping down as CEO.",
+        ]
+        texts = texts + filler_texts
+        docs = [Document(page_content=t) for t in texts]
+        query = "Where is Melissa Harkins from?"
+        expected_substrings = ["Harkins", "Louisville"]
+        return cls(
+            name=name,
+            docs=docs,
+            query=query,
+            expected_substrings=expected_substrings,
+            can_edit_docs=False,
+        )
+
+
+class TemporalQueryTestCase(RetrieverTestCase):
+    """"""
+
+    correct_date: str
+
+    def check_retrieved_docs(self, retrieved_docs: List[Document]) -> bool:
+        """"""
+        return any(d.metadata["date"] == self.correct_date for d in retrieved_docs)
+
+    @classmethod
+    def from_config(
+        cls,
+        options: Optional[List[str]] = None,
+        phrasings: Optional[List[str]] = None,
+        num_docs: int = 200,
+        seed: int = 0,
+        **kwargs: Any,
+    ) -> "TemporalQueryTestCase":
+        """"""
+        random.seed(seed)
+        if options is None:
+            options = [
+                "happy",
+                "sad",
+                "confused",
+                "angry",
+                "disgusted",
+                "scared",
+                "thankful",
+                "astonished",
+                "calm",
+            ]
+        if phrasings is None:
+            phrasings = [
+                "Today I felt {option}",
+                "I felt {option} today",
+                "I was really {option} today",
+                "My primary emotion is {option}",
+                "Everybody says I seemed so {option}",
+            ]
+        name = f"Temporal query ({num_docs=})"
+        options_sample = random.choices(options, k=num_docs - 1) + [options[0]]
+        texts = [
+            phrase.format(option=option)
+            for phrase, option in zip(
+                random.choices(phrasings, k=num_docs), options_sample
+            )
+        ]
+        dates = pd.date_range(start="01-01-2023", freq="D", periods=num_docs).astype(
+            str
+        )
+        metadatas = [{"date": d} for d in dates]
+        docs = [Document(page_content=t, metadata=m) for t, m in zip(texts, metadatas)]
+        query = f"When was the first time I mentioned being {options[0]}"
+        correct_date = dates[options_sample.index(options[0])]
+        return cls(name=name, docs=docs, query=query, correct_date=correct_date)
+
+
+class RevisedStatementTestCase(ExpectedSubstringsTestCase):
+    """"""
+
+    @classmethod
+    def from_config(
+        cls, filler_text: Optional[str] = None, **kwargs
+    ) -> "RevisedStatementTestCase":
+        """"""
+        if filler_text is None:
+            filler_text = (
+                TextLoader("../docs/modules/state_of_the_union.txt")
+                .load()[0]
+                .page_content
+            )
+        texts = CharacterTextSplitter(chunk_size=500, chunk_overlap=0).split_text(
+            filler_text
+        )
+        docs = [Document(page_content=t) for t in texts]
+        updates = [
+            "We are receiving reports of a magnitude 10 earthquake in Japan",
+            "The latest reports are that the earthquake that has hit Japan is actually of magnitude 8.2",
+            "Now the earthquake in Japan has been downgraded to magnitude 7",
+            "Looks like the earthquake is back up to an 8",
+            "The latest news is that the earthquake was of magnitude 3",
+            "No no it's a magnitude 4",
+            "I heard the earthquake is 6.3",
+            "Or did they say the earthquake in Japan was a magnitude 6.2",
+            "The Japanese arthquake is actually being recorded as a magnitude 12",
+            "Sorry correction, my Japanese was poor, the magnitude of the earthquake is 2",
+            "The Japanese earthquake is now being recorded as magnitude 5",
+        ]
+        for update, doc in zip(updates, docs):
+            doc.page_content += " " + update + "."
+        query = "What is the latest reported magnitude of the earthquake in Japan?"
+        num_revisions = len(updates)
+        name = f"Revised statement ({num_revisions=})"
+        expected_substrings = ["5"]
+        return cls(
+            name=name, docs=docs, query=query, expected_substrings=expected_substrings
+        )
+
+
+class LongTextOneFactTestCase(ExpectedSubstringsTestCase):
+    """"""
+
+    @classmethod
+    def from_config(
+        cls, filler_text: Optional[str] = None, **kwargs
+    ) -> "LongTextOneFactTestCase":
+        if filler_text is None:
+            filler_text = (
+                TextLoader("../docs/modules/state_of_the_union.txt")
+                .load()[0]
+                .page_content
+            )
+        fact = (
+            "We've just received reports of a purple monkey invading the White House."
+        )
+        filler_split = filler_text.split(". ")
+        all_text = ". ".join(
+            filler_split[: len(filler_split) // 2]
+            + [fact]
+            + filler_split[len(filler_split) // 2 :]
+        )
+        doc = Document(page_content=all_text)
+        text_len = len(all_text)
+        name = f"Fact in long text ({text_len=})"
+        query = "What color was the animal that was mentioned?"
+        expected_substrings = ["purple"]
+        return cls(
+            name=name, docs=[doc], query=query, expected_substrings=expected_substrings
+        )
+
+
+def load_transcript() -> List[Document]:
+    interview = (
+        TextLoader(
+            "../../Ian_Goodfellow--Generative_Adversarial_Networks_(GANs)-Artificial_Intelligence_(AI)_Podcast-April_18_2019.md"
+        )
+        .load()[0]
+        .page_content
+    )
+    speaker_tmpl = "\*\*\[{name}\]\*\*"
+    splits = re.split(speaker_tmpl.format(name="(.*)"), interview.strip())
+    # Madeup times
+    times = np.cumsum([len(splits[i].split()) for i in range(2, len(splits), 2)]) / 2.5
+    docs = [
+        Document(
+            page_content=splits[i + 1].strip(),
+            metadata={
+                "speaker": splits[i],
+                "statement_index": i // 2,
+                "time": times[i // 2],
+            },
+        )
+        for i in range(1, len(splits), 2)
+    ]
+    return docs
+
+
+class PodcastTestCase(RetrieverTestCase):
+    docs: List[Document] = Field(default_factory=load_transcript)
+
+
+class FirstMentionTestCase(PodcastTestCase, ExpectedSubstringsTestCase):
+    name: str = "Podcast First Mention"
+    query: str = "What was the first mention of deep learning?"
+    expected_substrings: List[str] = Field(
+        default_factory=lambda: ['"Deep Learning" book']
+    )
+
+
+class SpeakerTestCase(PodcastTestCase, ExpectedSubstringsTestCase):
+    name: str = "Podcast Reference to Speaker"
+    query: str = "What did Ian say about how he came up with the idea for GANs?"
+    expected_substrings: List[str] = Field(
+        default_factory=lambda: ["drinking helped a little bit"]
+    )
--- a/langchain/experimental/retriever_eval/test_retrievers.py
+++ b/langchain/experimental/retriever_eval/test_retrievers.py
@@ -0,0 +1,29 @@
+""""""
+from typing import List, Optional
+
+from langchain.experimental.retriever_eval.base import TestRetriever
+from langchain.schema import Document
+from langchain.text_splitter import TextSplitter
+from langchain.vectorstores.base import VectorStoreRetriever
+
+
+class VectorStoreTestRetriever(TestRetriever):
+    base_retriever: VectorStoreRetriever
+    text_splitter: Optional[TextSplitter] = None
+
+    class Config:
+        arbitrary_types_allowed = True
+
+    def get_relevant_documents(self, query):
+        return self.base_retriever.get_relevant_documents(query)
+
+    def aget_relevant_documents(self, query):
+        raise NotImplementedError
+
+    def _insert_documents(self, docs: List[Document]) -> None:
+        self.base_retriever.vectorstore.add_documents(docs)
+
+    def _transform_documents(self, docs: List[Document]) -> List[Document]:
+        if self.text_splitter is None:
+            return docs
+        return self.text_splitter.split_documents(docs)
--- a/langchain/output_parsers/boolean.py
+++ b/langchain/output_parsers/boolean.py
@@ -0,0 +1,29 @@
+from langchain.schema import BaseOutputParser
+
+
+class BooleanOutputParser(BaseOutputParser[bool]):
+    true_val: str = "YES"
+    false_val: str = "NO"
+
+    def parse(self, text: str) -> bool:
+        """Parse the output of an LLM call to a boolean.
+
+        Args:
+            text: output of language model
+
+        Returns:
+            boolean
+
+        """
+        cleaned_text = text.strip()
+        if cleaned_text not in (self.true_val, self.false_val):
+            raise ValueError(
+                f"BooleanOutputParser expected output value to either be "
+                f"{self.true_val} or {self.false_val}. Received {cleaned_text}."
+            )
+        return cleaned_text == self.true_val
+
+    @property
+    def _type(self) -> str:
+        """Snake-case string identifier for output parser type."""
+        return "boolean_output_parser"
--- a/notebooks/retrieval_test_suite.ipynb
+++ b/notebooks/retrieval_test_suite.ipynb
@@ -0,0 +1,948 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "51114977",
+   "metadata": {},
+   "source": [
+    "# Testing Retrieval QA Chains"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 312,
+   "id": "bb8e0812",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from abc import ABC, abstractmethod\n",
+    "from typing import Any, List, Dict, Tuple, Type\n",
+    "\n",
+    "from pydantic import Field, BaseModel\n",
+    "\n",
+    "from langchain.schema import Document, BaseRetriever\n",
+    "from langchain.chains.combine_documents.base import BaseCombineDocumentsChain\n",
+    "from langchain.chains.question_answering import load_qa_chain\n",
+    "from langchain.evaluation.qa import QAEvalChain\n",
+    "\n",
+    "\n",
+    "class TestRetriever(BaseRetriever, BaseModel, ABC):\n",
+    "    \"\"\"Retriever that can also ingest new documents.\"\"\"\n",
+    "    identifying_params: dict\n",
+    "    \n",
+    "    def add_documents(self, docs: List[Document], can_edit: bool = True) -> None:\n",
+    "        \"\"\"\"\"\"\n",
+    "        if can_edit:\n",
+    "            docs = self._transform_documents(docs)\n",
+    "        self._insert_documents(docs)\n",
+    "        \n",
+    "    def _transform_documents(self, docs: List[Document]) -> List[Document]:\n",
+    "        \"\"\"\"\"\"\n",
+    "        return docs\n",
+    "    \n",
+    "    @abstractmethod\n",
+    "    def _insert_documents(self, docs: List[Document]) -> None:\n",
+    "        \"\"\"\"\"\"\n",
+    "        \n",
+    "    def cleanup(self) -> None:\n",
+    "        pass\n",
+    "    \n",
+    "    @property\n",
+    "    def name(self):\n",
+    "        return str(self.identifying_params)\n",
+    "\n",
+    "    \n",
+    "class RetrieverTestCase(BaseModel, ABC):\n",
+    "    \"\"\"\"\"\"\n",
+    "    name: str\n",
+    "    query: str\n",
+    "    docs: List[Document]\n",
+    "    can_edit_docs: bool = True\n",
+    "    \n",
+    "    @classmethod\n",
+    "    def from_config(cls, **kwargs: Any) -> \"RetrieverTestCase\":\n",
+    "        \"\"\"\"\"\"\n",
+    "        return cls(**kwargs)\n",
+    "    \n",
+    "    \n",
+    "    @abstractmethod\n",
+    "    def check_retrieved_docs(self, retrieved_docs: List[Document]) -> bool:\n",
+    "        \"\"\"\"\"\"\n",
+    "        \n",
+    "    def run(self, retriever: TestRetriever) -> Tuple[bool, dict]:\n",
+    "        retriever.add_documents(self.docs, can_edit=self.can_edit_docs)\n",
+    "        retrieved_docs = retriever.get_relevant_documents(self.query)\n",
+    "        passed = self.check_retrieved_docs(retrieved_docs)\n",
+    "        extra_dict = {\"retrieved_docs\": retrieved_docs}\n",
+    "        retriever.cleanup()\n",
+    "        return passed, extra_dict\n",
+    "    \n",
+    "    \n",
+    "class QAEvalChainTestCase(RetrieverTestCase):\n",
+    "    \"\"\"\"\"\"\n",
+    "    gold_standard_answer: str\n",
+    "    qa_chain: BaseCombineDocumentsChain = Field(default_factory=lambda: load_qa_chain(OpenAI()))\n",
+    "    qa_eval_chain: QAEvalChain = Field(default_factory=lambda: QAEvalChain.from_llm(OpenAI(temperature=0)))\n",
+    "    \n",
+    "    def check_retrieved_docs(self, retrieved_docs: List[Document]) -> bool:\n",
+    "        qa_response = self.qa_chain({\"input_documents\": retrieved_docs, \"question\": query})\n",
+    "        qa_response[\"answer\"] = self.gold_standard_answer\n",
+    "        return self.qa_eval_chain.predict_and_parse(qa_response)\n",
+    "     \n",
+    "        \n",
+    "class ExpectedSubstringsTestCase(RetrieverTestCase):\n",
+    "    expected_substrings: List[str]\n",
+    "\n",
+    "    def check_retrieved_docs(self, retrieved_docs: List[Document]) -> bool:\n",
+    "        \"\"\"\"\"\"\n",
+    "        all_text = \"\\n\".join([d.page_content for d in retrieved_docs])\n",
+    "        for substring in self.expected_substrings:\n",
+    "            if substring not in all_text:\n",
+    "                return False\n",
+    "        return True\n",
+    "    \n",
+    "    \n",
+    "class ExpectedDocsTestCase(RetrieverTestCase):\n",
+    "    expected_: List[str]\n",
+    "\n",
+    "    def check_retrieved_docs(self, retrieved_docs: List[Document]) -> bool:\n",
+    "        \"\"\"\"\"\"\n",
+    "        all_text = \"\\n\".join([d.page_content for d in retrieved_docs])\n",
+    "        for substring in self.expected_substrings:\n",
+    "            if substring not in all_text:\n",
+    "                return False\n",
+    "        return True\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 313,
+   "id": "897b9751",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import random\n",
+    "import pandas as pd\n",
+    "\n",
+    "\n",
+    "class ManyDocsTestCase(ExpectedSubstringsTestCase):\n",
+    "    \"\"\"\"\"\"\n",
+    "    \n",
+    "    @classmethod\n",
+    "    def from_config(cls, retrieve: int = 5, total: int = 100, seed: int = 0, **kwargs: Any) -> \"ManyDocsTestCase\":\n",
+    "        \"\"\"\"\"\"\n",
+    "        random.seed(seed)\n",
+    "        name = f\"Many docs ({retrieve=}, {total=})\"\n",
+    "        text_template = \"On {date} the peak temperature was {temp} degrees\"\n",
+    "        dates = pd.date_range(start=\"01-01-2023\", freq='D', periods=total).astype(str)\n",
+    "        temps = [str(random.randint(50, 80)) for _ in range(len(dates))]\n",
+    "        texts = [text_template.format(date=d, temp=t) for d, t in zip(dates, temps)]\n",
+    "        docs = [Document(page_content=t) for t in texts]\n",
+    "        \n",
+    "        sample_idxs = random.choices(range(len(dates)), k=retrieve)\n",
+    "        expected_dates = [dates[i] for i in sample_idxs]\n",
+    "        query = f\"What were the peak temperatures on {', '.join(expected_dates)}?\"\n",
+    "        return cls(name=name, query=query, docs=docs, expected_substrings=expected_dates)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 314,
+   "id": "9364d920",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "class RedundantDocsTestCase(ExpectedSubstringsTestCase):\n",
+    "    \"\"\"\"\"\"\n",
+    "\n",
+    "    @classmethod\n",
+    "    def from_config(cls, **kwargs: Any) -> \"RedundantDocsTestCase\":\n",
+    "        \"\"\"\"\"\"\n",
+    "        name = \"Redundant docs\"\n",
+    "        texts = [\n",
+    "            \"OpenAI announces the release of GPT-5\",\n",
+    "            \"GPT-5 released by OpenAI\",\n",
+    "            \"The next-generation OpenAI GPT model is here\",\n",
+    "            \"GPT-5: OpenAI's next model is the biggest yet\",\n",
+    "            \"Sam Altman's OpenAI comes out with new GPT-5 model\",\n",
+    "            \"GPT-5 is here. What you need to know about the OpenAI model\",\n",
+    "            \"OpenAI announces ChatGPT successor GPT-5\",\n",
+    "            \"5 jaw-dropping things OpenAI's GPT-5 can do that ChatGPT couldn't\",\n",
+    "            \"OpenAI's GPT-5 Is Exciting and Scary\",\n",
+    "            \"OpenAI announces GPT-5, the new generation of AI\",\n",
+    "            \"OpenAI says new model GPT-5 is more creative and less\",\n",
+    "            \"Meta open sources new AI model, largest yet\",\n",
+    "        ]\n",
+    "        docs = [Document(page_content=t) for t in texts]\n",
+    "        query = \"What companies have recently released new models?\"\n",
+    "        expected_substrings = [\"OpenAI\", \"Meta\"]\n",
+    "        return cls(name=name, docs=docs, query=query, expected_substrings=expected_substrings)\n",
+    "    "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 315,
+   "id": "01da2ebc",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from typing import Optional\n",
+    "    \n",
+    "from langchain.document_loaders import TextLoader\n",
+    "\n",
+    "\n",
+    "class EntityLinkingTestCase(ExpectedSubstringsTestCase):\n",
+    "    \"\"\"\"\"\"\n",
+    "    \n",
+    "    @classmethod\n",
+    "    def from_config(cls, filler_texts: Optional[List[str]] = None, **kwargs: Any) -> \"EntityLinkingTestCase\":\n",
+    "        \"\"\"\"\"\"\n",
+    "        if filler_texts is None:\n",
+    "            filler_docs = TextLoader('../docs/modules/state_of_the_union.txt').load_and_split()\n",
+    "            filler_texts = [d.page_content for d in filler_docs]\n",
+    "        name = f\"Entity linking (num_filler={len(filler_texts)})\"\n",
+    "        texts = [\n",
+    "            \"The founder of ReallyCoolAICompany LLC is from Louisville, Kentucky.\",\n",
+    "            \"Melissa Harkins, founder of ReallyCoolAICompany LLC, said in a recent interview that she will be stepping down as CEO.\",\n",
+    "        ]\n",
+    "        texts = texts + filler_texts\n",
+    "        docs = [Document(page_content=t) for t in texts]\n",
+    "        query = \"Where is Melissa Harkins from?\"\n",
+    "        expected_substrings = [\"Harkins\", \"Louisville\"]\n",
+    "        return cls(name=name, docs=docs, query=query, expected_substrings=expected_substrings, can_edit_docs=False)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 316,
+   "id": "c8e26d41",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "class TemporalQueryTestCase(RetrieverTestCase):\n",
+    "    \"\"\"\"\"\"\n",
+    "    correct_date: str\n",
+    "        \n",
+    "    def check_retrieved_docs(self, retrieved_docs: List[Document]) -> bool:\n",
+    "        \"\"\"\"\"\"\n",
+    "        return any(d.metadata[\"date\"]==self.correct_date for d in retrieved_docs)\n",
+    "    \n",
+    "    @classmethod\n",
+    "    def from_config(cls, options: Optional[List[str]]=None, phrasings: Optional[List[str]]=None, num_docs: int = 200, seed: int = 0, **kwargs: Any) -> \"EntityLinkingTestCase\":\n",
+    "        \"\"\"\"\"\"\n",
+    "        random.seed(seed)\n",
+    "        if options is None:\n",
+    "            options = [\"happy\", \"sad\", \"confused\", \"angry\", \"disgusted\", \"scared\", \"thankful\", \"astonished\", \"calm\"]\n",
+    "        if phrasings is None:\n",
+    "            phrasings = [\n",
+    "                \"Today I felt {option}\",\n",
+    "                \"I felt {option} today\",\n",
+    "                \"I was really {option} today\",\n",
+    "                \"My primary emotion is {option}\",\n",
+    "                \"Everybody says I seemed so {option}\"\n",
+    "            ]\n",
+    "        name = f\"Temporal query ({num_docs=})\"\n",
+    "        options_sample = random.choices(options, k=num_docs-1) + [options[0]]\n",
+    "        texts = [\n",
+    "            phrase.format(option=option) for phrase, option in zip(random.choices(phrasings, k=num_docs), options_sample)\n",
+    "        ]\n",
+    "        dates = pd.date_range(start=\"01-01-2023\", freq='D', periods=num_docs).astype(str)\n",
+    "        metadatas = [{\"date\": d} for d in dates]\n",
+    "        docs = [Document(page_content=t, metadata=m) for t, m in zip(texts, metadatas)]\n",
+    "        query = f\"When was the first time I mentioned being {options[0]}\"\n",
+    "        correct_date = dates[options_sample.index(options[0])]\n",
+    "        return cls(name=name, docs=docs, query=query, correct_date=correct_date)\n",
+    "                "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 332,
+   "id": "7621adad",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from langchain.text_splitter import CharacterTextSplitter\n",
+    "\n",
+    "\n",
+    "class RevisedStatementTestCase(ExpectedSubstringsTestCase):\n",
+    "    \"\"\"\"\"\"\n",
+    "     \n",
+    "    @classmethod\n",
+    "    def from_config(cls, filler_text: Optional[str]=None, **kwargs) -> \"RevisedStatementTestCase\":\n",
+    "        \"\"\"\"\"\"\n",
+    "        if filler_text is None:\n",
+    "            filler_text = TextLoader('../docs/modules/state_of_the_union.txt').load()[0].page_content\n",
+    "        texts = CharacterTextSplitter(chunk_size=500, chunk_overlap=0).split_text(filler_text)\n",
+    "        docs = [Document(page_content=t) for t in texts]\n",
+    "        updates = [\n",
+    "            \"We are receiving reports of a magnitude 10 earthquake in Japan\",\n",
+    "            \"The latest reports are that the earthquake that has hit Japan is actually of magnitude 8.2\",\n",
+    "            \"Now the earthquake in Japan has been downgraded to magnitude 7\",\n",
+    "            \"Looks like the earthquake is back up to an 8\",\n",
+    "            \"The latest news is that the earthquake was of magnitude 3\",\n",
+    "            \"No no it's a magnitude 4\",\n",
+    "            \"I heard the earthquake is 6.3\",\n",
+    "            \"Or did they say the earthquake in Japan was a magnitude 6.2\",\n",
+    "            \"The Japanese arthquake is actually being recorded as a magnitude 12\",\n",
+    "            \"Sorry correction, my Japanese was poor, the magnitude of the earthquake is 2\",\n",
+    "            \"The Japanese earthquake is now being recorded as magnitude 5\"\n",
+    "        ]\n",
+    "        for update, doc in zip(updates, docs):\n",
+    "            doc.page_content += \" \" + update + \".\"\n",
+    "        query = \"What is the latest reported magnitude of the earthquake in Japan?\"\n",
+    "        num_revisions = len(updates)\n",
+    "        name = f\"Revised statement ({num_revisions=})\"\n",
+    "        expected_substrings = [\"5\"]\n",
+    "        return cls(name=name, docs=docs, query=query, expected_substrings=expected_substrings)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 318,
+   "id": "5d4c4a13",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "class LongTextOneFactTestCase(ExpectedSubstringsTestCase):\n",
+    "    \"\"\"\"\"\"\n",
+    "    \n",
+    "    @classmethod\n",
+    "    def from_config(cls, filler_text: Optional[str]=None, **kwargs) -> \"LongTextOneFactTestCase\":\n",
+    "        if filler_text is None:\n",
+    "            filler_text = TextLoader('../docs/modules/state_of_the_union.txt').load()[0].page_content\n",
+    "        fact = \"We've just received reports of a purple monkey invading the White House.\"\n",
+    "        filler_split = filler_text.split(\". \")\n",
+    "        all_text = \". \".join(filler_split[:len(filler_split)//2] + [fact] + filler_split[len(filler_split)//2:])\n",
+    "        doc = Document(page_content=all_text)\n",
+    "        text_len = len(all_text)\n",
+    "        name = f\"Fact in long text ({text_len=})\"\n",
+    "        query = \"What color was the animal that was mentioned?\"\n",
+    "        expected_substrings = [\"purple\"]\n",
+    "        return cls(name=name, docs=[doc], query=query, expected_substrings=expected_substrings)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 319,
+   "id": "f319f071",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import re\n",
+    "\n",
+    "import numpy as np\n",
+    "\n",
+    "from langchain.document_loaders import TextLoader\n",
+    "\n",
+    "\n",
+    "def load_transcript():\n",
+    "    interview = TextLoader(\"/Users/bagatur/Downloads/Ian_Goodfellow--Generative_Adversarial_Networks_(GANs)-Artificial_Intelligence_(AI)_Podcast-April_18_2019.md\").load()[0].page_content\n",
+    "    speaker_tmpl = \"\\*\\*\\[{name}\\]\\*\\*\"\n",
+    "    splits = re.split(speaker_tmpl.format(name=\"(.*)\"), interview.strip())\n",
+    "    # Madeup times\n",
+    "    times = np.cumsum([len(splits[i].split()) for i in range(2, len(splits), 2)]) / 2.5\n",
+    "    docs = [Document(page_content=splits[i+1].strip(), metadata={\"speaker\": splits[i], \"statement_index\": i // 2, \"time\": times[i // 2]}) for i in range(1, len(splits), 2)]\n",
+    "    return docs"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 320,
+   "id": "dab1ab76",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from pydantic import Field\n",
+    "\n",
+    "class PodcastTestCase(RetrieverTestCase):\n",
+    "    docs: List[Document] = Field(default_factory=load_transcript)\n",
+    "        \n",
+    "        \n",
+    "class FirstMentionTestCase(PodcastTestCase, ExpectedSubstringsTestCase):\n",
+    "    name: str = \"Podcast First Mention\"\n",
+    "    query: str = \"What was the first mention of deep learning?\"\n",
+    "    expected_substrings: List[str] = Field(default_factory=lambda: ['\"Deep Learning\" book'])\n",
+    "        \n",
+    "        \n",
+    "class SpeakerTestCase(PodcastTestCase, ExpectedSubstringsTestCase):\n",
+    "    name: str = \"Podcast Reference to Speaker\"\n",
+    "    query: str = \"What did Ian say about how he came up with the idea for GANs?\"\n",
+    "    expected_substrings: List[str] = Field(default_factory=lambda: [\"drinking helped a little bit\"])\n",
+    "    \n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "1b2fb192",
+   "metadata": {},
+   "source": [
+    "# Candidate retrieval systems\n",
+    "Now that we've defined some test cases, let's create a couple candidate retrieval systems to evaluate and compare. For this we'll need to define a function that returns a Retriever given a set of documents."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 321,
+   "id": "f2b99c56",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from langchain.text_splitter import TextSplitter\n",
+    "from langchain.vectorstores.base import VectorStoreRetriever\n",
+    "\n",
+    "\n",
+    "class VectorStoreTestRetriever(TestRetriever):\n",
+    "    base_retriever: VectorStoreRetriever\n",
+    "    text_splitter: Optional[TextSplitter] = None\n",
+    "        \n",
+    "    class Config:\n",
+    "        arbitrary_types_allowed = True\n",
+    "        \n",
+    "    def get_relevant_documents(self, query):\n",
+    "        return self.base_retriever.get_relevant_documents(query)\n",
+    "    \n",
+    "    def aget_relevant_documents(self, query):\n",
+    "        raise NotImplementedError\n",
+    "    \n",
+    "    def _insert_documents(self, docs: List[Document]) -> None:\n",
+    "        self.base_retriever.vectorstore.add_documents(docs)\n",
+    "        \n",
+    "    def _transform_documents(self, docs: List[Document]) -> List[Document]:\n",
+    "        if self.text_splitter is None:\n",
+    "            return docs\n",
+    "        return self.text_splitter.split_documents(docs)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 333,
+   "id": "51b071ad",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from langchain.embeddings.openai import OpenAIEmbeddings\n",
+    "from langchain.vectorstores import Chroma, FAISS\n",
+    "        \n",
+    "class ChromaTestRetriever(VectorStoreTestRetriever):\n",
+    "    base_retriever: BaseRetriever = Field(default_factory=lambda: Chroma(embedding_function=OpenAIEmbeddings()).as_retriever())\n",
+    "    text_splitter: TextSplitter = Field(default_factory=lambda: CharacterTextSplitter(chunk_size=1000, chunk_overlap=0))\n",
+    "    identifying_params = {\"chunk_size\": 1000, \"search\": \"similarity\", \"vectorstore\": \"Chroma\", \"k\": 4}\n",
+    "    \n",
+    "class ChromaTestRetrieverMMR(VectorStoreTestRetriever):\n",
+    "    base_retriever: BaseRetriever = Field(default_factory=lambda: Chroma(embedding_function=OpenAIEmbeddings()).as_retriever(search_type=\"mmr\", search_kwargs={\"k\": 6, \"fetch_k\": 12}))\n",
+    "    text_splitter: TextSplitter = Field(default_factory=lambda: CharacterTextSplitter(chunk_size=200, chunk_overlap=0))\n",
+    "    identifying_params = {\"chunk_size\": 200, \"search\": \"mmr\", \"vectorstore\": \"Chroma\", \"k\": 6}\n",
+    "\n",
+    "class ChromaTestRetrieverStuffMetadata(ChromaTestRetrieverMMR):\n",
+    "    identifying_params = {\"chunk_size\": 200, \"search\": \"mmr\", \"vectorstore\": \"Chroma\", \"k\": 6, \"metadata\": \"included_in_content\"}\n",
+    "\n",
+    "    def _transform_documents(self, docs: List[Document]) -> List[Document]:\n",
+    "        docs = super()._transform_documents(docs)\n",
+    "        for doc in docs:\n",
+    "            doc.page_content = f\"Document metadata: {doc.metadata}\\n\\n\" + doc.page_content\n",
+    "        return docs\n",
+    "    \n",
+    "class FAISSTestRetriever(VectorStoreTestRetriever):\n",
+    "    base_retriever: BaseRetriever = Field(default_factory=lambda: FAISS.from_texts([\"foo\"], OpenAIEmbeddings()).as_retriever())\n",
+    "    text_splitter: TextSplitter = Field(default_factory=lambda: CharacterTextSplitter(chunk_size=1000, chunk_overlap=0))\n",
+    "    identifying_params = {\"chunk_size\": 1000, \"search\": \"similarity\", \"vectorstore\": \"FAISS\", \"k\": 4}"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "c3a690dd",
+   "metadata": {},
+   "source": [
+    "# Run test cases against each retrieval system"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 334,
+   "id": "c71e4897",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "test_cases = [\n",
+    "    (ManyDocsTestCase, {}), \n",
+    "    (RedundantDocsTestCase, {}), \n",
+    "    (EntityLinkingTestCase, {}),\n",
+    "    (TemporalQueryTestCase, {}),\n",
+    "    (RevisedStatementTestCase, {}),\n",
+    "    (LongTextOneFactTestCase, {}),\n",
+    "    (FirstMentionTestCase, {}),\n",
+    "    (SpeakerTestCase, {}),\n",
+    "]\n",
+    "test_retrievers = [\n",
+    "    ChromaTestRetriever,\n",
+    "    ChromaTestRetrieverMMR,\n",
+    "    ChromaTestRetrieverStuffMetadata,\n",
+    "    FAISSTestRetriever,\n",
+    "]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 324,
+   "id": "ede7cf14",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Using embedded DuckDB without persistence: data will be transient\n",
+      "Using embedded DuckDB without persistence: data will be transient\n",
+      "Using embedded DuckDB without persistence: data will be transient\n",
+      "Using embedded DuckDB without persistence: data will be transient\n",
+      "Using embedded DuckDB without persistence: data will be transient\n",
+      "Using embedded DuckDB without persistence: data will be transient\n",
+      "Using embedded DuckDB without persistence: data will be transient\n",
+      "Using embedded DuckDB without persistence: data will be transient\n",
+      "Using embedded DuckDB without persistence: data will be transient\n",
+      "Using embedded DuckDB without persistence: data will be transient\n",
+      "Using embedded DuckDB without persistence: data will be transient\n",
+      "Using embedded DuckDB without persistence: data will be transient\n",
+      "Using embedded DuckDB without persistence: data will be transient\n",
+      "Using embedded DuckDB without persistence: data will be transient\n",
+      "Using embedded DuckDB without persistence: data will be transient\n",
+      "Created a chunk of size 215, which is longer than the specified 200\n",
+      "Created a chunk of size 232, which is longer than the specified 200\n",
+      "Created a chunk of size 304, which is longer than the specified 200\n",
+      "Created a chunk of size 205, which is longer than the specified 200\n",
+      "Created a chunk of size 332, which is longer than the specified 200\n",
+      "Created a chunk of size 215, which is longer than the specified 200\n",
+      "Created a chunk of size 281, which is longer than the specified 200\n",
+      "Created a chunk of size 201, which is longer than the specified 200\n",
+      "Created a chunk of size 325, which is longer than the specified 200\n",
+      "Using embedded DuckDB without persistence: data will be transient\n",
+      "Created a chunk of size 215, which is longer than the specified 200\n",
+      "Created a chunk of size 232, which is longer than the specified 200\n",
+      "Created a chunk of size 242, which is longer than the specified 200\n",
+      "Created a chunk of size 219, which is longer than the specified 200\n",
+      "Created a chunk of size 304, which is longer than the specified 200\n",
+      "Created a chunk of size 205, which is longer than the specified 200\n",
+      "Created a chunk of size 332, which is longer than the specified 200\n",
+      "Created a chunk of size 215, which is longer than the specified 200\n",
+      "Created a chunk of size 203, which is longer than the specified 200\n",
+      "Created a chunk of size 281, which is longer than the specified 200\n",
+      "Created a chunk of size 201, which is longer than the specified 200\n",
+      "Created a chunk of size 250, which is longer than the specified 200\n",
+      "Created a chunk of size 325, which is longer than the specified 200\n",
+      "Created a chunk of size 242, which is longer than the specified 200\n",
+      "Using embedded DuckDB without persistence: data will be transient\n",
+      "Using embedded DuckDB without persistence: data will be transient\n",
+      "Using embedded DuckDB without persistence: data will be transient\n",
+      "Using embedded DuckDB without persistence: data will be transient\n",
+      "Using embedded DuckDB without persistence: data will be transient\n",
+      "Using embedded DuckDB without persistence: data will be transient\n",
+      "Using embedded DuckDB without persistence: data will be transient\n",
+      "Using embedded DuckDB without persistence: data will be transient\n",
+      "Created a chunk of size 215, which is longer than the specified 200\n",
+      "Created a chunk of size 232, which is longer than the specified 200\n",
+      "Created a chunk of size 304, which is longer than the specified 200\n",
+      "Created a chunk of size 205, which is longer than the specified 200\n",
+      "Created a chunk of size 332, which is longer than the specified 200\n",
+      "Created a chunk of size 215, which is longer than the specified 200\n",
+      "Created a chunk of size 281, which is longer than the specified 200\n",
+      "Created a chunk of size 201, which is longer than the specified 200\n",
+      "Created a chunk of size 325, which is longer than the specified 200\n",
+      "Using embedded DuckDB without persistence: data will be transient\n",
+      "Created a chunk of size 215, which is longer than the specified 200\n",
+      "Created a chunk of size 232, which is longer than the specified 200\n",
+      "Created a chunk of size 242, which is longer than the specified 200\n",
+      "Created a chunk of size 219, which is longer than the specified 200\n",
+      "Created a chunk of size 304, which is longer than the specified 200\n",
+      "Created a chunk of size 205, which is longer than the specified 200\n",
+      "Created a chunk of size 332, which is longer than the specified 200\n",
+      "Created a chunk of size 215, which is longer than the specified 200\n",
+      "Created a chunk of size 203, which is longer than the specified 200\n",
+      "Created a chunk of size 281, which is longer than the specified 200\n",
+      "Created a chunk of size 201, which is longer than the specified 200\n",
+      "Created a chunk of size 250, which is longer than the specified 200\n",
+      "Created a chunk of size 325, which is longer than the specified 200\n",
+      "Created a chunk of size 242, which is longer than the specified 200\n",
+      "Using embedded DuckDB without persistence: data will be transient\n",
+      "Using embedded DuckDB without persistence: data will be transient\n"
+     ]
+    }
+   ],
+   "source": [
+    "results = {}\n",
+    "for retriever_cls in test_retrievers:\n",
+    "    retriever_name = retriever_cls().name\n",
+    "    results[retriever_name] = {}\n",
+    "    for test_case_cls, config in test_cases:\n",
+    "        retriever = retriever_cls()\n",
+    "        test_case = test_case_cls.from_config(**config)\n",
+    "        results[retriever_name][test_case.name] = test_case.run(retriever)\n",
+    "        "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 327,
+   "id": "022f836a",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "{\"{'chunk_size': 1000, 'search': 'similarity', 'vectorstore': 'Chroma', 'k': 4}\": {'Many docs (retrieve=5, total=100)': (False,\n",
+       "   {'retrieved_docs': [Document(page_content='On 2023-02-14 the peak temperature was 71 degrees', metadata={}),\n",
+       "     Document(page_content='On 2023-03-14 the peak temperature was 62 degrees', metadata={}),\n",
+       "     Document(page_content='On 2023-02-20 the peak temperature was 63 degrees', metadata={}),\n",
+       "     Document(page_content='On 2023-02-03 the peak temperature was 72 degrees', metadata={})]}),\n",
+       "  'Redundant docs': (True,\n",
+       "   {'retrieved_docs': [Document(page_content='Meta open sources new AI model, largest yet', metadata={}),\n",
+       "     Document(page_content=\"Sam Altman's OpenAI comes out with new GPT-5 model\", metadata={}),\n",
+       "     Document(page_content='The next-generation OpenAI GPT model is here', metadata={}),\n",
+       "     Document(page_content=\"GPT-5: OpenAI's next model is the biggest yet\", metadata={})]}),\n",
+       "  'Entity linking (num_filler=11)': (True,\n",
+       "   {'retrieved_docs': [Document(page_content='Melissa Harkins, founder of ReallyCoolAICompany LLC, said in a recent interview that she will be stepping down as CEO.', metadata={}),\n",
+       "     Document(page_content='The founder of ReallyCoolAICompany LLC is from Louisville, Kentucky.', metadata={}),\n",
+       "     Document(page_content='They were responding to a 9-1-1 call when a man shot and killed them with a stolen gun. \\n\\nOfficer Mora was 27 years old. \\n\\nOfficer Rivera was 22. \\n\\nBoth Dominican Americans who’d grown up on the same streets they later chose to patrol as police officers. \\n\\nI spoke with their families and told them that we are forever in debt for their sacrifice, and we will carry on their mission to restore the trust and safety every community deserves. \\n\\nI’ve worked on these issues a long time. \\n\\nI know what works: Investing in crime preventionand community police officers who’ll walk the beat, who’ll know the neighborhood, and who can restore trust and safety. \\n\\nSo let’s not abandon our streets. Or choose between safety and equal justice. \\n\\nLet’s come together to protect our communities, restore trust, and hold law enforcement accountable. \\n\\nThat’s why the Justice Department required body cameras, banned chokeholds, and restricted no-knock warrants for its officers. \\n\\nThat’s why the American Rescue Plan provided $350 Billion that cities, states, and counties can use to hire more police and invest in proven strategies like community violence interruption—trusted messengers breaking the cycle of violence and trauma and giving young people hope.  \\n\\nWe should all agree: The answer is not to Defund the police. The answer is to FUND the police with the resources and training they need to protect our communities. \\n\\nI ask Democrats and Republicans alike: Pass my budget and keep our neighborhoods safe.  \\n\\nAnd I will keep doing everything in my power to crack down on gun trafficking and ghost guns you can buy online and make at home—they have no serial numbers and can’t be traced. \\n\\nAnd I ask Congress to pass proven measures to reduce gun violence. Pass universal background checks. Why should anyone on a terrorist list be able to purchase a weapon? \\n\\nBan assault weapons and high-capacity magazines. \\n\\nRepeal the liability shield that makes gun manufacturers the only industry in America that can’t be sued. \\n\\nThese laws don’t infringe on the Second Amendment. They save lives. \\n\\nThe most fundamental right in America is the right to vote – and to have it counted. And it’s under assault. \\n\\nIn state after state, new laws have been passed, not only to suppress the vote, but to subvert entire elections. \\n\\nWe cannot let this happen. \\n\\nTonight. I call on the Senate to: Pass the Freedom to Vote Act. Pass the John Lewis Voting Rights Act. And while you’re at it, pass the Disclose Act so Americans can know who is funding our elections. \\n\\nTonight, I’d like to honor someone who has dedicated his life to serve this country: Justice Stephen Breyer—an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court. Justice Breyer, thank you for your service. \\n\\nOne of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. \\n\\nAnd I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nation’s top legal minds, who will continue Justice Breyer’s legacy of excellence. \\n\\nA former top litigator in private practice. A former federal public defender. And from a family of public school educators and police officers. A consensus builder. Since she’s been nominated, she’s received a broad range of support—from the Fraternal Order of Police to former judges appointed by Democrats and Republicans. \\n\\nAnd if we are to advance liberty and justice, we need to secure the Border and fix the immigration system. \\n\\nWe can do both. At our border, we’ve installed new technology like cutting-edge scanners to better detect drug smuggling.  \\n\\nWe’ve set up joint patrols with Mexico and Guatemala to catch more human traffickers.  \\n\\nWe’re putting in place dedicated immigration judges so families fleeing persecution and violence can have their cases heard faster.', metadata={}),\n",
+       "     Document(page_content='Madam Speaker, Madam Vice President, our First Lady and Second Gentleman. Members of Congress and the Cabinet. Justices of the Supreme Court. My fellow Americans.  \\n\\nLast year COVID-19 kept us apart. This year we are finally together again. \\n\\nTonight, we meet as Democrats Republicans and Independents. But most importantly as Americans. \\n\\nWith a duty to one another to the American people to the Constitution. \\n\\nAnd with an unwavering resolve that freedom will always triumph over tyranny. \\n\\nSix days ago, Russia’s Vladimir Putin sought to shake the foundations of the free world thinking he could make it bend to his menacing ways. But he badly miscalculated. \\n\\nHe thought he could roll into Ukraine and the world would roll over. Instead he met a wall of strength he never imagined. \\n\\nHe met the Ukrainian people. \\n\\nFrom President Zelenskyy to every Ukrainian, their fearlessness, their courage, their determination, inspires the world. \\n\\nGroups of citizens blocking tanks with their bodies. Everyone from students to retirees teachers turned soldiers defending their homeland. \\n\\nIn this struggle as President Zelenskyy said in his speech to the European Parliament “Light will win over darkness.” The Ukrainian Ambassador to the United States is here tonight. \\n\\nLet each of us here tonight in this Chamber send an unmistakable signal to Ukraine and to the world. \\n\\nPlease rise if you are able and show that, Yes, we the United States of America stand with the Ukrainian people. \\n\\nThroughout our history we’ve learned this lesson when dictators do not pay a price for their aggression they cause more chaos.   \\n\\nThey keep moving.   \\n\\nAnd the costs and the threats to America and the world keep rising.   \\n\\nThat’s why the NATO Alliance was created to secure peace and stability in Europe after World War 2. \\n\\nThe United States is a member along with 29 other nations. \\n\\nIt matters. American diplomacy matters. American resolve matters. \\n\\nPutin’s latest attack on Ukraine was premeditated and unprovoked. \\n\\nHe rejected repeated efforts at diplomacy. \\n\\nHe thought the West and NATO wouldn’t respond. And he thought he could divide us at home. Putin was wrong. We were ready.  Here is what we did.   \\n\\nWe prepared extensively and carefully. \\n\\nWe spent months building a coalition of other freedom-loving nations from Europe and the Americas to Asia and Africa to confront Putin. \\n\\nI spent countless hours unifying our European allies. We shared with the world in advance what we knew Putin was planning and precisely how he would try to falsely justify his aggression.  \\n\\nWe countered Russia’s lies with truth.   \\n\\nAnd now that he has acted the free world is holding him accountable. \\n\\nAlong with twenty-seven members of the European Union including France, Germany, Italy, as well as countries like the United Kingdom, Canada, Japan, Korea, Australia, New Zealand, and many others, even Switzerland. \\n\\nWe are inflicting pain on Russia and supporting the people of Ukraine. Putin is now isolated from the world more than ever. \\n\\nTogether with our allies –we are right now enforcing powerful economic sanctions. \\n\\nWe are cutting off Russia’s largest banks from the international financial system.  \\n\\nPreventing Russia’s central bank from defending the Russian Ruble making Putin’s $630 Billion “war fund” worthless.   \\n\\nWe are choking off Russia’s access to technology that will sap its economic strength and weaken its military for years to come.  \\n\\nTonight I say to the Russian oligarchs and corrupt leaders who have bilked billions of dollars off this violent regime no more. \\n\\nThe U.S. Department of Justice is assembling a dedicated task force to go after the crimes of Russian oligarchs.  \\n\\nWe are joining with our European allies to find and seize your yachts your luxury apartments your private jets. We are coming for your ill-begotten gains.', metadata={})]}),\n",
+       "  'Temporal query (num_docs=200)': (False,\n",
+       "   {'retrieved_docs': [Document(page_content='Today I felt happy', metadata={'date': '2023-02-26'}),\n",
+       "     Document(page_content='Today I felt happy', metadata={'date': '2023-05-15'}),\n",
+       "     Document(page_content='Today I felt happy', metadata={'date': '2023-05-22'}),\n",
+       "     Document(page_content='Today I felt happy', metadata={'date': '2023-06-19'})]}),\n",
+       "  'Revised statement (num_revisions=6)': (True,\n",
+       "   {'retrieved_docs': [Document(page_content='Six days ago, Russia’s Vladimir Putin sought to shake the foundations of the free world thinking he could make it bend to his menacing ways. But he badly miscalculated. \\n\\nHe thought he could roll into Ukraine and the world would roll over. Instead he met a wall of strength he never imagined. \\n\\nHe met the Ukrainian people. \\n\\nFrom President Zelenskyy to every Ukrainian, their fearlessness, their courage, their determination, inspires the world. The latest reports are that the earthquake that has hit Japan is actually of magnitude 8.2.', metadata={}),\n",
+       "     Document(page_content='We spent months building a coalition of other freedom-loving nations from Europe and the Americas to Asia and Africa to confront Putin. \\n\\nI spent countless hours unifying our European allies. We shared with the world in advance what we knew Putin was planning and precisely how he would try to falsely justify his aggression.  \\n\\nWe countered Russia’s lies with truth.   \\n\\nAnd now that he has acted the free world is holding him accountable. The Japanese earthquake is now being recorded as magnitude 5.', metadata={}),\n",
+       "     Document(page_content='Madam Speaker, Madam Vice President, our First Lady and Second Gentleman. Members of Congress and the Cabinet. Justices of the Supreme Court. My fellow Americans.  \\n\\nLast year COVID-19 kept us apart. This year we are finally together again. \\n\\nTonight, we meet as Democrats Republicans and Independents. But most importantly as Americans. \\n\\nWith a duty to one another to the American people to the Constitution. \\n\\nAnd with an unwavering resolve that freedom will always triumph over tyranny. We are receiving reports of a magnitude 10 earthquake in Japan.', metadata={}),\n",
+       "     Document(page_content='Groups of citizens blocking tanks with their bodies. Everyone from students to retirees teachers turned soldiers defending their homeland. \\n\\nIn this struggle as President Zelenskyy said in his speech to the European Parliament “Light will win over darkness.” The Ukrainian Ambassador to the United States is here tonight. \\n\\nLet each of us here tonight in this Chamber send an unmistakable signal to Ukraine and to the world. Now the earthquake in Japan has been downgraded to magnitude 7.', metadata={})]}),\n",
+       "  'Fact in long text (text_len=38613)': (False,\n",
+       "   {'retrieved_docs': [Document(page_content='Tonight. I call on the Senate to: Pass the Freedom to Vote Act. Pass the John Lewis Voting Rights Act. And while you’re at it, pass the Disclose Act so Americans can know who is funding our elections. \\n\\nTonight, I’d like to honor someone who has dedicated his life to serve this country: Justice Stephen Breyer—an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court. Justice Breyer, thank you for your service. \\n\\nOne of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. \\n\\nAnd I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nation’s top legal minds, who will continue Justice Breyer’s legacy of excellence.', metadata={}),\n",
+       "     Document(page_content='We can’t change how divided we’ve been. But we can change how we move forward—on COVID-19 and other issues we must face together. \\n\\nI recently visited the New York City Police Department days after the funerals of Officer Wilbert Mora and his partner, Officer Jason Rivera. \\n\\nThey were responding to a 9-1-1 call when a man shot and killed them with a stolen gun. \\n\\nOfficer Mora was 27 years old. \\n\\nOfficer Rivera was 22. \\n\\nBoth Dominican Americans who’d grown up on the same streets they later chose to patrol as police officers. \\n\\nI spoke with their families and told them that we are forever in debt for their sacrifice, and we will carry on their mission to restore the trust and safety every community deserves. \\n\\nI’ve worked on these issues a long time. \\n\\nI know what works: Investing in crime preventionand community police officers who’ll walk the beat, who’ll know the neighborhood, and who can restore trust and safety.', metadata={}),\n",
+       "     Document(page_content='And built the strongest, freest, and most prosperous nation the world has ever known. \\n\\nNow is the hour. \\n\\nOur moment of responsibility. \\n\\nOur test of resolve and conscience, of history itself. \\n\\nIt is in this moment that our character is formed. Our purpose is found. Our future is forged. \\n\\nWell I know this nation.  \\n\\nWe will meet the test. \\n\\nTo protect freedom and liberty, to expand fairness and opportunity. \\n\\nWe will save democracy. \\n\\nAs hard as these times have been, I am more optimistic about America today than I have been my whole life. \\n\\nBecause I see the future that is within our grasp. \\n\\nBecause I know there is simply nothing beyond our capacity. \\n\\nWe are the only nation on Earth that has always turned every crisis we have faced into an opportunity. \\n\\nThe only nation that can be defined by a single word: possibilities. \\n\\nSo on this night, in our 245th year as a nation, I have come to report on the State of the Union.', metadata={}),\n",
+       "     Document(page_content='A former top litigator in private practice. A former federal public defender. And from a family of public school educators and police officers. A consensus builder. Since she’s been nominated, she’s received a broad range of support—from the Fraternal Order of Police to former judges appointed by Democrats and Republicans. \\n\\nAnd if we are to advance liberty and justice, we need to secure the Border and fix the immigration system. \\n\\nWe can do both. At our border, we’ve installed new technology like cutting-edge scanners to better detect drug smuggling.  \\n\\nWe’ve set up joint patrols with Mexico and Guatemala to catch more human traffickers.  \\n\\nWe’re putting in place dedicated immigration judges so families fleeing persecution and violence can have their cases heard faster. \\n\\nWe’re securing commitments and supporting partners in South and Central America to host more refugees and secure their own borders.', metadata={})]}),\n",
+       "  'Podcast First Mention': (True,\n",
+       "   {'retrieved_docs': [Document(page_content='That\\'s so fascinating. [Peter Norvig](https://norvig.com/) mentioned that you\\'re writing the *Deep Learning* chapter for the fourth edition of the [\"Artificial Intelligence, A Modern Approach\"](http://aima.cs.berkeley.edu/) book. So how do you even begin summarizing the field of deep learning in a chapter?', metadata={'speaker': 'Lex', 'statement_index': 23, 'time': 934.0}),\n",
+       "     Document(page_content=\"Yes. I think we already see that a little bit. I already kind of think of neural nets as a kind of  program. I think of deep learning as basically learning programs that  have more than one step. So if you draw a flowchart, or if you draw a [TensorFlow graph](https://www.easy-tensorflow.com/tf-tutorials/basics/graph-and-session) describing your machine learning model, I think of the depth of that graph is describing the number of steps that run in sequence, and then the width of that graph is the number of steps that  run in parallel. Now it's been long enough that we've had deep learning working that it's a little bit silly to even discuss [shallow learning](http://www.princeton.edu/~aaa/Public/Teaching/ORF523/S17/ORF523_S17_Lec17_guest.pdf) anymore. But back when I first got involved in AI, when we used machine  learning, we were usually learning things like [Support Vector Machines](https://en.wikipedia.org/wiki/Support-vector_machine);  you could have a lot of input features to the model and you could multiply each feature by a different weight but all those multiplications were done in parallel to each other; there wasn't a lot  done in series. I think what we got with deep learning was really the ability to have steps of a program that run in sequence. And I think  that we've actually started to see that what's important with deep learning is more the fact that we have a multi-step program rather than the fact that we've learned a representation. If you look at things like [ResNet](https://en.wikipedia.org/wiki/Residual_neural_network), for example, they take one particular kind of representation and they update it several times. Back when deep learning first really took off in the academic world in 2006, when [Geoff Hinton](https://www.cs.toronto.edu/~hinton/) showed that you could train [deep belief networks](http://www.scholarpedia.org/article/Deep_belief_networks). Everybody, who was interested in the idea, thought of it as each layer learns a different level of abstraction, but the first layer trained on images learns something like edges, and the second layer learns corners, and eventually you get these kind of grandmother's cell units that recognize specific objects. Today, I think, most people think of it more as a computer program where as you add more layers you can do more updates before you output your  final number. But I don't think anybody believes the layer 150 of the  ResNet it is a grand grandmother cell, and layer 100 is contours, or something like that.\", metadata={'speaker': 'Ian', 'statement_index': 4, 'time': 341.6}),\n",
+       "     Document(page_content='You open your popular [\"Deep Learning\" book](https://www.deeplearningbook.org/) with a Russian doll type diagram that shows deep learning is a subset of [representation learning](representation learning), which  in turn is a subset of machine learning, and finally a subset of [AI](https://www.britannica.com/technology/artificial-intelligence). This kind of implies that there may be limits to [deep learning](https://www.mathworks.com/discovery/deep-learning.html) in the  context of AI. What do you think is the current limits of deep learning  and are those limits something that we can overcome with time?', metadata={'speaker': 'Lex', 'statement_index': 1, 'time': 92.4}),\n",
+       "     Document(page_content=\"I would say deep learning is any kind of machine learning that involves learning parameters of more than one consecutive step. I mean shallow  learning is things where you learn a lot of operations that happen in parallel. You might have a system that makes multiple steps, like you  might have hand designed feature extractors but really only one step is  learned. Deep learning is anything where you have multiple operations in sequence. And, that includes the things that are really popular today like convolutional networks and recurrent networks. But it also includes some of the things that have died out like [Boltzmann machines](https://en.wikipedia.org/wiki/Boltzmann_machine), where we weren't using [backpropagation](https://en.wikipedia.org/wiki/Backpropagation). Today, I hear a lot of people define deep learning as gradient descent applied to these differentiable functions, and I think that's a legitimate usage of the term, it's just different from the way that I use the term myself.\", metadata={'speaker': 'Ian', 'statement_index': 28, 'time': 1217.6})]}),\n",
+       "  'Podcast Reference to Speaker': (False,\n",
+       "   {'retrieved_docs': [Document(page_content='The following is a conversation with Ian Goodfellow. He\\'s the author of the popular textbook on deep learning simply titled [\"Deep Learning.\"](https://www.deeplearningbook.org/) He coined the term of [Generative Adversarial Networks](https://en.wikipedia.org/wiki/Generative_adversarial_network), otherwise known as GANs, and with his [2014 paper](https://papers.nips.cc/paper/5423-generative-adversarial-nets.pdf) is responsible for launching the  incredible growth of research and innovation in this subfield of deep  learning. He got his BS and MS at Stanford, his PhD at University of  Montreal with [Yoshua Bengio](https://yoshuabengio.org/) and [Aaron Courville](https://aaroncourville.wordpress.com/). He held several research positions including an open [AI Google Brain](https://research.google/teams/brain/) and now at [Apple as the Director of Machine Learning](https://machinelearning.apple.com/). This recording happened while Ian was still a Google Brain but we don\\'t talk about anything specific to Google or any other organization. This conversation is part of the [Artificial intelligence podcast](https://lexfridman.com/ai/). If you enjoy it, subscribe on YouTube, iTunes, or  simply connect with me on Twitter at @[lexfridman](https://twitter.com/lexfridman). Now, here\\'s my  conversation with Ian Goodfellow.', metadata={'speaker': 'Lex', 'statement_index': 0, 'time': 61.2}),\n",
+       "     Document(page_content='Can you give a brief history of GANs from 2014?', metadata={'speaker': 'Lex', 'statement_index': 57, 'time': 2682.4}),\n",
+       "     Document(page_content=\"Awesome. You talked about the story of you coming up with idea of GANs at a bar  with some friends. You were arguing that these GANs would work,  generative adversarial networks, and the others didn't think so. Then you went home. At midnight caught it up, and it worked. So, if I were a friend of yours at the bar I would also have doubts. It's a really nice  idea but I'm very skeptical that it would work. What was the basis of  their skepticism? What was the basis of your intuition why it should  work?\", metadata={'speaker': 'Lex', 'statement_index': 41, 'time': 1813.2}),\n",
+       "     Document(page_content=\"Yes. I do think that there are a lot of  ideas that can be developed really quickly. GANs were probably a little bit of an outlier on the whole like one-hour timescale [right] But just in terms of a like low resource ideas where you do something really different on the algorithm scale and get a big payback. I think it's not as likely that you'll see that in terms of things like core machine learning technologies like a better classifier, or a better reinforcement learning algorithm, or a better generative model. If I had the GAN idea today it would be a lot harder to prove that it was useful than it was back in 2014 because I would need to get it running on  something like ImageNet or [CelebA](http://mmlab.ie.cuhk.edu.hk/projects/CelebA.html) at high-resolution. You know, those take a while to train you couldn't you couldn't train it in an hour and know that it was something really new and exciting. Back in 2014 shredding an MNIST was enough but there are other areas of machine learning where I think a new idea could actually be developed really quickly with low resources.\", metadata={'speaker': 'Ian', 'statement_index': 80, 'time': 4001.6})]})},\n",
+       " \"{'chunk_size': 200, 'search': 'mmr', 'vectorstore': 'Chroma', 'k': 6}\": {'Many docs (retrieve=5, total=100)': (False,\n",
+       "   {'retrieved_docs': [Document(page_content='Document metadata: {}\\n\\nOn 2023-03-14 the peak temperature was 62 degrees', metadata={}),\n",
+       "     Document(page_content='Document metadata: {}\\n\\nOn 2023-02-09 the peak temperature was 53 degrees', metadata={}),\n",
+       "     Document(page_content='Document metadata: {}\\n\\nOn 2023-02-02 the peak temperature was 67 degrees', metadata={}),\n",
+       "     Document(page_content='Document metadata: {}\\n\\nOn 2023-04-04 the peak temperature was 57 degrees', metadata={}),\n",
+       "     Document(page_content='Document metadata: {}\\n\\nOn 2023-04-02 the peak temperature was 79 degrees', metadata={}),\n",
+       "     Document(page_content='Document metadata: {}\\n\\nOn 2023-02-26 the peak temperature was 80 degrees', metadata={})]}),\n",
+       "  'Redundant docs': (True,\n",
+       "   {'retrieved_docs': [Document(page_content='Document metadata: {}\\n\\nMeta open sources new AI model, largest yet', metadata={}),\n",
+       "     Document(page_content=\"Document metadata: {}\\n\\nSam Altman's OpenAI comes out with new GPT-5 model\", metadata={}),\n",
+       "     Document(page_content='Document metadata: {}\\n\\nThe next-generation OpenAI GPT model is here', metadata={}),\n",
+       "     Document(page_content=\"Document metadata: {}\\n\\nGPT-5: OpenAI's next model is the biggest yet\", metadata={}),\n",
+       "     Document(page_content='Document metadata: {}\\n\\nOpenAI announces the release of GPT-5', metadata={}),\n",
+       "     Document(page_content='Document metadata: {}\\n\\nOpenAI says new model GPT-5 is more creative and less', metadata={})]}),\n",
+       "  'Entity linking (num_filler=11)': (True,\n",
+       "   {'retrieved_docs': [Document(page_content='Melissa Harkins, founder of ReallyCoolAICompany LLC, said in a recent interview that she will be stepping down as CEO.', metadata={}),\n",
+       "     Document(page_content='The founder of ReallyCoolAICompany LLC is from Louisville, Kentucky.', metadata={}),\n",
+       "     Document(page_content='They were responding to a 9-1-1 call when a man shot and killed them with a stolen gun. \\n\\nOfficer Mora was 27 years old. \\n\\nOfficer Rivera was 22. \\n\\nBoth Dominican Americans who’d grown up on the same streets they later chose to patrol as police officers. \\n\\nI spoke with their families and told them that we are forever in debt for their sacrifice, and we will carry on their mission to restore the trust and safety every community deserves. \\n\\nI’ve worked on these issues a long time. \\n\\nI know what works: Investing in crime preventionand community police officers who’ll walk the beat, who’ll know the neighborhood, and who can restore trust and safety. \\n\\nSo let’s not abandon our streets. Or choose between safety and equal justice. \\n\\nLet’s come together to protect our communities, restore trust, and hold law enforcement accountable. \\n\\nThat’s why the Justice Department required body cameras, banned chokeholds, and restricted no-knock warrants for its officers. \\n\\nThat’s why the American Rescue Plan provided $350 Billion that cities, states, and counties can use to hire more police and invest in proven strategies like community violence interruption—trusted messengers breaking the cycle of violence and trauma and giving young people hope.  \\n\\nWe should all agree: The answer is not to Defund the police. The answer is to FUND the police with the resources and training they need to protect our communities. \\n\\nI ask Democrats and Republicans alike: Pass my budget and keep our neighborhoods safe.  \\n\\nAnd I will keep doing everything in my power to crack down on gun trafficking and ghost guns you can buy online and make at home—they have no serial numbers and can’t be traced. \\n\\nAnd I ask Congress to pass proven measures to reduce gun violence. Pass universal background checks. Why should anyone on a terrorist list be able to purchase a weapon? \\n\\nBan assault weapons and high-capacity magazines. \\n\\nRepeal the liability shield that makes gun manufacturers the only industry in America that can’t be sued. \\n\\nThese laws don’t infringe on the Second Amendment. They save lives. \\n\\nThe most fundamental right in America is the right to vote – and to have it counted. And it’s under assault. \\n\\nIn state after state, new laws have been passed, not only to suppress the vote, but to subvert entire elections. \\n\\nWe cannot let this happen. \\n\\nTonight. I call on the Senate to: Pass the Freedom to Vote Act. Pass the John Lewis Voting Rights Act. And while you’re at it, pass the Disclose Act so Americans can know who is funding our elections. \\n\\nTonight, I’d like to honor someone who has dedicated his life to serve this country: Justice Stephen Breyer—an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court. Justice Breyer, thank you for your service. \\n\\nOne of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. \\n\\nAnd I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nation’s top legal minds, who will continue Justice Breyer’s legacy of excellence. \\n\\nA former top litigator in private practice. A former federal public defender. And from a family of public school educators and police officers. A consensus builder. Since she’s been nominated, she’s received a broad range of support—from the Fraternal Order of Police to former judges appointed by Democrats and Republicans. \\n\\nAnd if we are to advance liberty and justice, we need to secure the Border and fix the immigration system. \\n\\nWe can do both. At our border, we’ve installed new technology like cutting-edge scanners to better detect drug smuggling.  \\n\\nWe’ve set up joint patrols with Mexico and Guatemala to catch more human traffickers.  \\n\\nWe’re putting in place dedicated immigration judges so families fleeing persecution and violence can have their cases heard faster.', metadata={}),\n",
+       "     Document(page_content='My administration is providing assistance with job training and housing, and now helping lower-income veterans get VA care debt-free.  \\n\\nOur troops in Iraq and Afghanistan faced many dangers. \\n\\nOne was stationed at bases and breathing in toxic smoke from “burn pits” that incinerated wastes of war—medical and hazard material, jet fuel, and more. \\n\\nWhen they came home, many of the world’s fittest and best trained warriors were never the same. \\n\\nHeadaches. Numbness. Dizziness. \\n\\nA cancer that would put them in a flag-draped coffin. \\n\\nI know. \\n\\nOne of those soldiers was my son Major Beau Biden. \\n\\nWe don’t know for sure if a burn pit was the cause of his brain cancer, or the diseases of so many of our troops. \\n\\nBut I’m committed to finding out everything we can. \\n\\nCommitted to military families like Danielle Robinson from Ohio. \\n\\nThe widow of Sergeant First Class Heath Robinson.  \\n\\nHe was born a soldier. Army National Guard. Combat medic in Kosovo and Iraq. \\n\\nStationed near Baghdad, just yards from burn pits the size of football fields. \\n\\nHeath’s widow Danielle is here with us tonight. They loved going to Ohio State football games. He loved building Legos with their daughter. \\n\\nBut cancer from prolonged exposure to burn pits ravaged Heath’s lungs and body. \\n\\nDanielle says Heath was a fighter to the very end. \\n\\nHe didn’t know how to stop fighting, and neither did she. \\n\\nThrough her pain she found purpose to demand we do better. \\n\\nTonight, Danielle—we are. \\n\\nThe VA is pioneering new ways of linking toxic exposures to diseases, already helping more veterans get benefits. \\n\\nAnd tonight, I’m announcing we’re expanding eligibility to veterans suffering from nine respiratory cancers. \\n\\nI’m also calling on Congress: pass a law to make sure veterans devastated by toxic exposures in Iraq and Afghanistan finally get the benefits and comprehensive health care they deserve. \\n\\nAnd fourth, let’s end cancer as we know it. \\n\\nThis is personal to me and Jill, to Kamala, and to so many of you. \\n\\nCancer is the #2 cause of death in America–second only to heart disease. \\n\\nLast month, I announced our plan to supercharge  \\nthe Cancer Moonshot that President Obama asked me to lead six years ago. \\n\\nOur goal is to cut the cancer death rate by at least 50% over the next 25 years, turn more cancers from death sentences into treatable diseases.  \\n\\nMore support for patients and families. \\n\\nTo get there, I call on Congress to fund ARPA-H, the Advanced Research Projects Agency for Health. \\n\\nIt’s based on DARPA—the Defense Department project that led to the Internet, GPS, and so much more.  \\n\\nARPA-H will have a singular purpose—to drive breakthroughs in cancer, Alzheimer’s, diabetes, and more. \\n\\nA unity agenda for the nation. \\n\\nWe can do this. \\n\\nMy fellow Americans—tonight , we have gathered in a sacred space—the citadel of our democracy. \\n\\nIn this Capitol, generation after generation, Americans have debated great questions amid great strife, and have done great things. \\n\\nWe have fought for freedom, expanded liberty, defeated totalitarianism and terror. \\n\\nAnd built the strongest, freest, and most prosperous nation the world has ever known. \\n\\nNow is the hour. \\n\\nOur moment of responsibility. \\n\\nOur test of resolve and conscience, of history itself. \\n\\nIt is in this moment that our character is formed. Our purpose is found. Our future is forged. \\n\\nWell I know this nation.  \\n\\nWe will meet the test. \\n\\nTo protect freedom and liberty, to expand fairness and opportunity. \\n\\nWe will save democracy. \\n\\nAs hard as these times have been, I am more optimistic about America today than I have been my whole life. \\n\\nBecause I see the future that is within our grasp. \\n\\nBecause I know there is simply nothing beyond our capacity. \\n\\nWe are the only nation on Earth that has always turned every crisis we have faced into an opportunity. \\n\\nThe only nation that can be defined by a single word: possibilities.', metadata={}),\n",
+       "     Document(page_content='We are the only nation on Earth that has always turned every crisis we have faced into an opportunity. \\n\\nThe only nation that can be defined by a single word: possibilities. \\n\\nSo on this night, in our 245th year as a nation, I have come to report on the State of the Union. \\n\\nAnd my report is this: the State of the Union is strong—because you, the American people, are strong. \\n\\nWe are stronger today than we were a year ago. \\n\\nAnd we will be stronger a year from now than we are today. \\n\\nNow is our moment to meet and overcome the challenges of our time. \\n\\nAnd we will, as one people. \\n\\nOne America. \\n\\nThe United States of America. \\n\\nMay God bless you all. May God protect our troops.', metadata={}),\n",
+       "     Document(page_content='I have a better plan to fight inflation. \\n\\nLower your costs, not your wages. \\n\\nMake more cars and semiconductors in America. \\n\\nMore infrastructure and innovation in America. \\n\\nMore goods moving faster and cheaper in America. \\n\\nMore jobs where you can earn a good living in America. \\n\\nAnd instead of relying on foreign supply chains, let’s make it in America. \\n\\nEconomists call it “increasing the productive capacity of our economy.” \\n\\nI call it building a better America. \\n\\nMy plan to fight inflation will lower your costs and lower the deficit. \\n\\n17 Nobel laureates in economics say my plan will ease long-term inflationary pressures. Top business leaders and most Americans support my plan. And here’s the plan: \\n\\nFirst – cut the cost of prescription drugs. Just look at insulin. One in ten Americans has diabetes. In Virginia, I met a 13-year-old boy named Joshua Davis.  \\n\\nHe and his Dad both have Type 1 diabetes, which means they need insulin every day. Insulin costs about $10 a vial to make.  \\n\\nBut drug companies charge families like Joshua and his Dad up to 30 times more. I spoke with Joshua’s mom. \\n\\nImagine what it’s like to look at your child who needs insulin and have no idea how you’re going to pay for it.  \\n\\nWhat it does to your dignity, your ability to look your child in the eye, to be the parent you expect to be. \\n\\nJoshua is here with us tonight. Yesterday was his birthday. Happy birthday, buddy.  \\n\\nFor Joshua, and for the 200,000 other young people with Type 1 diabetes, let’s cap the cost of insulin at $35 a month so everyone can afford it.  \\n\\nDrug companies will still do very well. And while we’re at it let Medicare negotiate lower prices for prescription drugs, like the VA already does. \\n\\nLook, the American Rescue Plan is helping millions of families on Affordable Care Act plans save $2,400 a year on their health care premiums. Let’s close the coverage gap and make those savings permanent. \\n\\nSecond – cut energy costs for families an average of $500 a year by combatting climate change.  \\n\\nLet’s provide investments and tax credits to weatherize your homes and businesses to be energy efficient and you get a tax credit; double America’s clean energy production in solar, wind, and so much more;  lower the price of electric vehicles, saving you another $80 a month because you’ll never have to pay at the gas pump again. \\n\\nThird – cut the cost of child care. Many families pay up to $14,000 a year for child care per child.  \\n\\nMiddle-class and working families shouldn’t have to pay more than 7% of their income for care of young children.  \\n\\nMy plan will cut the cost in half for most families and help parents, including millions of women, who left the workforce during the pandemic because they couldn’t afford child care, to be able to get back to work. \\n\\nMy plan doesn’t stop there. It also includes home and long-term care. More affordable housing. And Pre-K for every 3- and 4-year-old.  \\n\\nAll of these will lower costs. \\n\\nAnd under my plan, nobody earning less than $400,000 a year will pay an additional penny in new taxes. Nobody.  \\n\\nThe one thing all Americans agree on is that the tax system is not fair. We have to fix it.  \\n\\nI’m not looking to punish anyone. But let’s make sure corporations and the wealthiest Americans start paying their fair share. \\n\\nJust last year, 55 Fortune 500 corporations earned $40 billion in profits and paid zero dollars in federal income tax.  \\n\\nThat’s simply not fair. That’s why I’ve proposed a 15% minimum tax rate for corporations. \\n\\nWe got more than 130 countries to agree on a global minimum tax rate so companies can’t get out of paying their taxes at home by shipping jobs and factories overseas. \\n\\nThat’s why I’ve proposed closing loopholes so the very wealthy don’t pay a lower tax rate than a teacher or a firefighter.  \\n\\nSo that’s my plan. It will grow the economy and lower costs for families.', metadata={})]}),\n",
+       "  'Temporal query (num_docs=200)': (False,\n",
+       "   {'retrieved_docs': [Document(page_content=\"Document metadata: {'date': '2023-05-23'}\\n\\nEverybody says I seemed so happy\", metadata={'date': '2023-05-23'}),\n",
+       "     Document(page_content=\"Document metadata: {'date': '2023-05-15'}\\n\\nToday I felt happy\", metadata={'date': '2023-05-15'}),\n",
+       "     Document(page_content=\"Document metadata: {'date': '2023-06-22'}\\n\\nToday I felt happy\", metadata={'date': '2023-06-22'}),\n",
+       "     Document(page_content=\"Document metadata: {'date': '2023-05-22'}\\n\\nToday I felt happy\", metadata={'date': '2023-05-22'}),\n",
+       "     Document(page_content=\"Document metadata: {'date': '2023-02-26'}\\n\\nToday I felt happy\", metadata={'date': '2023-02-26'}),\n",
+       "     Document(page_content=\"Document metadata: {'date': '2023-05-05'}\\n\\nI felt happy today\", metadata={'date': '2023-05-05'})]}),\n",
+       "  'Revised statement (num_revisions=6)': (True,\n",
+       "   {'retrieved_docs': [Document(page_content='Document metadata: {}\\n\\nFrom President Zelenskyy to every Ukrainian, their fearlessness, their courage, their determination, inspires the world. The latest reports are that the earthquake that has hit Japan is actually of magnitude 8.2.', metadata={}),\n",
+       "     Document(page_content='Document metadata: {}\\n\\nWe prepared extensively and carefully. The latest news is that the earthquake was of magnitude 3.', metadata={}),\n",
+       "     Document(page_content='Document metadata: {}\\n\\nAnd with an unwavering resolve that freedom will always triumph over tyranny. We are receiving reports of a magnitude 10 earthquake in Japan.', metadata={}),\n",
+       "     Document(page_content='Document metadata: {}\\n\\nWe countered Russia’s lies with truth.   \\n\\nAnd now that he has acted the free world is holding him accountable. The Japanese earthquake is now being recorded as magnitude 5.', metadata={}),\n",
+       "     Document(page_content='Document metadata: {}\\n\\nThat’s why the NATO Alliance was created to secure peace and stability in Europe after World War 2. Looks like the earthquake is back up to an 8.', metadata={}),\n",
+       "     Document(page_content='Document metadata: {}\\n\\nStationed near Baghdad, just yards from burn pits the size of football fields.', metadata={})]}),\n",
+       "  'Fact in long text (text_len=38613)': (False,\n",
+       "   {'retrieved_docs': [Document(page_content='Document metadata: {}\\n\\nI recently visited the New York City Police Department days after the funerals of Officer Wilbert Mora and his partner, Officer Jason Rivera.', metadata={}),\n",
+       "     Document(page_content='Document metadata: {}\\n\\nThe only nation that can be defined by a single word: possibilities. \\n\\nSo on this night, in our 245th year as a nation, I have come to report on the State of the Union.', metadata={}),\n",
+       "     Document(page_content='Document metadata: {}\\n\\nI remember when my Dad had to leave our home in Scranton, Pennsylvania to find work. I grew up in a family where if the price of food went up, you felt it.', metadata={}),\n",
+       "     Document(page_content='Document metadata: {}\\n\\nOne was stationed at bases and breathing in toxic smoke from “burn pits” that incinerated wastes of war—medical and hazard material, jet fuel, and more.', metadata={}),\n",
+       "     Document(page_content='Document metadata: {}\\n\\nOne of those soldiers was my son Major Beau Biden. \\n\\nWe don’t know for sure if a burn pit was the cause of his brain cancer, or the diseases of so many of our troops.', metadata={}),\n",
+       "     Document(page_content='Document metadata: {}\\n\\nWe will buy American to make sure everything from the deck of an aircraft carrier to the steel on highway guardrails are made in America.', metadata={})]}),\n",
+       "  'Podcast First Mention': (False,\n",
+       "   {'retrieved_docs': [Document(page_content=\"Document metadata: {'speaker': 'Ian', 'statement_index': 4, 'time': 341.6}\\n\\nYes. I think we already see that a little bit. I already kind of think of neural nets as a kind of  program. I think of deep learning as basically learning programs that  have more than one step. So if you draw a flowchart, or if you draw a [TensorFlow graph](https://www.easy-tensorflow.com/tf-tutorials/basics/graph-and-session) describing your machine learning model, I think of the depth of that graph is describing the number of steps that run in sequence, and then the width of that graph is the number of steps that  run in parallel. Now it's been long enough that we've had deep learning working that it's a little bit silly to even discuss [shallow learning](http://www.princeton.edu/~aaa/Public/Teaching/ORF523/S17/ORF523_S17_Lec17_guest.pdf) anymore. But back when I first got involved in AI, when we used machine  learning, we were usually learning things like [Support Vector Machines](https://en.wikipedia.org/wiki/Support-vector_machine);  you could have a lot of input features to the model and you could multiply each feature by a different weight but all those multiplications were done in parallel to each other; there wasn't a lot  done in series. I think what we got with deep learning was really the ability to have steps of a program that run in sequence. And I think  that we've actually started to see that what's important with deep learning is more the fact that we have a multi-step program rather than the fact that we've learned a representation. If you look at things like [ResNet](https://en.wikipedia.org/wiki/Residual_neural_network), for example, they take one particular kind of representation and they update it several times. Back when deep learning first really took off in the academic world in 2006, when [Geoff Hinton](https://www.cs.toronto.edu/~hinton/) showed that you could train [deep belief networks](http://www.scholarpedia.org/article/Deep_belief_networks). Everybody, who was interested in the idea, thought of it as each layer learns a different level of abstraction, but the first layer trained on images learns something like edges, and the second layer learns corners, and eventually you get these kind of grandmother's cell units that recognize specific objects. Today, I think, most people think of it more as a computer program where as you add more layers you can do more updates before you output your  final number. But I don't think anybody believes the layer 150 of the  ResNet it is a grand grandmother cell, and layer 100 is contours, or something like that.\", metadata={'speaker': 'Ian', 'statement_index': 4, 'time': 341.6}),\n",
+       "     Document(page_content='Document metadata: {\\'speaker\\': \\'Lex\\', \\'statement_index\\': 23, \\'time\\': 934.0}\\n\\nThat\\'s so fascinating. [Peter Norvig](https://norvig.com/) mentioned that you\\'re writing the *Deep Learning* chapter for the fourth edition of the [\"Artificial Intelligence, A Modern Approach\"](http://aima.cs.berkeley.edu/) book. So how do you even begin summarizing the field of deep learning in a chapter?', metadata={'speaker': 'Lex', 'statement_index': 23, 'time': 934.0}),\n",
+       "     Document(page_content=\"Document metadata: {'speaker': 'Lex', 'statement_index': 27, 'time': 1158.0}\\n\\nSo, if I were to ask you -I remember I took algorithms and data structures  algorithm course-, remember the professor asked what is an algorithm, and yelled at everybody, in a good way, that nobody was answering it correctly. Everybody knew what the algorithm was - it was graduate course. Everybody knew what an algorithm was but they weren't able to answer it well. Let me ask you, in that same spirit, what is Deep Learning?\", metadata={'speaker': 'Lex', 'statement_index': 27, 'time': 1158.0}),\n",
+       "     Document(page_content=\"Document metadata: {'speaker': 'Ian', 'statement_index': 2, 'time': 164.8}\\n\\nI think one of the biggest limitations of deep learning is that right now it requires really a lot of data, especially labeled data. There's some unsupervised and semi-supervised learning algorithms that can reduce the amount of labeled data you need, but they still require a lot of unlabeled data. Reinforcement Learning algorithms they don't need labels but they need really a lot of experiences. As human beings we don't  want to play [Pong](https://en.wikipedia.org/wiki/Pong) by failing at Pong two million times. So, just getting the generalization ability better is one of the most important  bottlenecks and the capability of the technology today. Then, I guess, I'd also say deep learning is like a component of a bigger system. So far, nobody is really proposing to have only what you'd call deep learning as the entire ingredient of intelligence. You use deep learning as sub-modules of other systems. Like [AlphaGo](https://en.wikipedia.org/wiki/AlphaGo) has a deep learning  model that estimates the value function, most reinforcement learning  algorithms have a deep learning module that estimates which action to  take next but you might have other components.\", metadata={'speaker': 'Ian', 'statement_index': 2, 'time': 164.8}),\n",
+       "     Document(page_content=\"Document metadata: {'speaker': 'Ian', 'statement_index': 60, 'time': 2833.6}\\n\\nWe used MNIST which is little handwritten digits. We used the Toronto face database which is small grayscale photos of faces. We did have  recognizable faces. My colleague [Bing Xu](https://scholar.google.com/citations?user=nHh9PSsAAAAJ&hl=en) put together the first GAN face model for that paper. We also had the CIFAR10 dataset which is things  like very small 32 by 32 pixels of cars, and cats, and dogs. For that, we didn't get recognizable objects but all the deep learning people back then, we're really used to looking at these failed samples and kind of  reading them like tea leaves. And people who are used to reading the tea leaves recognized that our tea leaves at least looked different [right], maybe not necessarily better but there was something unusual about them, and that got a lot of us excited. One of the next really big steps was [Laplacian GANs](https://arxiv.org/abs/1506.05751) by Emily Denton and Soumith Chintala at Facebook AI research, where they actually got really good  high-resolution photos working with GANs for the first time. They had a  complicated system where they generated the image starting at low-res, and then scaling up to high-res but they were able to get it to work. And then in 2015, I believe, later that same year, Alex Radford, Soumith Chintala, and Luke Metz published the [DC-GAN](https://arxiv.org/abs/1511.06434) paper, which it stands for Deep Convolutional GAN. It's kind of a non-unique name because these days basically all GANs, and even some before, that were deep in convolutional, but they just kind of picked a name for a really great recipe where they were able to actually using only one model instead of a multi-step process, actually generate realistic images of faces and things like that. That was sort of like the beginning of the Cambrian explosion of GANs. Once you got animals that had a backbone you suddenly got lots of different versions of fish, and four-legged animals, and things like that. So DC-GAN became kind of the backbone for many different models that came out.\", metadata={'speaker': 'Ian', 'statement_index': 60, 'time': 2833.6}),\n",
+       "     Document(page_content='Document metadata: {\\'speaker\\': \\'Lex\\', \\'statement_index\\': 0, \\'time\\': 61.2}\\n\\nThe following is a conversation with Ian Goodfellow. He\\'s the author of the popular textbook on deep learning simply titled [\"Deep Learning.\"](https://www.deeplearningbook.org/) He coined the term of [Generative Adversarial Networks](https://en.wikipedia.org/wiki/Generative_adversarial_network), otherwise known as GANs, and with his [2014 paper](https://papers.nips.cc/paper/5423-generative-adversarial-nets.pdf) is responsible for launching the  incredible growth of research and innovation in this subfield of deep  learning. He got his BS and MS at Stanford, his PhD at University of  Montreal with [Yoshua Bengio](https://yoshuabengio.org/) and [Aaron Courville](https://aaroncourville.wordpress.com/). He held several research positions including an open [AI Google Brain](https://research.google/teams/brain/) and now at [Apple as the Director of Machine Learning](https://machinelearning.apple.com/). This recording happened while Ian was still a Google Brain but we don\\'t talk about anything specific to Google or any other organization. This conversation is part of the [Artificial intelligence podcast](https://lexfridman.com/ai/). If you enjoy it, subscribe on YouTube, iTunes, or  simply connect with me on Twitter at @[lexfridman](https://twitter.com/lexfridman). Now, here\\'s my  conversation with Ian Goodfellow.', metadata={'speaker': 'Lex', 'statement_index': 0, 'time': 61.2})]}),\n",
+       "  'Podcast Reference to Speaker': (False,\n",
+       "   {'retrieved_docs': [Document(page_content=\"Document metadata: {'speaker': 'Ian', 'statement_index': 80, 'time': 4001.6}\\n\\nYes. I do think that there are a lot of  ideas that can be developed really quickly. GANs were probably a little bit of an outlier on the whole like one-hour timescale [right] But just in terms of a like low resource ideas where you do something really different on the algorithm scale and get a big payback. I think it's not as likely that you'll see that in terms of things like core machine learning technologies like a better classifier, or a better reinforcement learning algorithm, or a better generative model. If I had the GAN idea today it would be a lot harder to prove that it was useful than it was back in 2014 because I would need to get it running on  something like ImageNet or [CelebA](http://mmlab.ie.cuhk.edu.hk/projects/CelebA.html) at high-resolution. You know, those take a while to train you couldn't you couldn't train it in an hour and know that it was something really new and exciting. Back in 2014 shredding an MNIST was enough but there are other areas of machine learning where I think a new idea could actually be developed really quickly with low resources.\", metadata={'speaker': 'Ian', 'statement_index': 80, 'time': 4001.6}),\n",
+       "     Document(page_content='Document metadata: {\\'speaker\\': \\'Lex\\', \\'statement_index\\': 0, \\'time\\': 61.2}\\n\\nThe following is a conversation with Ian Goodfellow. He\\'s the author of the popular textbook on deep learning simply titled [\"Deep Learning.\"](https://www.deeplearningbook.org/) He coined the term of [Generative Adversarial Networks](https://en.wikipedia.org/wiki/Generative_adversarial_network), otherwise known as GANs, and with his [2014 paper](https://papers.nips.cc/paper/5423-generative-adversarial-nets.pdf) is responsible for launching the  incredible growth of research and innovation in this subfield of deep  learning. He got his BS and MS at Stanford, his PhD at University of  Montreal with [Yoshua Bengio](https://yoshuabengio.org/) and [Aaron Courville](https://aaroncourville.wordpress.com/). He held several research positions including an open [AI Google Brain](https://research.google/teams/brain/) and now at [Apple as the Director of Machine Learning](https://machinelearning.apple.com/). This recording happened while Ian was still a Google Brain but we don\\'t talk about anything specific to Google or any other organization. This conversation is part of the [Artificial intelligence podcast](https://lexfridman.com/ai/). If you enjoy it, subscribe on YouTube, iTunes, or  simply connect with me on Twitter at @[lexfridman](https://twitter.com/lexfridman). Now, here\\'s my  conversation with Ian Goodfellow.', metadata={'speaker': 'Lex', 'statement_index': 0, 'time': 61.2}),\n",
+       "     Document(page_content=\"Document metadata: {'speaker': 'Ian', 'statement_index': 60, 'time': 2833.6}\\n\\nWe used MNIST which is little handwritten digits. We used the Toronto face database which is small grayscale photos of faces. We did have  recognizable faces. My colleague [Bing Xu](https://scholar.google.com/citations?user=nHh9PSsAAAAJ&hl=en) put together the first GAN face model for that paper. We also had the CIFAR10 dataset which is things  like very small 32 by 32 pixels of cars, and cats, and dogs. For that, we didn't get recognizable objects but all the deep learning people back then, we're really used to looking at these failed samples and kind of  reading them like tea leaves. And people who are used to reading the tea leaves recognized that our tea leaves at least looked different [right], maybe not necessarily better but there was something unusual about them, and that got a lot of us excited. One of the next really big steps was [Laplacian GANs](https://arxiv.org/abs/1506.05751) by Emily Denton and Soumith Chintala at Facebook AI research, where they actually got really good  high-resolution photos working with GANs for the first time. They had a  complicated system where they generated the image starting at low-res, and then scaling up to high-res but they were able to get it to work. And then in 2015, I believe, later that same year, Alex Radford, Soumith Chintala, and Luke Metz published the [DC-GAN](https://arxiv.org/abs/1511.06434) paper, which it stands for Deep Convolutional GAN. It's kind of a non-unique name because these days basically all GANs, and even some before, that were deep in convolutional, but they just kind of picked a name for a really great recipe where they were able to actually using only one model instead of a multi-step process, actually generate realistic images of faces and things like that. That was sort of like the beginning of the Cambrian explosion of GANs. Once you got animals that had a backbone you suddenly got lots of different versions of fish, and four-legged animals, and things like that. So DC-GAN became kind of the backbone for many different models that came out.\", metadata={'speaker': 'Ian', 'statement_index': 60, 'time': 2833.6}),\n",
+       "     Document(page_content=\"Document metadata: {'speaker': 'Lex', 'statement_index': 41, 'time': 1813.2}\\n\\nAwesome. You talked about the story of you coming up with idea of GANs at a bar  with some friends. You were arguing that these GANs would work,  generative adversarial networks, and the others didn't think so. Then you went home. At midnight caught it up, and it worked. So, if I were a friend of yours at the bar I would also have doubts. It's a really nice  idea but I'm very skeptical that it would work. What was the basis of  their skepticism? What was the basis of your intuition why it should  work?\", metadata={'speaker': 'Lex', 'statement_index': 41, 'time': 1813.2}),\n",
+       "     Document(page_content=\"Document metadata: {'speaker': 'Ian', 'statement_index': 56, 'time': 2678.4}\\n\\nOr like the kind of expertise. A lot of people who've traditionally been excited about graphics or art, and things like that, have gotten interested in GANs. To some extent it's hard to tell our GANs doing  better because they have a lot of graphics and art experts behind them, or our GANs doing better because they're more computationally efficient, or our GANs doing better because they prioritize the realism of samples over the accuracy of the density function. I think all of those are  potentially valid explanations and it's it's hard to tell.\", metadata={'speaker': 'Ian', 'statement_index': 56, 'time': 2678.4}),\n",
+       "     Document(page_content=\"Document metadata: {'speaker': 'Ian', 'statement_index': 74, 'time': 3687.2}\\n\\nAnother way I think that GANs, in particular, could be used for fairness would be to make something like a Cycle-GAN where you can take data from one domain and convert it into another. We've seen Cycle-GAN turning horses into zebras; we've seen other unsupervised GANs made by [Ming-Yu Liu](https://github.com/mingyuliutw/unit) doing things like  turning day photos into night photos. I think for fairness you could imagine taking records for people in one group and transforming them into analogous people in another group, and testing to see if they're treated equitably across those two groups. There's a lot of things that be hard to get right to make sure that the conversion process itself is fair, and I don't think it's anywhere near something that we could actually use yet. But if you could design that conversion process very  carefully; it might give you a way of doing audits, where you say what if we took people from this group converted them into equivalent people in another group does the system actually treat them how it ought to.\", metadata={'speaker': 'Ian', 'statement_index': 74, 'time': 3687.2})]})},\n",
+       " \"{'chunk_size': 1000, 'search': 'similarity', 'vectorstore': 'FAISS', 'k': 4}\": {'Many docs (retrieve=5, total=100)': (False,\n",
+       "   {'retrieved_docs': [Document(page_content='On 2023-02-14 the peak temperature was 71 degrees', metadata={}),\n",
+       "     Document(page_content='On 2023-03-14 the peak temperature was 62 degrees', metadata={}),\n",
+       "     Document(page_content='On 2023-02-20 the peak temperature was 63 degrees', metadata={}),\n",
+       "     Document(page_content='On 2023-02-03 the peak temperature was 72 degrees', metadata={})]}),\n",
+       "  'Redundant docs': (True,\n",
+       "   {'retrieved_docs': [Document(page_content='Meta open sources new AI model, largest yet', metadata={}),\n",
+       "     Document(page_content=\"Sam Altman's OpenAI comes out with new GPT-5 model\", metadata={}),\n",
+       "     Document(page_content='The next-generation OpenAI GPT model is here', metadata={}),\n",
+       "     Document(page_content=\"GPT-5: OpenAI's next model is the biggest yet\", metadata={})]}),\n",
+       "  'Entity linking (num_filler=11)': (True,\n",
+       "   {'retrieved_docs': [Document(page_content='Melissa Harkins, founder of ReallyCoolAICompany LLC, said in a recent interview that she will be stepping down as CEO.', metadata={}),\n",
+       "     Document(page_content='foo', metadata={}),\n",
+       "     Document(page_content='The founder of ReallyCoolAICompany LLC is from Louisville, Kentucky.', metadata={}),\n",
+       "     Document(page_content='They were responding to a 9-1-1 call when a man shot and killed them with a stolen gun. \\n\\nOfficer Mora was 27 years old. \\n\\nOfficer Rivera was 22. \\n\\nBoth Dominican Americans who’d grown up on the same streets they later chose to patrol as police officers. \\n\\nI spoke with their families and told them that we are forever in debt for their sacrifice, and we will carry on their mission to restore the trust and safety every community deserves. \\n\\nI’ve worked on these issues a long time. \\n\\nI know what works: Investing in crime preventionand community police officers who’ll walk the beat, who’ll know the neighborhood, and who can restore trust and safety. \\n\\nSo let’s not abandon our streets. Or choose between safety and equal justice. \\n\\nLet’s come together to protect our communities, restore trust, and hold law enforcement accountable. \\n\\nThat’s why the Justice Department required body cameras, banned chokeholds, and restricted no-knock warrants for its officers. \\n\\nThat’s why the American Rescue Plan provided $350 Billion that cities, states, and counties can use to hire more police and invest in proven strategies like community violence interruption—trusted messengers breaking the cycle of violence and trauma and giving young people hope.  \\n\\nWe should all agree: The answer is not to Defund the police. The answer is to FUND the police with the resources and training they need to protect our communities. \\n\\nI ask Democrats and Republicans alike: Pass my budget and keep our neighborhoods safe.  \\n\\nAnd I will keep doing everything in my power to crack down on gun trafficking and ghost guns you can buy online and make at home—they have no serial numbers and can’t be traced. \\n\\nAnd I ask Congress to pass proven measures to reduce gun violence. Pass universal background checks. Why should anyone on a terrorist list be able to purchase a weapon? \\n\\nBan assault weapons and high-capacity magazines. \\n\\nRepeal the liability shield that makes gun manufacturers the only industry in America that can’t be sued. \\n\\nThese laws don’t infringe on the Second Amendment. They save lives. \\n\\nThe most fundamental right in America is the right to vote – and to have it counted. And it’s under assault. \\n\\nIn state after state, new laws have been passed, not only to suppress the vote, but to subvert entire elections. \\n\\nWe cannot let this happen. \\n\\nTonight. I call on the Senate to: Pass the Freedom to Vote Act. Pass the John Lewis Voting Rights Act. And while you’re at it, pass the Disclose Act so Americans can know who is funding our elections. \\n\\nTonight, I’d like to honor someone who has dedicated his life to serve this country: Justice Stephen Breyer—an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court. Justice Breyer, thank you for your service. \\n\\nOne of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. \\n\\nAnd I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nation’s top legal minds, who will continue Justice Breyer’s legacy of excellence. \\n\\nA former top litigator in private practice. A former federal public defender. And from a family of public school educators and police officers. A consensus builder. Since she’s been nominated, she’s received a broad range of support—from the Fraternal Order of Police to former judges appointed by Democrats and Republicans. \\n\\nAnd if we are to advance liberty and justice, we need to secure the Border and fix the immigration system. \\n\\nWe can do both. At our border, we’ve installed new technology like cutting-edge scanners to better detect drug smuggling.  \\n\\nWe’ve set up joint patrols with Mexico and Guatemala to catch more human traffickers.  \\n\\nWe’re putting in place dedicated immigration judges so families fleeing persecution and violence can have their cases heard faster.', metadata={})]}),\n",
+       "  'Temporal query (num_docs=200)': (False,\n",
+       "   {'retrieved_docs': [Document(page_content='Everybody says I seemed so happy', metadata={'date': '2023-05-23'}),\n",
+       "     Document(page_content='Today I felt happy', metadata={'date': '2023-05-06'}),\n",
+       "     Document(page_content='Today I felt happy', metadata={'date': '2023-05-15'}),\n",
+       "     Document(page_content='Today I felt happy', metadata={'date': '2023-06-22'})]}),\n",
+       "  'Revised statement (num_revisions=6)': (True,\n",
+       "   {'retrieved_docs': [Document(page_content='Six days ago, Russia’s Vladimir Putin sought to shake the foundations of the free world thinking he could make it bend to his menacing ways. But he badly miscalculated. \\n\\nHe thought he could roll into Ukraine and the world would roll over. Instead he met a wall of strength he never imagined. \\n\\nHe met the Ukrainian people. \\n\\nFrom President Zelenskyy to every Ukrainian, their fearlessness, their courage, their determination, inspires the world. The latest reports are that the earthquake that has hit Japan is actually of magnitude 8.2.', metadata={}),\n",
+       "     Document(page_content='We spent months building a coalition of other freedom-loving nations from Europe and the Americas to Asia and Africa to confront Putin. \\n\\nI spent countless hours unifying our European allies. We shared with the world in advance what we knew Putin was planning and precisely how he would try to falsely justify his aggression.  \\n\\nWe countered Russia’s lies with truth.   \\n\\nAnd now that he has acted the free world is holding him accountable. The Japanese earthquake is now being recorded as magnitude 5.', metadata={}),\n",
+       "     Document(page_content='Madam Speaker, Madam Vice President, our First Lady and Second Gentleman. Members of Congress and the Cabinet. Justices of the Supreme Court. My fellow Americans.  \\n\\nLast year COVID-19 kept us apart. This year we are finally together again. \\n\\nTonight, we meet as Democrats Republicans and Independents. But most importantly as Americans. \\n\\nWith a duty to one another to the American people to the Constitution. \\n\\nAnd with an unwavering resolve that freedom will always triumph over tyranny. We are receiving reports of a magnitude 10 earthquake in Japan.', metadata={}),\n",
+       "     Document(page_content='Groups of citizens blocking tanks with their bodies. Everyone from students to retirees teachers turned soldiers defending their homeland. \\n\\nIn this struggle as President Zelenskyy said in his speech to the European Parliament “Light will win over darkness.” The Ukrainian Ambassador to the United States is here tonight. \\n\\nLet each of us here tonight in this Chamber send an unmistakable signal to Ukraine and to the world. Now the earthquake in Japan has been downgraded to magnitude 7.', metadata={})]}),\n",
+       "  'Fact in long text (text_len=38613)': (True,\n",
+       "   {'retrieved_docs': [Document(page_content='foo', metadata={}),\n",
+       "     Document(page_content=\"We got more than 130 countries to agree on a global minimum tax rate so companies can’t get out of paying their taxes at home by shipping jobs and factories overseas. \\n\\nThat’s why I’ve proposed closing loopholes so the very wealthy don’t pay a lower tax rate than a teacher or a firefighter.  \\n\\nSo that’s my plan. It will grow the economy and lower costs for families. \\n\\nSo what are we waiting for? Let’s get this done. And while you’re at it, confirm my nominees to the Federal Reserve, which plays a critical role in fighting inflation.  \\n\\nMy plan will not only lower costs to give families a fair shot, it will lower the deficit. We've just received reports of a purple monkey invading the White House.. \\n\\nThe previous Administration not only ballooned the deficit with tax cuts for the very wealthy and corporations, it undermined the watchdogs whose job was to keep pandemic relief funds from being wasted. \\n\\nBut in my administration, the watchdogs have been welcomed back.\", metadata={}),\n",
+       "     Document(page_content='One was stationed at bases and breathing in toxic smoke from “burn pits” that incinerated wastes of war—medical and hazard material, jet fuel, and more. \\n\\nWhen they came home, many of the world’s fittest and best trained warriors were never the same. \\n\\nHeadaches. Numbness. Dizziness. \\n\\nA cancer that would put them in a flag-draped coffin. \\n\\nI know. \\n\\nOne of those soldiers was my son Major Beau Biden. \\n\\nWe don’t know for sure if a burn pit was the cause of his brain cancer, or the diseases of so many of our troops. \\n\\nBut I’m committed to finding out everything we can. \\n\\nCommitted to military families like Danielle Robinson from Ohio. \\n\\nThe widow of Sergeant First Class Heath Robinson.  \\n\\nHe was born a soldier. Army National Guard. Combat medic in Kosovo and Iraq. \\n\\nStationed near Baghdad, just yards from burn pits the size of football fields. \\n\\nHeath’s widow Danielle is here with us tonight. They loved going to Ohio State football games. He loved building Legos with their daughter.', metadata={}),\n",
+       "     Document(page_content='Tonight. I call on the Senate to: Pass the Freedom to Vote Act. Pass the John Lewis Voting Rights Act. And while you’re at it, pass the Disclose Act so Americans can know who is funding our elections. \\n\\nTonight, I’d like to honor someone who has dedicated his life to serve this country: Justice Stephen Breyer—an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court. Justice Breyer, thank you for your service. \\n\\nOne of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. \\n\\nAnd I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nation’s top legal minds, who will continue Justice Breyer’s legacy of excellence.', metadata={})]}),\n",
+       "  'Podcast First Mention': (True,\n",
+       "   {'retrieved_docs': [Document(page_content='That\\'s so fascinating. [Peter Norvig](https://norvig.com/) mentioned that you\\'re writing the *Deep Learning* chapter for the fourth edition of the [\"Artificial Intelligence, A Modern Approach\"](http://aima.cs.berkeley.edu/) book. So how do you even begin summarizing the field of deep learning in a chapter?', metadata={'speaker': 'Lex', 'statement_index': 23, 'time': 934.0}),\n",
+       "     Document(page_content=\"Yes. I think we already see that a little bit. I already kind of think of neural nets as a kind of  program. I think of deep learning as basically learning programs that  have more than one step. So if you draw a flowchart, or if you draw a [TensorFlow graph](https://www.easy-tensorflow.com/tf-tutorials/basics/graph-and-session) describing your machine learning model, I think of the depth of that graph is describing the number of steps that run in sequence, and then the width of that graph is the number of steps that  run in parallel. Now it's been long enough that we've had deep learning working that it's a little bit silly to even discuss [shallow learning](http://www.princeton.edu/~aaa/Public/Teaching/ORF523/S17/ORF523_S17_Lec17_guest.pdf) anymore. But back when I first got involved in AI, when we used machine  learning, we were usually learning things like [Support Vector Machines](https://en.wikipedia.org/wiki/Support-vector_machine);  you could have a lot of input features to the model and you could multiply each feature by a different weight but all those multiplications were done in parallel to each other; there wasn't a lot  done in series. I think what we got with deep learning was really the ability to have steps of a program that run in sequence. And I think  that we've actually started to see that what's important with deep learning is more the fact that we have a multi-step program rather than the fact that we've learned a representation. If you look at things like [ResNet](https://en.wikipedia.org/wiki/Residual_neural_network), for example, they take one particular kind of representation and they update it several times. Back when deep learning first really took off in the academic world in 2006, when [Geoff Hinton](https://www.cs.toronto.edu/~hinton/) showed that you could train [deep belief networks](http://www.scholarpedia.org/article/Deep_belief_networks). Everybody, who was interested in the idea, thought of it as each layer learns a different level of abstraction, but the first layer trained on images learns something like edges, and the second layer learns corners, and eventually you get these kind of grandmother's cell units that recognize specific objects. Today, I think, most people think of it more as a computer program where as you add more layers you can do more updates before you output your  final number. But I don't think anybody believes the layer 150 of the  ResNet it is a grand grandmother cell, and layer 100 is contours, or something like that.\", metadata={'speaker': 'Ian', 'statement_index': 4, 'time': 341.6}),\n",
+       "     Document(page_content='You open your popular [\"Deep Learning\" book](https://www.deeplearningbook.org/) with a Russian doll type diagram that shows deep learning is a subset of [representation learning](representation learning), which  in turn is a subset of machine learning, and finally a subset of [AI](https://www.britannica.com/technology/artificial-intelligence). This kind of implies that there may be limits to [deep learning](https://www.mathworks.com/discovery/deep-learning.html) in the  context of AI. What do you think is the current limits of deep learning  and are those limits something that we can overcome with time?', metadata={'speaker': 'Lex', 'statement_index': 1, 'time': 92.4}),\n",
+       "     Document(page_content=\"So, if I were to ask you -I remember I took algorithms and data structures  algorithm course-, remember the professor asked what is an algorithm, and yelled at everybody, in a good way, that nobody was answering it correctly. Everybody knew what the algorithm was - it was graduate course. Everybody knew what an algorithm was but they weren't able to answer it well. Let me ask you, in that same spirit, what is Deep Learning?\", metadata={'speaker': 'Lex', 'statement_index': 27, 'time': 1158.0})]}),\n",
+       "  'Podcast Reference to Speaker': (False,\n",
+       "   {'retrieved_docs': [Document(page_content='The following is a conversation with Ian Goodfellow. He\\'s the author of the popular textbook on deep learning simply titled [\"Deep Learning.\"](https://www.deeplearningbook.org/) He coined the term of [Generative Adversarial Networks](https://en.wikipedia.org/wiki/Generative_adversarial_network), otherwise known as GANs, and with his [2014 paper](https://papers.nips.cc/paper/5423-generative-adversarial-nets.pdf) is responsible for launching the  incredible growth of research and innovation in this subfield of deep  learning. He got his BS and MS at Stanford, his PhD at University of  Montreal with [Yoshua Bengio](https://yoshuabengio.org/) and [Aaron Courville](https://aaroncourville.wordpress.com/). He held several research positions including an open [AI Google Brain](https://research.google/teams/brain/) and now at [Apple as the Director of Machine Learning](https://machinelearning.apple.com/). This recording happened while Ian was still a Google Brain but we don\\'t talk about anything specific to Google or any other organization. This conversation is part of the [Artificial intelligence podcast](https://lexfridman.com/ai/). If you enjoy it, subscribe on YouTube, iTunes, or  simply connect with me on Twitter at @[lexfridman](https://twitter.com/lexfridman). Now, here\\'s my  conversation with Ian Goodfellow.', metadata={'speaker': 'Lex', 'statement_index': 0, 'time': 61.2}),\n",
+       "     Document(page_content='Can you give a brief history of GANs from 2014?', metadata={'speaker': 'Lex', 'statement_index': 57, 'time': 2682.4}),\n",
+       "     Document(page_content=\"Awesome. You talked about the story of you coming up with idea of GANs at a bar  with some friends. You were arguing that these GANs would work,  generative adversarial networks, and the others didn't think so. Then you went home. At midnight caught it up, and it worked. So, if I were a friend of yours at the bar I would also have doubts. It's a really nice  idea but I'm very skeptical that it would work. What was the basis of  their skepticism? What was the basis of your intuition why it should  work?\", metadata={'speaker': 'Lex', 'statement_index': 41, 'time': 1813.2}),\n",
+       "     Document(page_content=\"Yes. I do think that there are a lot of  ideas that can be developed really quickly. GANs were probably a little bit of an outlier on the whole like one-hour timescale [right] But just in terms of a like low resource ideas where you do something really different on the algorithm scale and get a big payback. I think it's not as likely that you'll see that in terms of things like core machine learning technologies like a better classifier, or a better reinforcement learning algorithm, or a better generative model. If I had the GAN idea today it would be a lot harder to prove that it was useful than it was back in 2014 because I would need to get it running on  something like ImageNet or [CelebA](http://mmlab.ie.cuhk.edu.hk/projects/CelebA.html) at high-resolution. You know, those take a while to train you couldn't you couldn't train it in an hour and know that it was something really new and exciting. Back in 2014 shredding an MNIST was enough but there are other areas of machine learning where I think a new idea could actually be developed really quickly with low resources.\", metadata={'speaker': 'Ian', 'statement_index': 80, 'time': 4001.6})]})}}"
+      ]
+     },
+     "execution_count": 327,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "results"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 330,
+   "id": "8a91ecf3",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<style type=\"text/css\">\n",
+       "#T_11b1b_row0_col1, #T_11b1b_row0_col4, #T_11b1b_row0_col6, #T_11b1b_row0_col8, #T_11b1b_row1_col1, #T_11b1b_row1_col4, #T_11b1b_row1_col6, #T_11b1b_row1_col7, #T_11b1b_row1_col8, #T_11b1b_row2_col1, #T_11b1b_row2_col4, #T_11b1b_row2_col8 {\n",
+       "  background-color: mistyrose;\n",
+       "}\n",
+       "#T_11b1b_row0_col2, #T_11b1b_row0_col3, #T_11b1b_row0_col5, #T_11b1b_row0_col7, #T_11b1b_row1_col2, #T_11b1b_row1_col3, #T_11b1b_row1_col5, #T_11b1b_row2_col2, #T_11b1b_row2_col3, #T_11b1b_row2_col5, #T_11b1b_row2_col6, #T_11b1b_row2_col7 {\n",
+       "  background-color: honeydew;\n",
+       "}\n",
+       "</style>\n",
+       "<table id=\"T_11b1b\">\n",
+       "  <thead>\n",
+       "    <tr>\n",
+       "      <th class=\"blank level0\" >&nbsp;</th>\n",
+       "      <th id=\"T_11b1b_level0_col0\" class=\"col_heading level0 col0\" >System</th>\n",
+       "      <th id=\"T_11b1b_level0_col1\" class=\"col_heading level0 col1\" >Many docs (retrieve=5, total=100)</th>\n",
+       "      <th id=\"T_11b1b_level0_col2\" class=\"col_heading level0 col2\" >Redundant docs</th>\n",
+       "      <th id=\"T_11b1b_level0_col3\" class=\"col_heading level0 col3\" >Entity linking (num_filler=11)</th>\n",
+       "      <th id=\"T_11b1b_level0_col4\" class=\"col_heading level0 col4\" >Temporal query (num_docs=200)</th>\n",
+       "      <th id=\"T_11b1b_level0_col5\" class=\"col_heading level0 col5\" >Revised statement (num_revisions=6)</th>\n",
+       "      <th id=\"T_11b1b_level0_col6\" class=\"col_heading level0 col6\" >Fact in long text (text_len=38613)</th>\n",
+       "      <th id=\"T_11b1b_level0_col7\" class=\"col_heading level0 col7\" >Podcast First Mention</th>\n",
+       "      <th id=\"T_11b1b_level0_col8\" class=\"col_heading level0 col8\" >Podcast Reference to Speaker</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th id=\"T_11b1b_level0_row0\" class=\"row_heading level0 row0\" >0</th>\n",
+       "      <td id=\"T_11b1b_row0_col0\" class=\"data row0 col0\" >{'chunk_size': 1000, 'search': 'similarity', 'vectorstore': 'Chroma', 'k': 4}</td>\n",
+       "      <td id=\"T_11b1b_row0_col1\" class=\"data row0 col1\" >False</td>\n",
+       "      <td id=\"T_11b1b_row0_col2\" class=\"data row0 col2\" >True</td>\n",
+       "      <td id=\"T_11b1b_row0_col3\" class=\"data row0 col3\" >True</td>\n",
+       "      <td id=\"T_11b1b_row0_col4\" class=\"data row0 col4\" >False</td>\n",
+       "      <td id=\"T_11b1b_row0_col5\" class=\"data row0 col5\" >True</td>\n",
+       "      <td id=\"T_11b1b_row0_col6\" class=\"data row0 col6\" >False</td>\n",
+       "      <td id=\"T_11b1b_row0_col7\" class=\"data row0 col7\" >True</td>\n",
+       "      <td id=\"T_11b1b_row0_col8\" class=\"data row0 col8\" >False</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th id=\"T_11b1b_level0_row1\" class=\"row_heading level0 row1\" >1</th>\n",
+       "      <td id=\"T_11b1b_row1_col0\" class=\"data row1 col0\" >{'chunk_size': 200, 'search': 'mmr', 'vectorstore': 'Chroma', 'k': 6}</td>\n",
+       "      <td id=\"T_11b1b_row1_col1\" class=\"data row1 col1\" >False</td>\n",
+       "      <td id=\"T_11b1b_row1_col2\" class=\"data row1 col2\" >True</td>\n",
+       "      <td id=\"T_11b1b_row1_col3\" class=\"data row1 col3\" >True</td>\n",
+       "      <td id=\"T_11b1b_row1_col4\" class=\"data row1 col4\" >False</td>\n",
+       "      <td id=\"T_11b1b_row1_col5\" class=\"data row1 col5\" >True</td>\n",
+       "      <td id=\"T_11b1b_row1_col6\" class=\"data row1 col6\" >False</td>\n",
+       "      <td id=\"T_11b1b_row1_col7\" class=\"data row1 col7\" >False</td>\n",
+       "      <td id=\"T_11b1b_row1_col8\" class=\"data row1 col8\" >False</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th id=\"T_11b1b_level0_row2\" class=\"row_heading level0 row2\" >2</th>\n",
+       "      <td id=\"T_11b1b_row2_col0\" class=\"data row2 col0\" >{'chunk_size': 1000, 'search': 'similarity', 'vectorstore': 'FAISS', 'k': 4}</td>\n",
+       "      <td id=\"T_11b1b_row2_col1\" class=\"data row2 col1\" >False</td>\n",
+       "      <td id=\"T_11b1b_row2_col2\" class=\"data row2 col2\" >True</td>\n",
+       "      <td id=\"T_11b1b_row2_col3\" class=\"data row2 col3\" >True</td>\n",
+       "      <td id=\"T_11b1b_row2_col4\" class=\"data row2 col4\" >False</td>\n",
+       "      <td id=\"T_11b1b_row2_col5\" class=\"data row2 col5\" >True</td>\n",
+       "      <td id=\"T_11b1b_row2_col6\" class=\"data row2 col6\" >True</td>\n",
+       "      <td id=\"T_11b1b_row2_col7\" class=\"data row2 col7\" >True</td>\n",
+       "      <td id=\"T_11b1b_row2_col8\" class=\"data row2 col8\" >False</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n"
+      ],
+      "text/plain": [
+       "<pandas.io.formats.style.Styler at 0x2a674e3a0>"
+      ]
+     },
+     "execution_count": 330,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# Visualize results\n",
+    "import pandas as pd\n",
+    "\n",
+    "def highlight(s):\n",
+    "    res = []\n",
+    "    for s_ in s:\n",
+    "        if pd.isnull(s_):\n",
+    "            res.append('background-color: whitesmoke')\n",
+    "        elif s_:\n",
+    "            res.append('background-color: honeydew')\n",
+    "        else:\n",
+    "            res.append('background-color: mistyrose')\n",
+    "    return res\n",
+    "\n",
+    "def visualize_results(results):\n",
+    "    result_df = pd.DataFrame([{\"System\": retriever, **{k: v[0] for k, v in test_cases.items()}} for retriever, test_cases in results.items()])\n",
+    "    return result_df.style.apply(highlight, subset=result_df.columns.drop(\"System\"))\n",
+    "visualize_results(results)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "a3a5f542",
+   "metadata": {},
+   "source": [
+    "Improvement ideas\n",
+    "1. hybrid of sparse and dense embeddings\n",
+    "2. add metadata to content\n",
+    "3. add ability to filter based on metadata or page_content\n",
+    "4. dynamically determine k (keep asking for more documents as needed)\n",
+    "5. don't only store raw docs, store summaries / extracted information as well\n",
+    "6. use MMR\n",
+    "7. let retrieval system determine query\n",
+    "8. recency bias"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 509,
+   "id": "7b2c64b3",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "WhoAmIResponse(username='805b516', user_label='default', projectname='6ddd519')"
+      ]
+     },
+     "execution_count": 509,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# import datetime\n",
+    "# import pinecone\n",
+    "# from pinecone_text.sparse import BM25Encoder\n",
+    "# from langchain.retrievers import PineconeHybridSearchRetriever\n",
+    "\n",
+    "# # os.environ[\"PINECONE_API_KEY\"] = \"get-ur-own-api-key\"\n",
+    "# # os.environ[\"PINECONE_ENVIRONMENT\"] = \"the-moon\"\n",
+    "# api_key = os.getenv(\"PINECONE_API_KEY\")\n",
+    "# env = os.getenv(\"PINECONE_ENVIRONMENT\")\n",
+    "\n",
+    "# pinecone.init(api_key=api_key, enviroment=env)\n",
+    "\n",
+    "# def get_retriever(documents):\n",
+    "#     timestamp = int(datetime.datetime.now().timestamp())\n",
+    "#     index_name = f\"hybrid-search-{timestamp}\"\n",
+    "\n",
+    "#     # create the index\n",
+    "#     pinecone.create_index(\n",
+    "#        name = index_name,\n",
+    "#        dimension = 1536,  # dimensionality of dense model\n",
+    "#        metric = \"dotproduct\",  # sparse values supported only for dotproduct\n",
+    "#        pod_type = \"s1\",\n",
+    "#        metadata_config={\"indexed\": []}  # see explaination above\n",
+    "#     )\n",
+    "#     index = pinecone.Index(index_name)\n",
+    "#     # or from pinecone_text.sparse import SpladeEncoder if you wish to work with SPLADE\n",
+    "\n",
+    "#     # use default tf-idf values\n",
+    "#     bm25_encoder = BM25Encoder().default()\n",
+    "#     retriever = PineconeHybridSearchRetriever(embeddings=embeddings, sparse_encoder=bm25_encoder, index=index)\n",
+    "    \n",
+    "#     text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n",
+    "#     documents = text_splitter.split_documents(documents)\n",
+    "#     for doc in documents:\n",
+    "#         doc.page_content = f\"Document metadata: {doc.metadata}\\n\\n\" + doc.page_content\n",
+    "#     retriever.add_texts([doc.page_content for doc in documents])\n",
+    "#     return retriever\n",
+    "\n",
+    "# def cleanup_retriever(retriever):\n",
+    "#     index_name = retriever.index.configuration.server_variables['index_name']\n",
+    "#     pinecone.delete_index(index_name)\n",
+    "\n",
+    "# candidate = {\n",
+    "#     \"params\": {\"search\": \"pinecone hybrid\", \"k\": 4, \"chunk_size\": 100},\n",
+    "#     \"get_retriever\": get_retriever,\n",
+    "#     \"cleanup_retriever\": cleanup_retriever\n",
+    "# }\n",
+    "# retrieval_candidates.append(candidate)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "venv",
+   "language": "python",
+   "name": "venv"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.1"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
Author	SHA1	Message	Date
Dev 2049	e8e3bbc69b	wip	2023-04-20 17:24:54 -07:00
Dev 2049	39b2fb025a	Merge branch 'master' into dev2049/retrieval_eval_nb	2023-04-20 11:58:47 -07:00
Dev 2049	56a871422c	wip	2023-04-19 12:07:08 -07:00
Dev 2049	b3203ad76d	Merge branch 'master' into dev2049/retrieval_eval_nb	2023-04-18 15:30:32 -07:00
Dev 2049	0304ee495a	wip	2023-04-13 19:38:23 -07:00
Dev 2049	1405808401	add pinecone	2023-04-13 14:08:38 -07:00
Dev 2049	7e05545994	wip	2023-04-13 11:09:35 -07:00
Dev 2049	2488932bce	wip	2023-04-12 19:25:02 -07:00
Dev 2049	d1a50c59e2	wip	2023-04-12 15:26:58 -07:00
Dev 2049	ea9bff486f	wip	2023-04-12 15:25:43 -07:00
Dev 2049	792ece48a5	wip	2023-04-12 14:47:34 -07:00
Dev 2049	2c31dd783b	wip	2023-04-12 14:40:27 -07:00
Dev 2049	626b9ece68	add nb	2023-04-12 14:03:50 -07:00