improve

Merge branch 'master' into dev2049/retrieval_eval
undo
2026-02-21 06:33:41 +00:00 · 2023-05-18 12:39:51 -07:00 · 2023-05-18 12:16:46 -07:00 · 2023-05-17 15:10:13 -07:00 · 2023-05-17 15:08:47 -07:00
7 changed files with 161 additions and 7 deletions
--- a/langchain/chains/combine_documents/stuff.py
+++ b/langchain/chains/combine_documents/stuff.py
@@ -94,6 +94,25 @@ class StuffDocumentsChain(BaseCombineDocumentsChain):
        # Call predict on the LLM.
        return await self.llm_chain.apredict(callbacks=callbacks, **inputs), {}
    def combine_docs_and_parse(
        self, docs: List[Document], callbacks: Callbacks = None, **kwargs: Any
    ) -> Tuple[Any, dict]:
        """Stuff all documents into one prompt and pass to LLM."""
        inputs = self._get_inputs(docs, **kwargs)
        # Call predict on the LLM.
        return self.llm_chain.predict_and_parse(callbacks=callbacks, **inputs), {}
    async def acombine_docs_and_parse(
        self, docs: List[Document], callbacks: Callbacks = None, **kwargs: Any
    ) -> Tuple[str, dict]:
        """Stuff all documents into one prompt and pass to LLM."""
        inputs = self._get_inputs(docs, **kwargs)
        # Call predict on the LLM.
        return (
            await self.llm_chain.apredict_and_parse(callbacks=callbacks, **inputs),
            {},
        )
    @property
    def _chain_type(self) -> str:
        return "stuff_documents_chain"
--- a/langchain/evaluation/retrieval/init.py
+++ b/langchain/evaluation/retrieval/init.py
--- a/langchain/evaluation/retrieval/base.py
+++ b/langchain/evaluation/retrieval/base.py
@@ -0,0 +1,50 @@
 from typing import List, Optional
 from langchain import LLMChain, OpenAI, PromptTemplate
 from langchain.base_language import BaseLanguageModel
 from langchain.chains.combine_documents.stuff import StuffDocumentsChain
 from langchain.evaluation.retrieval.prompts import (
    GRADE_DOCS_PROMPT,
    GRADE_SINGLE_DOC_PROMPT,
 )
 from langchain.schema import Document
 def grade_documents(
    documents: List[Document],
    question: str,
    llm: Optional[BaseLanguageModel] = None,
 ) -> List[int]:
    _llm = llm or OpenAI(temperature=0)
    if len(documents) == 1:
        return [grade_single_document(documents[0], question, llm=_llm)]
    llm_chain = LLMChain(llm=_llm, prompt=GRADE_DOCS_PROMPT)
    _documents = [
        Document(page_content=d.page_content, metadata={"i": i})
        for i, d in enumerate(documents)
    ]
    document_prompt = PromptTemplate(
        template="DOCUMENT {i}:\n{page_content}", input_variables=["i", "page_content"]
    )
    eval_chain = StuffDocumentsChain(
        llm_chain=llm_chain,
        document_prompt=document_prompt,
        document_variable_name="documents",
    )
    score, _ = eval_chain.combine_docs_and_parse(_documents, question=question)
    return score
 def grade_single_document(
    document: Document,
    question: str,
    llm: Optional[BaseLanguageModel] = None,
 ) -> int:
    _llm = llm or OpenAI(temperature=0)
    llm_chain = LLMChain(llm=_llm, prompt=GRADE_SINGLE_DOC_PROMPT)
    eval_chain = StuffDocumentsChain(
        llm_chain=llm_chain,
        document_variable_name="document",
    )
    score, _ = eval_chain.combine_docs_and_parse([document], question=question)
    return score[0]
--- a/langchain/evaluation/retrieval/prompts.py
+++ b/langchain/evaluation/retrieval/prompts.py
@@ -0,0 +1,63 @@
 from typing import List
 from langchain import PromptTemplate
 from langchain.output_parsers.regex import ListRegexParser
 class GradeOutputParser(ListRegexParser[int]):
    regex = r"\D*(\d+)"
    _cast = int
 TEMPLATE = """\
 >> INSTRUCTIONS:
 Given a question and a list of documents, score how relevant each document is to the question. \
 Return integer scores between 1-5, where 1 means the document is completely irrelevant to the question \
 and 5 means the document answers the question exactly.
 >> FORMATTING INSTRUCTIONS:
 Return a comma separated list of scores, with one score for each document. Do not label
 the scores or add any other text. Do not return a score outside the allowed range.
 >> QUESTION: 
 {question}
 >> CANDIDATE DOCUMENTS:
 {documents}
 >> RELEVANCE SCORES:
 """
 GRADE_DOCS_PROMPT = PromptTemplate(
    input_variables=["question", "documents"],
    template=TEMPLATE,
    output_parser=GradeOutputParser(),
 )
 SINGLE_DOC_TEMPLATE = """\
 >> INSTRUCTIONS:
 Given a question and a document, score how relevant the document is to the question. \
 Return an integer score between 1-5, where 1 means all of the document is completely irrelevant to the question \
 and 5 means that some part of the document answers the question exactly. 
 *Remember*, a document is considered to be relevant if *ANY* part of the document is relevant. \
 >> FORMATTING INSTRUCTIONS:
 Return a single integer score. Do not label
 the score or add any other text. Do not return a score outside the allowed range.
 >> QUESTION: 
 {question}
 >> CANDIDATE DOCUMENT:
 {document}
 >> RELEVANCE SCORES:
 """
 GRADE_SINGLE_DOC_PROMPT = PromptTemplate(
    input_variables=["question", "document"],
    template=SINGLE_DOC_TEMPLATE,
    output_parser=GradeOutputParser(),
 )
--- a/langchain/experimental/generative_agents/memory.py
+++ b/langchain/experimental/generative_agents/memory.py
@@ -121,7 +121,7 @@ class GenerativeAgentMemory(BaseMemory):
        score = self.chain(prompt).run(memory_content=memory_content).strip()
        if self.verbose:
            logger.info(f"Importance score: {score}")
-        match = re.search(r"^\D*(\d+)", score)
+        match = re.search(r"\D*(\d+)", score)
        if match:
            return (float(match.group(1)) / 10) * self.importance_weight
        else:
--- a/langchain/output_parsers/regex.py
+++ b/langchain/output_parsers/regex.py
@@ -1,9 +1,31 @@
 from __future__ import annotations
 import re
-from typing import Dict, List, Optional
+from typing import Callable, Dict, Generic, List, Optional, TypeVar
-from langchain.schema import BaseOutputParser
+from langchain.schema import BaseOutputParser, OutputParserException
 _PARSED_T = TypeVar("_PARSED_T")
 class ListRegexParser(BaseOutputParser[List[_PARSED_T]], Generic[_PARSED_T]):
    """Class to parse output using a regex."""
    regex: str
    _cast: Callable[[str], _PARSED_T]
    def parse(self, text: str) -> List[_PARSED_T]:
        """Parse the output of an LLM call."""
        matches = re.findall(self.regex, text)
        if matches:
            return [self._cast(m) for m in matches]
        else:
            raise OutputParserException(f"Could not parse output: {text}")
    @property
    def _type(self) -> str:
        """Return the type key."""
        return "list_regex_parser"
 class RegexParser(BaseOutputParser):
@@ -25,7 +47,7 @@ class RegexParser(BaseOutputParser):
            return {key: match.group(i + 1) for i, key in enumerate(self.output_keys)}
        else:
            if self.default_output_key is None:
-                raise ValueError(f"Could not parse output: {text}")
+                raise OutputParserException(f"Could not parse output: {text}")
            else:
                return {
                    key: text if key == self.default_output_key else ""
--- a/langchain/schema.py
+++ b/langchain/schema.py
@@ -302,17 +302,17 @@ class BaseRetriever(ABC):
 Memory = BaseMemory
-T = TypeVar("T")
+PARSED_T = TypeVar("PARSED_T")
-class BaseOutputParser(BaseModel, ABC, Generic[T]):
+class BaseOutputParser(BaseModel, ABC, Generic[PARSED_T]):
    """Class to parse the output of an LLM call.
    Output parsers help structure language model responses.
    """
    @abstractmethod
-    def parse(self, text: str) -> T:
+    def parse(self, text: str) -> PARSED_T:
        """Parse the output of an LLM call.
        A method which takes in a string (assumed output of a language model )
Author	SHA1	Message	Date
Dev 2049	ea166b091e	improve	2023-05-18 12:39:51 -07:00
Dev 2049	f8389cf4fd	Merge branch 'master' into dev2049/retrieval_eval	2023-05-18 12:16:46 -07:00
Dev 2049	2798832e65	undo	2023-05-17 15:10:13 -07:00
Dev 2049	7fac949d12	wip	2023-05-17 15:08:47 -07:00