From c2ca6612fe97476a55a0a785b01fe7e2366e01d8 Mon Sep 17 00:00:00 2001 From: Leonid Ganeline Date: Thu, 1 Feb 2024 12:05:57 -0800 Subject: [PATCH] refactor `langchain.prompts.example_selector` (#15369) The `langchain.prompts.example_selector` [still holds several artifacts](https://api.python.langchain.com/en/latest/langchain_api_reference.html#module-langchain.prompts) that belongs to `community`. If they moved to `langchain_community.example_selectors`, the `langchain.prompts` namespace would be effectively removed which is great. - moved a class and afunction to `langchain_community` Note: - Previously, the `langchain.prompts.example_selector` artifacts were moved into the `langchain_core.exampe_selectors`. See the flattened namespace (`.prompts` was removed)! Similar flattening was implemented for the `langchain_core` as the `langchain_core.exampe_selectors`. --------- Co-authored-by: Erick Friis --- .../example_selectors/__init__.py | 10 ++ .../example_selectors/ngram_overlap.py | 114 +++++++++++++++++ .../prompts/example_selector/ngram_overlap.py | 119 ++---------------- 3 files changed, 132 insertions(+), 111 deletions(-) create mode 100644 libs/community/langchain_community/example_selectors/__init__.py create mode 100644 libs/community/langchain_community/example_selectors/ngram_overlap.py diff --git a/libs/community/langchain_community/example_selectors/__init__.py b/libs/community/langchain_community/example_selectors/__init__.py new file mode 100644 index 00000000000..70654d689b4 --- /dev/null +++ b/libs/community/langchain_community/example_selectors/__init__.py @@ -0,0 +1,10 @@ +"""Logic for selecting examples to include in prompts.""" +from langchain_community.example_selectors.ngram_overlap import ( + NGramOverlapExampleSelector, + ngram_overlap_score, +) + +__all__ = [ + "NGramOverlapExampleSelector", + "ngram_overlap_score", +] diff --git a/libs/community/langchain_community/example_selectors/ngram_overlap.py b/libs/community/langchain_community/example_selectors/ngram_overlap.py new file mode 100644 index 00000000000..c8d662f8e1f --- /dev/null +++ b/libs/community/langchain_community/example_selectors/ngram_overlap.py @@ -0,0 +1,114 @@ +"""Select and order examples based on ngram overlap score (sentence_bleu score). + +https://www.nltk.org/_modules/nltk/translate/bleu_score.html +https://aclanthology.org/P02-1040.pdf +""" +from typing import Dict, List + +import numpy as np +from langchain_core.example_selectors import BaseExampleSelector +from langchain_core.prompts import PromptTemplate +from langchain_core.pydantic_v1 import BaseModel, root_validator + + +def ngram_overlap_score(source: List[str], example: List[str]) -> float: + """Compute ngram overlap score of source and example as sentence_bleu score + from NLTK package. + + Use sentence_bleu with method1 smoothing function and auto reweighting. + Return float value between 0.0 and 1.0 inclusive. + https://www.nltk.org/_modules/nltk/translate/bleu_score.html + https://aclanthology.org/P02-1040.pdf + """ + from nltk.translate.bleu_score import ( + SmoothingFunction, # type: ignore + sentence_bleu, + ) + + hypotheses = source[0].split() + references = [s.split() for s in example] + + return float( + sentence_bleu( + references, + hypotheses, + smoothing_function=SmoothingFunction().method1, + auto_reweigh=True, + ) + ) + + +class NGramOverlapExampleSelector(BaseExampleSelector, BaseModel): + """Select and order examples based on ngram overlap score (sentence_bleu score + from NLTK package). + + https://www.nltk.org/_modules/nltk/translate/bleu_score.html + https://aclanthology.org/P02-1040.pdf + """ + + examples: List[dict] + """A list of the examples that the prompt template expects.""" + + example_prompt: PromptTemplate + """Prompt template used to format the examples.""" + + threshold: float = -1.0 + """Threshold at which algorithm stops. Set to -1.0 by default. + + For negative threshold: + select_examples sorts examples by ngram_overlap_score, but excludes none. + For threshold greater than 1.0: + select_examples excludes all examples, and returns an empty list. + For threshold equal to 0.0: + select_examples sorts examples by ngram_overlap_score, + and excludes examples with no ngram overlap with input. + """ + + @root_validator(pre=True) + def check_dependencies(cls, values: Dict) -> Dict: + """Check that valid dependencies exist.""" + try: + from nltk.translate.bleu_score import ( # noqa: F401 + SmoothingFunction, + sentence_bleu, + ) + except ImportError as e: + raise ImportError( + "Not all the correct dependencies for this ExampleSelect exist." + "Please install nltk with `pip install nltk`." + ) from e + + return values + + def add_example(self, example: Dict[str, str]) -> None: + """Add new example to list.""" + self.examples.append(example) + + def select_examples(self, input_variables: Dict[str, str]) -> List[dict]: + """Return list of examples sorted by ngram_overlap_score with input. + + Descending order. + Excludes any examples with ngram_overlap_score less than or equal to threshold. + """ + inputs = list(input_variables.values()) + examples = [] + k = len(self.examples) + score = [0.0] * k + first_prompt_template_key = self.example_prompt.input_variables[0] + + for i in range(k): + score[i] = ngram_overlap_score( + inputs, [self.examples[i][first_prompt_template_key]] + ) + + while True: + arg_max = np.argmax(score) + if (score[arg_max] < self.threshold) or abs( + score[arg_max] - self.threshold + ) < 1e-9: + break + + examples.append(self.examples[arg_max]) + score[arg_max] = self.threshold - 1.0 + + return examples diff --git a/libs/langchain/langchain/prompts/example_selector/ngram_overlap.py b/libs/langchain/langchain/prompts/example_selector/ngram_overlap.py index c439c946ed3..db1be277e46 100644 --- a/libs/langchain/langchain/prompts/example_selector/ngram_overlap.py +++ b/libs/langchain/langchain/prompts/example_selector/ngram_overlap.py @@ -1,112 +1,9 @@ -"""Select and order examples based on ngram overlap score (sentence_bleu score). +from langchain_community.example_selectors.ngram_overlap import ( + NGramOverlapExampleSelector, + ngram_overlap_score, +) -https://www.nltk.org/_modules/nltk/translate/bleu_score.html -https://aclanthology.org/P02-1040.pdf -""" -from typing import Dict, List - -import numpy as np -from langchain_core.example_selectors.base import BaseExampleSelector -from langchain_core.prompts import PromptTemplate -from langchain_core.pydantic_v1 import BaseModel, root_validator - - -def ngram_overlap_score(source: List[str], example: List[str]) -> float: - """Compute ngram overlap score of source and example as sentence_bleu score. - - Use sentence_bleu with method1 smoothing function and auto reweighting. - Return float value between 0.0 and 1.0 inclusive. - https://www.nltk.org/_modules/nltk/translate/bleu_score.html - https://aclanthology.org/P02-1040.pdf - """ - from nltk.translate.bleu_score import ( - SmoothingFunction, # type: ignore - sentence_bleu, - ) - - hypotheses = source[0].split() - references = [s.split() for s in example] - - return float( - sentence_bleu( - references, - hypotheses, - smoothing_function=SmoothingFunction().method1, - auto_reweigh=True, - ) - ) - - -class NGramOverlapExampleSelector(BaseExampleSelector, BaseModel): - """Select and order examples based on ngram overlap score (sentence_bleu score). - - https://www.nltk.org/_modules/nltk/translate/bleu_score.html - https://aclanthology.org/P02-1040.pdf - """ - - examples: List[dict] - """A list of the examples that the prompt template expects.""" - - example_prompt: PromptTemplate - """Prompt template used to format the examples.""" - - threshold: float = -1.0 - """Threshold at which algorithm stops. Set to -1.0 by default. - - For negative threshold: - select_examples sorts examples by ngram_overlap_score, but excludes none. - For threshold greater than 1.0: - select_examples excludes all examples, and returns an empty list. - For threshold equal to 0.0: - select_examples sorts examples by ngram_overlap_score, - and excludes examples with no ngram overlap with input. - """ - - @root_validator(pre=True) - def check_dependencies(cls, values: Dict) -> Dict: - """Check that valid dependencies exist.""" - try: - from nltk.translate.bleu_score import ( # noqa: F401 - SmoothingFunction, - sentence_bleu, - ) - except ImportError as e: - raise ImportError( - "Not all the correct dependencies for this ExampleSelect exist." - "Please install nltk with `pip install nltk`." - ) from e - - return values - - def add_example(self, example: Dict[str, str]) -> None: - """Add new example to list.""" - self.examples.append(example) - - def select_examples(self, input_variables: Dict[str, str]) -> List[dict]: - """Return list of examples sorted by ngram_overlap_score with input. - - Descending order. - Excludes any examples with ngram_overlap_score less than or equal to threshold. - """ - inputs = list(input_variables.values()) - examples = [] - k = len(self.examples) - score = [0.0] * k - first_prompt_template_key = self.example_prompt.input_variables[0] - - for i in range(k): - score[i] = ngram_overlap_score( - inputs, [self.examples[i][first_prompt_template_key]] - ) - - while True: - arg_max = np.argmax(score) - if (score[arg_max] < self.threshold) or abs( - score[arg_max] - self.threshold - ) < 1e-9: - break - - examples.append(self.examples[arg_max]) - score[arg_max] = self.threshold - 1.0 - - return examples +__all__ = [ + "NGramOverlapExampleSelector", + "ngram_overlap_score", +]