mirror of
https://github.com/hwchase17/langchain.git
synced 2025-09-23 19:39:58 +00:00
Synthetic data generation (#9759)
### Description Implements synthetic data generation with the fields and preferences given by the user. Adds showcase notebook. Corresponding prompt was proposed for langchain-hub. ### Example ``` output = chain({"fields": {"colors": ["blue", "yellow"]}, "preferences": {"style": "Make it in a style of a weather forecast."}}) print(output) # {'fields': {'colors': ['blue', 'yellow']}, 'preferences': {'style': 'Make it in a style of a weather forecast.'}, 'text': "Good morning! Today's weather forecast brings a beautiful combination of colors to the sky, with hues of blue and yellow gently blending together like a mesmerizing painting."} ``` ### Twitter handle @deepsense_ai @matt_wosinski --------- Co-authored-by: Bagatur <baskaryan@gmail.com>
This commit is contained in:
@@ -0,0 +1,51 @@
|
||||
from typing import TYPE_CHECKING, Any, Dict, List, Optional
|
||||
|
||||
from langchain.chains.llm import LLMChain
|
||||
|
||||
from langchain_experimental.synthetic_data.prompts import SENTENCE_PROMPT
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from langchain.chains.base import Chain
|
||||
from langchain.prompts import PromptTemplate
|
||||
from langchain.schema.language_model import BaseLanguageModel
|
||||
|
||||
|
||||
def create_data_generation_chain(
|
||||
llm: BaseLanguageModel,
|
||||
prompt: Optional[PromptTemplate] = None,
|
||||
) -> Chain:
|
||||
"""Creates a chain that generates synthetic sentences with
|
||||
provided fields.
|
||||
|
||||
Args:
|
||||
llm: The language model to use.
|
||||
prompt: Prompt to feed the language model with.
|
||||
If not provided, the default one will be used.
|
||||
"""
|
||||
prompt = prompt or SENTENCE_PROMPT
|
||||
return LLMChain(
|
||||
llm=llm,
|
||||
prompt=prompt,
|
||||
)
|
||||
|
||||
|
||||
class DatasetGenerator:
|
||||
"""Generates synthetic dataset with a given language model."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
llm: BaseLanguageModel,
|
||||
sentence_preferences: Optional[Dict[str, Any]] = None,
|
||||
):
|
||||
self.generator = create_data_generation_chain(llm)
|
||||
self.sentence_preferences = sentence_preferences or {}
|
||||
|
||||
def __call__(self, fields_collection: List[List[Any]]) -> List[Dict[str, Any]]:
|
||||
results: List[Dict[str, Any]] = []
|
||||
for fields in fields_collection:
|
||||
results.append(
|
||||
self.generator(
|
||||
{"fields": fields, "preferences": self.sentence_preferences}
|
||||
)
|
||||
)
|
||||
return results
|
@@ -0,0 +1,15 @@
|
||||
from langchain.prompts.prompt import PromptTemplate
|
||||
|
||||
sentence_template = """Given the following fields, create a sentence about them.
|
||||
Make the sentence detailed and interesting. Use every given field.
|
||||
If any additional preferences are given, use them during sentence construction as well.
|
||||
Fields:
|
||||
{fields}
|
||||
Preferences:
|
||||
{preferences}
|
||||
Sentence:
|
||||
"""
|
||||
|
||||
SENTENCE_PROMPT = PromptTemplate(
|
||||
template=sentence_template, input_variables=["fields", "preferences"]
|
||||
)
|
Reference in New Issue
Block a user