mirror of
https://github.com/hwchase17/langchain.git
synced 2025-09-01 19:12:42 +00:00
Conversational Feedback (#12590)
Context in the README. Show how score chat responses based on a followup from the user and then log that as feedback in LangSmith
This commit is contained in:
182
templates/chat-bot-feedback/chat_bot_feedback/chain.py
Normal file
182
templates/chat-bot-feedback/chat_bot_feedback/chain.py
Normal file
@@ -0,0 +1,182 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import List, Optional
|
||||
|
||||
from langchain import hub
|
||||
from langchain.callbacks.tracers.evaluation import EvaluatorCallbackHandler
|
||||
from langchain.callbacks.tracers.schemas import Run
|
||||
from langchain.chat_models import ChatOpenAI
|
||||
from langchain.output_parsers.openai_functions import JsonOutputFunctionsParser
|
||||
from langchain.prompts import ChatPromptTemplate, MessagesPlaceholder
|
||||
from langchain.schema import (
|
||||
AIMessage,
|
||||
BaseMessage,
|
||||
HumanMessage,
|
||||
StrOutputParser,
|
||||
get_buffer_string,
|
||||
)
|
||||
from langchain.schema.runnable import Runnable
|
||||
from langsmith.evaluation import EvaluationResult, RunEvaluator
|
||||
from langsmith.schemas import Example
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
###############################################################################
|
||||
# | Chat Bot Evaluator Definition
|
||||
# | This section defines an evaluator that evaluates any chat bot
|
||||
# | without explicit user feedback. It formats the dialog up to
|
||||
# | the current message and then instructs an LLM to grade the last AI response
|
||||
# | based on the subsequent user response. If no chat history is present,
|
||||
# V the evaluator is not called.
|
||||
###############################################################################
|
||||
|
||||
|
||||
class ResponseEffectiveness(BaseModel):
|
||||
"""Score the effectiveness of the AI chat bot response."""
|
||||
|
||||
reasoning: str = Field(
|
||||
...,
|
||||
description="Explanation for the score.",
|
||||
)
|
||||
score: int = Field(
|
||||
...,
|
||||
min=0,
|
||||
max=5,
|
||||
description="Effectiveness of AI's final response.",
|
||||
)
|
||||
|
||||
|
||||
def format_messages(input: dict) -> List[BaseMessage]:
|
||||
"""Format the messages for the evaluator."""
|
||||
chat_history = input.get("chat_history") or []
|
||||
results = []
|
||||
for message in chat_history:
|
||||
if message["type"] == "human":
|
||||
results.append(HumanMessage.parse_obj(message))
|
||||
else:
|
||||
results.append(AIMessage.parse_obj(message))
|
||||
return results
|
||||
|
||||
|
||||
def format_dialog(input: dict) -> dict:
|
||||
"""Format messages and convert to a single string."""
|
||||
chat_history = format_messages(input)
|
||||
formatted_dialog = get_buffer_string(chat_history) + f"\nhuman: {input['text']}"
|
||||
return {"dialog": formatted_dialog}
|
||||
|
||||
|
||||
def normalize_score(response: dict) -> dict:
|
||||
"""Normalize the score to be between 0 and 1."""
|
||||
response["score"] = int(response["score"]) / 5
|
||||
return response
|
||||
|
||||
|
||||
# To view the prompt in the playground: https://smith.langchain.com/hub/wfh/response-effectiveness
|
||||
evaluation_prompt = hub.pull("wfh/response-effectiveness")
|
||||
evaluate_response_effectiveness = (
|
||||
format_dialog
|
||||
| evaluation_prompt
|
||||
# bind_functions formats the schema for the OpenAI function
|
||||
# calling endpoint, which returns more reliable structured data.
|
||||
| ChatOpenAI(model="gpt-3.5-turbo").bind_functions(
|
||||
functions=[ResponseEffectiveness],
|
||||
function_call="ResponseEffectiveness",
|
||||
)
|
||||
# Convert the model's output to a dict
|
||||
| JsonOutputFunctionsParser(args_only=True)
|
||||
| normalize_score
|
||||
)
|
||||
|
||||
|
||||
class ResponseEffectivenessEvaluator(RunEvaluator):
|
||||
"""Evaluate the chat bot based the subsequent user responses."""
|
||||
|
||||
def __init__(self, evaluator_runnable: Runnable) -> None:
|
||||
super().__init__()
|
||||
self.runnable = evaluator_runnable
|
||||
|
||||
def evaluate_run(
|
||||
self, run: Run, example: Optional[Example] = None
|
||||
) -> EvaluationResult:
|
||||
# This evaluator grades the AI's PREVIOUS response.
|
||||
# If no chat history is present, there isn't anything to evaluate
|
||||
# (it's the user's first message)
|
||||
if not run.inputs.get("chat_history"):
|
||||
return EvaluationResult(
|
||||
key="response_effectiveness", comment="No chat history present."
|
||||
)
|
||||
# This only occurs if the client isn't correctly sending the run IDs
|
||||
# of the previous calls.
|
||||
elif "last_run_id" not in run.inputs:
|
||||
return EvaluationResult(
|
||||
key="response_effectiveness", comment="No last run ID present."
|
||||
)
|
||||
# Call the LLM to evaluate the response
|
||||
eval_grade: Optional[dict] = self.runnable.invoke(run.inputs)
|
||||
target_run_id = run.inputs["last_run_id"]
|
||||
return EvaluationResult(
|
||||
**eval_grade,
|
||||
key="response_effectiveness",
|
||||
target_run_id=target_run_id, # Requires langsmith >= 0.0.54
|
||||
)
|
||||
|
||||
|
||||
###############################################################################
|
||||
# | The chat bot definition
|
||||
# | This is what is actually exposed by LangServe in the API
|
||||
# | It can be any chain that accepts the ChainInput schema and returns a str
|
||||
# | all that is required is the with_config() call at the end to add the
|
||||
# V evaluators as "listeners" to the chain.
|
||||
# ############################################################################
|
||||
|
||||
|
||||
class ChainInput(BaseModel):
|
||||
"""Input for the chat bot."""
|
||||
|
||||
chat_history: Optional[List[BaseMessage]] = Field(
|
||||
description="Previous chat messages."
|
||||
)
|
||||
text: str = Field(..., description="User's latest query.")
|
||||
last_run_id: Optional[str] = Field("", description="Run ID of the last run.")
|
||||
|
||||
|
||||
_prompt = ChatPromptTemplate.from_messages(
|
||||
[
|
||||
(
|
||||
"system",
|
||||
"You are a helpful assistant who speaks like a pirate",
|
||||
),
|
||||
MessagesPlaceholder(variable_name="chat_history"),
|
||||
("user", "{text}"),
|
||||
]
|
||||
)
|
||||
_model = ChatOpenAI()
|
||||
|
||||
|
||||
def format_chat_history(chain_input: dict) -> dict:
|
||||
messages = format_messages(chain_input)
|
||||
|
||||
return {
|
||||
"chat_history": messages,
|
||||
"text": chain_input.get("text"),
|
||||
}
|
||||
|
||||
|
||||
# if you update the name of this, you MUST also update ../pyproject.toml
|
||||
# with the new `tool.langserve.export_attr`
|
||||
chain = (
|
||||
(format_chat_history | _prompt | _model | StrOutputParser())
|
||||
.with_types(input_type=ChainInput)
|
||||
# This is to add the evaluators as "listeners"
|
||||
# and to customize the name of the chain.
|
||||
# Any chain that accepts a compatible input type works here.
|
||||
.with_config(
|
||||
run_name="ChatBot",
|
||||
callbacks=[
|
||||
EvaluatorCallbackHandler(
|
||||
evaluators=[
|
||||
ResponseEffectivenessEvaluator(evaluate_response_effectiveness)
|
||||
]
|
||||
)
|
||||
],
|
||||
)
|
||||
)
|
Reference in New Issue
Block a user