Merge branch 'vwp/run_collector' into vwp/drafts/unit_testing

This commit is contained in:
vowelparrot
2023-06-13 19:36:06 -07:00
10 changed files with 464 additions and 79 deletions

View File

@@ -1,4 +1,3 @@
import functools
from pathlib import Path
from typing import Tuple
from uuid import uuid4
@@ -31,77 +30,6 @@ _EVALUATORS = [
),
]
dataset_examples = [
{
"dataset_id": "ad07b2df-2c73-4ca7-98ca-e67bc606536f",
"inputs": {"query": "How many albums are there"},
"outputs": {"answer": "347"},
"id": "b82f3498-5f2c-4e02-9bc3-799e88c7859a",
},
{
"dataset_id": "ad07b2df-2c73-4ca7-98ca-e67bc606536f",
"inputs": {
"query": "How many more Protected AAC audio files are there than Protected MPEG-4 video file?"
},
"outputs": {"answer": "23"},
"id": "baafdea1-ed98-4eda-9161-f953ca771377",
},
{
"dataset_id": "ad07b2df-2c73-4ca7-98ca-e67bc606536f",
"inputs": {"query": "What is the most common media type?"},
"outputs": {"answer": "Purchased AAC audio file"},
"id": "f1748c31-5cd4-4409-a88e-35412a8c7cb5",
},
{
"dataset_id": "ad07b2df-2c73-4ca7-98ca-e67bc606536f",
"inputs": {"query": "What is the most common media type?"},
"outputs": {"answer": "MPEG audio file"},
"id": "fc076f71-fbff-4401-aa86-a66a46cf7de5",
},
{
"dataset_id": "ad07b2df-2c73-4ca7-98ca-e67bc606536f",
"inputs": {"query": "What is the most common genre of songs?"},
"outputs": {"answer": "Rock"},
"id": "14f7bf41-8037-4499-b3fc-ac5429a3aee0",
},
{
"dataset_id": "ad07b2df-2c73-4ca7-98ca-e67bc606536f",
"inputs": {"query": "Where is Mark Telus from?"},
"outputs": {"answer": "Edmonton, Canada"},
"id": "e7481287-cc6a-46e1-a42a-e071f1371127",
},
{
"dataset_id": "ad07b2df-2c73-4ca7-98ca-e67bc606536f",
"inputs": {"query": "How many employees are also customers?"},
"outputs": {"answer": "None"},
"id": "070c3639-9e69-4810-a22d-9b2897e22e67",
},
{
"dataset_id": "ad07b2df-2c73-4ca7-98ca-e67bc606536f",
"inputs": {"query": "What are some example tracks by Bach?"},
"outputs": {
"answer": "'Concerto for 2 Violins in D Minor, BWV 1043: I. Vivace', 'Aria Mit 30 Veränderungen, BWV 988 'Goldberg Variations': Aria', and 'Suite for Solo Cello No. 1 in G Major, BWV 1007: I. Prélude'"
},
"id": "99a9f7ee-4fe7-4bf0-bec2-bf8e1a60c798",
},
{
"dataset_id": "ad07b2df-2c73-4ca7-98ca-e67bc606536f",
"inputs": {
"query": "What are some example tracks by composer Johann Sebastian Bach?"
},
"outputs": {
"answer": "'Concerto for 2 Violins in D Minor, BWV 1043: I. Vivace', 'Aria Mit 30 Veränderungen, BWV 988 'Goldberg Variations': Aria', and 'Suite for Solo Cello No. 1 in G Major, BWV 1007: I. Prélude'"
},
"id": "3f683e5e-314c-4d8c-98f9-6456e1c27e70",
},
{
"dataset_id": "ad07b2df-2c73-4ca7-98ca-e67bc606536f",
"inputs": {"query": "How many employees are there?"},
"outputs": {"answer": "8"},
"id": "85b3ae48-26c0-495f-95bf-7fa9cf271897",
},
]
@pytest.fixture(scope="module")
def database() -> SQLDatabase:
@@ -114,9 +42,11 @@ def chain_to_test(database: SQLDatabase) -> SQLDatabaseChain:
return SQLDatabaseChain.from_llm(llm, database)
@pytest.fixture(scope="module", params=dataset_examples)
@pytest.fixture(
scope="module", params=_CLIENT.list_examples(dataset_name="sql-qa-chinook")
)
def run_example_pair(request, chain_to_test: SQLDatabaseChain) -> Tuple[Run, Example]:
example = Example(**request.param)
example: Example = request.param
run_stack = RunStackCallbackHandler()
with tracing_v2_enabled(
session_name=f"test_chain_on_example-{_TEST_RUN_ID}", example_id=example.id

View File

@@ -201,4 +201,4 @@ async def test_arun_on_dataset(monkeypatch: pytest.MonkeyPatch) -> None:
]
for uuid_ in uuids
}
assert results == expected
assert results["results"] == expected