This commit is contained in:
Bagatur
2023-10-05 18:09:35 -07:00
1068 changed files with 63594 additions and 22802 deletions

View File

@@ -0,0 +1,102 @@
"""Test the bash utility."""
import re
import subprocess
import sys
from pathlib import Path
import pytest
from langchain_experimental.llm_bash.bash import BashProcess
@pytest.mark.skipif(
sys.platform.startswith("win"), reason="Test not supported on Windows"
)
def test_pwd_command() -> None:
"""Test correct functionality."""
session = BashProcess()
commands = ["pwd"]
output = session.run(commands)
assert output == subprocess.check_output("pwd", shell=True).decode()
@pytest.mark.skip(reason="flaky on GHA, TODO to fix")
@pytest.mark.skipif(
sys.platform.startswith("win"), reason="Test not supported on Windows"
)
def test_pwd_command_persistent() -> None:
"""Test correct functionality when the bash process is persistent."""
session = BashProcess(persistent=True, strip_newlines=True)
commands = ["pwd"]
output = session.run(commands)
assert subprocess.check_output("pwd", shell=True).decode().strip() in output
session.run(["cd .."])
new_output = session.run(["pwd"])
# Assert that the new_output is a parent of the old output
assert Path(output).parent == Path(new_output)
@pytest.mark.skipif(
sys.platform.startswith("win"), reason="Test not supported on Windows"
)
def test_incorrect_command() -> None:
"""Test handling of incorrect command."""
session = BashProcess()
output = session.run(["invalid_command"])
assert output == "Command 'invalid_command' returned non-zero exit status 127."
@pytest.mark.skipif(
sys.platform.startswith("win"), reason="Test not supported on Windows"
)
def test_incorrect_command_return_err_output() -> None:
"""Test optional returning of shell output on incorrect command."""
session = BashProcess(return_err_output=True)
output = session.run(["invalid_command"])
assert re.match(
r"^/bin/sh:.*invalid_command.*(?:not found|Permission denied).*$", output
)
@pytest.mark.skipif(
sys.platform.startswith("win"), reason="Test not supported on Windows"
)
def test_create_directory_and_files(tmp_path: Path) -> None:
"""Test creation of a directory and files in a temporary directory."""
session = BashProcess(strip_newlines=True)
# create a subdirectory in the temporary directory
temp_dir = tmp_path / "test_dir"
temp_dir.mkdir()
# run the commands in the temporary directory
commands = [
f"touch {temp_dir}/file1.txt",
f"touch {temp_dir}/file2.txt",
f"echo 'hello world' > {temp_dir}/file2.txt",
f"cat {temp_dir}/file2.txt",
]
output = session.run(commands)
assert output == "hello world"
# check that the files were created in the temporary directory
output = session.run([f"ls {temp_dir}"])
assert output == "file1.txt\nfile2.txt"
@pytest.mark.skip(reason="flaky on GHA, TODO to fix")
@pytest.mark.skipif(
sys.platform.startswith("win"), reason="Test not supported on Windows"
)
def test_create_bash_persistent() -> None:
"""Test the pexpect persistent bash terminal"""
session = BashProcess(persistent=True)
response = session.run("echo hello")
response += session.run("echo world")
assert "hello" in response
assert "world" in response

View File

@@ -48,6 +48,23 @@ def test_anonymize_multiple() -> None:
assert phrase not in anonymized_text
@pytest.mark.requires("presidio_analyzer", "presidio_anonymizer", "faker")
def test_check_instances() -> None:
"""Test anonymizing multiple items in a sentence"""
from langchain_experimental.data_anonymizer import PresidioAnonymizer
text = (
"This is John Smith. John Smith works in a bakery." "John Smith is a good guy"
)
anonymizer = PresidioAnonymizer(["PERSON"], faker_seed=42)
anonymized_text = anonymizer.anonymize(text)
assert anonymized_text.count("Connie Lawrence") == 3
# New name should be generated
anonymized_text = anonymizer.anonymize(text)
assert anonymized_text.count("Connie Lawrence") == 0
@pytest.mark.requires("presidio_analyzer", "presidio_anonymizer", "faker")
def test_anonymize_with_custom_operator() -> None:
"""Test anonymize a name with a custom operator"""
@@ -55,13 +72,13 @@ def test_anonymize_with_custom_operator() -> None:
from langchain_experimental.data_anonymizer import PresidioAnonymizer
custom_operator = {"PERSON": OperatorConfig("replace", {"new_value": "<name>"})}
custom_operator = {"PERSON": OperatorConfig("replace", {"new_value": "NAME"})}
anonymizer = PresidioAnonymizer(operators=custom_operator)
text = "Jane Doe was here."
anonymized_text = anonymizer.anonymize(text)
assert anonymized_text == "<name> was here."
assert anonymized_text == "NAME was here."
@pytest.mark.requires("presidio_analyzer", "presidio_anonymizer", "faker")
@@ -91,3 +108,21 @@ def test_add_recognizer_operator() -> None:
anonymizer.add_operators(custom_operator)
anonymized_text = anonymizer.anonymize(text)
assert anonymized_text == "Dear Jane Doe was here."
@pytest.mark.requires("presidio_analyzer", "presidio_anonymizer", "faker")
def test_non_faker_values() -> None:
"""Test anonymizing multiple items in a sentence without faker values"""
from langchain_experimental.data_anonymizer import PresidioAnonymizer
text = (
"My name is John Smith. Your name is Adam Smith. Her name is Jane Smith."
"Our names are: John Smith, Adam Smith, Jane Smith."
)
expected_result = (
"My name is <PERSON>. Your name is <PERSON_2>. Her name is <PERSON_3>."
"Our names are: <PERSON>, <PERSON_2>, <PERSON_3>."
)
anonymizer = PresidioAnonymizer(add_default_faker_operators=False)
anonymized_text = anonymizer.anonymize(text)
assert anonymized_text == expected_result

View File

@@ -0,0 +1,109 @@
"""Test LLM Bash functionality."""
import sys
import pytest
from langchain.schema import OutputParserException
from langchain_experimental.llm_bash.base import LLMBashChain
from langchain_experimental.llm_bash.prompt import _PROMPT_TEMPLATE, BashOutputParser
from tests.unit_tests.fake_llm import FakeLLM
_SAMPLE_CODE = """
Unrelated text
```bash
echo hello
```
Unrelated text
"""
_SAMPLE_CODE_2_LINES = """
Unrelated text
```bash
echo hello
echo world
```
Unrelated text
"""
@pytest.fixture
def output_parser() -> BashOutputParser:
"""Output parser for testing."""
return BashOutputParser()
@pytest.mark.skipif(
sys.platform.startswith("win"), reason="Test not supported on Windows"
)
def test_simple_question() -> None:
"""Test simple question that should not need python."""
question = "Please write a bash script that prints 'Hello World' to the console."
prompt = _PROMPT_TEMPLATE.format(question=question)
queries = {prompt: "```bash\nexpr 1 + 1\n```"}
fake_llm = FakeLLM(queries=queries)
fake_llm_bash_chain = LLMBashChain.from_llm(fake_llm, input_key="q", output_key="a")
output = fake_llm_bash_chain.run(question)
assert output == "2\n"
def test_get_code(output_parser: BashOutputParser) -> None:
"""Test the parser."""
code_lines = output_parser.parse(_SAMPLE_CODE)
code = [c for c in code_lines if c.strip()]
assert code == code_lines
assert code == ["echo hello"]
code_lines = output_parser.parse(_SAMPLE_CODE + _SAMPLE_CODE_2_LINES)
assert code_lines == ["echo hello", "echo hello", "echo world"]
def test_parsing_error() -> None:
"""Test that LLM Output without a bash block raises an exce"""
question = "Please echo 'hello world' to the terminal."
prompt = _PROMPT_TEMPLATE.format(question=question)
queries = {
prompt: """
```text
echo 'hello world'
```
"""
}
fake_llm = FakeLLM(queries=queries)
fake_llm_bash_chain = LLMBashChain.from_llm(fake_llm, input_key="q", output_key="a")
with pytest.raises(OutputParserException):
fake_llm_bash_chain.run(question)
def test_get_code_lines_mixed_blocks(output_parser: BashOutputParser) -> None:
text = """
Unrelated text
```bash
echo hello
ls && pwd && ls
```
```python
print("hello")
```
```bash
echo goodbye
```
"""
code_lines = output_parser.parse(text)
assert code_lines == ["echo hello", "ls && pwd && ls", "echo goodbye"]
def test_get_code_lines_simple_nested_ticks(output_parser: BashOutputParser) -> None:
"""Test that backticks w/o a newline are ignored."""
text = """
Unrelated text
```bash
echo hello
echo "```bash is in this string```"
```
"""
code_lines = output_parser.parse(text)
assert code_lines == ["echo hello", 'echo "```bash is in this string```"']

View File

@@ -0,0 +1,82 @@
"""Test LLM Math functionality."""
import pytest
from langchain_experimental.llm_symbolic_math.base import (
LLMSymbolicMathChain,
)
from langchain_experimental.llm_symbolic_math.prompt import (
_PROMPT_TEMPLATE,
)
from tests.unit_tests.fake_llm import FakeLLM
try:
import sympy
except ImportError:
pytest.skip("sympy not installed", allow_module_level=True)
@pytest.fixture
def fake_llm_symbolic_math_chain() -> LLMSymbolicMathChain:
"""Fake LLM Math chain for testing."""
queries = {
_PROMPT_TEMPLATE.format(question="What is 1 plus 1?"): "Answer: 2",
_PROMPT_TEMPLATE.format(
question="What is the square root of 2?"
): "```text\nsqrt(2)\n```",
_PROMPT_TEMPLATE.format(
question="What is the limit of sin(x) / x as x goes to 0?"
): "```text\nlimit(sin(x)/x,x,0)\n```",
_PROMPT_TEMPLATE.format(
question="What is the integral of e^-x from 0 to infinity?"
): "```text\nintegrate(exp(-x), (x, 0, oo))\n```",
_PROMPT_TEMPLATE.format(
question="What are the solutions to this equation x**2 - x?"
): "```text\nsolveset(x**2 - x, x)\n```",
_PROMPT_TEMPLATE.format(question="foo"): "foo",
}
fake_llm = FakeLLM(queries=queries)
return LLMSymbolicMathChain.from_llm(fake_llm, input_key="q", output_key="a")
def test_simple_question(fake_llm_symbolic_math_chain: LLMSymbolicMathChain) -> None:
"""Test simple question that should not need python."""
question = "What is 1 plus 1?"
output = fake_llm_symbolic_math_chain.run(question)
assert output == "Answer: 2"
def test_root_question(fake_llm_symbolic_math_chain: LLMSymbolicMathChain) -> None:
"""Test irrational number that should need sympy."""
question = "What is the square root of 2?"
output = fake_llm_symbolic_math_chain.run(question)
assert output == f"Answer: {sympy.sqrt(2)}"
def test_limit_question(fake_llm_symbolic_math_chain: LLMSymbolicMathChain) -> None:
"""Test question about limits that needs sympy"""
question = "What is the limit of sin(x) / x as x goes to 0?"
output = fake_llm_symbolic_math_chain.run(question)
assert output == "Answer: 1"
def test_integration_question(
fake_llm_symbolic_math_chain: LLMSymbolicMathChain,
) -> None:
"""Test question about integration that needs sympy"""
question = "What is the integral of e^-x from 0 to infinity?"
output = fake_llm_symbolic_math_chain.run(question)
assert output == "Answer: 1"
def test_solver_question(fake_llm_symbolic_math_chain: LLMSymbolicMathChain) -> None:
"""Test question about solving algebraic equations that needs sympy"""
question = "What are the solutions to this equation x**2 - x?"
output = fake_llm_symbolic_math_chain.run(question)
assert output == "Answer: {0, 1}"
def test_error(fake_llm_symbolic_math_chain: LLMSymbolicMathChain) -> None:
"""Test question that raises error."""
with pytest.raises(ValueError):
fake_llm_symbolic_math_chain.run("foo")

View File

@@ -49,6 +49,32 @@ def test_anonymize_multiple() -> None:
assert phrase not in anonymized_text
@pytest.mark.requires("presidio_analyzer", "presidio_anonymizer", "faker")
def test_check_instances() -> None:
"""Test anonymizing multiple items in a sentence"""
from langchain_experimental.data_anonymizer import PresidioReversibleAnonymizer
text = (
"This is John Smith. John Smith works in a bakery." "John Smith is a good guy"
)
anonymizer = PresidioReversibleAnonymizer(["PERSON"], faker_seed=42)
anonymized_text = anonymizer.anonymize(text)
persons = list(anonymizer.deanonymizer_mapping["PERSON"].keys())
assert len(persons) == 1
anonymized_name = persons[0]
assert anonymized_text.count(anonymized_name) == 3
anonymized_text = anonymizer.anonymize(text)
assert anonymized_text.count(anonymized_name) == 3
assert anonymizer.deanonymizer_mapping["PERSON"][anonymized_name] == "John Smith"
text = "This is Jane Smith"
anonymized_text = anonymizer.anonymize(text)
persons = list(anonymizer.deanonymizer_mapping["PERSON"].keys())
assert len(persons) == 2
@pytest.mark.requires("presidio_analyzer", "presidio_anonymizer", "faker")
def test_anonymize_with_custom_operator() -> None:
"""Test anonymize a name with a custom operator"""
@@ -56,13 +82,13 @@ def test_anonymize_with_custom_operator() -> None:
from langchain_experimental.data_anonymizer import PresidioReversibleAnonymizer
custom_operator = {"PERSON": OperatorConfig("replace", {"new_value": "<name>"})}
custom_operator = {"PERSON": OperatorConfig("replace", {"new_value": "NAME"})}
anonymizer = PresidioReversibleAnonymizer(operators=custom_operator)
text = "Jane Doe was here."
anonymized_text = anonymizer.anonymize(text)
assert anonymized_text == "<name> was here."
assert anonymized_text == "NAME was here."
@pytest.mark.requires("presidio_analyzer", "presidio_anonymizer", "faker")
@@ -88,6 +114,8 @@ def test_add_recognizer_operator() -> None:
assert anonymized_text == "<TITLE> Jane Doe was here."
# anonymizing with custom recognizer and operator
anonymizer = PresidioReversibleAnonymizer(analyzed_fields=[])
anonymizer.add_recognizer(custom_recognizer)
custom_operator = {"TITLE": OperatorConfig("replace", {"new_value": "Dear"})}
anonymizer.add_operators(custom_operator)
anonymized_text = anonymizer.anonymize(text)
@@ -161,3 +189,21 @@ def test_save_load_deanonymizer_mapping() -> None:
finally:
os.remove("test_file.json")
@pytest.mark.requires("presidio_analyzer", "presidio_anonymizer", "faker")
def test_non_faker_values() -> None:
"""Test anonymizing multiple items in a sentence without faker values"""
from langchain_experimental.data_anonymizer import PresidioReversibleAnonymizer
text = (
"My name is John Smith. Your name is Adam Smith. Her name is Jane Smith."
"Our names are: John Smith, Adam Smith, Jane Smith."
)
expected_result = (
"My name is <PERSON>. Your name is <PERSON_2>. Her name is <PERSON_3>."
"Our names are: <PERSON>, <PERSON_2>, <PERSON_3>."
)
anonymizer = PresidioReversibleAnonymizer(add_default_faker_operators=False)
anonymized_text = anonymizer.anonymize(text)
assert anonymized_text == expected_result

View File

@@ -0,0 +1,128 @@
from langchain.memory import ConversationBufferMemory
from langchain.output_parsers.list import CommaSeparatedListOutputParser
from langchain.prompts import PromptTemplate
from langchain.sql_database import SQLDatabase
from langchain_experimental.sql.base import SQLDatabaseChain, SQLDatabaseSequentialChain
from tests.unit_tests.fake_llm import FakeLLM
# Fake db to test SQL-Chain
db = SQLDatabase.from_uri("sqlite:///:memory:")
def create_fake_db(db: SQLDatabase) -> SQLDatabase:
"""Create a table in fake db to test SQL-Chain"""
db.run(
"""
CREATE TABLE foo (baaz TEXT);
"""
)
db.run(
"""
INSERT INTO foo (baaz)
VALUES ('baaz');
"""
)
return db
db = create_fake_db(db)
def test_sql_chain_without_memory() -> None:
queries = {"foo": "SELECT baaz from foo", "foo2": "SELECT baaz from foo"}
llm = FakeLLM(queries=queries, sequential_responses=True)
db_chain = SQLDatabaseChain.from_llm(llm, db, verbose=True)
assert db_chain.run("hello") == "SELECT baaz from foo"
def test_sql_chain_sequential_without_memory() -> None:
queries = {
"foo": "SELECT baaz from foo",
"foo2": "SELECT baaz from foo",
"foo3": "SELECT baaz from foo",
}
llm = FakeLLM(queries=queries, sequential_responses=True)
db_chain = SQLDatabaseSequentialChain.from_llm(llm, db, verbose=True)
assert db_chain.run("hello") == "SELECT baaz from foo"
def test_sql_chain_with_memory() -> None:
valid_prompt_with_history = """
Only use the following tables:
{table_info}
Question: {input}
Given an input question, first create a syntactically correct
{dialect} query to run.
Always limit your query to at most {top_k} results.
Relevant pieces of previous conversation:
{history}
(You do not need to use these pieces of information if not relevant)
"""
prompt = PromptTemplate(
input_variables=["input", "table_info", "dialect", "top_k", "history"],
template=valid_prompt_with_history,
)
queries = {"foo": "SELECT baaz from foo", "foo2": "SELECT baaz from foo"}
llm = FakeLLM(queries=queries, sequential_responses=True)
memory = ConversationBufferMemory()
db_chain = SQLDatabaseChain.from_llm(
llm, db, memory=memory, prompt=prompt, verbose=True
)
assert db_chain.run("hello") == "SELECT baaz from foo"
def test_sql_chain_sequential_with_memory() -> None:
valid_query_prompt_str = """
Only use the following tables:
{table_info}
Question: {input}
Given an input question, first create a syntactically correct
{dialect} query to run.
Always limit your query to at most {top_k} results.
Relevant pieces of previous conversation:
{history}
(You do not need to use these pieces of information
if not relevant)
"""
valid_decider_prompt_str = """Given the below input question and list of potential
tables, output a comma separated list of the
table names that may be necessary to answer this question.
Question: {query}
Table Names: {table_names}
Relevant Table Names:"""
valid_query_prompt = PromptTemplate(
input_variables=["input", "table_info", "dialect", "top_k", "history"],
template=valid_query_prompt_str,
)
valid_decider_prompt = PromptTemplate(
input_variables=["query", "table_names"],
template=valid_decider_prompt_str,
output_parser=CommaSeparatedListOutputParser(),
)
queries = {
"foo": "SELECT baaz from foo",
"foo2": "SELECT baaz from foo",
"foo3": "SELECT baaz from foo",
}
llm = FakeLLM(queries=queries, sequential_responses=True)
memory = ConversationBufferMemory(memory_key="history", input_key="query")
db_chain = SQLDatabaseSequentialChain.from_llm(
llm,
db,
memory=memory,
decider_prompt=valid_decider_prompt,
query_prompt=valid_query_prompt,
verbose=True,
)
assert db_chain.run("hello") == "SELECT baaz from foo"