mirror of
https://github.com/hwchase17/langchain.git
synced 2025-06-06 23:24:48 +00:00
- **Description:** Adds the document loader for [AWS Athena](https://aws.amazon.com/athena/), a serverless and interactive analytics service. - **Dependencies:** Added boto3 as a dependency
111 lines
2.2 KiB
Plaintext
111 lines
2.2 KiB
Plaintext
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {
|
|
"id": "MwTWzDxYgbrR"
|
|
},
|
|
"source": [
|
|
"# Athena\n",
|
|
"\n",
|
|
"This notebooks goes over how to load documents from AWS Athena"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"id": "F0zaLR3xgWmO"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"! pip install boto3"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"id": "076NLjfngoWJ"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"from langchain_community.document_loaders.athena import AthenaLoader"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"id": "XpMRQwU9gu44"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"database_name = \"my_database\"\n",
|
|
"s3_output_path = \"s3://my_bucket/query_results/\"\n",
|
|
"query = \"SELECT * FROM my_table\"\n",
|
|
"profile_name = \"my_profile\"\n",
|
|
"\n",
|
|
"loader = AthenaLoader(\n",
|
|
" query=query,\n",
|
|
" database=database_name,\n",
|
|
" s3_output_uri=s3_output_path,\n",
|
|
" profile_name=profile_name,\n",
|
|
")\n",
|
|
"\n",
|
|
"documents = loader.load()\n",
|
|
"print(documents)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {
|
|
"id": "5IBapL3ejoEt"
|
|
},
|
|
"source": [
|
|
"Example with metadata columns"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"id": "wMx6nI1qjryD"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"database_name = \"my_database\"\n",
|
|
"s3_output_path = \"s3://my_bucket/query_results/\"\n",
|
|
"query = \"SELECT * FROM my_table\"\n",
|
|
"profile_name = \"my_profile\"\n",
|
|
"metadata_columns = [\"_row\", \"_created_at\"]\n",
|
|
"\n",
|
|
"loader = AthenaLoader(\n",
|
|
" query=query,\n",
|
|
" database=database_name,\n",
|
|
" s3_output_uri=s3_output_path,\n",
|
|
" profile_name=profile_name,\n",
|
|
" metadata_columns=metadata_columns,\n",
|
|
")\n",
|
|
"\n",
|
|
"documents = loader.load()\n",
|
|
"print(documents)"
|
|
]
|
|
}
|
|
],
|
|
"metadata": {
|
|
"colab": {
|
|
"provenance": []
|
|
},
|
|
"kernelspec": {
|
|
"display_name": "Python 3",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"name": "python"
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 0
|
|
}
|