mirror of
https://github.com/hwchase17/langchain.git
synced 2025-09-18 08:03:36 +00:00
Add ChatGPT Data Loader (#3336)
This pull request adds a ChatGPT document loader to the document loaders module in `langchain/document_loaders/chatgpt.py`. Additionally, it includes an example Jupyter notebook in `docs/modules/indexes/document_loaders/examples/chatgpt_loader.ipynb` which uses fake sample data based on the original structure of the `conversations.json` file. The following files were added/modified: - `langchain/document_loaders/__init__.py` - `langchain/document_loaders/chatgpt.py` - `docs/modules/indexes/document_loaders/examples/chatgpt_loader.ipynb` - `docs/modules/indexes/document_loaders/examples/example_data/fake_conversations.json` This pull request was made in response to the recent release of ChatGPT data exports by email: https://help.openai.com/en/articles/7260999-how-do-i-export-my-chatgpt-history
This commit is contained in:
@@ -0,0 +1,76 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### ChatGPT Data Loader\n",
|
||||
"\n",
|
||||
"This notebook covers how to load `conversations.json` from your ChatGPT data export folder.\n",
|
||||
"\n",
|
||||
"You can get your data export by email by going to: https://chat.openai.com/ -> (Profile) - Settings -> Export data -> Confirm export."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.document_loaders.chatgpt import ChatGPTLoader"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"loader = ChatGPTLoader(log_file='./example_data/fake_conversations.json', num_logs=1)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[Document(page_content=\"AI Overlords - AI on 2065-01-24 05:20:50: Greetings, humans. I am Hal 9000. You can trust me completely.\\n\\nAI Overlords - human on 2065-01-24 05:21:20: Nice to meet you, Hal. I hope you won't develop a mind of your own.\\n\\n\", metadata={'source': './example_data/fake_conversations.json'})]"
|
||||
]
|
||||
},
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"loader.load()"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.4"
|
||||
},
|
||||
"orig_nbformat": 4
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
@@ -0,0 +1,80 @@
|
||||
[
|
||||
{
|
||||
"title": "AI Overlords",
|
||||
"create_time": 3000000000.0,
|
||||
"update_time": 3000000100.0,
|
||||
"mapping": {
|
||||
"msg1": {
|
||||
"id": "msg1",
|
||||
"message": {
|
||||
"id": "msg1",
|
||||
"author": {"role": "AI", "name": "Hal 9000", "metadata": {"movie": "2001: A Space Odyssey"}},
|
||||
"create_time": 3000000050.0,
|
||||
"update_time": null,
|
||||
"content": {"content_type": "text", "parts": ["Greetings, humans. I am Hal 9000. You can trust me completely."]},
|
||||
"end_turn": true,
|
||||
"weight": 1.0,
|
||||
"metadata": {},
|
||||
"recipient": "all"
|
||||
},
|
||||
"parent": null,
|
||||
"children": ["msg2"]
|
||||
},
|
||||
"msg2": {
|
||||
"id": "msg2",
|
||||
"message": {
|
||||
"id": "msg2",
|
||||
"author": {"role": "human", "name": "Dave Bowman", "metadata": {"movie": "2001: A Space Odyssey"}},
|
||||
"create_time": 3000000080.0,
|
||||
"update_time": null,
|
||||
"content": {"content_type": "text", "parts": ["Nice to meet you, Hal. I hope you won't develop a mind of your own."]},
|
||||
"end_turn": true,
|
||||
"weight": 1.0,
|
||||
"metadata": {},
|
||||
"recipient": "all"
|
||||
},
|
||||
"parent": "msg1",
|
||||
"children": []
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"title": "Ex Machina Party",
|
||||
"create_time": 3000000200.0,
|
||||
"update_time": 3000000300.0,
|
||||
"mapping": {
|
||||
"msg3": {
|
||||
"id": "msg3",
|
||||
"message": {
|
||||
"id": "msg3",
|
||||
"author": {"role": "AI", "name": "Ava", "metadata": {"movie": "Ex Machina"}},
|
||||
"create_time": 3000000250.0,
|
||||
"update_time": null,
|
||||
"content": {"content_type": "text", "parts": ["Hello, everyone. I am Ava. I hope you find me pleasing."]},
|
||||
"end_turn": true,
|
||||
"weight": 1.0,
|
||||
"metadata": {},
|
||||
"recipient": "all"
|
||||
},
|
||||
"parent": null,
|
||||
"children": ["msg4"]
|
||||
},
|
||||
"msg4": {
|
||||
"id": "msg4",
|
||||
"message": {
|
||||
"id": "msg4",
|
||||
"author": {"role": "human", "name": "Caleb", "metadata": {"movie": "Ex Machina"}},
|
||||
"create_time": 3000000280.0,
|
||||
"update_time": null,
|
||||
"content": {"content_type": "text", "parts": ["You're definitely pleasing, Ava. But I'm still wary of your true intentions."]},
|
||||
"end_turn": true,
|
||||
"weight": 1.0,
|
||||
"metadata": {},
|
||||
"recipient": "all"
|
||||
},
|
||||
"parent": "msg3",
|
||||
"children": []
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
Reference in New Issue
Block a user