mirror of
				https://github.com/hwchase17/langchain.git
				synced 2025-10-25 12:44:04 +00:00 
			
		
		
		
	Huge thanks to @leo-gan for improving the document loaders notebooks --------- Co-authored-by: Leonid Ganeline <leo.gan.57@gmail.com>
		
			
				
	
	
		
			103 lines
		
	
	
		
			2.1 KiB
		
	
	
	
		
			Plaintext
		
	
	
	
	
	
			
		
		
	
	
			103 lines
		
	
	
		
			2.1 KiB
		
	
	
	
		
			Plaintext
		
	
	
	
	
	
| {
 | |
|  "cells": [
 | |
|   {
 | |
|    "cell_type": "markdown",
 | |
|    "id": "d9826810",
 | |
|    "metadata": {},
 | |
|    "source": [
 | |
|     "# Copy Paste\n",
 | |
|     "\n",
 | |
|     "This notebook covers how to load a document object from something you just want to copy and paste. In this case, you don't even need to use a DocumentLoader, but rather can just construct the Document directly."
 | |
|    ]
 | |
|   },
 | |
|   {
 | |
|    "cell_type": "code",
 | |
|    "execution_count": 1,
 | |
|    "id": "fd9e71a2",
 | |
|    "metadata": {},
 | |
|    "outputs": [],
 | |
|    "source": [
 | |
|     "from langchain.docstore.document import Document"
 | |
|    ]
 | |
|   },
 | |
|   {
 | |
|    "cell_type": "code",
 | |
|    "execution_count": 2,
 | |
|    "id": "f40d3f30",
 | |
|    "metadata": {},
 | |
|    "outputs": [],
 | |
|    "source": [
 | |
|     "text = \"..... put the text you copy pasted here......\""
 | |
|    ]
 | |
|   },
 | |
|   {
 | |
|    "cell_type": "code",
 | |
|    "execution_count": 3,
 | |
|    "id": "d409bdba",
 | |
|    "metadata": {},
 | |
|    "outputs": [],
 | |
|    "source": [
 | |
|     "doc = Document(page_content=text)"
 | |
|    ]
 | |
|   },
 | |
|   {
 | |
|    "cell_type": "markdown",
 | |
|    "id": "cc0eff72",
 | |
|    "metadata": {},
 | |
|    "source": [
 | |
|     "## Metadata\n",
 | |
|     "If you want to add metadata about the where you got this piece of text, you easily can with the metadata key."
 | |
|    ]
 | |
|   },
 | |
|   {
 | |
|    "cell_type": "code",
 | |
|    "execution_count": 4,
 | |
|    "id": "fe3aa5aa",
 | |
|    "metadata": {},
 | |
|    "outputs": [],
 | |
|    "source": [
 | |
|     "metadata = {\"source\": \"internet\", \"date\": \"Friday\"}"
 | |
|    ]
 | |
|   },
 | |
|   {
 | |
|    "cell_type": "code",
 | |
|    "execution_count": 5,
 | |
|    "id": "827d4e91",
 | |
|    "metadata": {},
 | |
|    "outputs": [],
 | |
|    "source": [
 | |
|     "doc = Document(page_content=text, metadata=metadata)"
 | |
|    ]
 | |
|   },
 | |
|   {
 | |
|    "cell_type": "code",
 | |
|    "execution_count": null,
 | |
|    "id": "c986a43d",
 | |
|    "metadata": {},
 | |
|    "outputs": [],
 | |
|    "source": []
 | |
|   }
 | |
|  ],
 | |
|  "metadata": {
 | |
|   "kernelspec": {
 | |
|    "display_name": "Python 3 (ipykernel)",
 | |
|    "language": "python",
 | |
|    "name": "python3"
 | |
|   },
 | |
|   "language_info": {
 | |
|    "codemirror_mode": {
 | |
|     "name": "ipython",
 | |
|     "version": 3
 | |
|    },
 | |
|    "file_extension": ".py",
 | |
|    "mimetype": "text/x-python",
 | |
|    "name": "python",
 | |
|    "nbconvert_exporter": "python",
 | |
|    "pygments_lexer": "ipython3",
 | |
|    "version": "3.10.6"
 | |
|   }
 | |
|  },
 | |
|  "nbformat": 4,
 | |
|  "nbformat_minor": 5
 | |
| }
 |