mirror of
https://github.com/csunny/DB-GPT.git
synced 2025-09-01 17:16:51 +00:00
20 lines
524 B
Python
20 lines
524 B
Python
#!/usr/bin/env python3
|
|
# -*- coding: utf-8 -*-
|
|
|
|
import logging
|
|
import sys
|
|
|
|
from llama_index import GPTVectorStoreIndex, SimpleDirectoryReader
|
|
|
|
logging.basicConfig(stream=sys.stdout, level=logging.INFO)
|
|
logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))
|
|
|
|
# read the document of data dir
|
|
documents = SimpleDirectoryReader("data").load_data()
|
|
# split the document to chunk, max token size=500, convert chunk to vector
|
|
|
|
index = GPTVectorStoreIndex(documents)
|
|
|
|
# save index
|
|
index.save_to_disk("index.json")
|