From 29c7ac7f735e672486bf8e1174562f269ac8d79d Mon Sep 17 00:00:00 2001 From: zanussbaum Date: Wed, 19 Apr 2023 18:12:03 -0400 Subject: [PATCH] refactor: clean up directory structure --- gpt4all/__init__.py | 0 gpt4all/eval/__init__.py | 0 .../eval/eval_figures.py | 0 .../eval/eval_self_instruct.py | 2 +- gpt4all/inference/__init__.py | 0 generate.py => gpt4all/inference/generate.py | 2 +- .../inference/inference.py | 4 +-- gpt4all/train/__init__.py | 0 train.py => gpt4all/train/train.py | 4 +-- gpt4all/utils/__init__.py | 0 data.py => gpt4all/utils/data.py | 2 +- read.py => gpt4all/utils/read.py | 0 setup.py | 34 +++++++++++++++++++ 13 files changed, 41 insertions(+), 7 deletions(-) create mode 100644 gpt4all/__init__.py create mode 100644 gpt4all/eval/__init__.py rename eval_figures.py => gpt4all/eval/eval_figures.py (100%) rename eval_self_instruct.py => gpt4all/eval/eval_self_instruct.py (98%) create mode 100644 gpt4all/inference/__init__.py rename generate.py => gpt4all/inference/generate.py (97%) rename inference.py => gpt4all/inference/inference.py (98%) create mode 100644 gpt4all/train/__init__.py rename train.py => gpt4all/train/train.py (99%) create mode 100644 gpt4all/utils/__init__.py rename data.py => gpt4all/utils/data.py (99%) rename read.py => gpt4all/utils/read.py (100%) create mode 100644 setup.py diff --git a/gpt4all/__init__.py b/gpt4all/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/gpt4all/eval/__init__.py b/gpt4all/eval/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/eval_figures.py b/gpt4all/eval/eval_figures.py similarity index 100% rename from eval_figures.py rename to gpt4all/eval/eval_figures.py diff --git a/eval_self_instruct.py b/gpt4all/eval/eval_self_instruct.py similarity index 98% rename from eval_self_instruct.py rename to gpt4all/eval/eval_self_instruct.py index e05a68e4..7206fdd5 100644 --- a/eval_self_instruct.py +++ b/gpt4all/eval/eval_self_instruct.py @@ -3,7 +3,7 @@ import torch import pickle import numpy as np from tqdm import tqdm -from read import read_config +from gpt4all.utils.read import read_config from argparse import ArgumentParser from peft import PeftModelForCausalLM from transformers import AutoModelForCausalLM, AutoTokenizer diff --git a/gpt4all/inference/__init__.py b/gpt4all/inference/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/generate.py b/gpt4all/inference/generate.py similarity index 97% rename from generate.py rename to gpt4all/inference/generate.py index fa1c43fa..f4184d62 100644 --- a/generate.py +++ b/gpt4all/inference/generate.py @@ -1,6 +1,6 @@ from transformers import AutoModelForCausalLM, AutoTokenizer from peft import PeftModelForCausalLM -from read import read_config +from gpt4all.utils.read import read_config from argparse import ArgumentParser import torch import time diff --git a/inference.py b/gpt4all/inference/inference.py similarity index 98% rename from inference.py rename to gpt4all/inference/inference.py index 8a4efb51..5e351c46 100644 --- a/inference.py +++ b/gpt4all/inference/inference.py @@ -2,9 +2,9 @@ from transformers import AutoModelForCausalLM, AutoTokenizer import torch import torch.nn as nn from argparse import ArgumentParser -from read import read_config +from gpt4all.utils.read import read_config from accelerate.utils import set_seed -from data import load_data_for_inference +from gpt4all.utils.data import load_data_for_inference from tqdm import tqdm from datasets import Dataset import torch.distributed as dist diff --git a/gpt4all/train/__init__.py b/gpt4all/train/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/train.py b/gpt4all/train/train.py similarity index 99% rename from train.py rename to gpt4all/train/train.py index 8605af11..97b6c9a8 100644 --- a/train.py +++ b/gpt4all/train/train.py @@ -3,11 +3,11 @@ from transformers import AutoModelForCausalLM, AutoTokenizer, get_scheduler, Lla import torch from torch.optim import AdamW from argparse import ArgumentParser -from read import read_config +from gpt4all.utils.read import read_config from accelerate import Accelerator from accelerate.utils import DummyScheduler, DummyOptim, set_seed from peft import get_peft_model, LoraConfig, TaskType -from data import load_data +from gpt4all.utils.data import load_data from torchmetrics import MeanMetric from tqdm import tqdm import wandb diff --git a/gpt4all/utils/__init__.py b/gpt4all/utils/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/data.py b/gpt4all/utils/data.py similarity index 99% rename from data.py rename to gpt4all/utils/data.py index 8227de00..b55a589a 100644 --- a/data.py +++ b/gpt4all/utils/data.py @@ -1,6 +1,6 @@ import glob import torch -from datasets import load_dataset, concatenate_datasets +from datasets import load_dataset import os from torch.utils.data import DataLoader from transformers import DefaultDataCollator diff --git a/read.py b/gpt4all/utils/read.py similarity index 100% rename from read.py rename to gpt4all/utils/read.py diff --git a/setup.py b/setup.py new file mode 100644 index 00000000..6b100ed5 --- /dev/null +++ b/setup.py @@ -0,0 +1,34 @@ +from setuptools import setup, find_packages + +with open('README.md', 'r', encoding='utf-8') as f: + long_description = f.read() + +with open('requirements.txt', 'r', encoding='utf-8') as f: + requirements = [line.strip() for line in f if line.strip()] + +setup( + name='gpt4all', + version='0.0.1', + author='nomic-ai', + author_email='zach@nomic-ai', + description='an ecosystem of open-source chatbots trained on a massive collections of clean assistant data including code, stories and dialogue', + long_description=long_description, + long_description_content_type='text/markdown', + url='https://github.com/nomic-ai/gpt4all', + packages=find_packages(), + install_requires=requirements, + classifiers=[ + 'Development Status :: 3 - Alpha', + 'License :: OSI Approved :: MIT License', + 'Programming Language :: Python :: 3', + 'Programming Language :: Python :: 3.6', + 'Programming Language :: Python :: 3.7', + 'Programming Language :: Python :: 3.8', + 'Programming Language :: Python :: 3.9', + 'Topic :: Text Processing :: Linguistic', + 'Topic :: Scientific/Engineering :: Artificial Intelligence', + 'Intended Audience :: Science/Research', + 'Operating System :: OS Independent', + ], + python_requires='>=3.6', +) \ No newline at end of file