diff --git a/.env b/.env index 80a2a21f..f218b889 100644 --- a/.env +++ b/.env @@ -5,7 +5,7 @@ DB_HOST=localhost DB_USER=postgres DB_PORT=5432 DB_PASSWORD=admin -DB_NAME=openai +DB_NAME=QuickGpt SUPER_ADMIN_EMAIL=superadmin@email.com SUPER_ADMIN_PASSWORD=supersecretpassword diff --git a/Dockerfile.external b/Dockerfile.external index 0a8e4da5..6b3438fe 100644 --- a/Dockerfile.external +++ b/Dockerfile.external @@ -33,7 +33,7 @@ COPY --chown=worker private_gpt/ private_gpt COPY --chown=worker fern/ fern COPY --chown=worker *.yaml *.md ./ COPY --chown=worker scripts/ scripts -RUN poetry run python scripts/setup +RUN ENV PYTHONPATH="$PYTHONPATH:/private_gpt/" diff --git a/alembic/versions/14281ff34686_update_is_enabled_to_false_by_default.py b/alembic/versions/14281ff34686_update_is_enabled_to_false_by_default.py deleted file mode 100644 index 0290010e..00000000 --- a/alembic/versions/14281ff34686_update_is_enabled_to_false_by_default.py +++ /dev/null @@ -1,32 +0,0 @@ -"""Update is_enabled to false by default - -Revision ID: 14281ff34686 -Revises: b7b896502e8e -Create Date: 2024-03-18 16:33:43.133458 - -""" -from typing import Sequence, Union - -from alembic import op -import sqlalchemy as sa - - -# revision identifiers, used by Alembic. -revision: str = '14281ff34686' -down_revision: Union[str, None] = 'b7b896502e8e' -branch_labels: Union[str, Sequence[str], None] = None -depends_on: Union[str, Sequence[str], None] = None - - -def upgrade() -> None: - # ### commands auto generated by Alembic - please adjust! ### - # op.create_unique_constraint('unique_user_role', 'user_roles', ['user_id', 'role_id', 'company_id']) - # ### end Alembic commands ### - pass - - -def downgrade() -> None: - # ### commands auto generated by Alembic - please adjust! ### - # op.drop_constraint('unique_user_role', 'user_roles', type_='unique') - pass - # ### end Alembic commands ### diff --git a/alembic/versions/7bd9152cf172_create_chat_history_and_item.py b/alembic/versions/7bd9152cf172_create_chat_history_and_item.py new file mode 100644 index 00000000..b006a073 --- /dev/null +++ b/alembic/versions/7bd9152cf172_create_chat_history_and_item.py @@ -0,0 +1,52 @@ +"""Create chat history and item + +Revision ID: 7bd9152cf172 +Revises: +Create Date: 2024-04-03 16:23:24.813222 + +""" +from typing import Sequence, Union + +from alembic import op +import sqlalchemy as sa + + +# revision identifiers, used by Alembic. +revision: str = '7bd9152cf172' +down_revision: Union[str, None] = None +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + # ### commands auto generated by Alembic - please adjust! ### + op.create_table('chat_history', + sa.Column('conversation_id', sa.Integer(), nullable=False), + sa.Column('title', sa.String(length=255), nullable=True), + sa.Column('created_at', sa.DateTime(), nullable=True), + sa.Column('updated_at', sa.DateTime(), nullable=True), + sa.Column('user_id', sa.Integer(), nullable=True), + sa.ForeignKeyConstraint(['user_id'], ['users.id'], ), + sa.PrimaryKeyConstraint('conversation_id') + ) + op.create_table('chat_items', + sa.Column('id', sa.Integer(), nullable=False), + sa.Column('sender', sa.String(length=225), nullable=False), + sa.Column('content', sa.Text(), nullable=True), + sa.Column('created_at', sa.DateTime(), nullable=True), + sa.Column('updated_at', sa.DateTime(), nullable=True), + sa.Column('like', sa.Boolean(), nullable=True), + sa.Column('conversation_id', sa.Integer(), nullable=False), + sa.ForeignKeyConstraint(['conversation_id'], ['chat_history.conversation_id'], ), + sa.PrimaryKeyConstraint('id') + ) + # op.create_unique_constraint('unique_user_role', 'user_roles', ['user_id', 'role_id', 'company_id']) + # ### end Alembic commands ### + + +def downgrade() -> None: + # ### commands auto generated by Alembic - please adjust! ### + # op.drop_constraint('unique_user_role', 'user_roles', type_='unique') + op.drop_table('chat_items') + op.drop_table('chat_history') + # ### end Alembic commands ### diff --git a/alembic/versions/b7b896502e8e_update.py b/alembic/versions/b7b896502e8e_update.py deleted file mode 100644 index ab6a1d84..00000000 --- a/alembic/versions/b7b896502e8e_update.py +++ /dev/null @@ -1,38 +0,0 @@ -"""update - -Revision ID: b7b896502e8e -Revises: -Create Date: 2024-03-17 15:07:10.795935 - -""" -from typing import Sequence, Union - -from alembic import op -import sqlalchemy as sa - - -# revision identifiers, used by Alembic. -revision: str = 'b7b896502e8e' -down_revision: Union[str, None] = None -branch_labels: Union[str, Sequence[str], None] = None -depends_on: Union[str, Sequence[str], None] = None - - -def upgrade() -> None: - # ### commands auto generated by Alembic - please adjust! ### - op.drop_constraint('document_department_association_department_id_fkey', 'document_department_association', type_='foreignkey') - op.drop_constraint('document_department_association_document_id_fkey', 'document_department_association', type_='foreignkey') - op.create_foreign_key(None, 'document_department_association', 'document', ['document_id'], ['id'], onupdate='CASCADE', ondelete='CASCADE') - op.create_foreign_key(None, 'document_department_association', 'departments', ['department_id'], ['id'], onupdate='CASCADE', ondelete='CASCADE') - # op.create_unique_constraint('unique_user_role', 'user_roles', ['user_id', 'role_id', 'company_id']) - # ### end Alembic commands ### - - -def downgrade() -> None: - # ### commands auto generated by Alembic - please adjust! ### - # op.drop_constraint('unique_user_role', 'user_roles', type_='unique') - op.drop_constraint(None, 'document_department_association', type_='foreignkey') - op.drop_constraint(None, 'document_department_association', type_='foreignkey') - op.create_foreign_key('document_department_association_document_id_fkey', 'document_department_association', 'document', ['document_id'], ['id']) - op.create_foreign_key('document_department_association_department_id_fkey', 'document_department_association', 'departments', ['department_id'], ['id']) - # ### end Alembic commands ### diff --git a/private_gpt/components/llm/llm_component.py b/private_gpt/components/llm/llm_component.py index d4e13a58..9a3b25ad 100644 --- a/private_gpt/components/llm/llm_component.py +++ b/private_gpt/components/llm/llm_component.py @@ -11,8 +11,7 @@ from private_gpt.paths import models_cache_path, models_path from private_gpt.settings.settings import Settings logger = logging.getLogger(__name__) - - +local_path = models_path / "tokenizer/Mistral-7B-Instruct-v0.1" @singleton class LLMComponent: llm: LLM @@ -23,8 +22,7 @@ class LLMComponent: if settings.llm.tokenizer: set_global_tokenizer( AutoTokenizer.from_pretrained( - pretrained_model_name_or_path=settings.llm.tokenizer, - cache_dir=str(models_cache_path), + local_path ) ) diff --git a/private_gpt/server/chat/chat_service.py b/private_gpt/server/chat/chat_service.py index 5369200b..ef319f38 100644 --- a/private_gpt/server/chat/chat_service.py +++ b/private_gpt/server/chat/chat_service.py @@ -129,9 +129,7 @@ class ChatService: else None ) system_prompt = ( - chat_engine_input.system_message.content - if chat_engine_input.system_message - else None + "You can only answer questions about the provided context. If you know the answer but it is not based in the provided context, don't provide the answer, just state the answer is not in the context provided." ) chat_history = ( chat_engine_input.chat_history if chat_engine_input.chat_history else None @@ -165,9 +163,7 @@ class ChatService: else None ) system_prompt = ( - chat_engine_input.system_message.content - if chat_engine_input.system_message - else None + "You can only answer questions about the provided context. If you know the answer but it is not based in the provided context, don't provide the answer, just state the answer is not in the context provided." ) chat_history = ( chat_engine_input.chat_history if chat_engine_input.chat_history else None diff --git a/private_gpt/server/completions/completions_router.py b/private_gpt/server/completions/completions_router.py index 45e3e302..c2a9bfdb 100644 --- a/private_gpt/server/completions/completions_router.py +++ b/private_gpt/server/completions/completions_router.py @@ -22,6 +22,7 @@ completions_router = APIRouter(prefix="/v1", dependencies=[Depends(authenticated class CompletionsBody(BaseModel): + conversation_id: int prompt: str system_prompt: str | None = None use_context: bool = False @@ -33,6 +34,7 @@ class CompletionsBody(BaseModel): "json_schema_extra": { "examples": [ { + "conversation_id": 123, "prompt": "How do you fry an egg?", "system_prompt": "You are a rapper. Always answer with a rap.", "stream": False, @@ -92,6 +94,14 @@ class CompletionsBody(BaseModel): # ) # return chat_completion(request, chat_body) +def create_chat_item(db, sender, content, conversation_id): + chat_item_create = schemas.ChatItemCreate( + sender=sender, + content=content, + conversation_id=conversation_id + ) + return crud.chat_item.create(db, obj_in=chat_item_create) + @completions_router.post( "/chat", @@ -137,33 +147,47 @@ async def prompt_completion( docs_ids.extend(doc_id) body.context_filter = {"docs_ids": docs_ids} + chat_history = crud.chat.get_by_id( + db, id=body.conversation_id + ) + if (chat_history is None) and (chat_history.user_id != current_user.id): + raise HTTPException( + status_code=404, detail="Chat history not found") + + messages = [OpenAIMessage(content=body.prompt, role="user")] + create_chat_item(db, "user", body.prompt, body.conversation_id) + + if body.system_prompt: + messages.insert(0, OpenAIMessage( + content=body.system_prompt, role="system")) + + chat_body = ChatBody( + messages=messages, + use_context=body.use_context, + stream=body.stream, + include_sources=body.include_sources, + context_filter=body.context_filter, + ) + log_audit( + model='Chat', + action='Chat', + details={ + "query": body.prompt, + 'user': current_user.username, + }, + user_id=current_user.id + ) + chat_response = await chat_completion(request, chat_body) + print(chat_response) + create_chat_item(db, "assistant", chat_response.choices[0].message.content, body.conversation_id) + + return chat_response + + except Exception as e: print(traceback.format_exc()) logger.error(f"There was an error: {str(e)}") raise HTTPException( status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail="Internal Server Error", - ) - - messages = [OpenAIMessage(content=body.prompt, role="user")] - if body.system_prompt: - messages.insert(0, OpenAIMessage( - content=body.system_prompt, role="system")) - - chat_body = ChatBody( - messages=messages, - use_context=body.use_context, - stream=body.stream, - include_sources=body.include_sources, - context_filter=body.context_filter, - ) - log_audit( - model='Chat', - action='Chat', - details={ - "query": body.prompt, - 'user': current_user.username, - }, - user_id=current_user.id - ) - return await chat_completion(request, chat_body) + ) \ No newline at end of file diff --git a/private_gpt/users/api/v1/api.py b/private_gpt/users/api/v1/api.py index dd003c62..277978ea 100644 --- a/private_gpt/users/api/v1/api.py +++ b/private_gpt/users/api/v1/api.py @@ -1,5 +1,5 @@ from private_gpt.users.api import deps -from private_gpt.users.api.v1.routers import auth, roles, user_roles, users, subscriptions, companies, departments, documents, audits +from private_gpt.users.api.v1.routers import auth, roles, user_roles, users, subscriptions, companies, departments, documents, audits, chat_history from fastapi import APIRouter api_router = APIRouter(prefix="/v1") @@ -13,4 +13,5 @@ api_router.include_router(subscriptions.router) api_router.include_router(departments.router) api_router.include_router(documents.router) api_router.include_router(audits.router) +api_router.include_router(chat_history.router) diff --git a/private_gpt/users/api/v1/routers/chat_history.py b/private_gpt/users/api/v1/routers/chat_history.py new file mode 100644 index 00000000..f023cdcd --- /dev/null +++ b/private_gpt/users/api/v1/routers/chat_history.py @@ -0,0 +1,123 @@ +import logging +import traceback + +from sqlalchemy.orm import Session +from fastapi.responses import JSONResponse +from fastapi import APIRouter, Depends, HTTPException, status, Security + +from private_gpt.users.api import deps +from private_gpt.users import crud, models, schemas + +logger = logging.getLogger(__name__) +router = APIRouter(prefix="/c", tags=["Chat Histories"]) + + +@router.get("", response_model=list[schemas.ChatHistory]) +def list_chat_histories( + db: Session = Depends(deps.get_db), + skip: int = 0, + limit: int = 100, + current_user: models.User = Security( + deps.get_current_user, + ), +) -> list[schemas.ChatHistory]: + """ + Retrieve a list of chat histories with pagination support. + """ + try: + chat_histories = crud.chat.get_multi( + db, skip=skip, limit=limit) + return chat_histories + except Exception as e: + print(traceback.format_exc()) + logger.error(f"Error listing chat histories: {str(e)}") + raise HTTPException( + status_code=500, + detail="Internal Server Error", + ) + + +@router.post("/create", response_model=schemas.ChatHistory) +def create_chat_history( + db: Session = Depends(deps.get_db), + current_user: models.User = Security( + deps.get_current_user, + ), +) -> schemas.ChatHistory: + """ + Create a new chat history + """ + try: + chat_history_in = schemas.CreateChatHistory( + user_id= current_user.id + ) + chat_history = crud.chat.create( + db=db, obj_in=chat_history_in) + return chat_history + except Exception as e: + print(traceback.format_exc()) + logger.error(f"Error creating chat history: {str(e)}") + raise HTTPException( + status_code=500, + detail="Internal Server Error", + ) + + +@router.get("/{chat_history_id}", response_model=schemas.ChatHistory) +def read_chat_history( + chat_history_id: int, + db: Session = Depends(deps.get_db), + current_user: models.User = Security( + deps.get_current_user, + ), +) -> schemas.ChatHistory: + """ + Read a chat history by ID + """ + try: + chat_history = crud.chat.get_by_id(db, id=chat_history_id) + if chat_history is None or chat_history.user_id != current_user.id: + raise HTTPException( + status_code=404, detail="Chat history not found") + return chat_history + except Exception as e: + print(traceback.format_exc()) + logger.error(f"Error reading chat history: {str(e)}") + raise HTTPException( + status_code=500, + detail="Internal Server Error", + ) + + +@router.post("/delete") +def delete_chat_history( + chat_history_in: schemas.ChatDelete, + db: Session = Depends(deps.get_db), + current_user: models.User = Security( + deps.get_current_user, + ), +): + """ + Delete a chat history by ID + """ + try: + chat_history_id = chat_history_in.conversation_id + chat_history = crud.chat.get(db, id=chat_history_id) + if chat_history is None or chat_history.user_id != current_user.id: + raise HTTPException( + status_code=404, detail="Chat history not found") + + crud.chat.remove(db=db, id=chat_history_id) + return JSONResponse( + status_code=status.HTTP_200_OK, + content={ + "message": "Chat history deleted successfully", + }, + ) + except Exception as e: + print(traceback.format_exc()) + logger.error(f"Error deleting chat history: {str(e)}") + raise HTTPException( + status_code=500, + detail="Internal Server Error", + ) diff --git a/private_gpt/users/crud/__init__.py b/private_gpt/users/crud/__init__.py index fd7663a9..15168e47 100644 --- a/private_gpt/users/crud/__init__.py +++ b/private_gpt/users/crud/__init__.py @@ -5,4 +5,5 @@ from .company_crud import company from .subscription_crud import subscription from .document_crud import documents from .department_crud import department -from .audit_crud import audit \ No newline at end of file +from .audit_crud import audit +from .chat_crud import chat, chat_item \ No newline at end of file diff --git a/private_gpt/users/crud/chat_crud.py b/private_gpt/users/crud/chat_crud.py new file mode 100644 index 00000000..8fd79d86 --- /dev/null +++ b/private_gpt/users/crud/chat_crud.py @@ -0,0 +1,21 @@ +from typing import Optional, List, Union, Dict, Any +from fastapi import HTTPException, status +from sqlalchemy.orm import Session +from sqlalchemy.exc import IntegrityError + +from private_gpt.users.crud.base import CRUDBase +from private_gpt.users.models.chat import ChatHistory, ChatItem +from private_gpt.users.schemas.chat import ChatHistoryCreate, ChatHistoryCreate, ChatItemCreate, ChatItemUpdate + + +class CRUDChat(CRUDBase[ChatHistory, ChatHistoryCreate, ChatHistoryCreate]): + def get_by_id(self, db: Session, *, id: int) -> Optional[ChatHistory]: + return db.query(self.model).filter(ChatHistory.conversation_id == id).first() + + +class CRUDChatItem(CRUDBase[ChatItem, ChatItemCreate, ChatItemUpdate]): + pass + + +chat = CRUDChat(ChatHistory) +chat_item = CRUDChatItem(ChatItem) diff --git a/private_gpt/users/models/__init__.py b/private_gpt/users/models/__init__.py index 7dfd1e98..ab639e6c 100644 --- a/private_gpt/users/models/__init__.py +++ b/private_gpt/users/models/__init__.py @@ -6,4 +6,5 @@ from .document import Document from .subscription import Subscription from .department import Department from .audit import Audit -from .document_department import document_department_association \ No newline at end of file +from .document_department import document_department_association +from .chat import ChatHistory, ChatItem \ No newline at end of file diff --git a/private_gpt/users/models/chat.py b/private_gpt/users/models/chat.py new file mode 100644 index 00000000..84c1762c --- /dev/null +++ b/private_gpt/users/models/chat.py @@ -0,0 +1,60 @@ +from datetime import datetime +from sqlalchemy.orm import relationship +from sqlalchemy import Column, Integer, String, DateTime, ForeignKey, Text, Boolean +from private_gpt.users.db.base_class import Base + + +class ChatHistory(Base): + """Models a chat history table""" + + __tablename__ = "chat_history" + + conversation_id = Column(Integer, nullable=False, primary_key=True) + title = Column(String(255), nullable=True) + created_at = Column(DateTime, default=datetime.now) + updated_at = Column(DateTime, default=datetime.now, + onupdate=datetime.now) + user_id = Column(Integer, ForeignKey("users.id")) + user = relationship("User", back_populates="chat_histories") + chat_items = relationship( + "ChatItem", back_populates="chat_history", cascade="all, delete-orphan") + + def __init__(self, user_id, chat_items=None, **kwargs): + super().__init__(**kwargs) + self.user_id = user_id + self.chat_items = chat_items or [] + self.generate_title() + + def generate_title(self): + user_chat_items = [ + item for item in self.chat_items if item.role == "user"] + if user_chat_items: + first_user_chat_item = user_chat_items[0] + self.title = first_user_chat_item.content[:30] + else: + self.title = "Untitled Chat" + + def __repr__(self): + """Returns string representation of model instance""" + return f"" + + +class ChatItem(Base): + """Models a chat item table""" + + __tablename__ = "chat_items" + + id = Column(Integer, nullable=False, primary_key=True) + sender = Column(String(225), nullable=False) + content = Column(Text, nullable=True) + created_at = Column(DateTime, default=datetime.now) + updated_at = Column(DateTime, default=datetime.now, + onupdate=datetime.now) + like = Column(Boolean, default=True) + conversation_id = Column(Integer, ForeignKey( + "chat_history.conversation_id"), nullable=False) + chat_history = relationship("ChatHistory", back_populates="chat_items") + + def __repr__(self): + """Returns string representation of model instance""" + return f"" diff --git a/private_gpt/users/models/user.py b/private_gpt/users/models/user.py index 5b527208..dd4be3db 100644 --- a/private_gpt/users/models/user.py +++ b/private_gpt/users/models/user.py @@ -51,6 +51,7 @@ class User(Base): Integer, ForeignKey("departments.id"), nullable=False) department = relationship("Department", back_populates="users") + chat_histories = relationship("ChatHistory", back_populates="user") __table_args__ = ( UniqueConstraint('username', name='unique_username_no_spacing'), diff --git a/private_gpt/users/schemas/__init__.py b/private_gpt/users/schemas/__init__.py index 487e92c5..5d88d1d7 100644 --- a/private_gpt/users/schemas/__init__.py +++ b/private_gpt/users/schemas/__init__.py @@ -1,9 +1,25 @@ from .role import Role, RoleCreate, RoleInDB, RoleUpdate from .token import TokenSchema, TokenPayload -from .user import User, UserCreate, UserInDB, UserUpdate, UserBaseSchema, Profile, UsernameUpdate, DeleteUser, UserAdminUpdate, UserAdmin, PasswordUpdate +from .user import ( + User, UserCreate, UserInDB, UserUpdate, UserBaseSchema, Profile, + UsernameUpdate, DeleteUser, UserAdminUpdate, UserAdmin, PasswordUpdate +) from .user_role import UserRole, UserRoleCreate, UserRoleInDB, UserRoleUpdate -from .subscription import Subscription, SubscriptionBase, SubscriptionCreate, SubscriptionUpdate +from .subscription import ( + Subscription, SubscriptionBase, SubscriptionCreate, SubscriptionUpdate +) from .company import Company, CompanyBase, CompanyCreate, CompanyUpdate -from .documents import Document, DocumentCreate, DocumentsBase, DocumentUpdate, DocumentList, DepartmentList, DocumentEnable, DocumentDepartmentUpdate, DocumentCheckerUpdate, DocumentMakerCreate, DocumentDepartmentList, DocumentView, DocumentVerify, DocumentFilter -from .department import Department, DepartmentCreate, DepartmentUpdate, DepartmentAdminCreate, DepartmentDelete +from .documents import ( + Document, DocumentCreate, DocumentsBase, DocumentUpdate, DocumentList, + DepartmentList, DocumentEnable, DocumentDepartmentUpdate, DocumentCheckerUpdate, + DocumentMakerCreate, DocumentDepartmentList, DocumentView, DocumentVerify, + DocumentFilter +) +from .department import ( + Department, DepartmentCreate, DepartmentUpdate, DepartmentAdminCreate, DepartmentDelete +) from .audit import AuditBase, AuditCreate, AuditUpdate, Audit, GetAudit +from .chat import ( + ChatHistory, ChatHistoryBase, ChatHistoryCreate, ChatHistoryUpdate, ChatDelete, + ChatItem, ChatItemBase, ChatItemCreate, ChatItemUpdate, CreateChatHistory +) diff --git a/private_gpt/users/schemas/chat.py b/private_gpt/users/schemas/chat.py new file mode 100644 index 00000000..1ecae5d5 --- /dev/null +++ b/private_gpt/users/schemas/chat.py @@ -0,0 +1,56 @@ +from datetime import datetime +from typing import List, Optional +from pydantic import BaseModel + + +class ChatItemBase(BaseModel): + conversation_id: int + sender: str + content: Optional[str] + + +class ChatItemCreate(ChatItemBase): + pass + +class ChatItemUpdate(ChatItemBase): + like: Optional[bool] + + +class ChatItem(ChatItemBase): + id: int + created_at: datetime + updated_at: datetime + + class Config: + orm_mode = True + + +class ChatHistoryBase(BaseModel): + user_id: int + title: Optional[str] + + +class ChatHistoryCreate(ChatHistoryBase): + chat_items: Optional[List[ChatItemCreate]] + +class ChatHistoryUpdate(ChatHistoryBase): + updated_at: datetime + chat_items: Optional[List[ChatItemCreate]] + +class Chat(BaseModel): + conversation_id: int + +class ChatHistory(ChatHistoryBase): + conversation_id: int + created_at: datetime + updated_at: datetime + chat_items: List[ChatItem] + + class Config: + orm_mode = True + +class ChatDelete(BaseModel): + conversation_id: int + +class CreateChatHistory(BaseModel): + user_id: int diff --git a/scripts/setup b/scripts/setup index 3e02e641..cbf49577 100755 --- a/scripts/setup +++ b/scripts/setup @@ -10,7 +10,7 @@ from private_gpt.settings.settings import settings resume_download = True if __name__ == '__main__': - parser = argparse.ArgumentParser(prog='Setup: Download models from huggingface') + parser = argparse.ArgumentParser(prog='Setup: Download models from Hugging Face') parser.add_argument('--resume', default=True, action=argparse.BooleanOptionalAction, help='Enable/Disable resume_download options to restart the download progress interrupted') args = parser.parse_args() resume_download = args.resume @@ -40,10 +40,10 @@ print("LLM model downloaded!") # Download Tokenizer print(f"Downloading tokenizer {settings().llm.tokenizer}") +local_path = models_path / "tokenizer/Mistral-7B-Instruct-v0.1" AutoTokenizer.from_pretrained( - pretrained_model_name_or_path=settings().llm.tokenizer, - cache_dir=models_cache_path, + local_path ) print("Tokenizer downloaded!") -print("Setup done") +print("Setup done") \ No newline at end of file diff --git a/settings-local.yaml b/settings-local.yaml index 2c1995bc..102eee4c 100644 --- a/settings-local.yaml +++ b/settings-local.yaml @@ -8,16 +8,24 @@ llm: context_window: 3900 tokenizer: mistralai/Mistral-7B-Instruct-v0.2 + llamacpp: - prompt_style: "mistral" - llm_hf_repo_id: TheBloke/Mistral-7B-Instruct-v0.2-GGUF - llm_hf_model_file: mistral-7b-instruct-v0.2.Q4_K_M.gguf + prompt_style: "chatml" + llm_hf_repo_id: TheBloke/OpenHermes-2.5-Mistral-7B-GGUF + llm_hf_model_file: openhermes-2.5-mistral-7b.Q5_K_M.gguf + tfs_z: 1.0 # Tail free sampling is used to reduce the impact of less probable tokens from the output. A higher value (e.g., 2.0) will reduce the impact more, while a value of 1.0 disables this setting + top_k: 40 # Reduces the probability of generating nonsense. A higher value (e.g. 100) will give more diverse answers, while a lower value (e.g. 10) will be more conservative. (Default: 40) + top_p: 0.9 # Works together with top-k. A higher value (e.g., 0.95) will lead to more diverse text, while a lower value (e.g., 0.5) will generate more focused and conservative text. (Default: 0.9) + repeat_last_n: 64 # Sets how far back for the model to look back to prevent repetition. (Default: 64, 0 = disabled, -1 = num_ctx) + repeat_penalty: 1.1 # Sets how strongly to penalize repetitions. A higher value (e.g., 1.5) will penalize repetitions more strongly, while a lower value (e.g., 0.9) will be more lenient. (Default: 1.1) embedding: + # Should be matching the value above in most cases mode: huggingface + ingest_mode: simple huggingface: - embedding_hf_model_name: BAAI/bge-small-en-v1.5 + embedding_hf_model_name: BAAI/bge-large-en-v1.5 vectorstore: database: qdrant