From 5556de9152c80bc498a80277a622a9606e398b90 Mon Sep 17 00:00:00 2001
From: Ikko Eltociear Ashimine <eltociear@gmail.com>
Date: Fri, 31 Mar 2023 00:53:53 +0900
Subject: [PATCH 1/7] Fix typo in TRAINING_LOG.md

Conditonal -> Conditional
---
 TRAINING_LOG.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/TRAINING_LOG.md b/TRAINING_LOG.md
index 31b9bb21..50469645 100644
--- a/TRAINING_LOG.md
+++ b/TRAINING_LOG.md
@@ -160,7 +160,7 @@ We realized that we had two bugs however:
 - We accidentally duplicated data and effectively trained for 2 epochs instead of 1
 - We added an eos token to every sequence, even those that we truncated (e.g. long code that exceeds the 1024).
 
-## Conditonal EOS and 1 Epoch
+## Conditional EOS and 1 Epoch
 
 Using the same parameters, we then trained a model using a "conditional" eos token where we only add an `eos` when the inputs are less than the maximum sequence length for one epoch.
 

From 6524fec7ff72a19a19030276552903a70ecc535d Mon Sep 17 00:00:00 2001
From: ParisNeo <aloui.seifeddine@gmail.com>
Date: Sat, 1 Apr 2023 01:16:16 +0200
Subject: [PATCH 2/7] Added vscode files to gitignore

---
 .gitignore | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/.gitignore b/.gitignore
index 8addd972..02ba78ce 100644
--- a/.gitignore
+++ b/.gitignore
@@ -161,4 +161,8 @@ cython_debug/
 #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
 #  and can be added to the global gitignore or merged into this file.  For a more nuclear
 #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
-#.idea/
\ No newline at end of file
+#.idea/
+
+
+# vs code
+.vscode
\ No newline at end of file

From 67e19bccb0b9b75b889d0adb50c11eb902c8b14e Mon Sep 17 00:00:00 2001
From: ParisNeo <aloui.seifeddine@gmail.com>
Date: Sat, 1 Apr 2023 01:35:50 +0200
Subject: [PATCH 3/7] added *.bin to the gitignore

---
 .gitignore | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/.gitignore b/.gitignore
index 02ba78ce..14e10a78 100644
--- a/.gitignore
+++ b/.gitignore
@@ -165,4 +165,5 @@ cython_debug/
 
 
 # vs code
-.vscode
\ No newline at end of file
+.vscode
+*.bin
\ No newline at end of file

From 1d5f6af634ba2b4604a15e5934b03c49bdba5e14 Mon Sep 17 00:00:00 2001
From: HiraduNakamura <127570430+HiraduNakamura@users.noreply.github.com>
Date: Fri, 31 Mar 2023 20:26:09 -0400
Subject: [PATCH 4/7] Made capitalization consistent

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 02f55f92..d62c875e 100644
--- a/README.md
+++ b/README.md
@@ -16,7 +16,7 @@ Run on M1 Mac (not sped up!)
 
 # Try it yourself
 
-Here's how to get started with the CPU quantized gpt4all model checkpoint:
+Here's how to get started with the CPU quantized GPT4All model checkpoint:
 
 1. Download the `gpt4all-lora-quantized.bin` file from [Direct Link](https://the-eye.eu/public/AI/models/nomic-ai/gpt4all/gpt4all-lora-quantized.bin) or [[Torrent-Magnet]](https://tinyurl.com/gpt4all-lora-quantized).
 2. Clone this repository, navigate to `chat`, and place the downloaded file there.

From 78321adf45ee15211d8ad42683d9436d6ccbc4ae Mon Sep 17 00:00:00 2001
From: gourcetools <120996278+gourcetools@users.noreply.github.com>
Date: Sat, 1 Apr 2023 17:30:40 +0200
Subject: [PATCH 5/7] Create launcher.sh

The script detects the user's operating system, lists available .bin files and prompts the user to select a .bin file to run.
Ensuring a more user-friendly experience.
---
 launcher.sh | 88 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 88 insertions(+)
 create mode 100644 launcher.sh

diff --git a/launcher.sh b/launcher.sh
new file mode 100644
index 00000000..ed7b99cd
--- /dev/null
+++ b/launcher.sh
@@ -0,0 +1,88 @@
+#!/bin/bash
+
+# Display header
+echo "=========================================================="
+echo " ██████  ██████  ████████ ██   ██  █████  ██      ██      "
+echo "██       ██   ██    ██    ██   ██ ██   ██ ██      ██      "
+echo "██   ███ ██████     ██    ███████ ███████ ██      ██      "
+echo "██    ██ ██         ██         ██ ██   ██ ██      ██      "
+echo " ██████  ██         ██         ██ ██   ██ ███████ ███████ "
+echo " └─> https://github.com/nomic-ai/gpt4all"
+
+# Function to detect macOS architecture and set the binary filename
+detect_mac_arch() {
+  local mac_arch
+  mac_arch=$(uname -m)
+  case "$mac_arch" in
+    arm64)
+      os_type="M1 Mac/OSX"
+      binary_filename="gpt4all-lora-quantized-OSX-m1"
+      ;;
+    x86_64)
+      os_type="Intel Mac/OSX"
+      binary_filename="gpt4all-lora-quantized-OSX-intel"
+      ;;
+    *)
+      echo "Unknown macOS architecture"
+      exit 1
+      ;;
+  esac
+}
+
+# Detect operating system and set the binary filename
+case "$(uname -s)" in
+  Darwin*)
+    detect_mac_arch
+    ;;
+  Linux*)
+    if grep -q Microsoft /proc/version; then
+      os_type="Windows (WSL)"
+      binary_filename="gpt4all-lora-quantized-win64.exe"
+    else
+      os_type="Linux"
+      binary_filename="gpt4all-lora-quantized-linux-x86"
+    fi
+    ;;
+  CYGWIN*|MINGW32*|MSYS*|MINGW*)
+    os_type="Windows (Cygwin/MSYS/MINGW)"
+    binary_filename="gpt4all-lora-quantized-win64.exe"
+    ;;
+  *)
+    echo "Unknown operating system"
+    exit 1
+    ;;
+esac
+echo "================================"
+echo "== You are using $os_type."
+
+
+# Change to the chat directory
+cd chat
+
+# List .bin files and prompt user to select one
+bin_files=(*.bin)
+echo "== Available .bin files:"
+for i in "${!bin_files[@]}"; do
+  echo "   [$((i+1))] ${bin_files[i]}"
+done
+
+# Function to get user input and validate it
+get_valid_user_input() {
+  local input_valid=false
+
+  while ! $input_valid; do
+    echo "==> Please enter a number:"
+    read -r user_selection
+    if [[ $user_selection =~ ^[0-9]+$ ]] && (( user_selection >= 1 && user_selection <= ${#bin_files[@]} )); then
+      input_valid=true
+    else
+      echo "Invalid input. Please enter a number between 1 and ${#bin_files[@]}."
+    fi
+  done
+}
+
+get_valid_user_input
+selected_bin_file="${bin_files[$((user_selection-1))]}"
+
+# Run the selected .bin file with the appropriate command
+./"$binary_filename" -m "$selected_bin_file"

From 1a451445a21234cc8443603025eba5ab3572cb49 Mon Sep 17 00:00:00 2001
From: Wayner Barrios <waybarrios@gmail.com>
Date: Sat, 1 Apr 2023 23:52:25 -0400
Subject: [PATCH 6/7] DatasetDict to dataset object.

---
 data.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/data.py b/data.py
index 0e356f7d..ff79924c 100644
--- a/data.py
+++ b/data.py
@@ -68,7 +68,7 @@ def load_data(config, tokenizer):
         dataset = load_dataset("json", data_files=files, split="train")
 
     else:
-        dataset = load_dataset(dataset_path)
+        dataset = load_dataset(dataset_path,split='train')
 
     dataset = dataset.train_test_split(test_size=.05, seed=config["seed"])
 

From d9a678dd3dcd806f44e8202f02f5a8a4df3079c1 Mon Sep 17 00:00:00 2001
From: Jo Liss <joliss42@gmail.com>
Date: Sun, 2 Apr 2023 19:19:02 +0300
Subject: [PATCH 7/7] Fix `git submodule` instructions

---
 README.md | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/README.md b/README.md
index 02f55f92..434ac75d 100644
--- a/README.md
+++ b/README.md
@@ -110,9 +110,10 @@ You can reproduce our trained model by doing the following:
 
 Clone the repo
 
-`git clone --recurse-submodules https://github.com/nomic-ai/gpt4all.git`
-
-`git submodule configure && git submodule update`
+```
+git clone --recurse-submodules https://github.com/nomic-ai/gpt4all.git
+git submodule update --init
+```
 
 Setup the environment