Replit Model (#713)

* porting over replit code model to gpt4all * replaced memory with kv_self struct * continuing debug * welp it built but lot of sus things * working model loading and somewhat working generate.. need to format response? * revert back to semi working version * finally got rid of weird formatting * figured out problem is with python bindings - this is good to go for testing * addressing PR feedback * output refactor * fixed prompt reponse collection * cleanup * addressing PR comments * building replit backend with new ggmlver code * chatllm replit and clean python files * cleanup * updated replit to match new llmodel api * match llmodel api and change size_t to Token * resolve PR comments * replit model commit comment
2025-10-31 13:51:43 +00:00 · 2023-06-06 17:09:00 -04:00
parent ef35eb496f
commit c4706d0c14
8 changed files with 1140 additions and 6 deletions
--- a/gpt4all-bindings/python/gpt4all/pyllmodel.py
+++ b/gpt4all-bindings/python/gpt4all/pyllmodel.py
@@ -12,11 +12,10 @@ class DualStreamProcessor:
        self.output = ""

    def write(self, text):
-        cleaned_text = re.sub(r"\n(?!\n)", "", text)
        if self.stream is not None:
-            self.stream.write(cleaned_text)
+            self.stream.write(text)
            self.stream.flush()
-        self.output += cleaned_text
+        self.output += text

 # TODO: provide a config file to make this more robust
 LLMODEL_PATH = os.path.join("llmodel_DO_NOT_MODIFY", "build").replace("\\", "\\\\")
@@ -236,7 +235,6 @@ class LLModel:
        sys.stdout = old_stdout
        # Force new line
        print()
-
        return stream_processor.output

    # Empty prompt callback
@@ -247,7 +245,7 @@ class LLModel:
    # Empty response callback method that just prints response to be collected
    @staticmethod
    def _response_callback(token_id, response):
-        print(response.decode('utf-8'))
+        sys.stdout.write(response.decode('utf-8'))
        return True

    # Empty recalculate callback