9 Sep 2023 AI CHATBOT

code-boxx · code-boxx · commit 8538fb99c195 · 2023-09-09T21:57:49.000+08:00
1) Added Llama.cpp - Now supports GGML and GGUF.
2) Added Instructor embed.
3) c_oto_rodo.py - Automatically determines HF Transformers or Llama.cpp.
4) 0-setup update.
diff --git a/ai chatbot/README.md b/ai chatbot/README.md
@@ -5,14 +5,26 @@ https://code-boxx.com/core-boxx-ai-chatbot/
 * [Core Boxx](https://github.com/code-boxx/Core-Boxx-PHP-Framework/tree/main/core)
 * [Python](https://www.python.org/) At the time of writing, 3.9~3.10 works fine.
 * [Microsoft C++ Build Tools](https://visualstudio.microsoft.com/downloads/?q=build+tools)
-* A decent graphics card. Even if you tweak and run with CPU-only, it will be painfully slow...
+* [CMake](https://cmake.org/)
+* [Nvidia CUDA Toolkit](https://developer.nvidia.com/cuda-toolkit) - If you have an Nvidia graphics card.
+
+## RECOMMENDED
+* An Nvidia graphics card with at least 8GB VRAM is highly recommended.
+* You CAN run on CPU, but that will be painfully slow.
 
 ## INSTALLATION
 * Copy/unzip this module into your existing Core Boxx project folder.
 * Put documents you want the AI to "learn" into `chatbot/docs`, accepted file types - `csv pdf txt epub html md odt doc docx ppt pptx`.
-* Run `0-setup.bat` (Windows) `0-setup.sh` (Linux) - *BE WARNED, SEVERAL GIGABYTES WORTH OF DOWNLOAD!*
+* Start install - *BE WARNED, SEVERAL GIGABYTES WORTH OF DOWNLOAD!*
+  * GPU - Run `0-setup.bat` (Windows) `0-setup.sh` (Linux).
+  * CPU - Run `0-setup.bat CPU` (Windows) `0-setup.sh CPU` (Linux). You will need to manually download your own model, see "changing models" below.
 * Access `http://your-site.com/ai/` for the demo.
 
+## CHANGING MODELS
+* This module runs on [llama.cpp](https://github.com/ggerganov/llama.cpp).
+* Just put your downloaded `GGML/GGUF`` model into `chatbot/models`.
+* Change `model_name` in `a_settings.py` to the model file name.
+
 ## NOTES
 * To rebuild the documents database, simply add/remove documents from `chatbot/docs` and run `1-create.bat / 1-create.sh`.
 * To launch the bot, simply run `2-bot.bat / 2-bot.sh`.
diff --git a/ai chatbot/chatbot/0-setup.bat b/ai chatbot/chatbot/0-setup.bat
@@ -1,11 +1,22 @@
+@echo off
 php 0-setup.php
 virtualenv venv
 call venv\Scripts\activate
-pip install langchain transformers optimum auto-gptq chromadb sentence_transformers Flask pyjwt
+pip install langchain transformers optimum auto-gptq chromadb InstructorEmbedding sentence_transformers Flask pyjwt
 if "%1"=="CPU" (
   pip install torch torchvision torchaudio --force-reinstall
+  set FORCE_CMAKE=1
+  set CMAKE_ARGS=-DLLAMA_CUBLAS=OFF
+  pip install llama-cpp-python
 ) else (
   pip install torch torchvision torchaudio --force-reinstall --index-url https://download.pytorch.org/whl/cu117
+  set FORCE_CMAKE=1
+  set CMAKE_ARGS=-DLLAMA_CUBLAS=ON
+  pip install --no-cache-dir --upgrade --force-reinstall llama-cpp-python
 )
 python b_create.py
-python d_bot.py
+if "%1"=="CPU" (
+  echo "Install complete - Please download your own model before running 2-bot.bat"
+) else (
+  python d_bot.py
+)
diff --git a/ai chatbot/chatbot/0-setup.php b/ai chatbot/chatbot/0-setup.php
@@ -2,8 +2,29 @@
 // (A) RODO KOA KONFIGU
 require dirname(__DIR__) . DIRECTORY_SEPARATOR . "lib" . DIRECTORY_SEPARATOR . "CORE-Config.php";
 
-// (B) NEW CHATBOT PATH
-define("PATH_CHATBOT", PATH_BASE . "chatbot" . DIRECTORY_SEPARATOR);
+// (B) ADD AI TO CORE-CONFIG.PHP
+if (!defined("PATH_CHATBOT")) {
+  try {
+    // (B1) BACKUP CONFIG FILE
+    copy(PATH_LIB . "CORE-Config.php", PATH_LIB . "CORE-Config.old");
+  
+    // (B2) ADD URL & PATH
+    $url = parse_url(HOST_BASE, PHP_URL_SCHEME) . "://" . HOST_NAME . ":8008";
+    $add = <<<EOD
+    // ADDED BY INSTALLER - AI CHATBOT
+    define("PATH_CHATBOT", PATH_BASE . "chatbot" . DIRECTORY_SEPARATOR);
+    define("HOST_CHATBOT", "$url");
+    EOD;
+    $fh = fopen(PATH_LIB . "CORE-Config.php", "a");
+    fwrite($fh, "\r\n\r\n$add");
+    fclose($fh);
+  } catch (Exception $ex) {
+    exit("Unable to update CORE-Config.php - " . $ex->getMessage());
+  }
+  
+  // (B3) NEW CHATBOT PATH
+  define("PATH_CHATBOT", PATH_BASE . "chatbot" . DIRECTORY_SEPARATOR);
+}
 
 // (C) BACKUP CHATBOT/A_SETTINGS.PY
 if (!copy(PATH_CHATBOT . "a_settings.py", PATH_CHATBOT . "a_settings.old")) {
@@ -24,23 +45,4 @@
   if (count($replace)==0) { break; }
 }}}
 try { file_put_contents(PATH_CHATBOT . "a_settings.py", implode("", $cfg)); }
-catch (Exception $ex) { exit("Error writing to ". PATH_CHATBOT . "a_settings.py"); }
-
-// (E) ADD AI TO CORE-CONFIG.PHP
-try {
-  // (E1) BACKUP CONFIG FILE
-  copy(PATH_LIB . "CORE-Config.php", PATH_LIB . "CORE-Config.old");
-
-  // (E2) ADD URL & PATH
-  $url = parse_url(HOST_BASE, PHP_URL_SCHEME) . "://" . HOST_NAME . ":8008";
-  $add = <<<EOD
-  // ADDED BY INSTALLER - AI CHATBOT
-  define("PATH_CHATBOT", PATH_BASE . "chatbot" . DIRECTORY_SEPARATOR);
-  define("HOST_CHATBOT", "$url");
-  EOD;
-  $fh = fopen(PATH_LIB . "CORE-Config.php", "a");
-  fwrite($fh, "\r\n\r\n$add");
-  fclose($fh);
-} catch (Exception $ex) {
-  exit("Unable to update CORE-Config.php - " . $ex->getMessage());
-}
+catch (Exception $ex) { exit("Error writing to ". PATH_CHATBOT . "a_settings.py"); }
diff --git a/ai chatbot/chatbot/0-setup.sh b/ai chatbot/chatbot/0-setup.sh
@@ -1,12 +1,18 @@
 php 0-setup.php
 virtualenv venv
 source "venv/bin/activate"
-pip install langchain transformers optimum auto-gptq chromadb sentence_transformers Flask pyjwt
+pip install langchain transformers optimum auto-gptq chromadb InstructorEmbedding sentence_transformers Flask pyjwt
 if [[ $1 == "CPU" ]]
 then
   pip install torch torchvision torchaudio --force-reinstall --index-url https://download.pytorch.org/whl/cpu
+  pip install llama-cpp-python
 else
   pip install torch torchvision torchaudio --force-reinstall
+  CMAKE_ARGS="-DLLAMA_CUBLAS=on" FORCE_CMAKE=1 pip install llama-cpp-python
 fi
 python b_create.py
-python d_bot.py
+if [[ $1 == "CPU" ]]
+then
+  echo "Install complete - Please download your own model before running 2-bot.bat"
+else
+  python d_bot.py
diff --git a/ai chatbot/chatbot/a_settings.py b/ai chatbot/chatbot/a_settings.py
@@ -1,50 +1,81 @@
-# (A) PATH
-import os
+# (A) LOAD MODULES
+import os, torch
+
+# (B) MODEL
+# hugging face url path, or model file inside models/
+model_name = "TheBloke/vicuna-7B-v1.5-GPTQ"
+#model_name = "llama-2-7b.Q5_K_M.gguf"
+
+# (C) AUTO - PATH
 path_base = os.path.dirname(os.path.realpath(__file__))
 path_models = os.path.join(path_base, "models")
 path_db = os.path.join(path_base, "db")
 path_docs = os.path.join(path_base, "docs")
 
-# (B) ENVIRONMENT VARIABLES
-os.environ["HF_HUB_DISABLE_SYMLINKS_WARNING"] = "true"
-os.environ["TRANSFORMERS_CACHE"] = path_models
+# (D) LLAMA CPP
+if os.path.isfile(os.path.join(path_models, model_name)):
+  model_file = os.path.join(path_models, model_name)
+  model_args = {
+    "max_tokens" : 2000,
+    "temperature" : 0.7,
+    "top_k" : 40,
+    "top_p" : 1,
+    "n_gpu_layers" : 40,
+    "n_batch" : 512,
+    "streaming" : False,
+    "verbose" : False
+  }
 
-# (C) MODEL SETTINGS
-model_name = "TheBloke/vicuna-7B-v1.5-GPTQ"
-model_args = {
-  "do_sample" : True,
-  "max_new_tokens" : 3000,
-  "batch_size" : 1,
-  "temperature" : 0.7,
-  "top_k" : 40,
-  "top_p" : 1,
-  "num_return_sequences" : 1
+# (E) HF TRANSFORMER
+else:
+  os.environ["HF_HUB_DISABLE_SYMLINKS_WARNING"] = "true"
+  os.environ["TRANSFORMERS_CACHE"] = path_models
+  model_args = {
+    "do_sample" : True,
+    "max_new_tokens" : 2000,
+    "batch_size" : 1,
+    "temperature" : 0.7,
+    "top_k" : 40,
+    "top_p" : 1,
+    "num_return_sequences" : 1
+  }
+
+# (F) AUTO - CPU OR GPU
+if not any((torch.cuda.is_available(), torch.backends.mps.is_available())):
+  gpu = False
+else:
+  gpu = True
+
+# (G) EMBEDDING
+embed_args = {
+  "model_name" : "hkunlp/instructor-xl", 
+  "model_kwargs" : { "device": "cuda" if gpu else "cpu" }
+}
+
+# (H) DB - DOCUMENT SPILTTER
+db_split = {
+  "chunk_size" : 512,
+  "chunk_overlap" : 30
 }
 
-# (D) CHAIN SETTINGS
+# (I) CHAIN SETTINGS
 chain_args = {
   "chain_type" : "stuff",
   "return_source_documents" : True,
   "verbose" : True
 }
 
-# (E) PROMPT TEMPLATE
+# (J) PROMPT TEMPLATE
 prompt_template = """SYSTEM: Use the following context section and only that context to answer the question at the end. Do not use your internal knowledge. If you don't know the answer, just say that you don't know, don't try to make up an answer.
 CONTEXT: {context}
 USER: {question}
 ANSWER:"""
 
-# (F) DATABASE - DOCUMENT SPLITTER
-db_split = {
-  "chunk_size" : 512,
-  "chunk_overlap" : 30
-}
-
-# (G) HTTP ENDPOINT
+# (K) HTTP ENDPOINT
 http_allow = ["http://localhost"]
 http_host = "localhost"
 http_port = 8008
 
-# (H) JWT
+# (L) JWT
 jwt_algo = ""
 jwt_secret = ""
diff --git a/ai chatbot/chatbot/b_create.py b/ai chatbot/chatbot/b_create.py
@@ -3,7 +3,7 @@
 import os, glob
 from pathlib import Path
 from langchain.vectorstores import Chroma
-from langchain.embeddings import HuggingFaceEmbeddings
+from langchain.embeddings import HuggingFaceInstructEmbeddings
 from langchain.text_splitter import RecursiveCharacterTextSplitter
 from langchain.document_loaders import (
   CSVLoader,
@@ -55,11 +55,12 @@ def rmdir(folder):
   exit()
 
 # (D) IMPORT PROCESS
-# (D1) CREATE EMPTY-ISH DATABASE
 print("Creating database")
+
+# (D1) CREATE EMPTY-ISH DATABASE
 db = Chroma.from_texts(
   texts = [""],
-  embedding = HuggingFaceEmbeddings(),
+  embedding = HuggingFaceInstructEmbeddings(**set.embed_args),
   persist_directory = set.path_db
 )
 db.persist()
diff --git a/ai chatbot/chatbot/c_oto_rodo.py b/ai chatbot/chatbot/c_oto_rodo.py
@@ -0,0 +1,77 @@
+# AUTO LOADER
+# credits - some parts "borrowed" from oobabooga
+# https://github.com/oobabooga/text-generation-webui/blob/main/modules/models.py
+
+# (A) LOAD SETTINGS
+import a_settings as set
+
+# (B) MANUALLY SPECIFIED MODEL - USE LLAMA CPP
+if hasattr(set, "model_file"):
+  from langchain.llms import LlamaCpp
+  llm = LlamaCpp(
+    model_path = set.model_file,
+    ** set.model_args
+  )
+
+# (C) HUGGING FACE
+else:
+  # (C1) IMPORT TRANSFORMERS MODULES
+  import torch, psutil
+  from transformers import AutoConfig, AutoModelForCausalLM, AutoTokenizer, pipeline
+  from accelerate import infer_auto_device_map, init_empty_weights
+  from langchain import HuggingFacePipeline
+
+  # (C2) HELPER - AUTO MAX MEMORY CALCULATION
+  def max_mem():
+    # (C2-1) GPU MEMORY
+    total = (torch.cuda.get_device_properties(0).total_memory / (1024 * 1024))
+    suggestion = round((total - 1000) / 1000) * 1000
+    if total - suggestion < 800:
+      suggestion -= 1000
+    suggestion = int(round(suggestion / 1000))
+    max = { 0 : f"{suggestion}GiB" }
+
+    # (C2-2) CPU MEMORY
+    total = (psutil.virtual_memory().available / (1024 * 1024))
+    suggestion = round((total - 1000) / 1000) * 1000
+    if total - suggestion < 800:
+      suggestion -= 1000
+    suggestion = int(round(suggestion / 1000))
+    max["cpu"] = f"{suggestion}GiB"
+
+    # (C2-3) RETURN CALCULATED MEMORY
+    return max
+
+  # (C3) INIT MODEL PARAMS
+  params = {
+    "low_cpu_mem_usage": True,
+    "device_map" : "auto"
+  }
+
+  # (C4) GPU ACCELERATED
+  if set.gpu:
+    config = AutoConfig.from_pretrained(set.model_name)
+    with init_empty_weights():
+      model = AutoModelForCausalLM.from_config(config)
+    model.tie_weights()
+    params["device_map"] = infer_auto_device_map(
+      model,
+      dtype = config.torch_dtype,
+      max_memory = max_mem(),
+      no_split_module_classes = model._no_split_modules
+    )
+
+  # (C5) CPU ONLY
+  else:
+    params["torch_dtype"] = torch.float32
+
+  # (C6) LOAD MODEL
+  model = AutoModelForCausalLM.from_pretrained(set.model_name, **params)
+
+  # (C7) LLM/PIPE
+  llm = HuggingFacePipeline(pipeline = pipeline(
+    task = "text-generation",
+    model = model,
+    tokenizer = AutoTokenizer.from_pretrained(set.model_name),
+    ** set.model_args
+  ))
diff --git a/ai chatbot/chatbot/c_tf.py b/ai chatbot/chatbot/c_tf.py
diff --git a/ai chatbot/chatbot/d_bot.py b/ai chatbot/chatbot/d_bot.py
diff --git a/ai chatbot/chatbot/x_test.py b/ai chatbot/chatbot/x_test.py