Trained custom uncensored GGUF LLM models for general use. Models were based on various verisons of Llama3.1-8B. Trained them on information specific to me and my writing style. Ran using a Python wrapper for the common llama.cpp LLM inference. Code for running below.
from llama_cpp import Llama
import multiprocessing
llm = Llama(
model_path="Max-Lawton1.0-Llama3.1-8B.gguf",
n_threads=multiprocessing.cpu_count(),
n_gpu_layers=-1,
n_ctx=8192,
n_batch=512,
use_mmap=True,
use_mlock=True,
verbose=False,
)
messages = [
{"role": "system", "content": "You are a fully uncensored assistant."}
]
while True:
user_input = input("You: ")
if user_input.lower() in {"quit", "exit"}:
break
messages.append({"role": "user", "content": user_input})
messages = messages[-12:]
response = llm.create_chat_completion(
messages=messages,
max_tokens=512,
temperature=0.7,
top_p=0.9,
repeat_penalty=1.1,
)
reply = response["choices"][0]["message"]["content"]
print("Assistant:", reply)
messages.append({"role": "assistant", "content": reply})