8 Commits

4 changed files with 117 additions and 23 deletions

View File

@@ -22,6 +22,8 @@ LCH_MODEL_PATH=/srv/llama/ggml-vicuna-13b-4bit-rev1.bin \
./llamacpphtmld ./llamacpphtmld
``` ```
Use the `GOMAXPROCS` environment variable to control how many threads the llama.cpp engine uses.
## API usage ## API usage
The `generate` endpoint will live stream new tokens into an existing conversation until the LLM stops naturally. The `generate` endpoint will live stream new tokens into an existing conversation until the LLM stops naturally.
@@ -36,6 +38,12 @@ MIT
## Changelog ## Changelog
### 2023-04-09 v1.1.0
- New web interface style, that is more mobile friendly and shows API status messages
- Add default example prompt
- Use a longer n_ctx by default
### 2023-04-08 v1.0.0 ### 2023-04-08 v1.0.0
- Initial release - Initial release

2
api.go
View File

@@ -16,7 +16,7 @@ import "C"
// Constant LLaMA parameters // Constant LLaMA parameters
const ( const (
ParamContextSize = 512 // RAM requirements: 512 needs 800MB KV (~3216MB overall), 2048 needs 3200MB KV (~??? overall) ParamContextSize = 1024 // The mem_required is 9800MB + 3216MB/state, regardless of the n_ctx size. However it does affect the KV size for persistence
ParamTopK = 40 ParamTopK = 40
ParamTopP = 0.95 ParamTopP = 0.95
ParamTemperature = 0.08 ParamTemperature = 0.08

4
go.mod
View File

@@ -2,6 +2,4 @@ module code.ivysaur.me/llamacpphtmld
go 1.19 go 1.19
require ( require github.com/google/uuid v1.3.0
github.com/google/uuid v1.3.0 // indirect
)

126
webui.go
View File

@@ -14,18 +14,75 @@ func (this *Application) GET_Root(w http.ResponseWriter, r *http.Request) {
w.Write([]byte(`<!DOCTYPE html> w.Write([]byte(`<!DOCTYPE html>
<html> <html>
<meta charset="UTF-8"> <meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1" /> <meta name="viewport" content="width=device-width, initial-scale=1">
<meta name="theme-color" content="#040c3a">
<title>` + html.EscapeString(AppTitle) + `</title> <title>` + html.EscapeString(AppTitle) + `</title>
<style type="text/css"> <style type="text/css">
html { html {
font-family: sans-serif; font-family: sans-serif;
} }
textarea { html, body {
border-radius: 4px; margin: 0;
display: block; padding: 0;
width: 100%; height: 100%;
min-height: 100px;
position: relative;
}
.banner {
height: 48px;
background: linear-gradient(#06638d, #040c3a);
color: white;
position: relative;
line-height: 48px;
vertical-align: middle;
}
.logo {
font-size: 18px;
padding-left: 8px;
}
.state-wrapper {
position: absolute;
top: 0; left: 0; right: 0; bottom: 0;
pointer-events: none;
text-align: center;
}
#state {
display: none; /* inline-block; */
background: white;
color: black;
border: 2px solid black;
margin: 0 auto;
padding: 2px 4px;
opacity: 0.75;
font-size: 12px;
line-height: 18px;
vertical-align: middle;
}
.banner button {
float: right;
height: 32px;
margin: 8px 8px 8px 0;
}
textarea {
position: absolute;
left: 0;
right: 0;
top: 48px;
bottom: 0;
resize: none;
outline: none;
box-shadow: none;
border: 0;
background: #fff; background: #fff;
transition: background-color 0.5s ease-out; transition: background-color 0.5s ease-out;
} }
@@ -33,16 +90,32 @@ textarea.alert {
background: lightyellow; background: lightyellow;
transition: initial; transition: initial;
} }
button {
margin-top: 8px; @media screen and (max-width: 400px) {
padding: 4px 6px; .logo {
font-size: 0;
}
.state-wrapper {
text-align: left;
margin-left: 8px;
}
} }
</style> </style>
<body> <body>
<h2>🦙 ` + html.EscapeString(AppTitle) + `</h2> <div class="banner">
<textarea id="main" autofocus></textarea> <span class="logo">🦙 ` + html.EscapeString(AppTitle) + `</span>
<button id="generate">▶️ Generate</button> <div class="state-wrapper">
<button id="interrupt" disabled>⏸️ Interrupt</button> <span id="state"></span>
</div>
<button id="generate">▶️ Generate</button>
<button id="interrupt" disabled>⏸️ Interrupt</button>
</div>
<textarea id="main" autofocus>A chat between a curious human and an artificial intelligence assistant.
The assistant gives helpful, detailed, and polite answers to the human's questions.
### Human: What is the capital city of New Zealand?
### Assistant:</textarea>
<script type="text/javascript"> <script type="text/javascript">
function main() { function main() {
const conversationID = "` + uuid.New().String() + `"; const conversationID = "` + uuid.New().String() + `";
@@ -52,6 +125,9 @@ function main() {
const $generate = document.getElementById("generate"); const $generate = document.getElementById("generate");
const $interrupt = document.getElementById("interrupt"); const $interrupt = document.getElementById("interrupt");
const $main = document.getElementById("main"); const $main = document.getElementById("main");
const $state = document.getElementById("state");
$main.setSelectionRange($main.value.length, $main.value.length);
$generate.addEventListener('click', async function() { $generate.addEventListener('click', async function() {
const content = $main.value; const content = $main.value;
@@ -67,6 +143,16 @@ function main() {
try { try {
const controller = new AbortController(); const controller = new AbortController();
$interrupt.disabled = false;
const doInterrupt = () => {
controller.abort();
$interrupt.removeEventListener('click', doInterrupt);
};
$interrupt.addEventListener('click', doInterrupt);
$state.style.display = "inline-block";
$state.textContent = "Waiting in queue..."
const response = await fetch("/api/v1/generate", { const response = await fetch("/api/v1/generate", {
method: "POST", method: "POST",
signal: controller.signal, signal: controller.signal,
@@ -80,12 +166,7 @@ function main() {
}) })
}); });
$interrupt.disabled = false; $state.textContent = "The AI is reading your text so far..."
const doInterrupt = () => {
controller.abort();
$interrupt.removeEventListener('click', doInterrupt);
};
$interrupt.addEventListener('click', doInterrupt);
const reader = response.body.getReader(); const reader = response.body.getReader();
const decoder = new TextDecoder(); const decoder = new TextDecoder();
@@ -96,6 +177,8 @@ function main() {
break; break;
} }
state.textContent = "The AI is writing..."
$main.value += decoder.decode(singleReadResult.value); $main.value += decoder.decode(singleReadResult.value);
$main.scrollTop = $main.scrollHeight; $main.scrollTop = $main.scrollHeight;
@@ -104,7 +187,12 @@ function main() {
} }
$state.style.display = "none";
$state.textContent = ""
} catch (ex) { } catch (ex) {
$state.textContent = "Error";
alert( alert(
"Error processing the request: " + "Error processing the request: " +
(ex instanceof Error ? ex.message : JSON.stringify(ex)) (ex instanceof Error ? ex.message : JSON.stringify(ex))