8 Commits

4 changed files with 117 additions and 23 deletions

View File

@@ -22,6 +22,8 @@ LCH_MODEL_PATH=/srv/llama/ggml-vicuna-13b-4bit-rev1.bin \
./llamacpphtmld
```
Use the `GOMAXPROCS` environment variable to control how many threads the llama.cpp engine uses.
## API usage
The `generate` endpoint will live stream new tokens into an existing conversation until the LLM stops naturally.
@@ -36,6 +38,12 @@ MIT
## Changelog
### 2023-04-09 v1.1.0
- New web interface style, that is more mobile friendly and shows API status messages
- Add default example prompt
- Use a longer n_ctx by default
### 2023-04-08 v1.0.0
- Initial release

2
api.go
View File

@@ -16,7 +16,7 @@ import "C"
// Constant LLaMA parameters
const (
ParamContextSize = 512 // RAM requirements: 512 needs 800MB KV (~3216MB overall), 2048 needs 3200MB KV (~??? overall)
ParamContextSize = 1024 // The mem_required is 9800MB + 3216MB/state, regardless of the n_ctx size. However it does affect the KV size for persistence
ParamTopK = 40
ParamTopP = 0.95
ParamTemperature = 0.08

4
go.mod
View File

@@ -2,6 +2,4 @@ module code.ivysaur.me/llamacpphtmld
go 1.19
require (
github.com/google/uuid v1.3.0 // indirect
)
require github.com/google/uuid v1.3.0

122
webui.go
View File

@@ -14,18 +14,75 @@ func (this *Application) GET_Root(w http.ResponseWriter, r *http.Request) {
w.Write([]byte(`<!DOCTYPE html>
<html>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1" />
<meta name="viewport" content="width=device-width, initial-scale=1">
<meta name="theme-color" content="#040c3a">
<title>` + html.EscapeString(AppTitle) + `</title>
<style type="text/css">
html {
font-family: sans-serif;
}
textarea {
border-radius: 4px;
display: block;
width: 100%;
min-height: 100px;
html, body {
margin: 0;
padding: 0;
height: 100%;
position: relative;
}
.banner {
height: 48px;
background: linear-gradient(#06638d, #040c3a);
color: white;
position: relative;
line-height: 48px;
vertical-align: middle;
}
.logo {
font-size: 18px;
padding-left: 8px;
}
.state-wrapper {
position: absolute;
top: 0; left: 0; right: 0; bottom: 0;
pointer-events: none;
text-align: center;
}
#state {
display: none; /* inline-block; */
background: white;
color: black;
border: 2px solid black;
margin: 0 auto;
padding: 2px 4px;
opacity: 0.75;
font-size: 12px;
line-height: 18px;
vertical-align: middle;
}
.banner button {
float: right;
height: 32px;
margin: 8px 8px 8px 0;
}
textarea {
position: absolute;
left: 0;
right: 0;
top: 48px;
bottom: 0;
resize: none;
outline: none;
box-shadow: none;
border: 0;
background: #fff;
transition: background-color 0.5s ease-out;
}
@@ -33,16 +90,32 @@ textarea.alert {
background: lightyellow;
transition: initial;
}
button {
margin-top: 8px;
padding: 4px 6px;
@media screen and (max-width: 400px) {
.logo {
font-size: 0;
}
.state-wrapper {
text-align: left;
margin-left: 8px;
}
}
</style>
<body>
<h2>🦙 ` + html.EscapeString(AppTitle) + `</h2>
<textarea id="main" autofocus></textarea>
<div class="banner">
<span class="logo">🦙 ` + html.EscapeString(AppTitle) + `</span>
<div class="state-wrapper">
<span id="state"></span>
</div>
<button id="generate">▶️ Generate</button>
<button id="interrupt" disabled>⏸️ Interrupt</button>
</div>
<textarea id="main" autofocus>A chat between a curious human and an artificial intelligence assistant.
The assistant gives helpful, detailed, and polite answers to the human's questions.
### Human: What is the capital city of New Zealand?
### Assistant:</textarea>
<script type="text/javascript">
function main() {
const conversationID = "` + uuid.New().String() + `";
@@ -52,6 +125,9 @@ function main() {
const $generate = document.getElementById("generate");
const $interrupt = document.getElementById("interrupt");
const $main = document.getElementById("main");
const $state = document.getElementById("state");
$main.setSelectionRange($main.value.length, $main.value.length);
$generate.addEventListener('click', async function() {
const content = $main.value;
@@ -67,6 +143,16 @@ function main() {
try {
const controller = new AbortController();
$interrupt.disabled = false;
const doInterrupt = () => {
controller.abort();
$interrupt.removeEventListener('click', doInterrupt);
};
$interrupt.addEventListener('click', doInterrupt);
$state.style.display = "inline-block";
$state.textContent = "Waiting in queue..."
const response = await fetch("/api/v1/generate", {
method: "POST",
signal: controller.signal,
@@ -80,12 +166,7 @@ function main() {
})
});
$interrupt.disabled = false;
const doInterrupt = () => {
controller.abort();
$interrupt.removeEventListener('click', doInterrupt);
};
$interrupt.addEventListener('click', doInterrupt);
$state.textContent = "The AI is reading your text so far..."
const reader = response.body.getReader();
const decoder = new TextDecoder();
@@ -96,6 +177,8 @@ function main() {
break;
}
state.textContent = "The AI is writing..."
$main.value += decoder.decode(singleReadResult.value);
$main.scrollTop = $main.scrollHeight;
@@ -104,7 +187,12 @@ function main() {
}
$state.style.display = "none";
$state.textContent = ""
} catch (ex) {
$state.textContent = "Error";
alert(
"Error processing the request: " +
(ex instanceof Error ? ex.message : JSON.stringify(ex))