api: llama_eval only needs to evaluate new tokens

This commit is contained in:
mappu 2023-04-08 15:48:38 +12:00
parent 2c11e32018
commit 6c6a5c602e

35
api.go
View File

@ -101,16 +101,33 @@ func (this *Application) POST_Chat(w http.ResponseWriter, r *http.Request) {
// Get the next token from LLaMA // Get the next token from LLaMA
log.Println("doing llama_eval...") if i == int(llast_n_tokens_used_size) {
evalErr := C.llama_eval(lcontext, log.Println("doing llama_eval (for the first time on all supplied input)...")
&llast_n_tokens[0], C.int(i), // tokens + n_tokens is the provided batch of new tokens to process
C.int(i), // n_past is the number of tokens to use from previous eval calls evalErr := C.llama_eval(lcontext,
C.int(runtime.GOMAXPROCS(0))) &llast_n_tokens[0], C.int(i), // tokens + n_tokens is the provided batch of new tokens to process
if evalErr != 0 { C.int(0), // n_past is the number of tokens to use from previous eval calls
log.Printf("llama_eval: %d", evalErr) C.int(runtime.GOMAXPROCS(0)))
http.Error(w, "Internal error", 500) if evalErr != 0 {
return log.Printf("llama_eval: %d", evalErr)
http.Error(w, "Internal error", 500)
return
}
} else {
log.Println("doing llama_eval (incrementally on the newly generated token)...")
evalErr := C.llama_eval(lcontext,
&llast_n_tokens[i-1], 1, // tokens + n_tokens is the provided batch of new tokens to process
C.int(i), // n_past is the number of tokens to use from previous eval calls
C.int(runtime.GOMAXPROCS(0)))
if evalErr != 0 {
log.Printf("llama_eval: %d", evalErr)
http.Error(w, "Internal error", 500)
return
}
} }
if err := r.Context().Err(); err != nil { if err := r.Context().Err(); err != nil {