api: llama_eval only needs to evaluate new tokens
This commit is contained in:
parent
2c11e32018
commit
6c6a5c602e
19
api.go
19
api.go
@ -101,10 +101,26 @@ func (this *Application) POST_Chat(w http.ResponseWriter, r *http.Request) {
|
|||||||
|
|
||||||
// Get the next token from LLaMA
|
// Get the next token from LLaMA
|
||||||
|
|
||||||
log.Println("doing llama_eval...")
|
if i == int(llast_n_tokens_used_size) {
|
||||||
|
|
||||||
|
log.Println("doing llama_eval (for the first time on all supplied input)...")
|
||||||
|
|
||||||
evalErr := C.llama_eval(lcontext,
|
evalErr := C.llama_eval(lcontext,
|
||||||
&llast_n_tokens[0], C.int(i), // tokens + n_tokens is the provided batch of new tokens to process
|
&llast_n_tokens[0], C.int(i), // tokens + n_tokens is the provided batch of new tokens to process
|
||||||
|
C.int(0), // n_past is the number of tokens to use from previous eval calls
|
||||||
|
C.int(runtime.GOMAXPROCS(0)))
|
||||||
|
if evalErr != 0 {
|
||||||
|
log.Printf("llama_eval: %d", evalErr)
|
||||||
|
http.Error(w, "Internal error", 500)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
} else {
|
||||||
|
|
||||||
|
log.Println("doing llama_eval (incrementally on the newly generated token)...")
|
||||||
|
|
||||||
|
evalErr := C.llama_eval(lcontext,
|
||||||
|
&llast_n_tokens[i-1], 1, // tokens + n_tokens is the provided batch of new tokens to process
|
||||||
C.int(i), // n_past is the number of tokens to use from previous eval calls
|
C.int(i), // n_past is the number of tokens to use from previous eval calls
|
||||||
C.int(runtime.GOMAXPROCS(0)))
|
C.int(runtime.GOMAXPROCS(0)))
|
||||||
if evalErr != 0 {
|
if evalErr != 0 {
|
||||||
@ -112,6 +128,7 @@ func (this *Application) POST_Chat(w http.ResponseWriter, r *http.Request) {
|
|||||||
http.Error(w, "Internal error", 500)
|
http.Error(w, "Internal error", 500)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if err := r.Context().Err(); err != nil {
|
if err := r.Context().Err(); err != nil {
|
||||||
return
|
return
|
||||||
|
Loading…
Reference in New Issue
Block a user