add custom lexer, use for redis string splitting

This commit is contained in:
mappu 2024-06-29 11:21:30 +12:00
parent fc084d7190
commit 639da11ab3
3 changed files with 255 additions and 3 deletions

View File

@ -4,9 +4,10 @@ import (
"context"
"fmt"
"strconv"
"strings"
"unsafe"
"yvbolt/lexer"
"github.com/redis/go-redis/v9"
"github.com/ying32/govcl/vcl"
"github.com/ying32/govcl/vcl/types"
@ -173,8 +174,11 @@ func (ld *redisLoadedDatabase) ExecQuery(query string, resultArea *vcl.TListView
ctx := context.Background()
// Need to parse the query into separate string+args fields for the protocol
// TODO This needs to better handle quotes, escaping, ...
fields := strings.Fields(query)
fields, err := lexer.Fields(query)
if err != nil {
vcl.ShowMessage(fmt.Sprintf("Parsing the query: %v", err))
return
}
fields_boxed := box_interface(fields)

141
lexer/lexer.go Normal file
View File

@ -0,0 +1,141 @@
package lexer
import (
"fmt"
)
func isWhitespace(r byte) bool {
return (r == ' ' || r == '\t' || r == '\r' || r == '\n')
}
// Fields splits a string into separate tokens using something kind of vaguely
// like how SQL would do it.
// The result still includes the quote and backslash characters.
func Fields(input string) ([]string, error) {
const (
StateToplevel = 0
StateWhitespace = 1
StateInDoubleQuote = 2
StateInDoubleQuoteSlash = 3
StateInSingleQuote = 4
StateInSingleQuoteSlash = 5
)
var (
ret []string
state int = StateToplevel
wip string
)
for pos := 0; pos < len(input); pos++ {
c := input[pos]
switch state {
case StateToplevel:
if isWhitespace(c) {
state = StateWhitespace
if len(wip) != 0 {
ret = append(ret, wip)
wip = ""
}
} else if c == '"' {
if len(wip) != 0 {
return nil, fmt.Errorf(`Unexpected " at char %d`, pos)
}
wip += string(c)
state = StateInDoubleQuote
} else if c == '\'' {
if len(wip) != 0 {
return nil, fmt.Errorf(`Unexpected ' at char %d`, pos)
}
wip += string(c)
state = StateInSingleQuote
} else if c == '\\' {
return nil, fmt.Errorf(`Unexpected \ at char %d`, pos)
} else {
wip += string(c)
}
case StateWhitespace:
if isWhitespace(c) {
// continue
} else {
state = StateToplevel
pos-- // reparse
}
case StateInDoubleQuote:
if c == '"' {
wip += string(c)
ret = append(ret, wip)
wip = ""
state = StateToplevel
} else if c == '\\' {
wip += string(c)
state = StateInDoubleQuoteSlash
} else {
wip += string(c)
}
case StateInDoubleQuoteSlash:
if isWhitespace(c) {
return nil, fmt.Errorf(`Unexpected whitespace after \ at char %d`, pos)
} else {
wip += string(c)
state = StateInDoubleQuote
}
case StateInSingleQuote:
if c == '\'' {
wip += string(c)
ret = append(ret, wip)
wip = ""
state = StateToplevel
} else if c == '\\' {
wip += string(c)
state = StateInSingleQuoteSlash
} else {
wip += string(c)
}
case StateInSingleQuoteSlash:
if isWhitespace(c) {
return nil, fmt.Errorf(`Unexpected whitespace after \ at char %d`, pos)
} else {
wip += string(c)
state = StateInSingleQuote
}
}
}
// Reached the end of input stream
switch state {
case StateToplevel:
if len(wip) > 0 {
ret = append(ret, wip)
wip = ""
}
return ret, nil
case StateWhitespace:
return ret, nil
default:
return nil, fmt.Errorf(`Unexpected end of quoted input`)
}
}

107
lexer/lexer_test.go Normal file
View File

@ -0,0 +1,107 @@
package lexer
import (
"reflect"
"testing"
)
func TestLexer(t *testing.T) {
type testCase struct {
input string
expect []string
expectErr bool
}
cases := []testCase{
testCase{
input: "foo bar baz",
expect: []string{"foo", "bar", "baz"},
expectErr: false,
},
// Quotes
testCase{
input: `foo "bar" baz`,
expect: []string{"foo", `"bar"`, "baz"},
expectErr: false,
},
testCase{
input: `foo "bar baz" quux`,
expect: []string{"foo", `"bar baz"`, "quux"},
expectErr: false,
},
testCase{
input: `foo 'bar baz' quux`,
expect: []string{"foo", `'bar baz'`, "quux"},
expectErr: false,
},
// Escape characters
testCase{
input: `foo 'bar \n baz' quux`,
expect: []string{"foo", `'bar \n baz'`, "quux"},
expectErr: false,
},
testCase{
input: `foo "bar\"" baz`,
expect: []string{"foo", `"bar\""`, "baz"},
expectErr: false,
},
// Collapsing whitespace
testCase{
input: " foo bar \r\t\n baz\n",
expect: []string{"foo", "bar", "baz"},
expectErr: false,
},
// Errors
testCase{
input: `foo "bar`,
expect: nil,
expectErr: true, // mismatched quotes
},
testCase{
input: `foo 'bar`,
expect: nil,
expectErr: true, // mismatched quotes
},
testCase{
input: `foo \"bar"`,
expect: nil,
expectErr: true, // invalid top-level escape
},
testCase{
input: `foo "bar\ "`,
expect: nil,
expectErr: true, // escaping nothing
},
}
for _, tc := range cases {
out, err := Fields(tc.input)
if err != nil {
if !tc.expectErr {
t.Errorf("Test %q got error %v, expected nil", tc.input, err)
}
} else {
if tc.expectErr {
t.Errorf("Test %q got error <nil>, expected error", tc.input)
continue
}
if !reflect.DeepEqual(out, tc.expect) {
t.Errorf("Test %q got %v, expected %v", tc.input, out, tc.expect)
}
}
}
}