From 639da11ab396a73d010862c4b731bf375dd0d36f Mon Sep 17 00:00:00 2001 From: mappu Date: Sat, 29 Jun 2024 11:21:30 +1200 Subject: [PATCH] add custom lexer, use for redis string splitting --- db_redis.go | 10 +++- lexer/lexer.go | 141 ++++++++++++++++++++++++++++++++++++++++++++ lexer/lexer_test.go | 107 +++++++++++++++++++++++++++++++++ 3 files changed, 255 insertions(+), 3 deletions(-) create mode 100644 lexer/lexer.go create mode 100644 lexer/lexer_test.go diff --git a/db_redis.go b/db_redis.go index da6228e..6af61ea 100644 --- a/db_redis.go +++ b/db_redis.go @@ -4,9 +4,10 @@ import ( "context" "fmt" "strconv" - "strings" "unsafe" + "yvbolt/lexer" + "github.com/redis/go-redis/v9" "github.com/ying32/govcl/vcl" "github.com/ying32/govcl/vcl/types" @@ -173,8 +174,11 @@ func (ld *redisLoadedDatabase) ExecQuery(query string, resultArea *vcl.TListView ctx := context.Background() // Need to parse the query into separate string+args fields for the protocol - // TODO This needs to better handle quotes, escaping, ... - fields := strings.Fields(query) + fields, err := lexer.Fields(query) + if err != nil { + vcl.ShowMessage(fmt.Sprintf("Parsing the query: %v", err)) + return + } fields_boxed := box_interface(fields) diff --git a/lexer/lexer.go b/lexer/lexer.go new file mode 100644 index 0000000..2c310a7 --- /dev/null +++ b/lexer/lexer.go @@ -0,0 +1,141 @@ +package lexer + +import ( + "fmt" +) + +func isWhitespace(r byte) bool { + return (r == ' ' || r == '\t' || r == '\r' || r == '\n') +} + +// Fields splits a string into separate tokens using something kind of vaguely +// like how SQL would do it. +// The result still includes the quote and backslash characters. +func Fields(input string) ([]string, error) { + + const ( + StateToplevel = 0 + StateWhitespace = 1 + StateInDoubleQuote = 2 + StateInDoubleQuoteSlash = 3 + StateInSingleQuote = 4 + StateInSingleQuoteSlash = 5 + ) + + var ( + ret []string + state int = StateToplevel + wip string + ) + + for pos := 0; pos < len(input); pos++ { + c := input[pos] + + switch state { + + case StateToplevel: + if isWhitespace(c) { + state = StateWhitespace + if len(wip) != 0 { + ret = append(ret, wip) + wip = "" + } + + } else if c == '"' { + if len(wip) != 0 { + return nil, fmt.Errorf(`Unexpected " at char %d`, pos) + } + + wip += string(c) + state = StateInDoubleQuote + + } else if c == '\'' { + if len(wip) != 0 { + return nil, fmt.Errorf(`Unexpected ' at char %d`, pos) + } + + wip += string(c) + state = StateInSingleQuote + + } else if c == '\\' { + return nil, fmt.Errorf(`Unexpected \ at char %d`, pos) + + } else { + wip += string(c) + } + + case StateWhitespace: + if isWhitespace(c) { + // continue + } else { + state = StateToplevel + pos-- // reparse + } + + case StateInDoubleQuote: + if c == '"' { + wip += string(c) + ret = append(ret, wip) + wip = "" + state = StateToplevel + + } else if c == '\\' { + wip += string(c) + state = StateInDoubleQuoteSlash + + } else { + wip += string(c) + } + + case StateInDoubleQuoteSlash: + if isWhitespace(c) { + return nil, fmt.Errorf(`Unexpected whitespace after \ at char %d`, pos) + + } else { + wip += string(c) + state = StateInDoubleQuote + } + + case StateInSingleQuote: + if c == '\'' { + wip += string(c) + ret = append(ret, wip) + wip = "" + state = StateToplevel + + } else if c == '\\' { + wip += string(c) + state = StateInSingleQuoteSlash + + } else { + wip += string(c) + } + + case StateInSingleQuoteSlash: + if isWhitespace(c) { + return nil, fmt.Errorf(`Unexpected whitespace after \ at char %d`, pos) + + } else { + wip += string(c) + state = StateInSingleQuote + } + } + + } + + // Reached the end of input stream + switch state { + case StateToplevel: + if len(wip) > 0 { + ret = append(ret, wip) + wip = "" + } + return ret, nil + + case StateWhitespace: + return ret, nil + + default: + return nil, fmt.Errorf(`Unexpected end of quoted input`) + } +} diff --git a/lexer/lexer_test.go b/lexer/lexer_test.go new file mode 100644 index 0000000..ef79db6 --- /dev/null +++ b/lexer/lexer_test.go @@ -0,0 +1,107 @@ +package lexer + +import ( + "reflect" + "testing" +) + +func TestLexer(t *testing.T) { + type testCase struct { + input string + expect []string + expectErr bool + } + + cases := []testCase{ + + testCase{ + input: "foo bar baz", + expect: []string{"foo", "bar", "baz"}, + expectErr: false, + }, + + // Quotes + + testCase{ + input: `foo "bar" baz`, + expect: []string{"foo", `"bar"`, "baz"}, + expectErr: false, + }, + testCase{ + input: `foo "bar baz" quux`, + expect: []string{"foo", `"bar baz"`, "quux"}, + expectErr: false, + }, + testCase{ + input: `foo 'bar baz' quux`, + expect: []string{"foo", `'bar baz'`, "quux"}, + expectErr: false, + }, + + // Escape characters + + testCase{ + input: `foo 'bar \n baz' quux`, + expect: []string{"foo", `'bar \n baz'`, "quux"}, + expectErr: false, + }, + testCase{ + input: `foo "bar\"" baz`, + expect: []string{"foo", `"bar\""`, "baz"}, + expectErr: false, + }, + + // Collapsing whitespace + + testCase{ + input: " foo bar \r\t\n baz\n", + expect: []string{"foo", "bar", "baz"}, + expectErr: false, + }, + + // Errors + + testCase{ + input: `foo "bar`, + expect: nil, + expectErr: true, // mismatched quotes + }, + testCase{ + input: `foo 'bar`, + expect: nil, + expectErr: true, // mismatched quotes + }, + testCase{ + input: `foo \"bar"`, + expect: nil, + expectErr: true, // invalid top-level escape + }, + testCase{ + input: `foo "bar\ "`, + expect: nil, + expectErr: true, // escaping nothing + }, + } + + for _, tc := range cases { + out, err := Fields(tc.input) + + if err != nil { + if !tc.expectErr { + t.Errorf("Test %q got error %v, expected nil", tc.input, err) + } + + } else { + + if tc.expectErr { + t.Errorf("Test %q got error , expected error", tc.input) + continue + } + + if !reflect.DeepEqual(out, tc.expect) { + t.Errorf("Test %q got %v, expected %v", tc.input, out, tc.expect) + } + } + + } +}