lexer: separate tokens for top-level special characters

This commit is contained in:
mappu 2024-06-30 11:26:00 +12:00
parent 493ab846b9
commit b141aaaa6c
2 changed files with 28 additions and 1 deletions

View File

@ -65,6 +65,15 @@ func Fields(input string) ([]string, error) {
} else if c == '\\' {
return nil, fmt.Errorf(`Unexpected \ at char %d`, pos)
} else if c == '(' || c == ')' || c == '?' || c == ',' || c == '+' || c == '*' || c == '-' || c == '/' || c == '%' || c == ';' || c == '=' {
// Tokenize separately, even if they appear touching another top-level token
// Should still be safe to re-join
if len(wip) != 0 {
ret = append(ret, wip)
wip = ""
}
ret = append(ret, string(c))
} else {
wip += string(c)
}

View File

@ -59,6 +59,24 @@ func TestLexer(t *testing.T) {
expectErr: false,
},
// Special characters lexed as separate tokens, but only at top level
testCase{
input: `3+5*(2.3/6);`,
expect: []string{"3", `+`, "5", "*", "(", "2.3", "/", "6", ")", ";"},
expectErr: false,
},
testCase{
input: `SELECT "3+5*(2.3/6)" AS expression;`,
expect: []string{"SELECT", `"3+5*(2.3/6)"`, "AS", "expression", ";"},
expectErr: false,
},
testCase{
input: `INSERT INTO foo (bar, baz) VALUES (?, ?);`,
expect: []string{"INSERT", "INTO", "foo", "(", "bar", ",", "baz", ")", "VALUES", "(", "?", ",", "?", ")", ";"},
expectErr: false,
},
// Errors
testCase{
@ -99,7 +117,7 @@ func TestLexer(t *testing.T) {
}
if !reflect.DeepEqual(out, tc.expect) {
t.Errorf("Test %q got %v, expected %v", tc.input, out, tc.expect)
t.Errorf("Test %q\n- got: %#v\n- expected %#v", tc.input, out, tc.expect)
}
}