156 lines
3.0 KiB
Go
156 lines
3.0 KiB
Go
package lexer
|
|
|
|
import (
|
|
"fmt"
|
|
"strings"
|
|
)
|
|
|
|
func isWhitespace(r byte) bool {
|
|
return (r == ' ' || r == '\t' || r == '\r' || r == '\n')
|
|
}
|
|
|
|
func Quote(input string) string {
|
|
return `"` + strings.ReplaceAll(strings.ReplaceAll(input, `\`, `\\`), `"`, `\"`) + `"`
|
|
}
|
|
|
|
// Fields splits a string into separate tokens using something kind of vaguely
|
|
// like how SQL would do it.
|
|
// The result still includes the quote and backslash characters.
|
|
func Fields(input string) ([]string, error) {
|
|
|
|
const (
|
|
StateToplevel = 0
|
|
StateWhitespace = 1
|
|
StateInDoubleQuote = 2
|
|
StateInDoubleQuoteSlash = 3
|
|
StateInSingleQuote = 4
|
|
StateInSingleQuoteSlash = 5
|
|
)
|
|
|
|
var (
|
|
ret []string
|
|
state int = StateToplevel
|
|
wip string
|
|
)
|
|
|
|
for pos := 0; pos < len(input); pos++ {
|
|
c := input[pos]
|
|
|
|
switch state {
|
|
|
|
case StateToplevel:
|
|
if isWhitespace(c) {
|
|
state = StateWhitespace
|
|
if len(wip) != 0 {
|
|
ret = append(ret, wip)
|
|
wip = ""
|
|
}
|
|
|
|
} else if c == '"' {
|
|
if len(wip) != 0 {
|
|
return nil, fmt.Errorf(`Unexpected " at char %d`, pos)
|
|
}
|
|
|
|
wip += string(c)
|
|
state = StateInDoubleQuote
|
|
|
|
} else if c == '\'' {
|
|
if len(wip) != 0 {
|
|
return nil, fmt.Errorf(`Unexpected ' at char %d`, pos)
|
|
}
|
|
|
|
wip += string(c)
|
|
state = StateInSingleQuote
|
|
|
|
} else if c == '\\' {
|
|
return nil, fmt.Errorf(`Unexpected \ at char %d`, pos)
|
|
|
|
} else if c == '(' || c == ')' || c == '?' || c == ',' || c == '+' || c == '*' || c == '-' || c == '/' || c == '%' || c == ';' || c == '=' {
|
|
// Tokenize separately, even if they appear touching another top-level token
|
|
// Should still be safe to re-join
|
|
if len(wip) != 0 {
|
|
ret = append(ret, wip)
|
|
wip = ""
|
|
}
|
|
ret = append(ret, string(c))
|
|
|
|
} else {
|
|
wip += string(c)
|
|
}
|
|
|
|
case StateWhitespace:
|
|
if isWhitespace(c) {
|
|
// continue
|
|
} else {
|
|
state = StateToplevel
|
|
pos-- // reparse
|
|
}
|
|
|
|
case StateInDoubleQuote:
|
|
if c == '"' {
|
|
wip += string(c)
|
|
ret = append(ret, wip)
|
|
wip = ""
|
|
state = StateToplevel
|
|
|
|
} else if c == '\\' {
|
|
wip += string(c)
|
|
state = StateInDoubleQuoteSlash
|
|
|
|
} else {
|
|
wip += string(c)
|
|
}
|
|
|
|
case StateInDoubleQuoteSlash:
|
|
if isWhitespace(c) {
|
|
return nil, fmt.Errorf(`Unexpected whitespace after \ at char %d`, pos)
|
|
|
|
} else {
|
|
wip += string(c)
|
|
state = StateInDoubleQuote
|
|
}
|
|
|
|
case StateInSingleQuote:
|
|
if c == '\'' {
|
|
wip += string(c)
|
|
ret = append(ret, wip)
|
|
wip = ""
|
|
state = StateToplevel
|
|
|
|
} else if c == '\\' {
|
|
wip += string(c)
|
|
state = StateInSingleQuoteSlash
|
|
|
|
} else {
|
|
wip += string(c)
|
|
}
|
|
|
|
case StateInSingleQuoteSlash:
|
|
if isWhitespace(c) {
|
|
return nil, fmt.Errorf(`Unexpected whitespace after \ at char %d`, pos)
|
|
|
|
} else {
|
|
wip += string(c)
|
|
state = StateInSingleQuote
|
|
}
|
|
}
|
|
|
|
}
|
|
|
|
// Reached the end of input stream
|
|
switch state {
|
|
case StateToplevel:
|
|
if len(wip) > 0 {
|
|
ret = append(ret, wip)
|
|
wip = ""
|
|
}
|
|
return ret, nil
|
|
|
|
case StateWhitespace:
|
|
return ret, nil
|
|
|
|
default:
|
|
return nil, fmt.Errorf(`Unexpected end of quoted input`)
|
|
}
|
|
}
|