yvbolt/lexer/lexer.go

156 lines
3.0 KiB
Go
Raw Permalink Normal View History

package lexer
import (
"fmt"
2024-06-28 23:56:26 +00:00
"strings"
)
func isWhitespace(r byte) bool {
return (r == ' ' || r == '\t' || r == '\r' || r == '\n')
}
2024-06-28 23:56:26 +00:00
func Quote(input string) string {
return `"` + strings.ReplaceAll(strings.ReplaceAll(input, `\`, `\\`), `"`, `\"`) + `"`
}
// Fields splits a string into separate tokens using something kind of vaguely
// like how SQL would do it.
// The result still includes the quote and backslash characters.
func Fields(input string) ([]string, error) {
const (
StateToplevel = 0
StateWhitespace = 1
StateInDoubleQuote = 2
StateInDoubleQuoteSlash = 3
StateInSingleQuote = 4
StateInSingleQuoteSlash = 5
)
var (
ret []string
state int = StateToplevel
wip string
)
for pos := 0; pos < len(input); pos++ {
c := input[pos]
switch state {
case StateToplevel:
if isWhitespace(c) {
state = StateWhitespace
if len(wip) != 0 {
ret = append(ret, wip)
wip = ""
}
} else if c == '"' {
if len(wip) != 0 {
return nil, fmt.Errorf(`Unexpected " at char %d`, pos)
}
wip += string(c)
state = StateInDoubleQuote
} else if c == '\'' {
if len(wip) != 0 {
return nil, fmt.Errorf(`Unexpected ' at char %d`, pos)
}
wip += string(c)
state = StateInSingleQuote
} else if c == '\\' {
return nil, fmt.Errorf(`Unexpected \ at char %d`, pos)
} else if c == '(' || c == ')' || c == '?' || c == ',' || c == '+' || c == '*' || c == '-' || c == '/' || c == '%' || c == ';' || c == '=' {
// Tokenize separately, even if they appear touching another top-level token
// Should still be safe to re-join
if len(wip) != 0 {
ret = append(ret, wip)
wip = ""
}
ret = append(ret, string(c))
} else {
wip += string(c)
}
case StateWhitespace:
if isWhitespace(c) {
// continue
} else {
state = StateToplevel
pos-- // reparse
}
case StateInDoubleQuote:
if c == '"' {
wip += string(c)
ret = append(ret, wip)
wip = ""
state = StateToplevel
} else if c == '\\' {
wip += string(c)
state = StateInDoubleQuoteSlash
} else {
wip += string(c)
}
case StateInDoubleQuoteSlash:
if isWhitespace(c) {
return nil, fmt.Errorf(`Unexpected whitespace after \ at char %d`, pos)
} else {
wip += string(c)
state = StateInDoubleQuote
}
case StateInSingleQuote:
if c == '\'' {
wip += string(c)
ret = append(ret, wip)
wip = ""
state = StateToplevel
} else if c == '\\' {
wip += string(c)
state = StateInSingleQuoteSlash
} else {
wip += string(c)
}
case StateInSingleQuoteSlash:
if isWhitespace(c) {
return nil, fmt.Errorf(`Unexpected whitespace after \ at char %d`, pos)
} else {
wip += string(c)
state = StateInSingleQuote
}
}
}
// Reached the end of input stream
switch state {
case StateToplevel:
if len(wip) > 0 {
ret = append(ret, wip)
wip = ""
}
return ret, nil
case StateWhitespace:
return ret, nil
default:
return nil, fmt.Errorf(`Unexpected end of quoted input`)
}
}