package lexer import ( "fmt" "strings" ) func isWhitespace(r byte) bool { return (r == ' ' || r == '\t' || r == '\r' || r == '\n') } func Quote(input string) string { return `"` + strings.ReplaceAll(strings.ReplaceAll(input, `\`, `\\`), `"`, `\"`) + `"` } // Fields splits a string into separate tokens using something kind of vaguely // like how SQL would do it. // The result still includes the quote and backslash characters. func Fields(input string) ([]string, error) { const ( StateToplevel = 0 StateWhitespace = 1 StateInDoubleQuote = 2 StateInDoubleQuoteSlash = 3 StateInSingleQuote = 4 StateInSingleQuoteSlash = 5 ) var ( ret []string state int = StateToplevel wip string ) for pos := 0; pos < len(input); pos++ { c := input[pos] switch state { case StateToplevel: if isWhitespace(c) { state = StateWhitespace if len(wip) != 0 { ret = append(ret, wip) wip = "" } } else if c == '"' { if len(wip) != 0 { return nil, fmt.Errorf(`Unexpected " at char %d`, pos) } wip += string(c) state = StateInDoubleQuote } else if c == '\'' { if len(wip) != 0 { return nil, fmt.Errorf(`Unexpected ' at char %d`, pos) } wip += string(c) state = StateInSingleQuote } else if c == '\\' { return nil, fmt.Errorf(`Unexpected \ at char %d`, pos) } else if c == '(' || c == ')' || c == '?' || c == ',' || c == '+' || c == '*' || c == '-' || c == '/' || c == '%' || c == ';' || c == '=' { // Tokenize separately, even if they appear touching another top-level token // Should still be safe to re-join if len(wip) != 0 { ret = append(ret, wip) wip = "" } ret = append(ret, string(c)) } else { wip += string(c) } case StateWhitespace: if isWhitespace(c) { // continue } else { state = StateToplevel pos-- // reparse } case StateInDoubleQuote: if c == '"' { wip += string(c) ret = append(ret, wip) wip = "" state = StateToplevel } else if c == '\\' { wip += string(c) state = StateInDoubleQuoteSlash } else { wip += string(c) } case StateInDoubleQuoteSlash: if isWhitespace(c) { return nil, fmt.Errorf(`Unexpected whitespace after \ at char %d`, pos) } else { wip += string(c) state = StateInDoubleQuote } case StateInSingleQuote: if c == '\'' { wip += string(c) ret = append(ret, wip) wip = "" state = StateToplevel } else if c == '\\' { wip += string(c) state = StateInSingleQuoteSlash } else { wip += string(c) } case StateInSingleQuoteSlash: if isWhitespace(c) { return nil, fmt.Errorf(`Unexpected whitespace after \ at char %d`, pos) } else { wip += string(c) state = StateInSingleQuote } } } // Reached the end of input stream switch state { case StateToplevel: if len(wip) > 0 { ret = append(ret, wip) wip = "" } return ret, nil case StateWhitespace: return ret, nil default: return nil, fmt.Errorf(`Unexpected end of quoted input`) } }