From afac87e1a9d1238ba15c8d3380d5c5fecec550e0 Mon Sep 17 00:00:00 2001 From: mappu Date: Mon, 11 Dec 2023 19:32:37 +1300 Subject: [PATCH] lexer: parse fields with a state machine --- lexer.go | 107 ++++++++++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 99 insertions(+), 8 deletions(-) diff --git a/lexer.go b/lexer.go index 56063cd..d00dd4d 100644 --- a/lexer.go +++ b/lexer.go @@ -31,6 +31,99 @@ func (l *lexer) Peek() (Token, error) { return tok, nil } +func (l *lexer) Fields(line string) ([]string, error) { + const ( + STATE_START = 0 + STATE_IN_WORD = 1 + STATE_IN_QUOTED_STRING = 3 + STATE_BACKSLASH = 4 + STATE_LINE_COMMENT = 5 + ) + + var ( + ret = []string{} + state = STATE_START + buff = "" + ) + + for _, c := range line { + + switch state { + case STATE_START: + if c == ' ' || c == '\t' || c == '\r' || c == '\n' || c == ',' { + continue + } else if c == ';' { + state = STATE_LINE_COMMENT + } else if c == '"' { + state = STATE_IN_QUOTED_STRING + } else { + buff = string(c) + state = STATE_IN_WORD + } + + case STATE_IN_WORD: + if c == ' ' || c == '\t' || c == '\r' || c == '\n' || c == ',' { + ret = append(ret, buff) + buff = "" + state = STATE_START + } else if c == ';' { + ret = append(ret, buff) + buff = "" + state = STATE_LINE_COMMENT + } else { + buff += string(c) + } + + case STATE_IN_QUOTED_STRING: + if c == '"' { + ret = append(ret, buff) + buff = "" + state = STATE_START + } else if c == '\\' { + state = STATE_BACKSLASH + } else { + buff += string(c) + } + + case STATE_BACKSLASH: + if c == 'n' { + buff += "\n" + } else if c == 'r' { + buff += "\r" + } else if c == 't' { + buff += "\t" + } else if c == 's' { + buff += " " + } else if c == '\\' { + buff += `\` + } else { + return nil, fmt.Errorf("Unknown backslash escape sequence") + } + state = STATE_IN_QUOTED_STRING + + case STATE_LINE_COMMENT: + if c == '\n' { + state = STATE_START + } // ignore all else + + default: + panic("impossible state") + } + } + + if state == STATE_IN_QUOTED_STRING { + return nil, fmt.Errorf("unterminated string literal") + } + + if state == STATE_IN_WORD { + ret = append(ret, buff) + buff = "" + state = STATE_START + } + + return ret, nil +} + func (l *lexer) Next() (Token, error) { if l.peek != nil { ret := l.peek @@ -46,21 +139,19 @@ func (l *lexer) Next() (Token, error) { return nil, err } - // Strip leading spaces - line = strings.TrimLeft(line, " \t\r\n") + // - // Strip trailing line-comments (;) - line, _, _ = strings.Cut(line, `;`) + fields, err := l.Fields(line) + if err != nil { + return nil, err + } - if len(line) == 0 { + if len(fields) == 0 { // This line only contained comments // Continue to the next line return l.Next() } - fields := strings.Fields(line) - // FIXME commas!? - switch strings.ToLower(fields[0]) { case "section": return SectionToken{fields[1]}, nil