From 6ec99a8e161dda68a4fbb7dff6bf2150ef3df061 Mon Sep 17 00:00:00 2001 From: Laurent ULRICH Date: Mon, 21 Mar 2022 14:45:07 +0000 Subject: [PATCH] making code more generic --- csv-parser.go | 88 +++++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 82 insertions(+), 6 deletions(-) diff --git a/csv-parser.go b/csv-parser.go index e0358cf..ba55a06 100644 --- a/csv-parser.go +++ b/csv-parser.go @@ -1,5 +1,8 @@ package csvparser +import ( + "strings" +) type EncloserId int const ( @@ -7,18 +10,91 @@ const ( SingleQuotes RoundBrackets SquareBrackets - + CurlyBrackets ) +type Encloser struct { + Open rune + Close rune +} + +var EnclosersRunes = map[EncloserId]Encloser{ + DoubleQuotes: Encloser{'"', '"'}, + SingleQuotes: Encloser{'\'', '\''}, + RoundBrackets: Encloser{'(', ')'}, + SquareBrackets: Encloser{'[', ']'}, + CurlyBrackets: Encloser{'{', '}'}, +} type CsvParser struct { - enclosers []EncloserId - delimiter rune + Enclosers []EncloserId + Delimiter rune + DelimiterString string } func (parser *CsvParser) Init() { - parser.enclosers = append(parser.enclosers, DoubleQuotes, SquareBrackets) - parser.delimiter = ' ' + parser.Enclosers = []EncloserId{DoubleQuotes, SquareBrackets} + parser.Delimiter = ' ' + parser.DelimiterString = string(parser.Delimiter) } -func (parser *CsvParser) Parse(line string) { +func (parser *CsvParser) Parse(line string) error { + for len(line) > 0 { + line = strings.TrimLeft(line, parser.DelimiterString) + line = strings.TrimRight(line, parser.DelimiterString) + + if len(line) == 0 { + break + } + // Search for an encloser + for id := range parser.Enclosers { + if line[0] == EnclosersRunes[id].Open { + endChar := EnclosersRunes[id].Close + endOfFieldFound := false + l := len(line) + for i := 1; i < l; i++ { + if line[i] == endChar && line[i-1] != '\\' { + fields = append(fields, line[1:i]) + line = line[i+1:] + endOfFieldFound = true + break + } + } + if !endOfFieldFound { + return nil, errors.New("#ERR: bad format") + } + break; + } + } + if line[0] == '"' || line[0] == '[' { + var endChar byte = '"' + if line[0] == '[' { + endChar = ']' + } + endOfFieldFound := false + l := len(line) + for i := 1; i < l; i++ { + if line[i] == endChar && line[i-1] != '\\' { + fields = append(fields, line[1:i]) + line = line[i+1:] + endOfFieldFound = true + break + } + } + if !endOfFieldFound { + return nil, errors.New("#ERR: bad format") + } + } else { + nextSpace := strings.IndexAny(line, " \t") + if nextSpace != -1 { + fields = append(fields, line[:nextSpace]) + line = line[nextSpace:] + } else { + fields = append(fields, line) + line = "" + break + } + } + } + return fields, nil + }