From d6958291fb986cd0d67f15e0d8b68e7b3f49d675 Mon Sep 17 00:00:00 2001 From: Laurent ULRICH Date: Mon, 21 Mar 2022 20:37:26 +0000 Subject: [PATCH] Parser implemented but absolutly not tested --- csv-parser.go | 103 +++++++++++++++++++++++--------------------------- 1 file changed, 47 insertions(+), 56 deletions(-) diff --git a/csv-parser.go b/csv-parser.go index ba55a06..0ee8417 100644 --- a/csv-parser.go +++ b/csv-parser.go @@ -2,11 +2,14 @@ package csvparser import ( "strings" + "errors" ) + type EncloserId int const ( - DoubleQuotes EncloserId = iota + None EncloserId = iota + DoubleQuotes SingleQuotes RoundBrackets SquareBrackets @@ -14,8 +17,8 @@ const ( ) type Encloser struct { - Open rune - Close rune + Open byte + Close byte } var EnclosersRunes = map[EncloserId]Encloser{ @@ -27,74 +30,62 @@ var EnclosersRunes = map[EncloserId]Encloser{ } type CsvParser struct { - Enclosers []EncloserId - Delimiter rune - DelimiterString string + Enclosers []EncloserId + Delimiter string + Fields []string + Line string } func (parser *CsvParser) Init() { parser.Enclosers = []EncloserId{DoubleQuotes, SquareBrackets} - parser.Delimiter = ' ' - parser.DelimiterString = string(parser.Delimiter) + parser.Delimiter = " \t" } + +func (parser *CsvParser) ExtractEnclosedFieldValue(line string, endChar byte) (string, error) { + lineLen := len(line) + for i := 1; i < lineLen; i++ { + if line[i] == endChar && line[i-1] != '\\' { + parser.Fields = append(parser.Fields, line[1:i]) + return line[i+1:], nil + } + } + return line, errors.New("Encloser close not found") +} + func (parser *CsvParser) Parse(line string) error { + var err error = nil for len(line) > 0 { - line = strings.TrimLeft(line, parser.DelimiterString) - line = strings.TrimRight(line, parser.DelimiterString) + line = strings.TrimLeft(line, parser.Delimiter) + line = strings.TrimRight(line, parser.Delimiter) if len(line) == 0 { break } // Search for an encloser - for id := range parser.Enclosers { + encloserId := None + for _, id := range parser.Enclosers { if line[0] == EnclosersRunes[id].Open { - endChar := EnclosersRunes[id].Close - endOfFieldFound := false - l := len(line) - for i := 1; i < l; i++ { - if line[i] == endChar && line[i-1] != '\\' { - fields = append(fields, line[1:i]) - line = line[i+1:] - endOfFieldFound = true - break - } - } - if !endOfFieldFound { - return nil, errors.New("#ERR: bad format") - } - break; - } - } - if line[0] == '"' || line[0] == '[' { - var endChar byte = '"' - if line[0] == '[' { - endChar = ']' - } - endOfFieldFound := false - l := len(line) - for i := 1; i < l; i++ { - if line[i] == endChar && line[i-1] != '\\' { - fields = append(fields, line[1:i]) - line = line[i+1:] - endOfFieldFound = true - break - } - } - if !endOfFieldFound { - return nil, errors.New("#ERR: bad format") - } - } else { - nextSpace := strings.IndexAny(line, " \t") - if nextSpace != -1 { - fields = append(fields, line[:nextSpace]) - line = line[nextSpace:] - } else { - fields = append(fields, line) - line = "" + encloserId = id break } } - } - return fields, nil + if encloserId != None { + line, err = parser.ExtractEnclosedFieldValue(line, EnclosersRunes[encloserId].Close) + if err != nil { + return err + } + } else { + nextSpace := strings.IndexAny(line, parser.Delimiter) + if nextSpace != -1 { + parser.Fields = append(parser.Fields, line[:nextSpace]) + line = line[nextSpace:] + } else { + parser.Fields = append(parser.Fields, line) + line = "" + break + } + } + } + return nil }