From 614ec535aad3a2b8b7893e36b97aa23673d9e905 Mon Sep 17 00:00:00 2001 From: laurentu Date: Wed, 7 Aug 2024 15:46:14 +0200 Subject: [PATCH] =?UTF-8?q?R=C3=A9-=C3=A9criture=20de=20l'algo?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- csvparser.go | 83 +++++++++++++++++++++++++---------------------- csvparser_test.go | 8 +++++ 2 files changed, 52 insertions(+), 39 deletions(-) diff --git a/csvparser.go b/csvparser.go index 28704c7..880e735 100644 --- a/csvparser.go +++ b/csvparser.go @@ -37,50 +37,55 @@ func (p *CsvParser) Initialize(delimiter string, enclosers []string, lineFormat } func (p *CsvParser) Parse(line string) (map[string]string, error) { - inEnclosedField := false - currentEncloserEnd := ' ' - escape := false currentFieldIndex := 0 + valueStart := 0 + escape := false + enclosed := false ret := make(map[string]string) - valueStart := 0 indexMax := len(line) - 1 + maxFieldIndex := len(p.fields) - 1 + delimiter := p.delimiter for index, r := range line { - if index == indexMax { - if currentFieldIndex < len(p.fields) && p.fields[currentFieldIndex] != "ignore" { - if inEnclosedField && r == currentEncloserEnd { - ret[p.fields[currentFieldIndex]] = line[valueStart:index] - } else { - ret[p.fields[currentFieldIndex]] = line[valueStart : index+1] - } - } - } - if r == '\\' { - escape = !escape - } else if inEnclosedField { - if r == currentEncloserEnd && !escape { - inEnclosedField = false - } - } else if r == p.delimiter { - - if currentFieldIndex < len(p.fields) && p.fields[currentFieldIndex] != "ignore" { - ret[p.fields[currentFieldIndex]] = line[valueStart:index] - valueStart = index + 1 - } - currentFieldIndex++ - } else { - for _, encloser := range p.enclosers { - runes := []rune(encloser) - if r == runes[0] { - // opening encloser - inEnclosedField = true - currentEncloserEnd = runes[1] - valueStart++ - break - } - } - } - } + if r == '\\' { + // Check if EOL before continue + escape=true + continue + } + if escape { + escape=false + continue + } + if r == delimiter { + ret[p.fields[currentFieldIndex]] = line[valueStart : index] + currentFieldIndex++ + if currentFieldIndex > maxFieldIndex { + break + } + valueStart = index + 1 + if enclosed { + enclosed=false + delimiter = p.delimiter + // Omit next delimiter + escape=true + } + continue + } + if index >= indexMax { + ret[p.fields[currentFieldIndex]] = line[valueStart:] + continue + } + for _, encloser := range p.enclosers { + runes := []rune(encloser) + if r == runes[0] { + // opening encloser + enclosed = true + delimiter = runes[1] + valueStart++ + break + } + } + } return ret, nil } diff --git a/csvparser_test.go b/csvparser_test.go index 95aa266..9b8966d 100644 --- a/csvparser_test.go +++ b/csvparser_test.go @@ -48,6 +48,14 @@ func TestCorrectLines(t *testing.T) { } fmt.Println("event is", event) + line = "John Doe \"John Doe\"\\\\" + fmt.Println("parsing:", line) + event, err = csvParser.Parse(line) + if err != nil { + t.Fatalf("Parsing of empty line failed %v %v", err, event) + } + fmt.Println("event is", event) + line = "John Doe I don't know him" fmt.Println("parsing:", line) event, err = csvParser.Parse(line)