From 892e9a62ec1c9d624d657b5e05400d760eec2f05 Mon Sep 17 00:00:00 2001 From: Laurent Ulrich Date: Mon, 8 Jul 2024 14:47:34 +0200 Subject: [PATCH] =?UTF-8?q?Probl=C3=A8me=20de=20reconnaissance=20des=20cha?= =?UTF-8?q?mps=20=3F?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- csvparser.go | 47 ++++++++++++++++++++++++++++++++--------------- csvparser_test.go | 39 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 71 insertions(+), 15 deletions(-) create mode 100644 csvparser_test.go diff --git a/csvparser.go b/csvparser.go index 35f98ad..83293b4 100644 --- a/csvparser.go +++ b/csvparser.go @@ -6,13 +6,13 @@ import ( "unicode/utf8" ) -type Parser struct { +type CsvParser struct { enclosers []string delimiter rune fields []string } -func (p *Parser) Initialize(delimiter string, enclosers []string, lineFormat string) error { +func (p *CsvParser) Initialize(delimiter string, enclosers []string, lineFormat string) error { if utf8.RuneCountInString(delimiter) != 1 { return fmt.Errorf("delimiter shoud be one character") } @@ -36,23 +36,40 @@ func (p *Parser) Initialize(delimiter string, enclosers []string, lineFormat str return nil } -func (p *Parser) Parse(line string) (map[string]string, error) { +func (p *CsvParser) Parse(line string) (map[string]string, error) { + inEnclosedField := false + currentEncloserEnd := ' ' + escape := false currentFieldIndex := 0 + ret := make(map[string]string) - value := "" + valueStart := 0 + valueEnd := 0 for index, r := range line { - if r == p.delimiter { + if r == '\\' { + escape = !escape + } else if inEnclosedField { + if r == currentEncloserEnd && !escape { + if currentFieldIndex < len(p.fields) { + ret[p.fields[currentFieldIndex]] = line[valueStart : valueEnd-valueStart] + } + currentFieldIndex++ + inEnclosedField = false + } + } else if r == p.delimiter { + valueEnd = index + if currentFieldIndex < len(p.fields) { + ret[p.fields[currentFieldIndex]] = line[valueStart : valueEnd-valueStart] + } currentFieldIndex++ - ret[p.fields[currentFieldIndex]] = value - continue - } - for _, encloser := range p.enclosers { - runes := []rune(encloser) - if r == runes[0] { - // opening encloser - tmpStr := line[index:] - for tmpIndex, tmpR := range tmpStr { - + } else { + for _, encloser := range p.enclosers { + runes := []rune(encloser) + if r == runes[0] { + // opening encloser + inEnclosedField = true + currentEncloserEnd = runes[1] + break } } } diff --git a/csvparser_test.go b/csvparser_test.go new file mode 100644 index 0000000..52a002d --- /dev/null +++ b/csvparser_test.go @@ -0,0 +1,39 @@ +package csvparser + +import ( + "fmt" + "testing" +) + +func TestCorrectLines(t *testing.T) { + var csvParser CsvParser + csvParser.Initialize(" ", []string{"\"\"", "[]"}, "firstname lastname complete_name") + line := "" + fmt.Println("parsing:", line) + event, err := csvParser.Parse(line) + if err != nil { + t.Fatalf("Parsing of empty line failed %v %v", err, event) + } + fmt.Println("event is", event) + line = "John Doe \"John Doe\"" + fmt.Println("parsing:", line) + event, err = csvParser.Parse(line) + if err != nil { + t.Fatalf("Parsing of empty line failed %v %v", err, event) + } + fmt.Println("event is", event) + line = "John Doe" + fmt.Println("parsing:", line) + event, err = csvParser.Parse(line) + if err != nil { + t.Fatalf("Parsing of empty line failed %v %v", err, event) + } + fmt.Println("event is", event) + line = "John Doe I don't know him" + fmt.Println("parsing:", line) + event, err = csvParser.Parse(line) + if err != nil { + t.Fatalf("Parsing of empty line failed %v %v", err, event) + } + fmt.Println("event is", event) +}