Problème de reconnaissance des champs ?

This commit is contained in:
Laurent Ulrich 2024-07-08 14:47:34 +02:00
parent f9b8ec213a
commit 892e9a62ec
2 changed files with 71 additions and 15 deletions

View File

@ -6,13 +6,13 @@ import (
"unicode/utf8"
)
type Parser struct {
type CsvParser struct {
enclosers []string
delimiter rune
fields []string
}
func (p *Parser) Initialize(delimiter string, enclosers []string, lineFormat string) error {
func (p *CsvParser) Initialize(delimiter string, enclosers []string, lineFormat string) error {
if utf8.RuneCountInString(delimiter) != 1 {
return fmt.Errorf("delimiter shoud be one character")
}
@ -36,23 +36,40 @@ func (p *Parser) Initialize(delimiter string, enclosers []string, lineFormat str
return nil
}
func (p *Parser) Parse(line string) (map[string]string, error) {
func (p *CsvParser) Parse(line string) (map[string]string, error) {
inEnclosedField := false
currentEncloserEnd := ' '
escape := false
currentFieldIndex := 0
ret := make(map[string]string)
value := ""
valueStart := 0
valueEnd := 0
for index, r := range line {
if r == p.delimiter {
if r == '\\' {
escape = !escape
} else if inEnclosedField {
if r == currentEncloserEnd && !escape {
if currentFieldIndex < len(p.fields) {
ret[p.fields[currentFieldIndex]] = line[valueStart : valueEnd-valueStart]
}
currentFieldIndex++
inEnclosedField = false
}
} else if r == p.delimiter {
valueEnd = index
if currentFieldIndex < len(p.fields) {
ret[p.fields[currentFieldIndex]] = line[valueStart : valueEnd-valueStart]
}
currentFieldIndex++
ret[p.fields[currentFieldIndex]] = value
continue
}
for _, encloser := range p.enclosers {
runes := []rune(encloser)
if r == runes[0] {
// opening encloser
tmpStr := line[index:]
for tmpIndex, tmpR := range tmpStr {
} else {
for _, encloser := range p.enclosers {
runes := []rune(encloser)
if r == runes[0] {
// opening encloser
inEnclosedField = true
currentEncloserEnd = runes[1]
break
}
}
}

39
csvparser_test.go Normal file
View File

@ -0,0 +1,39 @@
package csvparser
import (
"fmt"
"testing"
)
func TestCorrectLines(t *testing.T) {
var csvParser CsvParser
csvParser.Initialize(" ", []string{"\"\"", "[]"}, "firstname lastname complete_name")
line := ""
fmt.Println("parsing:", line)
event, err := csvParser.Parse(line)
if err != nil {
t.Fatalf("Parsing of empty line failed %v %v", err, event)
}
fmt.Println("event is", event)
line = "John Doe \"John Doe\""
fmt.Println("parsing:", line)
event, err = csvParser.Parse(line)
if err != nil {
t.Fatalf("Parsing of empty line failed %v %v", err, event)
}
fmt.Println("event is", event)
line = "John Doe"
fmt.Println("parsing:", line)
event, err = csvParser.Parse(line)
if err != nil {
t.Fatalf("Parsing of empty line failed %v %v", err, event)
}
fmt.Println("event is", event)
line = "John Doe I don't know him"
fmt.Println("parsing:", line)
event, err = csvParser.Parse(line)
if err != nil {
t.Fatalf("Parsing of empty line failed %v %v", err, event)
}
fmt.Println("event is", event)
}