3 Commits

Author SHA1 Message Date
3e4b170099 ne renvoie plus les ignore 2024-08-07 15:51:27 +02:00
614ec535aa Ré-écriture de l'algo 2024-08-07 15:46:14 +02:00
5f6ef91d86 Ignorer le champs ignore 2024-08-07 13:46:53 +02:00
3 changed files with 57 additions and 46 deletions

View File

@@ -37,57 +37,59 @@ func (p *CsvParser) Initialize(delimiter string, enclosers []string, lineFormat
} }
func (p *CsvParser) Parse(line string) (map[string]string, error) { func (p *CsvParser) Parse(line string) (map[string]string, error) {
inEnclosedField := false
currentEncloserEnd := ' '
escape := false
currentFieldIndex := 0 currentFieldIndex := 0
valueStart := 0
escape := false
enclosed := false
ret := make(map[string]string) ret := make(map[string]string)
valueStart := 0
indexMax := len(line) - 1 indexMax := len(line) - 1
maxFieldIndex := len(p.fields) - 1
delimiter := p.delimiter
for index, r := range line { for index, r := range line {
if index == indexMax { if r == '\\' {
if currentFieldIndex < len(p.fields) { // Check if EOL before continue
//fmt.Println("start:", valueStart, "end:", index) escape=true
//fmt.Println("Found a field value for:", p.fields[currentFieldIndex], line[valueStart:index]) continue
if inEnclosedField && r == currentEncloserEnd { }
ret[p.fields[currentFieldIndex]] = line[valueStart:index] if escape {
} else { escape=false
ret[p.fields[currentFieldIndex]] = line[valueStart : index+1] continue
} }
if r == delimiter {
//fmt.Println("Index is:", index) if p.fields[currentFieldIndex] != "ignore" {
} ret[p.fields[currentFieldIndex]] = line[valueStart : index]
} }
if r == '\\' { currentFieldIndex++
escape = !escape if currentFieldIndex > maxFieldIndex {
} else if inEnclosedField { break
if r == currentEncloserEnd && !escape { }
inEnclosedField = false valueStart = index + 1
} if enclosed {
} else if r == p.delimiter { enclosed=false
delimiter = p.delimiter
if currentFieldIndex < len(p.fields) { // Omit next delimiter
//fmt.Println("start:", valueStart, "end:", index) escape=true
//fmt.Println("Found a field value for:", p.fields[currentFieldIndex], line[valueStart:index]) }
ret[p.fields[currentFieldIndex]] = line[valueStart:index] continue
//fmt.Println("Index is:", index) }
valueStart = index + 1 if index >= indexMax {
} if p.fields[currentFieldIndex] != "ignore" {
currentFieldIndex++ ret[p.fields[currentFieldIndex]] = line[valueStart:]
} else { }
for _, encloser := range p.enclosers { continue
runes := []rune(encloser) }
if r == runes[0] {
// opening encloser
inEnclosedField = true
currentEncloserEnd = runes[1]
valueStart++
break
}
}
}
}
for _, encloser := range p.enclosers {
runes := []rune(encloser)
if r == runes[0] {
// opening encloser
enclosed = true
delimiter = runes[1]
valueStart++
break
}
}
}
return ret, nil return ret, nil
} }

View File

@@ -48,6 +48,14 @@ func TestCorrectLines(t *testing.T) {
} }
fmt.Println("event is", event) fmt.Println("event is", event)
line = "John Doe \"John Doe\"\\\\"
fmt.Println("parsing:", line)
event, err = csvParser.Parse(line)
if err != nil {
t.Fatalf("Parsing of empty line failed %v %v", err, event)
}
fmt.Println("event is", event)
line = "John Doe I don't know him" line = "John Doe I don't know him"
fmt.Println("parsing:", line) fmt.Println("parsing:", line)
event, err = csvParser.Parse(line) event, err = csvParser.Parse(line)

1
go.mod
View File

@@ -1,3 +1,4 @@
module git.passke.org/laurentu/csv-parser module git.passke.org/laurentu/csv-parser
toolchain go1.22.1
go 1.22 go 1.22