2 Commits

Author SHA1 Message Date
614ec535aa Ré-écriture de l'algo 2024-08-07 15:46:14 +02:00
5f6ef91d86 Ignorer le champs ignore 2024-08-07 13:46:53 +02:00
3 changed files with 53 additions and 46 deletions

View File

@@ -37,57 +37,55 @@ func (p *CsvParser) Initialize(delimiter string, enclosers []string, lineFormat
} }
func (p *CsvParser) Parse(line string) (map[string]string, error) { func (p *CsvParser) Parse(line string) (map[string]string, error) {
inEnclosedField := false
currentEncloserEnd := ' '
escape := false
currentFieldIndex := 0 currentFieldIndex := 0
valueStart := 0
escape := false
enclosed := false
ret := make(map[string]string) ret := make(map[string]string)
valueStart := 0
indexMax := len(line) - 1 indexMax := len(line) - 1
maxFieldIndex := len(p.fields) - 1
delimiter := p.delimiter
for index, r := range line { for index, r := range line {
if index == indexMax { if r == '\\' {
if currentFieldIndex < len(p.fields) { // Check if EOL before continue
//fmt.Println("start:", valueStart, "end:", index) escape=true
//fmt.Println("Found a field value for:", p.fields[currentFieldIndex], line[valueStart:index]) continue
if inEnclosedField && r == currentEncloserEnd { }
ret[p.fields[currentFieldIndex]] = line[valueStart:index] if escape {
} else { escape=false
ret[p.fields[currentFieldIndex]] = line[valueStart : index+1] continue
} }
if r == delimiter {
//fmt.Println("Index is:", index) ret[p.fields[currentFieldIndex]] = line[valueStart : index]
} currentFieldIndex++
} if currentFieldIndex > maxFieldIndex {
if r == '\\' { break
escape = !escape }
} else if inEnclosedField { valueStart = index + 1
if r == currentEncloserEnd && !escape { if enclosed {
inEnclosedField = false enclosed=false
} delimiter = p.delimiter
} else if r == p.delimiter { // Omit next delimiter
escape=true
if currentFieldIndex < len(p.fields) { }
//fmt.Println("start:", valueStart, "end:", index) continue
//fmt.Println("Found a field value for:", p.fields[currentFieldIndex], line[valueStart:index]) }
ret[p.fields[currentFieldIndex]] = line[valueStart:index] if index >= indexMax {
//fmt.Println("Index is:", index) ret[p.fields[currentFieldIndex]] = line[valueStart:]
valueStart = index + 1 continue
} }
currentFieldIndex++
} else {
for _, encloser := range p.enclosers {
runes := []rune(encloser)
if r == runes[0] {
// opening encloser
inEnclosedField = true
currentEncloserEnd = runes[1]
valueStart++
break
}
}
}
}
for _, encloser := range p.enclosers {
runes := []rune(encloser)
if r == runes[0] {
// opening encloser
enclosed = true
delimiter = runes[1]
valueStart++
break
}
}
}
return ret, nil return ret, nil
} }

View File

@@ -48,6 +48,14 @@ func TestCorrectLines(t *testing.T) {
} }
fmt.Println("event is", event) fmt.Println("event is", event)
line = "John Doe \"John Doe\"\\\\"
fmt.Println("parsing:", line)
event, err = csvParser.Parse(line)
if err != nil {
t.Fatalf("Parsing of empty line failed %v %v", err, event)
}
fmt.Println("event is", event)
line = "John Doe I don't know him" line = "John Doe I don't know him"
fmt.Println("parsing:", line) fmt.Println("parsing:", line)
event, err = csvParser.Parse(line) event, err = csvParser.Parse(line)

1
go.mod
View File

@@ -1,3 +1,4 @@
module git.passke.org/laurentu/csv-parser module git.passke.org/laurentu/csv-parser
toolchain go1.22.1
go 1.22 go 1.22