Penser à gérer le case de délimiteurs multiples qui s'enchaînent et qui ne doivent être pris en compte que pour 1 seul
This commit is contained in:
parent
3e4b170099
commit
9f83b3ce1f
116
csvparser.go
116
csvparser.go
|
@ -7,28 +7,22 @@ import (
|
|||
)
|
||||
|
||||
type CsvParser struct {
|
||||
enclosers []string
|
||||
delimiter rune
|
||||
fields []string
|
||||
enclosers []string
|
||||
delimiters string
|
||||
fields []string
|
||||
}
|
||||
|
||||
func (p *CsvParser) Initialize(delimiter string, enclosers []string, lineFormat string) error {
|
||||
if utf8.RuneCountInString(delimiter) != 1 {
|
||||
return fmt.Errorf("delimiter shoud be one character")
|
||||
func (p *CsvParser) Initialize(delimiters string, enclosers []string, lineFormat string) error {
|
||||
if utf8.RuneCountInString(delimiter) == 0 {
|
||||
return fmt.Errorf("delimiter shoud be at least one character")
|
||||
}
|
||||
p.enclosers = make([]string, 0)
|
||||
p.delimiters = delimiters
|
||||
for _, encloser := range enclosers {
|
||||
if utf8.RuneCountInString(encloser) != 2 {
|
||||
return fmt.Errorf("encolser should have to characters")
|
||||
}
|
||||
}
|
||||
p.enclosers = enclosers
|
||||
p.delimiter = []rune(delimiter)[0]
|
||||
for _, pair := range enclosers {
|
||||
if utf8.RuneCountInString(pair) != 2 {
|
||||
return fmt.Errorf("encoloser should contain two characters: %s", pair)
|
||||
}
|
||||
}
|
||||
// line format is in the form of: field1 field2 ignore ...
|
||||
// if field name is ignore, it is parsed but not retained
|
||||
p.fields = strings.Split(lineFormat, " ")
|
||||
|
@ -40,56 +34,60 @@ func (p *CsvParser) Parse(line string) (map[string]string, error) {
|
|||
currentFieldIndex := 0
|
||||
valueStart := 0
|
||||
escape := false
|
||||
enclosed := false
|
||||
enclosed := false
|
||||
|
||||
ret := make(map[string]string)
|
||||
indexMax := len(line) - 1
|
||||
maxFieldIndex := len(p.fields) - 1
|
||||
delimiter := p.delimiter
|
||||
maxFieldIndex := len(p.fields) - 1
|
||||
delimiters := p.delimiters
|
||||
for index, r := range line {
|
||||
if r == '\\' {
|
||||
// Check if EOL before continue
|
||||
escape=true
|
||||
continue
|
||||
}
|
||||
if escape {
|
||||
escape=false
|
||||
continue
|
||||
}
|
||||
if r == delimiter {
|
||||
if p.fields[currentFieldIndex] != "ignore" {
|
||||
ret[p.fields[currentFieldIndex]] = line[valueStart : index]
|
||||
}
|
||||
currentFieldIndex++
|
||||
if currentFieldIndex > maxFieldIndex {
|
||||
break
|
||||
}
|
||||
valueStart = index + 1
|
||||
if enclosed {
|
||||
enclosed=false
|
||||
delimiter = p.delimiter
|
||||
// Omit next delimiter
|
||||
escape=true
|
||||
}
|
||||
continue
|
||||
}
|
||||
if index >= indexMax {
|
||||
if p.fields[currentFieldIndex] != "ignore" {
|
||||
ret[p.fields[currentFieldIndex]] = line[valueStart:]
|
||||
}
|
||||
continue
|
||||
}
|
||||
if r == '\\' {
|
||||
// Check if EOL before continue
|
||||
escape = true
|
||||
continue
|
||||
}
|
||||
if escape {
|
||||
escape = false
|
||||
continue
|
||||
}
|
||||
for _, d := range delimiters {
|
||||
delimiter = true
|
||||
}
|
||||
if delimiter {
|
||||
if p.fields[currentFieldIndex] != "ignore" {
|
||||
ret[p.fields[currentFieldIndex]] = line[valueStart:index]
|
||||
}
|
||||
currentFieldIndex++
|
||||
if currentFieldIndex > maxFieldIndex {
|
||||
break
|
||||
}
|
||||
valueStart = index + 1
|
||||
if enclosed {
|
||||
enclosed = false
|
||||
delimiter = p.delimiter
|
||||
// Omit next delimiter
|
||||
escape = true
|
||||
}
|
||||
continue
|
||||
}
|
||||
|
||||
for _, encloser := range p.enclosers {
|
||||
runes := []rune(encloser)
|
||||
if r == runes[0] {
|
||||
// opening encloser
|
||||
enclosed = true
|
||||
delimiter = runes[1]
|
||||
valueStart++
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
if index >= indexMax {
|
||||
if p.fields[currentFieldIndex] != "ignore" {
|
||||
ret[p.fields[currentFieldIndex]] = line[valueStart:]
|
||||
}
|
||||
continue
|
||||
}
|
||||
|
||||
for _, encloser := range p.enclosers {
|
||||
runes := []rune(encloser)
|
||||
if r == runes[0] {
|
||||
// opening encloser
|
||||
enclosed = true
|
||||
delimiters = runes[1]
|
||||
valueStart++
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
return ret, nil
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue