80 lines
1.9 KiB
Go
80 lines
1.9 KiB
Go
package csvparser
|
|
|
|
import (
|
|
"fmt"
|
|
"strings"
|
|
"unicode/utf8"
|
|
)
|
|
|
|
type CsvParser struct {
|
|
enclosers []string
|
|
delimiter rune
|
|
fields []string
|
|
}
|
|
|
|
func (p *CsvParser) Initialize(delimiter string, enclosers []string, lineFormat string) error {
|
|
if utf8.RuneCountInString(delimiter) != 1 {
|
|
return fmt.Errorf("delimiter shoud be one character")
|
|
}
|
|
p.enclosers = make([]string, 0)
|
|
for _, encloser := range enclosers {
|
|
if utf8.RuneCountInString(encloser) != 2 {
|
|
return fmt.Errorf("encolser should have to characters")
|
|
}
|
|
}
|
|
p.enclosers = enclosers
|
|
p.delimiter = []rune(delimiter)[0]
|
|
for _, pair := range enclosers {
|
|
if utf8.RuneCountInString(pair) != 2 {
|
|
return fmt.Errorf("encoloser should contain two characters: %s", pair)
|
|
}
|
|
}
|
|
// line format is in the form of: field1 field2 ignore ...
|
|
// if field name is ignore, it is parsed but not retained
|
|
p.fields = strings.Split(lineFormat, " ")
|
|
|
|
return nil
|
|
}
|
|
|
|
func (p *CsvParser) Parse(line string) (map[string]string, error) {
|
|
inEnclosedField := false
|
|
currentEncloserEnd := ' '
|
|
escape := false
|
|
currentFieldIndex := 0
|
|
|
|
ret := make(map[string]string)
|
|
valueStart := 0
|
|
valueEnd := 0
|
|
for index, r := range line {
|
|
if r == '\\' {
|
|
escape = !escape
|
|
} else if inEnclosedField {
|
|
if r == currentEncloserEnd && !escape {
|
|
if currentFieldIndex < len(p.fields) {
|
|
ret[p.fields[currentFieldIndex]] = line[valueStart : valueEnd-valueStart]
|
|
}
|
|
currentFieldIndex++
|
|
inEnclosedField = false
|
|
}
|
|
} else if r == p.delimiter {
|
|
valueEnd = index
|
|
if currentFieldIndex < len(p.fields) {
|
|
ret[p.fields[currentFieldIndex]] = line[valueStart : valueEnd-valueStart]
|
|
}
|
|
currentFieldIndex++
|
|
} else {
|
|
for _, encloser := range p.enclosers {
|
|
runes := []rune(encloser)
|
|
if r == runes[0] {
|
|
// opening encloser
|
|
inEnclosedField = true
|
|
currentEncloserEnd = runes[1]
|
|
break
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
return ret, nil
|
|
}
|