csv-parser/csvparser.go

94 lines
2.4 KiB
Go

package csvparser
import (
"fmt"
"strings"
"unicode/utf8"
)
type CsvParser struct {
enclosers []string
delimiter rune
fields []string
}
func (p *CsvParser) Initialize(delimiter string, enclosers []string, lineFormat string) error {
if utf8.RuneCountInString(delimiter) != 1 {
return fmt.Errorf("delimiter shoud be one character")
}
p.enclosers = make([]string, 0)
for _, encloser := range enclosers {
if utf8.RuneCountInString(encloser) != 2 {
return fmt.Errorf("encolser should have to characters")
}
}
p.enclosers = enclosers
p.delimiter = []rune(delimiter)[0]
for _, pair := range enclosers {
if utf8.RuneCountInString(pair) != 2 {
return fmt.Errorf("encoloser should contain two characters: %s", pair)
}
}
// line format is in the form of: field1 field2 ignore ...
// if field name is ignore, it is parsed but not retained
p.fields = strings.Split(lineFormat, " ")
return nil
}
func (p *CsvParser) Parse(line string) (map[string]string, error) {
inEnclosedField := false
currentEncloserEnd := ' '
escape := false
currentFieldIndex := 0
ret := make(map[string]string)
valueStart := 0
indexMax := len(line) - 1
for index, r := range line {
if index == indexMax {
if currentFieldIndex < len(p.fields) {
//fmt.Println("start:", valueStart, "end:", index)
//fmt.Println("Found a field value for:", p.fields[currentFieldIndex], line[valueStart:index])
if inEnclosedField && r == currentEncloserEnd {
ret[p.fields[currentFieldIndex]] = line[valueStart:index]
} else {
ret[p.fields[currentFieldIndex]] = line[valueStart : index+1]
}
//fmt.Println("Index is:", index)
}
}
if r == '\\' {
escape = !escape
} else if inEnclosedField {
if r == currentEncloserEnd && !escape {
inEnclosedField = false
}
} else if r == p.delimiter {
if currentFieldIndex < len(p.fields) {
//fmt.Println("start:", valueStart, "end:", index)
//fmt.Println("Found a field value for:", p.fields[currentFieldIndex], line[valueStart:index])
ret[p.fields[currentFieldIndex]] = line[valueStart:index]
//fmt.Println("Index is:", index)
valueStart = index + 1
}
currentFieldIndex++
} else {
for _, encloser := range p.enclosers {
runes := []rune(encloser)
if r == runes[0] {
// opening encloser
inEnclosedField = true
currentEncloserEnd = runes[1]
valueStart++
break
}
}
}
}
return ret, nil
}