118 lines
2.6 KiB
Go
118 lines
2.6 KiB
Go
package csvparser
|
|
|
|
import (
|
|
"fmt"
|
|
"strings"
|
|
"unicode/utf8"
|
|
)
|
|
|
|
type CsvParser struct {
|
|
enclosers []string
|
|
delimiters string
|
|
mergeDelimiters string
|
|
fields []string
|
|
}
|
|
|
|
/*
|
|
* delimiters: string with all delimiter chars
|
|
* mergeDelimiters: if true, all successive delimiters are considered as one
|
|
* enclosers: array of string, each string contains a enclorser tuple: "", [], {}...
|
|
* lineFormat: format of the line (each fieldname or ignore)
|
|
*/
|
|
func (p *CsvParser) Initialize(delimiters string, mergeDelimiters bool, enclosers []string, lineFormat string) error {
|
|
if utf8.RuneCountInString(delimiters) == 0 {
|
|
return fmt.Errorf("delimiter shoud be at least one character")
|
|
}
|
|
p.delimiters = delimiters
|
|
p.mergeDelimiters = mergeDelimiters
|
|
for _, encloser := range enclosers {
|
|
if utf8.RuneCountInString(encloser) != 2 {
|
|
return fmt.Errorf("encolser should have to characters")
|
|
}
|
|
}
|
|
p.enclosers = enclosers
|
|
// line format is in the form of: field1 field2 ignore ...
|
|
// if field name is ignore, it is parsed but not retained
|
|
p.fields = strings.Split(lineFormat, " ")
|
|
|
|
return nil
|
|
}
|
|
|
|
type parserState struct {
|
|
delimiter bool
|
|
enclosed bool
|
|
encloserEnd rune
|
|
}
|
|
|
|
func (p *CsvParser) Parse(line string) (map[string]string, error) {
|
|
currentFieldIndex := 0
|
|
valueStart := -1
|
|
escape := false
|
|
enclosed := false
|
|
|
|
ret := make(map[string]string)
|
|
indexMax := len(line) - 1
|
|
maxFieldIndex := len(p.fields) - 1
|
|
delimiters := p.delimiters
|
|
delimiter := false
|
|
for index, r := range line {
|
|
if r == '\\' {
|
|
// Check if EOL before continue
|
|
escape = true
|
|
continue
|
|
}
|
|
if escape {
|
|
escape = false
|
|
continue
|
|
}
|
|
for _, d := range delimiters {
|
|
if r == d {
|
|
delimiter = true
|
|
break
|
|
}
|
|
}
|
|
if delimiter && valueStart > -1 {
|
|
if p.fields[currentFieldIndex] != "ignore" {
|
|
ret[p.fields[currentFieldIndex]] = line[valueStart:index]
|
|
}
|
|
currentFieldIndex++
|
|
}
|
|
if delimiter {
|
|
if p.fields[currentFieldIndex] != "ignore" {
|
|
ret[p.fields[currentFieldIndex]] = line[valueStart:index]
|
|
}
|
|
currentFieldIndex++
|
|
if currentFieldIndex > maxFieldIndex {
|
|
break
|
|
}
|
|
valueStart = index + 1
|
|
if enclosed {
|
|
enclosed = false
|
|
delimiters = p.delimiters
|
|
// Omit next delimiter
|
|
escape = true
|
|
}
|
|
continue
|
|
}
|
|
|
|
if index >= indexMax {
|
|
if p.fields[currentFieldIndex] != "ignore" {
|
|
ret[p.fields[currentFieldIndex]] = line[valueStart:]
|
|
}
|
|
continue
|
|
}
|
|
|
|
for _, encloser := range p.enclosers {
|
|
runes := []rune(encloser)
|
|
if r == runes[0] {
|
|
// opening encloser
|
|
enclosed = true
|
|
delimiters = string(runes[1])
|
|
valueStart++
|
|
break
|
|
}
|
|
}
|
|
}
|
|
return ret, nil
|
|
}
|