package csvparser import ( "fmt" "strings" "unicode/utf8" ) type CsvParser struct { enclosers []string delimiters string mergeDelimiters string fields []string } /* * delimiters: string with all delimiter chars * mergeDelimiters: if true, all successive delimiters are considered as one * enclosers: array of string, each string contains a enclorser tuple: "", [], {}... * lineFormat: format of the line (each fieldname or ignore) */ func (p *CsvParser) Initialize(delimiters string, mergeDelimiters bool, enclosers []string, lineFormat string) error { if utf8.RuneCountInString(delimiters) == 0 { return fmt.Errorf("delimiter shoud be at least one character") } p.delimiters = delimiters p.mergeDelimiters = mergeDelimiters for _, encloser := range enclosers { if utf8.RuneCountInString(encloser) != 2 { return fmt.Errorf("encolser should have to characters") } } p.enclosers = enclosers // line format is in the form of: field1 field2 ignore ... // if field name is ignore, it is parsed but not retained p.fields = strings.Split(lineFormat, " ") return nil } type parserState struct { delimiter bool enclosed bool encloserEnd rune } func (p *CsvParser) Parse(line string) (map[string]string, error) { currentFieldIndex := 0 valueStart := -1 escape := false enclosed := false ret := make(map[string]string) indexMax := len(line) - 1 maxFieldIndex := len(p.fields) - 1 delimiters := p.delimiters delimiter := false for index, r := range line { if r == '\\' { // Check if EOL before continue escape = true continue } if escape { escape = false continue } for _, d := range delimiters { if r == d { delimiter = true break } } if delimiter && valueStart > -1 { if p.fields[currentFieldIndex] != "ignore" { ret[p.fields[currentFieldIndex]] = line[valueStart:index] } currentFieldIndex++ } if delimiter { if p.fields[currentFieldIndex] != "ignore" { ret[p.fields[currentFieldIndex]] = line[valueStart:index] } currentFieldIndex++ if currentFieldIndex > maxFieldIndex { break } valueStart = index + 1 if enclosed { enclosed = false delimiters = p.delimiters // Omit next delimiter escape = true } continue } if index >= indexMax { if p.fields[currentFieldIndex] != "ignore" { ret[p.fields[currentFieldIndex]] = line[valueStart:] } continue } for _, encloser := range p.enclosers { runes := []rune(encloser) if r == runes[0] { // opening encloser enclosed = true delimiters = string(runes[1]) valueStart++ break } } } return ret, nil }