package csvparser import ( "fmt" "strings" "unicode/utf8" ) type CsvParser struct { enclosers []string delimiter rune fields []string } func (p *CsvParser) Initialize(delimiter string, enclosers []string, lineFormat string) error { if utf8.RuneCountInString(delimiter) != 1 { return fmt.Errorf("delimiter shoud be one character") } p.enclosers = make([]string, 0) for _, encloser := range enclosers { if utf8.RuneCountInString(encloser) != 2 { return fmt.Errorf("encolser should have to characters") } } p.enclosers = enclosers p.delimiter = []rune(delimiter)[0] for _, pair := range enclosers { if utf8.RuneCountInString(pair) != 2 { return fmt.Errorf("encoloser should contain two characters: %s", pair) } } // line format is in the form of: field1 field2 ignore ... // if field name is ignore, it is parsed but not retained p.fields = strings.Split(lineFormat, " ") return nil } func (p *CsvParser) Parse(line string) (map[string]string, error) { inEnclosedField := false currentEncloserEnd := ' ' escape := false currentFieldIndex := 0 ret := make(map[string]string) valueStart := 0 indexMax := len(line) - 1 for index, r := range line { if index == indexMax { if currentFieldIndex < len(p.fields) && p.fields[currentFieldIndex] != "ignore" { if inEnclosedField && r == currentEncloserEnd { ret[p.fields[currentFieldIndex]] = line[valueStart:index] } else { ret[p.fields[currentFieldIndex]] = line[valueStart : index+1] } } } if r == '\\' { escape = !escape } else if inEnclosedField { if r == currentEncloserEnd && !escape { inEnclosedField = false } } else if r == p.delimiter { if currentFieldIndex < len(p.fields) && p.fields[currentFieldIndex] != "ignore" { ret[p.fields[currentFieldIndex]] = line[valueStart:index] valueStart = index + 1 } currentFieldIndex++ } else { for _, encloser := range p.enclosers { runes := []rune(encloser) if r == runes[0] { // opening encloser inEnclosedField = true currentEncloserEnd = runes[1] valueStart++ break } } } } return ret, nil }