diff --git a/csv-parser.go b/csv-parser.go deleted file mode 100644 index c9257d5..0000000 --- a/csv-parser.go +++ /dev/null @@ -1,102 +0,0 @@ -package csvparser - -import ( - "strings" - "errors" -) - -type EncloserId int - -const ( - None EncloserId = iota - DoubleQuotes - SingleQuotes - RoundBrackets - SquareBrackets - CurlyBrackets -) - -type Encloser struct { - Open byte - Close byte -} - -var EnclosersRunes = map[EncloserId]Encloser{ - DoubleQuotes: Encloser{'"', '"'}, - SingleQuotes: Encloser{'\'', '\''}, - RoundBrackets: Encloser{'(', ')'}, - SquareBrackets: Encloser{'[', ']'}, - CurlyBrackets: Encloser{'{', '}'}, -} - -type Parser struct { - Enclosers []EncloserId - Delimiter string - Fields []string - Line string -} - -func (parser *Parser) Init() { - parser.Enclosers = []EncloserId{DoubleQuotes, SquareBrackets} - parser.Delimiter = " \t" -} - -func (parser *Parser) ExtractEnclosedFieldValue(endChar byte) error { - lineLen := len(parser.Line) - for i := 1; i < lineLen; i++ { - if parser.Line[i] == endChar && parser.Line[i-1] != '\\' { - parser.Fields = append(parser.Fields, parser.Line[0:i]) - parser.Line = parser.Line[i+1:] - return nil - } - } - return errors.New("Encloser close not found") -} - -/* -* Usage: -* import "../csv-parser/" -* var parser csvparser.Parser -* parser.init() -* parser.Parse("a b c d") -*/ -func (parser *Parser) Parse(CsvLine string) error { - var err error = nil - - parser.Fields = make([]string,0) - parser.Line = CsvLine - for len(parser.Line) > 0 { - parser.Line = strings.TrimLeft(parser.Line, parser.Delimiter) - parser.Line = strings.TrimRight(parser.Line, parser.Delimiter) - if len(parser.Line) == 0 { - break - } - // Search for an encloser - encloserId := None - for _, id := range parser.Enclosers { - if parser.Line[0] == EnclosersRunes[id].Open { - encloserId = id - break - } - } - if encloserId != None { - parser.Line = parser.Line[1:] - err = parser.ExtractEnclosedFieldValue(EnclosersRunes[encloserId].Close) - if err != nil { - return err - } - } else { - nextSpace := strings.IndexAny(parser.Line, parser.Delimiter) - if nextSpace != -1 { - parser.Fields = append(parser.Fields, parser.Line[:nextSpace]) - parser.Line = parser.Line[nextSpace:] - } else { - parser.Fields = append(parser.Fields, parser.Line) - parser.Line = "" - break - } - - } - } - return nil -} diff --git a/csv-parser_test.go b/csv-parser_test.go deleted file mode 100644 index 76fa165..0000000 --- a/csv-parser_test.go +++ /dev/null @@ -1,68 +0,0 @@ -package csvparser - -import( - "testing" -) - - -func TestParse(t *testing.T) { - var parser Parser - - var CsvTestValues = [...]string { - "field1 field2 field3", // standard CSV - " field1 field2 field3 ", // Space or multiple spaces as delimiters - " field1 field2 field3 ", // Spaces + tabs as delimiters - " \"field1\" field2 field3 ", // Enclosed fields - " \"field1\" field2 [field3] ", // Enclosed fields - } - var CsvExpectedValues = [...]string { - "field1", - "field2", - "field3", - } - - parser.Init() - for _, v := range CsvTestValues { - err := parser.Parse(v) - if err != nil { - t.Error("Parse error:", err, " in ", v) - } - if len(parser.Fields) != len(CsvExpectedValues) { - t.Error("Extracted field number does not match expected", parser.Fields) - } - for i,val := range CsvExpectedValues { - if parser.Fields[i] != val { - t.Error("Field values do not match", i, " expected ", val, " got ", parser.Fields[i]) - } - } - } -} -func TestParseWithEscape(t *testing.T) { - var parser Parser - - var CsvTestValues = [...]string { - "\"\\\"field1 and more\" field2 [\\[field3] ", // Enclosed fields - } - var CsvExpectedValues = [...]string { - "\\\"field1 and more", - "field2", - "\\[field3", - } - - - parser.Init() - for _, v := range CsvTestValues { - err := parser.Parse(v) - if err != nil { - t.Error("Parse error:", err, " in ", v) - } - if len(parser.Fields) != len(CsvExpectedValues) { - t.Error("Extracted field number does not match expected", parser.Fields) - } - for i,val := range CsvExpectedValues { - if parser.Fields[i] != val { - t.Error("Field values do not match", i, " expected ", val, " got ", parser.Fields[i]) - } - } - } -} diff --git a/csvparser.go b/csvparser.go new file mode 100644 index 0000000..25eec38 --- /dev/null +++ b/csvparser.go @@ -0,0 +1,48 @@ +package csvparser + +import ( + "fmt" + "strings" + "unicode/utf8" +) + +type Parser struct { + enclosers [][]rune + delimiter rune + fields []string +} + +func (p *Parser) Initialize(delimiter string, enclosers []string, lineFormat string) error { + if utf8.RuneCountInString(delimiter) != 1 { + return fmt.Errorf("delimiter shoud be 1 char length") + } + p.enclosers = make([][]rune, 0) + for _, str := range enclosers { + p.enclosers = append(p.enclosers, []rune(str)) + } + p.delimiter = []rune(delimiter)[0] + for _, pair := range enclosers { + if utf8.RuneCountInString(pair) != 2 { + return fmt.Errorf("encoloser should contain two characters: %s", pair) + } + } + // line format is in the form of: field1 field2 ignore ... + // if field name is ignore, it is parsed but not retained + p.fields = strings.Split(lineFormat, " ") + + return nil +} + +func (p *Parser) Parse(line string) (map[string]string, error) { + ret := make(map[string]string) + for index, r := range line { + if r == p.delimiter { + + } + for _, encloser := range p.enclosers { + runes = []rune(encloser) + } + } + + return ret, nil +} diff --git a/go.mod b/go.mod index 63ac833..bf2b379 100644 --- a/go.mod +++ b/go.mod @@ -1,3 +1,3 @@ -module git.orange.fruit.ovh/laurentu/csv-parser +module git.passke.org/laurentu/csv-parser -go 1.17 +go 1.22