This commit is contained in:
Laurent Ulrich 2024-07-06 21:03:35 +02:00
parent 7d521b3d11
commit f5c589127a
4 changed files with 50 additions and 172 deletions

View File

@ -1,102 +0,0 @@
package csvparser
import (
"strings"
"errors"
)
type EncloserId int
const (
None EncloserId = iota
DoubleQuotes
SingleQuotes
RoundBrackets
SquareBrackets
CurlyBrackets
)
type Encloser struct {
Open byte
Close byte
}
var EnclosersRunes = map[EncloserId]Encloser{
DoubleQuotes: Encloser{'"', '"'},
SingleQuotes: Encloser{'\'', '\''},
RoundBrackets: Encloser{'(', ')'},
SquareBrackets: Encloser{'[', ']'},
CurlyBrackets: Encloser{'{', '}'},
}
type Parser struct {
Enclosers []EncloserId
Delimiter string
Fields []string
Line string
}
func (parser *Parser) Init() {
parser.Enclosers = []EncloserId{DoubleQuotes, SquareBrackets}
parser.Delimiter = " \t"
}
func (parser *Parser) ExtractEnclosedFieldValue(endChar byte) error {
lineLen := len(parser.Line)
for i := 1; i < lineLen; i++ {
if parser.Line[i] == endChar && parser.Line[i-1] != '\\' {
parser.Fields = append(parser.Fields, parser.Line[0:i])
parser.Line = parser.Line[i+1:]
return nil
}
}
return errors.New("Encloser close not found")
}
/*
* Usage:
* import "../csv-parser/"
* var parser csvparser.Parser
* parser.init()
* parser.Parse("a b c d")
*/
func (parser *Parser) Parse(CsvLine string) error {
var err error = nil
parser.Fields = make([]string,0)
parser.Line = CsvLine
for len(parser.Line) > 0 {
parser.Line = strings.TrimLeft(parser.Line, parser.Delimiter)
parser.Line = strings.TrimRight(parser.Line, parser.Delimiter)
if len(parser.Line) == 0 {
break
}
// Search for an encloser
encloserId := None
for _, id := range parser.Enclosers {
if parser.Line[0] == EnclosersRunes[id].Open {
encloserId = id
break
}
}
if encloserId != None {
parser.Line = parser.Line[1:]
err = parser.ExtractEnclosedFieldValue(EnclosersRunes[encloserId].Close)
if err != nil {
return err
}
} else {
nextSpace := strings.IndexAny(parser.Line, parser.Delimiter)
if nextSpace != -1 {
parser.Fields = append(parser.Fields, parser.Line[:nextSpace])
parser.Line = parser.Line[nextSpace:]
} else {
parser.Fields = append(parser.Fields, parser.Line)
parser.Line = ""
break
}
}
}
return nil
}

View File

@ -1,68 +0,0 @@
package csvparser
import(
"testing"
)
func TestParse(t *testing.T) {
var parser Parser
var CsvTestValues = [...]string {
"field1 field2 field3", // standard CSV
" field1 field2 field3 ", // Space or multiple spaces as delimiters
" field1 field2 field3 ", // Spaces + tabs as delimiters
" \"field1\" field2 field3 ", // Enclosed fields
" \"field1\" field2 [field3] ", // Enclosed fields
}
var CsvExpectedValues = [...]string {
"field1",
"field2",
"field3",
}
parser.Init()
for _, v := range CsvTestValues {
err := parser.Parse(v)
if err != nil {
t.Error("Parse error:", err, " in ", v)
}
if len(parser.Fields) != len(CsvExpectedValues) {
t.Error("Extracted field number does not match expected", parser.Fields)
}
for i,val := range CsvExpectedValues {
if parser.Fields[i] != val {
t.Error("Field values do not match", i, " expected ", val, " got ", parser.Fields[i])
}
}
}
}
func TestParseWithEscape(t *testing.T) {
var parser Parser
var CsvTestValues = [...]string {
"\"\\\"field1 and more\" field2 [\\[field3] ", // Enclosed fields
}
var CsvExpectedValues = [...]string {
"\\\"field1 and more",
"field2",
"\\[field3",
}
parser.Init()
for _, v := range CsvTestValues {
err := parser.Parse(v)
if err != nil {
t.Error("Parse error:", err, " in ", v)
}
if len(parser.Fields) != len(CsvExpectedValues) {
t.Error("Extracted field number does not match expected", parser.Fields)
}
for i,val := range CsvExpectedValues {
if parser.Fields[i] != val {
t.Error("Field values do not match", i, " expected ", val, " got ", parser.Fields[i])
}
}
}
}

48
csvparser.go Normal file
View File

@ -0,0 +1,48 @@
package csvparser
import (
"fmt"
"strings"
"unicode/utf8"
)
type Parser struct {
enclosers [][]rune
delimiter rune
fields []string
}
func (p *Parser) Initialize(delimiter string, enclosers []string, lineFormat string) error {
if utf8.RuneCountInString(delimiter) != 1 {
return fmt.Errorf("delimiter shoud be 1 char length")
}
p.enclosers = make([][]rune, 0)
for _, str := range enclosers {
p.enclosers = append(p.enclosers, []rune(str))
}
p.delimiter = []rune(delimiter)[0]
for _, pair := range enclosers {
if utf8.RuneCountInString(pair) != 2 {
return fmt.Errorf("encoloser should contain two characters: %s", pair)
}
}
// line format is in the form of: field1 field2 ignore ...
// if field name is ignore, it is parsed but not retained
p.fields = strings.Split(lineFormat, " ")
return nil
}
func (p *Parser) Parse(line string) (map[string]string, error) {
ret := make(map[string]string)
for index, r := range line {
if r == p.delimiter {
}
for _, encloser := range p.enclosers {
runes = []rune(encloser)
}
}
return ret, nil
}

4
go.mod
View File

@ -1,3 +1,3 @@
module git.orange.fruit.ovh/laurentu/csv-parser
module git.passke.org/laurentu/csv-parser
go 1.17
go 1.22