Parser implemented but absolutly not tested

This commit is contained in:
Laurent ULRICH 2022-03-21 20:37:26 +00:00
parent 6ec99a8e16
commit d6958291fb
1 changed files with 47 additions and 56 deletions

View File

@ -2,11 +2,14 @@ package csvparser
import ( import (
"strings" "strings"
"errors"
) )
type EncloserId int type EncloserId int
const ( const (
DoubleQuotes EncloserId = iota None EncloserId = iota
DoubleQuotes
SingleQuotes SingleQuotes
RoundBrackets RoundBrackets
SquareBrackets SquareBrackets
@ -14,8 +17,8 @@ const (
) )
type Encloser struct { type Encloser struct {
Open rune Open byte
Close rune Close byte
} }
var EnclosersRunes = map[EncloserId]Encloser{ var EnclosersRunes = map[EncloserId]Encloser{
@ -27,74 +30,62 @@ var EnclosersRunes = map[EncloserId]Encloser{
} }
type CsvParser struct { type CsvParser struct {
Enclosers []EncloserId Enclosers []EncloserId
Delimiter rune Delimiter string
DelimiterString string Fields []string
Line string
} }
func (parser *CsvParser) Init() { func (parser *CsvParser) Init() {
parser.Enclosers = []EncloserId{DoubleQuotes, SquareBrackets} parser.Enclosers = []EncloserId{DoubleQuotes, SquareBrackets}
parser.Delimiter = ' ' parser.Delimiter = " \t"
parser.DelimiterString = string(parser.Delimiter)
} }
func (parser *CsvParser) ExtractEnclosedFieldValue(line string, endChar byte) (string, error) {
lineLen := len(line)
for i := 1; i < lineLen; i++ {
if line[i] == endChar && line[i-1] != '\\' {
parser.Fields = append(parser.Fields, line[1:i])
return line[i+1:], nil
}
}
return line, errors.New("Encloser close not found")
}
func (parser *CsvParser) Parse(line string) error { func (parser *CsvParser) Parse(line string) error {
var err error = nil
for len(line) > 0 { for len(line) > 0 {
line = strings.TrimLeft(line, parser.DelimiterString) line = strings.TrimLeft(line, parser.Delimiter)
line = strings.TrimRight(line, parser.DelimiterString) line = strings.TrimRight(line, parser.Delimiter)
if len(line) == 0 { if len(line) == 0 {
break break
} }
// Search for an encloser // Search for an encloser
for id := range parser.Enclosers { encloserId := None
for _, id := range parser.Enclosers {
if line[0] == EnclosersRunes[id].Open { if line[0] == EnclosersRunes[id].Open {
endChar := EnclosersRunes[id].Close encloserId = id
endOfFieldFound := false
l := len(line)
for i := 1; i < l; i++ {
if line[i] == endChar && line[i-1] != '\\' {
fields = append(fields, line[1:i])
line = line[i+1:]
endOfFieldFound = true
break
}
}
if !endOfFieldFound {
return nil, errors.New("#ERR: bad format")
}
break;
}
}
if line[0] == '"' || line[0] == '[' {
var endChar byte = '"'
if line[0] == '[' {
endChar = ']'
}
endOfFieldFound := false
l := len(line)
for i := 1; i < l; i++ {
if line[i] == endChar && line[i-1] != '\\' {
fields = append(fields, line[1:i])
line = line[i+1:]
endOfFieldFound = true
break
}
}
if !endOfFieldFound {
return nil, errors.New("#ERR: bad format")
}
} else {
nextSpace := strings.IndexAny(line, " \t")
if nextSpace != -1 {
fields = append(fields, line[:nextSpace])
line = line[nextSpace:]
} else {
fields = append(fields, line)
line = ""
break break
} }
} }
} if encloserId != None {
return fields, nil line, err = parser.ExtractEnclosedFieldValue(line, EnclosersRunes[encloserId].Close)
if err != nil {
return err
}
} else {
nextSpace := strings.IndexAny(line, parser.Delimiter)
if nextSpace != -1 {
parser.Fields = append(parser.Fields, line[:nextSpace])
line = line[nextSpace:]
} else {
parser.Fields = append(parser.Fields, line)
line = ""
break
}
}
}
return nil
} }