progression
This commit is contained in:
parent
9f83b3ce1f
commit
f92b89e01e
36
csvparser.go
36
csvparser.go
|
@ -9,14 +9,22 @@ import (
|
||||||
type CsvParser struct {
|
type CsvParser struct {
|
||||||
enclosers []string
|
enclosers []string
|
||||||
delimiters string
|
delimiters string
|
||||||
|
mergeDelimiters string
|
||||||
fields []string
|
fields []string
|
||||||
}
|
}
|
||||||
|
|
||||||
func (p *CsvParser) Initialize(delimiters string, enclosers []string, lineFormat string) error {
|
/*
|
||||||
if utf8.RuneCountInString(delimiter) == 0 {
|
* delimiters: string with all delimiter chars
|
||||||
|
* mergeDelimiters: if true, all successive delimiters are considered as one
|
||||||
|
* enclosers: array of string, each string contains a enclorser tuple: "", [], {}...
|
||||||
|
* lineFormat: format of the line (each fieldname or ignore)
|
||||||
|
*/
|
||||||
|
func (p *CsvParser) Initialize(delimiters string, mergeDelimiters bool, enclosers []string, lineFormat string) error {
|
||||||
|
if utf8.RuneCountInString(delimiters) == 0 {
|
||||||
return fmt.Errorf("delimiter shoud be at least one character")
|
return fmt.Errorf("delimiter shoud be at least one character")
|
||||||
}
|
}
|
||||||
p.delimiters = delimiters
|
p.delimiters = delimiters
|
||||||
|
p.mergeDelimiters = mergeDelimiters
|
||||||
for _, encloser := range enclosers {
|
for _, encloser := range enclosers {
|
||||||
if utf8.RuneCountInString(encloser) != 2 {
|
if utf8.RuneCountInString(encloser) != 2 {
|
||||||
return fmt.Errorf("encolser should have to characters")
|
return fmt.Errorf("encolser should have to characters")
|
||||||
|
@ -30,9 +38,15 @@ func (p *CsvParser) Initialize(delimiters string, enclosers []string, lineFormat
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type parserState struct {
|
||||||
|
delimiter bool
|
||||||
|
enclosed bool
|
||||||
|
encloserEnd rune
|
||||||
|
}
|
||||||
|
|
||||||
func (p *CsvParser) Parse(line string) (map[string]string, error) {
|
func (p *CsvParser) Parse(line string) (map[string]string, error) {
|
||||||
currentFieldIndex := 0
|
currentFieldIndex := 0
|
||||||
valueStart := 0
|
valueStart := -1
|
||||||
escape := false
|
escape := false
|
||||||
enclosed := false
|
enclosed := false
|
||||||
|
|
||||||
|
@ -40,6 +54,7 @@ func (p *CsvParser) Parse(line string) (map[string]string, error) {
|
||||||
indexMax := len(line) - 1
|
indexMax := len(line) - 1
|
||||||
maxFieldIndex := len(p.fields) - 1
|
maxFieldIndex := len(p.fields) - 1
|
||||||
delimiters := p.delimiters
|
delimiters := p.delimiters
|
||||||
|
delimiter := false
|
||||||
for index, r := range line {
|
for index, r := range line {
|
||||||
if r == '\\' {
|
if r == '\\' {
|
||||||
// Check if EOL before continue
|
// Check if EOL before continue
|
||||||
|
@ -51,8 +66,17 @@ func (p *CsvParser) Parse(line string) (map[string]string, error) {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
for _, d := range delimiters {
|
for _, d := range delimiters {
|
||||||
delimiter = true
|
if r == d {
|
||||||
|
delimiter = true
|
||||||
|
break
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
if delimiter && valueStart > -1 {
|
||||||
|
if p.fields[currentFieldIndex] != "ignore" {
|
||||||
|
ret[p.fields[currentFieldIndex]] = line[valueStart:index]
|
||||||
|
}
|
||||||
|
currentFieldIndex++
|
||||||
|
}
|
||||||
if delimiter {
|
if delimiter {
|
||||||
if p.fields[currentFieldIndex] != "ignore" {
|
if p.fields[currentFieldIndex] != "ignore" {
|
||||||
ret[p.fields[currentFieldIndex]] = line[valueStart:index]
|
ret[p.fields[currentFieldIndex]] = line[valueStart:index]
|
||||||
|
@ -64,7 +88,7 @@ func (p *CsvParser) Parse(line string) (map[string]string, error) {
|
||||||
valueStart = index + 1
|
valueStart = index + 1
|
||||||
if enclosed {
|
if enclosed {
|
||||||
enclosed = false
|
enclosed = false
|
||||||
delimiter = p.delimiter
|
delimiters = p.delimiters
|
||||||
// Omit next delimiter
|
// Omit next delimiter
|
||||||
escape = true
|
escape = true
|
||||||
}
|
}
|
||||||
|
@ -83,7 +107,7 @@ func (p *CsvParser) Parse(line string) (map[string]string, error) {
|
||||||
if r == runes[0] {
|
if r == runes[0] {
|
||||||
// opening encloser
|
// opening encloser
|
||||||
enclosed = true
|
enclosed = true
|
||||||
delimiters = runes[1]
|
delimiters = string(runes[1])
|
||||||
valueStart++
|
valueStart++
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
|
|
|
@ -62,5 +62,14 @@ func TestCorrectLines(t *testing.T) {
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatalf("Parsing of empty line failed %v %v", err, event)
|
t.Fatalf("Parsing of empty line failed %v %v", err, event)
|
||||||
}
|
}
|
||||||
|
csvParser.Initialize(" \t", []string{"\"\"", "[]"}, "ignore ignore ignore proxy ignore domain clientip ignore ignore apache-date request status bytes duration referer user-agent")
|
||||||
fmt.Println("event is", event)
|
fmt.Println("event is", event)
|
||||||
|
line = "Aug 7 00:00:00 proxy-4 haproxy[17429]: www.yvelines.gouv.fr 66.249.64.10 - - [06/Aug/2024:23:59:59 +0200] \"GET /content/download/19274/117923/file/SE_EAU_20190325_LesJardines_78201900027_LetNotifCompletude+recepisse.pdf HTTP/1.1\" 301 1414 240 \"\" \"Mozilla/5.0 (Linux; Android 6.0.1; Nexus 5X Build/MMB29P) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.6478.182 Mobile Safari/537.36 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)\" \"-\" \"GB\" \"15169\""
|
||||||
|
fmt.Println("parsing:", line)
|
||||||
|
event, err = csvParser.Parse(line)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("Parsing of empty line failed %v %v", err, event)
|
||||||
|
}
|
||||||
|
fmt.Println("event is", event)
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue