Compare commits
5 Commits
Author | SHA1 | Date |
---|---|---|
|
8c241fff65 | |
|
62296e1da6 | |
|
7bf103427b | |
|
9301f353ba | |
|
c363b760d9 |
14
csvparser.go
14
csvparser.go
|
@ -14,7 +14,9 @@ type CsvParser struct {
|
||||||
ignore string
|
ignore string
|
||||||
maxFieldIndex int
|
maxFieldIndex int
|
||||||
}
|
}
|
||||||
|
func (p *csvParser) FieldCount() int {
|
||||||
|
return maxFieldIndex + 1
|
||||||
|
}
|
||||||
/*
|
/*
|
||||||
* delimiters: string with all delimiter chars
|
* delimiters: string with all delimiter chars
|
||||||
* mergeDelimiters: if true, all successive delimiters are considered as one
|
* mergeDelimiters: if true, all successive delimiters are considered as one
|
||||||
|
@ -66,7 +68,6 @@ type ParserState struct {
|
||||||
func (p *CsvParser) Parse(line string) (map[string]string, error) {
|
func (p *CsvParser) Parse(line string) (map[string]string, error) {
|
||||||
currentFieldIndex := 0
|
currentFieldIndex := 0
|
||||||
valueStart := -1
|
valueStart := -1
|
||||||
//valueEnd := -1
|
|
||||||
|
|
||||||
state := ParserState{inField: false, delimiter: true, escape: false, enclosed: false, enclosedMode: false, encloserStart: '?', encloserEnd: '?'}
|
state := ParserState{inField: false, delimiter: true, escape: false, enclosed: false, enclosedMode: false, encloserStart: '?', encloserEnd: '?'}
|
||||||
|
|
||||||
|
@ -108,11 +109,12 @@ func (p *CsvParser) Parse(line string) (map[string]string, error) {
|
||||||
|
|
||||||
// current rune is a delimiter, a value is present and the next char is the next value
|
// current rune is a delimiter, a value is present and the next char is the next value
|
||||||
if isDelimiter || index == maxIndex {
|
if isDelimiter || index == maxIndex {
|
||||||
if index == maxIndex && valueStart == -1 {
|
if index == maxIndex && valueStart == -1 {
|
||||||
valueStart = 0
|
valueStart = 0
|
||||||
}
|
}
|
||||||
state.delimiter = true
|
state.delimiter = true
|
||||||
field := line[valueStart:index]
|
field := line[valueStart : index+1]
|
||||||
|
field = strings.TrimRight(field, p.delimiters)
|
||||||
if state.enclosed {
|
if state.enclosed {
|
||||||
field = strings.TrimRight(strings.TrimLeft(field, string(state.encloserStart)), string(state.encloserEnd))
|
field = strings.TrimRight(strings.TrimLeft(field, string(state.encloserStart)), string(state.encloserEnd))
|
||||||
}
|
}
|
||||||
|
|
|
@ -64,7 +64,7 @@ func TestCorrectLines(t *testing.T) {
|
||||||
t.Fatalf("Parsing of empty line failed %v %v", err, event)
|
t.Fatalf("Parsing of empty line failed %v %v", err, event)
|
||||||
}
|
}
|
||||||
fmt.Println("event is", event)
|
fmt.Println("event is", event)
|
||||||
*/
|
*/
|
||||||
csvParser.Initialize(" \t", true, []string{"\"\"", "[]"}, "ignore ignore ignore proxy ignore domain clientip ignore ignore apache-date request status bytes duration referer user-agent", "ignore")
|
csvParser.Initialize(" \t", true, []string{"\"\"", "[]"}, "ignore ignore ignore proxy ignore domain clientip ignore ignore apache-date request status bytes duration referer user-agent", "ignore")
|
||||||
line = "Aug 7 00:00:00 proxy-4 haproxy[17429]: www.yvelines.gouv.fr 66.249.64.10 - - [06/Aug/2024:23:59:59 +0200] \"GET /content/download/19274/117923/file/SE_EAU_20190325_LesJardines_78201900027_LetNotifCompletude+recepisse.pdf HTTP/1.1\" 301 1414 240 \"\" \"Mozilla/5.0 (Linux; Android 6.0.1; Nexus 5X Build/MMB29P) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.6478.182 Mobile Safari/537.36 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)\" \"-\" \"GB\" \"15169\""
|
line = "Aug 7 00:00:00 proxy-4 haproxy[17429]: www.yvelines.gouv.fr 66.249.64.10 - - [06/Aug/2024:23:59:59 +0200] \"GET /content/download/19274/117923/file/SE_EAU_20190325_LesJardines_78201900027_LetNotifCompletude+recepisse.pdf HTTP/1.1\" 301 1414 240 \"\" \"Mozilla/5.0 (Linux; Android 6.0.1; Nexus 5X Build/MMB29P) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.6478.182 Mobile Safari/537.36 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)\" \"-\" \"GB\" \"15169\""
|
||||||
fmt.Println("parsing:", line)
|
fmt.Println("parsing:", line)
|
||||||
|
@ -73,7 +73,8 @@ func TestCorrectLines(t *testing.T) {
|
||||||
t.Fatalf("Parsing of empty line failed %v %v", err, event)
|
t.Fatalf("Parsing of empty line failed %v %v", err, event)
|
||||||
}
|
}
|
||||||
fmt.Println("event is", event)
|
fmt.Println("event is", event)
|
||||||
|
/*
|
||||||
|
csvParser.Initialize(" \t", true, []string{"\"\"", "[]"}, "ignore ignore ignore proxy ignore domain clientip ignore ignore apache-date request status bytes duration referer user-agent", "ignore")
|
||||||
line = "Aug 7 00:00:00 proxy-4 haproxy[17429]: www.yvelines.gouv.fr 66.249.64.10 - - [06/Aug/2024:23:59:59 +0200] \"GET /content/download/19274/117923/file/SE_EAU_20190325_LesJardines_78201900027_LetNotifCompletude+recepisse.pdf HTTP/1.1\" 301 1414 240 \"\" \"Mozilla/5.0 (Linux; Android 6.0.1; Nexus 5X Build/MMB29P) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.6478.182 Mobile Safari/537.36 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)\" \"-\" \"GB\" 15169"
|
line = "Aug 7 00:00:00 proxy-4 haproxy[17429]: www.yvelines.gouv.fr 66.249.64.10 - - [06/Aug/2024:23:59:59 +0200] \"GET /content/download/19274/117923/file/SE_EAU_20190325_LesJardines_78201900027_LetNotifCompletude+recepisse.pdf HTTP/1.1\" 301 1414 240 \"\" \"Mozilla/5.0 (Linux; Android 6.0.1; Nexus 5X Build/MMB29P) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.6478.182 Mobile Safari/537.36 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)\" \"-\" \"GB\" 15169"
|
||||||
fmt.Println("parsing:", line)
|
fmt.Println("parsing:", line)
|
||||||
event, err = csvParser.Parse(line)
|
event, err = csvParser.Parse(line)
|
||||||
|
@ -82,6 +83,7 @@ func TestCorrectLines(t *testing.T) {
|
||||||
}
|
}
|
||||||
fmt.Println("event is", event)
|
fmt.Println("event is", event)
|
||||||
|
|
||||||
|
csvParser.Initialize(" \t", true, []string{"\"\"", "[]"}, "ignore ignore ignore proxy ignore domain clientip ignore ignore apache-date request status bytes duration referer user-agent", "ignore")
|
||||||
line = "Aug 7 00:00:00 proxy-4 haproxy[17429]: www.yvelines.gouv.fr 66.249.64.10 - - [06/Aug/2024:23:59:59 +0200] \"GET /content/download/19274/117923/file/SE_EAU_20190325_LesJardines_78201900027_LetNotifCompletude+recepisse.pdf HTTP/1.1\" 301 1414 240 \"\" \"Mozilla/5.0 (Linux; Android 6.0.1; Nexus 5X Build/MMB29P) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.6478.182 Mobile Safari/537.36 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)\" \"-\" \"GB\" \"15169"
|
line = "Aug 7 00:00:00 proxy-4 haproxy[17429]: www.yvelines.gouv.fr 66.249.64.10 - - [06/Aug/2024:23:59:59 +0200] \"GET /content/download/19274/117923/file/SE_EAU_20190325_LesJardines_78201900027_LetNotifCompletude+recepisse.pdf HTTP/1.1\" 301 1414 240 \"\" \"Mozilla/5.0 (Linux; Android 6.0.1; Nexus 5X Build/MMB29P) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.6478.182 Mobile Safari/537.36 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)\" \"-\" \"GB\" \"15169"
|
||||||
fmt.Println("parsing:", line)
|
fmt.Println("parsing:", line)
|
||||||
event, err = csvParser.Parse(line)
|
event, err = csvParser.Parse(line)
|
||||||
|
@ -90,4 +92,32 @@ func TestCorrectLines(t *testing.T) {
|
||||||
}
|
}
|
||||||
fmt.Println("event is", event)
|
fmt.Println("event is", event)
|
||||||
|
|
||||||
|
csvParser.Initialize(" ", true, []string{"\"\""}, "verb uri http-version", "ignore")
|
||||||
|
line = "GET /Actions-de-l-Etat/Vos-aides/Particuliers/Pass-culture?_escaped_fragment_=/particuliers/page/R65575 HTTP/1.1"
|
||||||
|
fmt.Println("parsing:", line)
|
||||||
|
event, err = csvParser.Parse(line)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("Parsing of empty line failed %v %v", err, event)
|
||||||
|
}
|
||||||
|
fmt.Println("event is", event)
|
||||||
|
*/
|
||||||
|
csvParser.Initialize(" ", true, []string{"\"\""}, "verb uri http-version", "ignore")
|
||||||
|
line = "GET /Actions-de-l-Etat/Vos-aides/Particuliers/Pass-culture?_escaped_fragment_=/particuliers/page/R65575 HTTP/1.1"
|
||||||
|
fmt.Println("parsing:", line)
|
||||||
|
event, err = csvParser.Parse(line)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("Parsing of empty line failed %v %v", err, event)
|
||||||
|
}
|
||||||
|
fmt.Println("event is", event)
|
||||||
|
|
||||||
|
csvParser.Initialize(" ", true, []string{"\"\""}, "verb uri http-version", "ignore")
|
||||||
|
line = "G"
|
||||||
|
fmt.Println("parsing:", line)
|
||||||
|
event, err = csvParser.Parse(line)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("Parsing of empty line failed %v %v", err, event)
|
||||||
|
}
|
||||||
|
fmt.Println("event is", event)
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue