Compare commits

...

5 Commits

Author SHA1 Message Date
Laurentu 8c241fff65 zz 2025-05-14 15:56:45 +02:00
Laurent Ulrich 62296e1da6 simple tabulation 2024-11-15 15:35:13 +01:00
laurentu 7bf103427b suppression log 2024-08-08 16:04:56 +02:00
laurentu 9301f353ba semble ok 2024-08-08 16:03:06 +02:00
laurentu c363b760d9 bug sur les fin de lignes non enclosed (manque le dernier char) 2024-08-08 15:23:56 +02:00
2 changed files with 40 additions and 8 deletions

View File

@ -14,7 +14,9 @@ type CsvParser struct {
ignore string ignore string
maxFieldIndex int maxFieldIndex int
} }
func (p *csvParser) FieldCount() int {
return maxFieldIndex + 1
}
/* /*
* delimiters: string with all delimiter chars * delimiters: string with all delimiter chars
* mergeDelimiters: if true, all successive delimiters are considered as one * mergeDelimiters: if true, all successive delimiters are considered as one
@ -66,7 +68,6 @@ type ParserState struct {
func (p *CsvParser) Parse(line string) (map[string]string, error) { func (p *CsvParser) Parse(line string) (map[string]string, error) {
currentFieldIndex := 0 currentFieldIndex := 0
valueStart := -1 valueStart := -1
//valueEnd := -1
state := ParserState{inField: false, delimiter: true, escape: false, enclosed: false, enclosedMode: false, encloserStart: '?', encloserEnd: '?'} state := ParserState{inField: false, delimiter: true, escape: false, enclosed: false, enclosedMode: false, encloserStart: '?', encloserEnd: '?'}
@ -108,11 +109,12 @@ func (p *CsvParser) Parse(line string) (map[string]string, error) {
// current rune is a delimiter, a value is present and the next char is the next value // current rune is a delimiter, a value is present and the next char is the next value
if isDelimiter || index == maxIndex { if isDelimiter || index == maxIndex {
if index == maxIndex && valueStart == -1 { if index == maxIndex && valueStart == -1 {
valueStart = 0 valueStart = 0
} }
state.delimiter = true state.delimiter = true
field := line[valueStart:index] field := line[valueStart : index+1]
field = strings.TrimRight(field, p.delimiters)
if state.enclosed { if state.enclosed {
field = strings.TrimRight(strings.TrimLeft(field, string(state.encloserStart)), string(state.encloserEnd)) field = strings.TrimRight(strings.TrimLeft(field, string(state.encloserStart)), string(state.encloserEnd))
} }

View File

@ -64,7 +64,7 @@ func TestCorrectLines(t *testing.T) {
t.Fatalf("Parsing of empty line failed %v %v", err, event) t.Fatalf("Parsing of empty line failed %v %v", err, event)
} }
fmt.Println("event is", event) fmt.Println("event is", event)
*/ */
csvParser.Initialize(" \t", true, []string{"\"\"", "[]"}, "ignore ignore ignore proxy ignore domain clientip ignore ignore apache-date request status bytes duration referer user-agent", "ignore") csvParser.Initialize(" \t", true, []string{"\"\"", "[]"}, "ignore ignore ignore proxy ignore domain clientip ignore ignore apache-date request status bytes duration referer user-agent", "ignore")
line = "Aug 7 00:00:00 proxy-4 haproxy[17429]: www.yvelines.gouv.fr 66.249.64.10 - - [06/Aug/2024:23:59:59 +0200] \"GET /content/download/19274/117923/file/SE_EAU_20190325_LesJardines_78201900027_LetNotifCompletude+recepisse.pdf HTTP/1.1\" 301 1414 240 \"\" \"Mozilla/5.0 (Linux; Android 6.0.1; Nexus 5X Build/MMB29P) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.6478.182 Mobile Safari/537.36 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)\" \"-\" \"GB\" \"15169\"" line = "Aug 7 00:00:00 proxy-4 haproxy[17429]: www.yvelines.gouv.fr 66.249.64.10 - - [06/Aug/2024:23:59:59 +0200] \"GET /content/download/19274/117923/file/SE_EAU_20190325_LesJardines_78201900027_LetNotifCompletude+recepisse.pdf HTTP/1.1\" 301 1414 240 \"\" \"Mozilla/5.0 (Linux; Android 6.0.1; Nexus 5X Build/MMB29P) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.6478.182 Mobile Safari/537.36 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)\" \"-\" \"GB\" \"15169\""
fmt.Println("parsing:", line) fmt.Println("parsing:", line)
@ -73,7 +73,8 @@ func TestCorrectLines(t *testing.T) {
t.Fatalf("Parsing of empty line failed %v %v", err, event) t.Fatalf("Parsing of empty line failed %v %v", err, event)
} }
fmt.Println("event is", event) fmt.Println("event is", event)
/*
csvParser.Initialize(" \t", true, []string{"\"\"", "[]"}, "ignore ignore ignore proxy ignore domain clientip ignore ignore apache-date request status bytes duration referer user-agent", "ignore")
line = "Aug 7 00:00:00 proxy-4 haproxy[17429]: www.yvelines.gouv.fr 66.249.64.10 - - [06/Aug/2024:23:59:59 +0200] \"GET /content/download/19274/117923/file/SE_EAU_20190325_LesJardines_78201900027_LetNotifCompletude+recepisse.pdf HTTP/1.1\" 301 1414 240 \"\" \"Mozilla/5.0 (Linux; Android 6.0.1; Nexus 5X Build/MMB29P) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.6478.182 Mobile Safari/537.36 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)\" \"-\" \"GB\" 15169" line = "Aug 7 00:00:00 proxy-4 haproxy[17429]: www.yvelines.gouv.fr 66.249.64.10 - - [06/Aug/2024:23:59:59 +0200] \"GET /content/download/19274/117923/file/SE_EAU_20190325_LesJardines_78201900027_LetNotifCompletude+recepisse.pdf HTTP/1.1\" 301 1414 240 \"\" \"Mozilla/5.0 (Linux; Android 6.0.1; Nexus 5X Build/MMB29P) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.6478.182 Mobile Safari/537.36 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)\" \"-\" \"GB\" 15169"
fmt.Println("parsing:", line) fmt.Println("parsing:", line)
event, err = csvParser.Parse(line) event, err = csvParser.Parse(line)
@ -82,6 +83,7 @@ func TestCorrectLines(t *testing.T) {
} }
fmt.Println("event is", event) fmt.Println("event is", event)
csvParser.Initialize(" \t", true, []string{"\"\"", "[]"}, "ignore ignore ignore proxy ignore domain clientip ignore ignore apache-date request status bytes duration referer user-agent", "ignore")
line = "Aug 7 00:00:00 proxy-4 haproxy[17429]: www.yvelines.gouv.fr 66.249.64.10 - - [06/Aug/2024:23:59:59 +0200] \"GET /content/download/19274/117923/file/SE_EAU_20190325_LesJardines_78201900027_LetNotifCompletude+recepisse.pdf HTTP/1.1\" 301 1414 240 \"\" \"Mozilla/5.0 (Linux; Android 6.0.1; Nexus 5X Build/MMB29P) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.6478.182 Mobile Safari/537.36 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)\" \"-\" \"GB\" \"15169" line = "Aug 7 00:00:00 proxy-4 haproxy[17429]: www.yvelines.gouv.fr 66.249.64.10 - - [06/Aug/2024:23:59:59 +0200] \"GET /content/download/19274/117923/file/SE_EAU_20190325_LesJardines_78201900027_LetNotifCompletude+recepisse.pdf HTTP/1.1\" 301 1414 240 \"\" \"Mozilla/5.0 (Linux; Android 6.0.1; Nexus 5X Build/MMB29P) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.6478.182 Mobile Safari/537.36 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)\" \"-\" \"GB\" \"15169"
fmt.Println("parsing:", line) fmt.Println("parsing:", line)
event, err = csvParser.Parse(line) event, err = csvParser.Parse(line)
@ -90,4 +92,32 @@ func TestCorrectLines(t *testing.T) {
} }
fmt.Println("event is", event) fmt.Println("event is", event)
csvParser.Initialize(" ", true, []string{"\"\""}, "verb uri http-version", "ignore")
line = "GET /Actions-de-l-Etat/Vos-aides/Particuliers/Pass-culture?_escaped_fragment_=/particuliers/page/R65575 HTTP/1.1"
fmt.Println("parsing:", line)
event, err = csvParser.Parse(line)
if err != nil {
t.Fatalf("Parsing of empty line failed %v %v", err, event)
}
fmt.Println("event is", event)
*/
csvParser.Initialize(" ", true, []string{"\"\""}, "verb uri http-version", "ignore")
line = "GET /Actions-de-l-Etat/Vos-aides/Particuliers/Pass-culture?_escaped_fragment_=/particuliers/page/R65575 HTTP/1.1"
fmt.Println("parsing:", line)
event, err = csvParser.Parse(line)
if err != nil {
t.Fatalf("Parsing of empty line failed %v %v", err, event)
}
fmt.Println("event is", event)
csvParser.Initialize(" ", true, []string{"\"\""}, "verb uri http-version", "ignore")
line = "G"
fmt.Println("parsing:", line)
event, err = csvParser.Parse(line)
if err != nil {
t.Fatalf("Parsing of empty line failed %v %v", err, event)
}
fmt.Println("event is", event)
} }