From 5b1a9448e62ab0f940bef6ab57d76ec5f79a6ca3 Mon Sep 17 00:00:00 2001 From: josc146 Date: Sun, 9 Jul 2023 11:31:07 +0800 Subject: [PATCH] fix jsonl data when using directory as training data --- backend-golang/rwkv.go | 28 ++++++++-------------------- 1 file changed, 8 insertions(+), 20 deletions(-) diff --git a/backend-golang/rwkv.go b/backend-golang/rwkv.go index fa980b7..0bc48ae 100644 --- a/backend-golang/rwkv.go +++ b/backend-golang/rwkv.go @@ -1,6 +1,7 @@ package backend_golang import ( + "encoding/json" "errors" "os" "os/exec" @@ -59,30 +60,17 @@ func (a *App) ConvertData(python string, input string, outputPrefix string, voca if file.IsDir() || !strings.HasSuffix(file.Name(), ".txt") { continue } - txtFile, err := os.Open(input + "/" + file.Name()) + textContent, err := os.ReadFile(input + "/" + file.Name()) if err != nil { return "", err } - defer txtFile.Close() - jsonlFile.WriteString("{\"text\": \"") - buf := make([]byte, 1024) - for { - n, err := txtFile.Read(buf) - if err != nil { - break - } - // regex replace \r\n \n \r with \\n - jsonlFile.WriteString( - strings.ReplaceAll( - strings.ReplaceAll( - strings.ReplaceAll( - strings.ReplaceAll(string(buf[:n]), - "\r\n", "\\n"), - "\n", "\\n"), - "\r", "\\n"), - "\n\n", "\\n")) + textJson, err := json.Marshal(map[string]string{"text": string(textContent)}) + if err != nil { + return "", err + } + if _, err := jsonlFile.WriteString(string(textJson) + "\n"); err != nil { + return "", err } - jsonlFile.WriteString("\"}\n") } input = outputPrefix + ".jsonl" } else if err != nil {