fix jsonl data when using directory as training data
This commit is contained in:
parent
07d89e3eeb
commit
5b1a9448e6
@ -1,6 +1,7 @@
|
|||||||
package backend_golang
|
package backend_golang
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"encoding/json"
|
||||||
"errors"
|
"errors"
|
||||||
"os"
|
"os"
|
||||||
"os/exec"
|
"os/exec"
|
||||||
@ -59,30 +60,17 @@ func (a *App) ConvertData(python string, input string, outputPrefix string, voca
|
|||||||
if file.IsDir() || !strings.HasSuffix(file.Name(), ".txt") {
|
if file.IsDir() || !strings.HasSuffix(file.Name(), ".txt") {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
txtFile, err := os.Open(input + "/" + file.Name())
|
textContent, err := os.ReadFile(input + "/" + file.Name())
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return "", err
|
return "", err
|
||||||
}
|
}
|
||||||
defer txtFile.Close()
|
textJson, err := json.Marshal(map[string]string{"text": string(textContent)})
|
||||||
jsonlFile.WriteString("{\"text\": \"")
|
if err != nil {
|
||||||
buf := make([]byte, 1024)
|
return "", err
|
||||||
for {
|
}
|
||||||
n, err := txtFile.Read(buf)
|
if _, err := jsonlFile.WriteString(string(textJson) + "\n"); err != nil {
|
||||||
if err != nil {
|
return "", err
|
||||||
break
|
|
||||||
}
|
|
||||||
// regex replace \r\n \n \r with \\n
|
|
||||||
jsonlFile.WriteString(
|
|
||||||
strings.ReplaceAll(
|
|
||||||
strings.ReplaceAll(
|
|
||||||
strings.ReplaceAll(
|
|
||||||
strings.ReplaceAll(string(buf[:n]),
|
|
||||||
"\r\n", "\\n"),
|
|
||||||
"\n", "\\n"),
|
|
||||||
"\r", "\\n"),
|
|
||||||
"\n\n", "\\n"))
|
|
||||||
}
|
}
|
||||||
jsonlFile.WriteString("\"}\n")
|
|
||||||
}
|
}
|
||||||
input = outputPrefix + ".jsonl"
|
input = outputPrefix + ".jsonl"
|
||||||
} else if err != nil {
|
} else if err != nil {
|
||||||
|
Loading…
Reference in New Issue
Block a user