1 回答
TA贡献1864条经验 获得超6个赞
我自己解决了。这很简单,而不是直接打印outputString,而是打印一个json.dumps(). 我将在下面提供整个代码:
main.go文件
package main
import (
"bytes"
"encoding/json"
"fmt"
"log"
"os"
"os/exec"
)
type ParseText struct {
Text string `json:"text"`
}
func main() {
fmt.Println("Running...")
pdfPath := "./Y2V7 Full With SS-2.pdf"
_, err := parsePdf(pdfPath)
if err != nil {
fmt.Println(err)
}
}
func parsePdf(path string) (string, error) {
cmd := exec.Command("python", "pdf_parser.py", path)
var stdout, stderr bytes.Buffer
cmd.Stdout = &stdout
cmd.Stderr = &stderr
err := cmd.Run()
if err != nil {
log.Printf("Error when executing python: %s\n", stderr.Bytes())
return "", fmt.Errorf("Error executing python: %w", err)
}
res := ParseText{}
err = json.Unmarshal(stdout.Bytes(), &res)
writeToFile("go-pdf.txt", res.Text)
return res.Text, err
}
func writeToFile(fileName, text string) {
f, err := os.Create(fileName)
if err != nil {
log.Fatal(err)
}
defer f.Close()
_, err2 := f.WriteString(text)
if err2 != nil {
log.Fatal(err2)
}
}
pdf-parser.py文件
import fitz
import sys
import json
URL = sys.argv[1]
doc = fitz.open(URL)
list = []
for page in doc:
text = page.get_text("text")
list.append(text)
outputString= ' '.join(list)
print(json.dumps({"text":outputString}))
- 1 回答
- 0 关注
- 83 浏览
添加回答
举报