PDFIndexer/main.go

117 lines
1.9 KiB
Go
Raw Permalink Normal View History

2023-09-24 16:11:25 +02:00
package main
import (
"encoding/json"
"flag"
"fmt"
"io/fs"
"log"
"os"
"path/filepath"
"strings"
"code.sajari.com/docconv"
)
var fileFormats = []string{".pdf", ".jpg", ".png", ".txt", ".docx", ".doc", ".tif", "tiff"}
2023-09-24 16:11:25 +02:00
func buildIndex(dirs []string, old map[string]string) (map[string]string, error) {
2023-09-24 16:11:25 +02:00
var index = old
if index == nil {
index = make(map[string]string)
}
for _, dir := range dirs {
err := filepath.Walk(dir, func(path string, info fs.FileInfo, err error) error {
if err != nil {
return err
}
2023-09-24 16:11:25 +02:00
if info.IsDir() {
return nil
}
2023-09-24 16:11:25 +02:00
supportedType := false
for _, suffix := range fileFormats {
if strings.HasSuffix(strings.ToLower(path), suffix) {
supportedType = true
break
}
2023-09-24 16:11:25 +02:00
}
if !supportedType {
return nil
}
2023-09-24 16:11:25 +02:00
_, exists := index[path]
if exists {
return nil
}
2023-09-24 16:11:25 +02:00
log.Println(path)
2023-09-24 16:11:25 +02:00
str, err := docconv.ConvertPath(path)
if err != nil {
log.Println("Fail:", err)
return nil
}
2023-09-24 16:11:25 +02:00
index[path] = str.Body
return nil
})
2023-09-24 16:11:25 +02:00
if err != nil {
return nil, err
}
2023-09-24 16:11:25 +02:00
}
return index, nil
}
func main() {
force := flag.Bool("force", false, "Force generation of a full new index")
flag.Parse()
if len(flag.Args()) < 2 {
fmt.Println("Usage:", os.Args[0], "<destination file> <directory> [directories...]")
2023-09-24 16:11:25 +02:00
os.Exit(1)
}
indexFile := flag.Arg(0)
directories := flag.Args()[1:]
2023-09-24 16:11:25 +02:00
var oldIndex = make(map[string]string)
if !*force {
oldBytes, err := os.ReadFile(indexFile)
if err != nil {
if !os.IsNotExist(err) {
log.Panicln(err)
}
}
if len(oldBytes) > 0 {
err = json.Unmarshal(oldBytes, &oldIndex)
if err != nil {
log.Panicln(err)
}
}
}
index, err := buildIndex(directories, oldIndex)
2023-09-24 16:11:25 +02:00
if err != nil {
log.Panicln(err)
}
json, err := json.MarshalIndent(index, "", "\t")
if err != nil {
log.Panicln(json)
}
err = os.WriteFile(indexFile, json, 0o664)
if err != nil {
log.Panicln(err)
}
}