searchhut/cmd/sh-index/main.go

60 lines
1 KiB
Go
Raw Normal View History

2022-07-08 19:46:11 +02:00
package main
import (
"database/sql"
"log"
"net/url"
2022-07-08 19:46:11 +02:00
"os"
"git.sr.ht/~sircmpwn/getopt"
2022-07-08 19:46:11 +02:00
_ "github.com/lib/pq"
"git.sr.ht/~sircmpwn/searchhut/config"
2022-07-08 19:46:11 +02:00
"git.sr.ht/~sircmpwn/searchhut/crawler"
)
func main() {
var urls []*url.URL
opts, optind, err := getopt.Getopts(os.Args, "u:")
if err != nil {
panic(err)
}
for _, opt := range opts {
switch opt.Option {
case 'u':
url, err := url.Parse(opt.Value)
if err != nil {
log.Fatal(err)
}
urls = append(urls, url)
}
}
args := os.Args[optind:]
domain := args[0]
conf := config.Load()
connstr, ok := conf.Get("searchhut", "connection-string")
if !ok {
log.Fatal("Configuration missing connection string")
}
2022-07-08 19:46:11 +02:00
db, err := sql.Open("postgres", connstr)
if err != nil {
log.Fatal(err)
}
2022-07-09 18:14:06 +02:00
ua, ok := conf.Get("searchhut", "user-agent")
if !ok {
log.Fatal("Configuration missing user agent")
}
crawler := crawler.NewCrawler(ua, db, domain)
for _, url := range urls {
log.Printf("Manually scheduling %s", url.String())
crawler.Schedule(url)
}
2022-07-08 19:46:11 +02:00
crawler.Crawl()
}