searchhut/cmd/sh-index/main.go
Drew DeVault 19a9a3a3b5 sh-index: add -u flag to add URLs to schedule
This is useful for indexing parts of sites which are not reachable from
the index page.
2022-07-11 20:57:59 +02:00

59 lines
1 KiB
Go

package main
import (
"database/sql"
"log"
"net/url"
"os"
"git.sr.ht/~sircmpwn/getopt"
_ "github.com/lib/pq"
"git.sr.ht/~sircmpwn/searchhut/config"
"git.sr.ht/~sircmpwn/searchhut/crawler"
)
func main() {
var urls []*url.URL
opts, optind, err := getopt.Getopts(os.Args, "u:")
if err != nil {
panic(err)
}
for _, opt := range opts {
switch opt.Option {
case 'u':
url, err := url.Parse(opt.Value)
if err != nil {
log.Fatal(err)
}
urls = append(urls, url)
}
}
args := os.Args[optind:]
domain := args[0]
conf := config.Load()
connstr, ok := conf.Get("searchhut", "connection-string")
if !ok {
log.Fatal("Configuration missing connection string")
}
db, err := sql.Open("postgres", connstr)
if err != nil {
log.Fatal(err)
}
ua, ok := conf.Get("searchhut", "user-agent")
if !ok {
log.Fatal("Configuration missing user agent")
}
crawler := crawler.NewCrawler(ua, db, domain)
for _, url := range urls {
log.Printf("Manually scheduling %s", url.String())
crawler.Schedule(url)
}
crawler.Crawl()
}