package main import ( "database/sql" "log" "net/url" "os" "git.sr.ht/~sircmpwn/getopt" _ "github.com/lib/pq" "git.sr.ht/~sircmpwn/searchhut/config" "git.sr.ht/~sircmpwn/searchhut/crawler" ) func main() { var urls []*url.URL opts, optind, err := getopt.Getopts(os.Args, "u:") if err != nil { panic(err) } for _, opt := range opts { switch opt.Option { case 'u': url, err := url.Parse(opt.Value) if err != nil { log.Fatal(err) } urls = append(urls, url) } } args := os.Args[optind:] domain := args[0] conf := config.Load() connstr, ok := conf.Get("searchhut", "connection-string") if !ok { log.Fatal("Configuration missing connection string") } db, err := sql.Open("postgres", connstr) if err != nil { log.Fatal(err) } ua, ok := conf.Get("searchhut", "user-agent") if !ok { log.Fatal("Configuration missing user agent") } crawler := crawler.NewCrawler(ua, db, domain) for _, url := range urls { log.Printf("Manually scheduling %s", url.String()) crawler.Schedule(url) } crawler.Crawl() }