diff --git a/crawler/index.go b/crawler/index.go index 8520481..2ca8a0e 100644 --- a/crawler/index.go +++ b/crawler/index.go @@ -14,6 +14,7 @@ import ( "time" "github.com/go-shiori/go-readability" + "github.com/mattn/go-runewidth" "golang.org/x/net/html" "git.sr.ht/~sircmpwn/searchhut/database" @@ -115,6 +116,7 @@ func (c *Crawler) Index(ctx context.Context, url *url.URL) error { weight += 1 } + excerpt := runewidth.Truncate(article.Excerpt, 512, "…") return database.WithTx(ctx, nil, func(tx *sql.Tx) error { _, err := tx.ExecContext(ctx, ` INSERT INTO page ( @@ -161,7 +163,7 @@ func (c *Crawler) Index(ctx context.Context, url *url.URL) error { setweight(to_tsvector(coalesce($11, '')), $13) || setweight(to_tsvector(coalesce($12, '')), 'D');`, c.DomainID, url.String(), counter.Length, hash.Sum([]byte{}), - meta.Title, meta.Author, meta.Description, article.Excerpt, + meta.Title, meta.Author, meta.Description, excerpt, meta.JavaScript, url.Host, c.labels, article.TextContent, weights[weight]) return err diff --git a/go.mod b/go.mod index 44d47d0..5641dae 100644 --- a/go.mod +++ b/go.mod @@ -10,6 +10,7 @@ require ( github.com/go-chi/chi v1.5.4 github.com/go-shiori/go-readability v0.0.0-20220215145315-dd6828d2f09b github.com/lib/pq v1.10.6 + github.com/mattn/go-runewidth v0.0.13 github.com/temoto/robotstxt v1.1.2 github.com/vaughan0/go-ini v0.0.0-20130923145212-a98ad7ee00ec github.com/vektah/gqlparser/v2 v2.4.6 @@ -27,6 +28,7 @@ require ( github.com/hashicorp/golang-lru v0.5.4 // indirect github.com/matryer/moq v0.2.7 // indirect github.com/mitchellh/mapstructure v1.3.1 // indirect + github.com/rivo/uniseg v0.2.0 // indirect github.com/russross/blackfriday/v2 v2.1.0 // indirect github.com/sirupsen/logrus v1.8.1 // indirect github.com/urfave/cli/v2 v2.8.1 // indirect diff --git a/go.sum b/go.sum index 5ee914b..c8330f1 100644 --- a/go.sum +++ b/go.sum @@ -98,6 +98,8 @@ github.com/matryer/moq v0.2.7 h1:RtpiPUM8L7ZSCbSwK+QcZH/E9tgqAkFjKQxsRs25b4w= github.com/matryer/moq v0.2.7/go.mod h1:kITsx543GOENm48TUAQyJ9+SAvFSr7iGQXPoth/VUBk= github.com/mattn/go-colorable v0.1.12/go.mod h1:u5H1YNBxpqRaxsYJYSkiCWKzEfiAb1Gb520KVy5xxl4= github.com/mattn/go-isatty v0.0.14/go.mod h1:7GGIvUiUoEMVVmxf/4nioHXj79iQHKdU27kJ6hsGG94= +github.com/mattn/go-runewidth v0.0.13 h1:lTGmDsbAYt5DmK6OnoV7EuIF1wEIFAcxld6ypU4OSgU= +github.com/mattn/go-runewidth v0.0.13/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w= github.com/matttproud/golang_protobuf_extensions v1.0.1/go.mod h1:D8He9yQNgCq6Z5Ld7szi9bcBfOoFv/3dc6xSMkL2PC0= github.com/mitchellh/go-homedir v1.1.0/go.mod h1:SfyaCUpYCn1Vlf4IUYiD9fPX4A5wJrkLzIz1N1q0pr0= github.com/mitchellh/mapstructure v1.1.2/go.mod h1:FVVH3fgwuzCH5S8UJGiWEs2h04kUh9fWfEaFds41c1Y= @@ -118,6 +120,8 @@ github.com/prometheus/common v0.4.0/go.mod h1:TNfzLD0ON7rHzMJeJkieUDPYmFC7Snx/y8 github.com/prometheus/procfs v0.0.0-20181005140218-185b4288413d/go.mod h1:c3At6R/oaqEKCNdg8wHV1ftS6bRYblBhIjjI8uT2IGk= github.com/prometheus/procfs v0.0.0-20190507164030-5867b95ac084/go.mod h1:TjEm7ze935MbeOT/UhFTIMYKhuLP4wbCsTZCD3I8kEA= github.com/prometheus/tsdb v0.7.1/go.mod h1:qhTCs0VvXwvX/y3TZrWD7rabWM+ijKTux40TwIPHuXU= +github.com/rivo/uniseg v0.2.0 h1:S1pD9weZBuJdFmowNwbpi7BJ8TNftyUImj/0WQi72jY= +github.com/rivo/uniseg v0.2.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc= github.com/rogpeppe/fastuuid v0.0.0-20150106093220-6724a57986af/go.mod h1:XWv6SoW27p1b0cqNHllgS5HIMJraePCO15w5zCzIWYg= github.com/russross/blackfriday/v2 v2.0.1/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= github.com/russross/blackfriday/v2 v2.1.0 h1:JIOH55/0cWyOuilr9/qlrm0BSXldqnqwMsf35Ld67mk=