2022-07-08 19:46:11 +02:00
|
|
|
CREATE TABLE domain (
|
|
|
|
id integer PRIMARY KEY GENERATED BY DEFAULT AS IDENTITY,
|
|
|
|
hostname text NOT NULL,
|
|
|
|
authoritative boolean NOT NULL,
|
2022-07-10 09:07:37 +02:00
|
|
|
tags text[] NOT NULL,
|
2022-07-11 17:48:15 +02:00
|
|
|
exclusion_patterns text[] NOT NULL DEFAULT '{}',
|
2022-07-10 09:07:37 +02:00
|
|
|
last_index_date timestamptz,
|
|
|
|
crawl_duration interval
|
2022-07-08 19:46:11 +02:00
|
|
|
);
|
|
|
|
|
|
|
|
CREATE TABLE page (
|
|
|
|
id integer PRIMARY KEY GENERATED BY DEFAULT AS IDENTITY,
|
|
|
|
domain_id integer NOT NULL REFERENCES domain(id),
|
2022-07-10 10:13:11 +02:00
|
|
|
source varchar NOT NULL,
|
2022-07-08 19:46:11 +02:00
|
|
|
url text NOT NULL UNIQUE,
|
2022-07-10 09:36:07 +02:00
|
|
|
checksum bytea NOT NULL UNIQUE,
|
2022-07-08 19:46:11 +02:00
|
|
|
last_index_date timestamptz NOT NULL,
|
|
|
|
fts_vector tsvector NOT NULL,
|
2022-07-10 09:36:07 +02:00
|
|
|
javascript boolean NOT NULL,
|
2022-07-08 19:46:11 +02:00
|
|
|
title text,
|
|
|
|
language text,
|
|
|
|
description text,
|
|
|
|
author text,
|
2022-07-10 09:36:07 +02:00
|
|
|
excerpt text
|
2022-07-08 19:46:11 +02:00
|
|
|
);
|
|
|
|
|
|
|
|
CREATE INDEX idx_page_content ON page USING GIN (fts_vector);
|