Browse Source

ref: remove scrape app

master
Inhji Y. 10 months ago
parent
commit
d1eb24c659
  1. 4
      apps/scraper/.formatter.exs
  2. 24
      apps/scraper/.gitignore
  3. 21
      apps/scraper/README.md
  4. 5
      apps/scraper/lib/scraper.ex
  5. 13
      apps/scraper/lib/scraper/application.ex
  6. 30
      apps/scraper/lib/scraper/html.ex
  7. 51
      apps/scraper/lib/scraper/weather.ex
  8. 14
      apps/scraper/lib/scraper/workers/weather.ex
  9. 37
      apps/scraper/mix.exs
  10. 47
      apps/scraper/test/data/example.com.html
  11. 1716
      apps/scraper/test/data/github.com.html
  12. 21
      apps/scraper/test/scraper_test.exs
  13. 1
      apps/scraper/test/test_helper.exs

4
apps/scraper/.formatter.exs

@ -1,4 +0,0 @@
# Used by "mix format"
[
inputs: ["{mix,.formatter}.exs", "{config,lib,test}/**/*.{ex,exs}"]
]

24
apps/scraper/.gitignore

@ -1,24 +0,0 @@
# The directory Mix will write compiled artifacts to.
/_build/
# If you run "mix test --cover", coverage assets end up here.
/cover/
# The directory Mix downloads your dependencies sources to.
/deps/
# Where third-party dependencies like ExDoc output generated docs.
/doc/
# Ignore .fetch files in case you like to edit your project deps locally.
/.fetch
# If the VM crashes, it generates a dump, let's ignore it too.
erl_crash.dump
# Also ignore archive artifacts (built via "mix archive.build").
*.ez
# Ignore package tarball (built via "mix hex.build").
scraper-*.tar

21
apps/scraper/README.md

@ -1,21 +0,0 @@
# Scraper
**TODO: Add description**
## Installation
If [available in Hex](https://hex.pm/docs/publish), the package can be installed
by adding `scraper` to your list of dependencies in `mix.exs`:
```elixir
def deps do
[
{:scraper, "~> 0.1.0"}
]
end
```
Documentation can be generated with [ExDoc](https://github.com/elixir-lang/ex_doc)
and published on [HexDocs](https://hexdocs.pm). Once published, the docs can
be found at [https://hexdocs.pm/scraper](https://hexdocs.pm/scraper).

5
apps/scraper/lib/scraper.ex

@ -1,5 +0,0 @@
defmodule Scraper do
@moduledoc """
Documentation for `Scraper`.
"""
end

13
apps/scraper/lib/scraper/application.ex

@ -1,13 +0,0 @@
defmodule Scraper.Application do
use Application
def start(_type, _args) do
import Supervisor.Spec, warn: false
children = [
worker(Cachex, [:weather, []], id: :weather)
]
Supervisor.start_link(children, strategy: :one_for_one, name: Scraper.Supervisor)
end
end

30
apps/scraper/lib/scraper/html.ex

@ -1,30 +0,0 @@
defmodule Scraper.Html do
def get_html(url) do
case Http.get(url, [], follow_redirect: true) do
{:ok, %{body: body, status_code: 200}} ->
{:ok, body}
{:ok, status_code: status_code} ->
{:error, status_code}
{:error, error} ->
{:error, error}
end
end
def get_title!(html) do
{:ok, document} = Floki.parse_document(html)
document
|> Floki.find("head title")
|> Floki.text()
end
def parse(html) do
title = get_title!(html)
og = OpenGraphExtended.parse(html)
result = Map.merge(%{title: title}, og, fn _k, v1, v2 -> v2 || v1 end)
{:ok, result}
end
end

51
apps/scraper/lib/scraper/weather.ex

@ -1,51 +0,0 @@
defmodule Scraper.Weather do
@base_url "http://api.openweathermap.org/data/2.5/onecall"
@opts "units=metric,exclude=minutely,daily"
def get_weather() do
[api_key: api_key, latitude: lat, longitude: lon, city_id: _] =
Application.fetch_env!(:scraper, :weather)
url = "#{@base_url}?lat=#{lat}&lon=#{lon}&appid=#{api_key}&#{@opts}"
with %{body: body, status_code: 200} <- Http.get!(url),
{:ok, json} <- Jason.decode!(body, keys: :atoms) do
json
else
error -> error
end
end
def save_weather(%{
current: current,
daily: daily,
hourly: _hourly,
lat: _lat,
lon: _lon,
timezone: _timezone,
timezone_offset: _timezone_offset
}) do
daily_data = Enum.map(daily, &map_daily/1)
Cachex.put(:weather, :daily, daily_data)
Cachex.put(:weather, :now, %{
pretty: List.first(current.weather),
temp: current.temp,
clouds: current.clouds,
timestamp: current.dt,
humidity: current.humidity
})
end
defp map_daily(day) do
date =
day.dt
|> DateTime.from_unix!()
|> DateTime.to_date()
day
|> Map.put(:date, date)
|> Map.put(:pretty, List.first(day.weather))
end
end

14
apps/scraper/lib/scraper/workers/weather.ex

@ -1,14 +0,0 @@
defmodule Scraper.Workers.Weather do
use Oban.Worker,
queue: :scraper,
max_attempts: 1,
unique: [period: 60, fields: [:worker]]
require Logger
@impl Oban.Worker
def perform(_args, _job) do
weather_data = Scraper.Weather.get_weather()
Scraper.Weather.save_weather(weather_data)
end
end

37
apps/scraper/mix.exs

@ -1,37 +0,0 @@
defmodule Scraper.MixProject do
use Mix.Project
def project do
[
app: :scraper,
version: "0.1.0",
build_path: "../../_build",
config_path: "../../config/config.exs",
deps_path: "../../deps",
lockfile: "../../mix.lock",
elixir: "~> 1.10",
start_permanent: Mix.env() == :prod,
deps: deps()
]
end
# Run "mix help compile.app" to learn about applications.
def application do
[
mod: {Scraper.Application, []},
extra_applications: [:logger]
]
end
# Run "mix help deps" to learn about dependencies.
defp deps do
[
{:cachex, "~> 3.2"},
{:floki, "~> 0.26.0"},
{:http, in_umbrella: true},
{:httpoison, "~> 1.6"},
{:oban, "~> 1.2"},
{:open_graph_extended, git: "https://git.inhji.de/inhji/open_graph.git"}
]
end
end

47
apps/scraper/test/data/example.com.html

@ -1,47 +0,0 @@
<!doctype html>
<html>
<head>
<title>Example Domain</title>
<meta charset="utf-8" />
<meta http-equiv="Content-type" content="text/html; charset=utf-8" />
<meta name="viewport" content="width=device-width, initial-scale=1" />
<style type="text/css">
body {
background-color: #f0f0f2;
margin: 0;
padding: 0;
font-family: -apple-system, system-ui, BlinkMacSystemFont, "Segoe UI", "Open Sans", "Helvetica Neue", Helvetica, Arial, sans-serif;
}
div {
width: 600px;
margin: 5em auto;
padding: 2em;
background-color: #fdfdff;
border-radius: 0.5em;
box-shadow: 2px 3px 7px 2px rgba(0,0,0,0.02);
}
a:link, a:visited {
color: #38488f;
text-decoration: none;
}
@media (max-width: 700px) {
div {
margin: 0 auto;
width: auto;
}
}
</style>
</head>
<body>
<div>
<h1>Example Domain</h1>
<p>This domain is for use in illustrative examples in documents. You may use this
domain in literature without prior coordination or asking for permission.</p>
<p><a href="https://www.iana.org/domains/example">More information...</a></p>
</div>
</body>
</html>

1716
apps/scraper/test/data/github.com.html
File diff suppressed because it is too large
View File

21
apps/scraper/test/scraper_test.exs

@ -1,21 +0,0 @@
defmodule ScraperTest do
use ExUnit.Case
doctest Scraper
test "get_title/1 extracts the website title from the given html" do
html = File.read!("./test/data/example.com.html")
assert Scraper.Html.get_title!(html) == "Example Domain"
end
test "parse/1 extracts the title and og properties when og properties are present" do
html = File.read!("./test/data/github.com.html")
{:ok, result} = Scraper.Html.parse(html)
assert result.title == "sheharyarn/que"
end
test "parse/1 extracts the title and og properties when og properties are not present" do
html = File.read!("./test/data/example.com.html")
{:ok, result} = Scraper.Html.parse(html)
assert result.title == "Example Domain"
end
end

1
apps/scraper/test/test_helper.exs

@ -1 +0,0 @@
ExUnit.start()
Loading…
Cancel
Save