Browse Source

the great refactoring[tm]

Much more idiomatic Elixir code
pull/6/head
Christian Kruse 1 year ago
parent
commit
7dad01944a
6 changed files with 200 additions and 231 deletions
  1. +1
    -0
      .gitignore
  2. +93
    -0
      lib/helpers.ex
  3. +42
    -80
      lib/implied.ex
  4. +47
    -63
      lib/items.ex
  5. +0
    -62
      lib/microformats2.ex
  6. +17
    -26
      lib/rels.ex

+ 1
- 0
.gitignore View File

@@ -4,3 +4,4 @@
erl_crash.dump
*.ez
/.elixir_ls
/doc

+ 93
- 0
lib/helpers.ex View File

@@ -0,0 +1,93 @@
defmodule Microformats2.Helpers do
@spec attr_list(String.t() | [any()] | tuple(), String.t()) :: [String.t()]
def attr_list(node, attr \\ "class") do
node
|> Floki.attribute(attr)
|> List.first()
|> to_string
|> String.split(" ", trim: true)
end

@spec blank?(any()) :: boolean()
def blank?(nil), do: true
def blank?(""), do: true
def blank?([]), do: true
def blank?(_), do: false

@spec present?(any()) :: boolean()
def present?(v), do: not blank?(v)

@spec stripped_or_nil(nil | String.t()) :: nil | String.t()
def stripped_or_nil(nil), do: nil
def stripped_or_nil(val), do: String.trim(val)

@spec is_rootlevel?(bitstring() | tuple()) :: boolean()
def is_rootlevel?(node) when is_tuple(node) do
node
|> attr_list("class")
|> Enum.any?(&is_a?(&1, "h"))
end

def is_rootlevel?(class_name) when is_bitstring(class_name) do
is_a?(class_name, "h")
end

@spec is_a?(any(), any()) :: boolean()
def is_a?("h-" <> _, wanted), do: wanted == "h"
def is_a?("p-" <> _, wanted), do: wanted == "p"
def is_a?("e-" <> _, wanted), do: wanted == "e"
def is_a?("u-" <> _, wanted), do: wanted == "u"
def is_a?("dt-" <> _, wanted), do: wanted == "dt"
def is_a?(_, _), do: false

@spec has_a?(String.t() | [any()] | tuple(), any()) :: boolean()
def has_a?(node, wanted) do
node
|> attr_list()
|> Enum.filter(&is_a?(&1, wanted))
|> blank?
end

@spec abs_uri(String.t(), String.t(), any()) :: String.t()
def abs_uri(url, base_url, doc) do
parsed = URI.parse(url)
parsed_base = URI.parse(base_url)

cond do
# absolute URI
present?(parsed.scheme) ->
url

# protocol relative URI
blank?(parsed.scheme) and present?(parsed.host) ->
URI.to_string(%{parsed | scheme: parsed_base.scheme})

true ->
base_element = Floki.find(doc, "base")

new_base =
if blank?(base_element) or blank?(Floki.attribute(base_element, "href")) do
base_url
else
abs_uri(Floki.attribute(base_element, "href") |> List.first(), base_url, [])
end

parsed_new_base = URI.parse(new_base)
new_path = Path.expand(parsed.path || "/", Path.dirname(parsed_new_base.path || "/"))

URI.to_string(%{parsed | scheme: parsed_new_base.scheme, host: parsed_new_base.host, path: new_path})
end
end

@spec to_key(String.t()) :: String.t()
def to_key(str) do
String.replace(str, ~r/[-]/, "_")
end

@spec normalized_key(String.t()) :: String.t() | atom()
def normalized_key(key) do
if Application.get_env(:microformats2, :atomize_keys, true),
do: String.to_atom(key),
else: key
end
end

+ 42
- 80
lib/implied.ex View File

@@ -1,58 +1,62 @@
defmodule Microformats2.Items.ImpliedProperties do
import Microformats2.Helpers

alias Microformats2.Items

def parse(entry, root, url, doc) do
implied_name_property(entry, root)
entry
|> implied_name_property(root)
|> implied_photo_property(root)
|> implied_url_property(root, url, doc)
end

defp implied_url_property(entry, root, doc_url, doc) do
url_key = if Application.get_env(:microformats2, :atomize_keys, true), do: :url, else: "url"
url_key = normalized_key("url")

if entry[:properties][url_key] == nil do
val = implied_url_attrval(root)

url =
if Microformats2.blank?(val) do
if blank?(val) do
implied_url_deep(root)
else
val
end
|> Microformats2.stripped_or_nil()
|> stripped_or_nil()

if Microformats2.blank?(url) do
entry
else
Map.put(entry, :properties, Map.put(entry[:properties], url_key, [Microformats2.abs_uri(url, doc_url, doc)]))
end
if blank?(url),
do: entry,
else: put_in(entry, [:properties, url_key], [abs_uri(url, doc_url, doc)])
else
entry
end
end

defp implied_photo_property(entry, root) do
photo_key = if Application.get_env(:microformats2, :atomize_keys, true), do: :photo, else: "photo"
photo_key = normalized_key("photo")

if entry[:properties][photo_key] == nil do
val = implied_photo_attrval(root)

url =
if Microformats2.blank?(val) do
if blank?(val) do
implied_photo_deep(root)
else
val
end
|> Microformats2.stripped_or_nil()
|> stripped_or_nil()

if Microformats2.blank?(url) do
entry
else
Map.put(entry, :properties, Map.put(entry[:properties], photo_key, [url]))
end
if blank?(url),
do: entry,
else: put_in(entry, [:properties, photo_key], [url])
else
entry
end
end

defp implied_name_property(entry, root = {elem, _, _}) do
name_key = if Application.get_env(:microformats2, :atomize_keys, true), do: :name, else: "name"
name_key = normalized_key("name")

if entry[:properties][name_key] == nil do
nam =
cond do
@@ -65,38 +69,28 @@ defmodule Microformats2.Items.ImpliedProperties do
true ->
val = implied_name_deep(root)

if Microformats2.blank?(val) do
Microformats2.Items.text_content(root)
else
val
end
if blank?(val),
do: Items.text_content(root),
else: val
end
|> Microformats2.stripped_or_nil()
|> stripped_or_nil()

Map.put(entry, :properties, Map.put(entry[:properties], name_key, [nam]))
put_in(entry, [:properties, name_key], [nam])
else
entry
end
end

defp implied_name_deep({_, _, children}) do
only_nodes =
Enum.filter(children, fn
el when is_bitstring(el) -> false
_ -> true
end)
only_nodes = Enum.reject(children, &is_bitstring/1)

if Enum.count(only_nodes) == 1 do
sec_node = List.first(only_nodes)
{_, _, sec_node_children} = sec_node
attrval = implied_name_attrval(sec_node)

if Microformats2.blank?(attrval) do
sec_only_nodes =
Enum.filter(sec_node_children, fn
el when is_bitstring(el) -> false
_ -> true
end)
if blank?(attrval) do
sec_only_nodes = Enum.reject(sec_node_children, &is_bitstring/1)

if Enum.count(sec_only_nodes) == 1 do
third_node = sec_only_nodes |> List.first()
@@ -108,21 +102,10 @@ defmodule Microformats2.Items.ImpliedProperties do
end
end

defp implied_name_attrval(node = {"img", _, _}) do
Floki.attribute(node, "alt") |> List.first()
end

defp implied_name_attrval(node = {"area", _, _}) do
Floki.attribute(node, "alt") |> List.first()
end

defp implied_name_attrval(node = {"abbr", _, _}) do
Floki.attribute(node, "title") |> List.first()
end

defp implied_name_attrval(_) do
nil
end
defp implied_name_attrval(node = {"img", _, _}), do: Floki.attribute(node, "alt") |> List.first()
defp implied_name_attrval(node = {"area", _, _}), do: Floki.attribute(node, "alt") |> List.first()
defp implied_name_attrval(node = {"abbr", _, _}), do: Floki.attribute(node, "title") |> List.first()
defp implied_name_attrval(_), do: nil

defp implied_photo_deep(root) do
imgs = direct_not_h_children_with_attr(root, "img", "src")
@@ -137,12 +120,7 @@ defmodule Microformats2.Items.ImpliedProperties do

true ->
{_, _, children} = root

only_nodes =
Enum.filter(children, fn
el when is_bitstring(el) -> false
_ -> true
end)
only_nodes = Enum.reject(children, &is_bitstring/1)

if Enum.count(only_nodes) == 1 do
child = List.first(children)
@@ -181,36 +159,20 @@ defmodule Microformats2.Items.ImpliedProperties do
end
end

defp implied_photo_attrval(node = {"img", _, _}) do
Floki.attribute(node, "src") |> List.first()
end

defp implied_photo_attrval(node = {"object", _, _}) do
Floki.attribute(node, "data") |> List.first()
end

defp implied_photo_attrval(_) do
nil
end
defp implied_photo_attrval(node = {"img", _, _}), do: Floki.attribute(node, "src") |> List.first()
defp implied_photo_attrval(node = {"object", _, _}), do: Floki.attribute(node, "data") |> List.first()
defp implied_photo_attrval(_), do: nil

defp direct_not_h_children_with_attr({_, _, children}, name, attr) do
Enum.filter(children, fn
{el, _, _} -> el == name
v when is_bitstring(v) -> false
end)
|> Enum.filter(fn el -> not Microformats2.is_rootlevel?(el) end)
|> Enum.filter(fn el -> not is_rootlevel?(el) end)
|> Enum.filter(fn el -> Enum.count(Floki.attribute(el, attr)) > 0 end)
end

defp implied_url_attrval(node = {"a", _, _}) do
Floki.attribute(node, "href") |> List.first()
end

defp implied_url_attrval(node = {"area", _, _}) do
Floki.attribute(node, "href") |> List.first()
end

defp implied_url_attrval(_) do
nil
end
defp implied_url_attrval(node = {"a", _, _}), do: Floki.attribute(node, "href") |> List.first()
defp implied_url_attrval(node = {"area", _, _}), do: Floki.attribute(node, "href") |> List.first()
defp implied_url_attrval(_), do: nil
end

+ 47
- 63
lib/items.ex View File

@@ -1,4 +1,8 @@
defmodule Microformats2.Items do
import Microformats2.Helpers

alias Microformats2.Items.ImpliedProperties

def parse(nodes, doc, url, items \\ [])
def parse([head | tail], doc, url, items) when is_bitstring(head), do: parse(tail, doc, url, items)
def parse([head | tail], doc, url, items), do: parse(tail, doc, url, parse(head, doc, url, items))
@@ -6,8 +10,8 @@ defmodule Microformats2.Items do

def parse(root, doc, url, items) do
root_classes =
Microformats2.attr_list(root)
|> Enum.filter(fn class_name -> Microformats2.is_rootlevel?(class_name) end)
attr_list(root)
|> Enum.filter(&is_rootlevel?/1)
|> Enum.sort()

{_, _, children} = root
@@ -15,7 +19,7 @@ defmodule Microformats2.Items do
if not Enum.empty?(root_classes) do
entry =
parse_sub(children, doc, url, %{type: root_classes, properties: %{}})
|> Microformats2.Items.ImpliedProperties.parse(root, url, doc)
|> ImpliedProperties.parse(root, url, doc)

items ++ [entry]
else
@@ -28,14 +32,14 @@ defmodule Microformats2.Items do

defp parse_sub([child = {_, _, child_children} | children], doc, url, item) do
p =
if Microformats2.has_a?(child, "h-") do
if has_a?(child, "h-") do
parse(child, doc, url, []) |> List.first()
else
[]
end

classes =
Microformats2.attr_list(child)
attr_list(child)
|> Enum.filter(fn
"p-" <> _ -> true
"u-" <> _ -> true
@@ -45,7 +49,11 @@ defmodule Microformats2.Items do
end)

props = gen_prop(child, classes, item, p, doc, url)
n_item = if Microformats2.is_rootlevel?(child), do: props, else: parse_sub(child_children, doc, url, props)

n_item =
if is_rootlevel?(child),
do: props,
else: parse_sub(child_children, doc, url, props)

parse_sub(children, doc, url, n_item)
end
@@ -57,10 +65,10 @@ defmodule Microformats2.Items do
alt = Floki.attribute(child, "alt") |> List.first()

cond do
elem == "abbr" and not Microformats2.blank?(title) ->
elem == "abbr" and present?(title) ->
title

elem == "img" and not Microformats2.blank?(alt) ->
elem == "img" and present?(alt) ->
alt

true ->
@@ -77,29 +85,29 @@ defmodule Microformats2.Items do
value = Floki.attribute(child, "value") |> List.first()

cond do
Enum.member?(["a", "area"], elem) and not Microformats2.blank?(href) ->
Enum.member?(["a", "area"], elem) and present?(href) ->
href

Enum.member?(["img", "audio", "video", "source"], elem) and not Microformats2.blank?(src) ->
Enum.member?(["img", "audio", "video", "source"], elem) and present?(src) ->
src

elem == "object" and not Microformats2.blank?(data) ->
elem == "object" and present?(data) ->
data

elem == "video" and not Microformats2.blank?(poster) ->
elem == "video" and present?(poster) ->
poster

# TODO value-class-pattern at this position
elem == "abbr" and not Microformats2.blank?(title) ->
elem == "abbr" and present?(title) ->
title

Enum.member?(["data", "input"], elem) and not Microformats2.blank?(value) ->
Enum.member?(["data", "input"], elem) and present?(value) ->
value

true ->
text_content(child) |> String.trim()
end
|> Microformats2.abs_uri(url, doc)
|> abs_uri(url, doc)
end

defp parse_prop("dt-" <> _, child = {elem, _, _}, _, _) do
@@ -108,13 +116,13 @@ defmodule Microformats2.Items do
value = Floki.attribute(child, "value")

cond do
Enum.member?(["time", "ins", "del"], elem) and not Microformats2.blank?(dt) ->
Enum.member?(["time", "ins", "del"], elem) and present?(dt) ->
dt |> List.first()

elem == "abbr" and not Microformats2.blank?(title) ->
elem == "abbr" and present?(title) ->
title |> List.first()

Enum.member?(["data", "input"], elem) and not Microformats2.blank?(value) ->
Enum.member?(["data", "input"], elem) and present?(value) ->
value |> List.first()

true ->
@@ -124,25 +132,26 @@ defmodule Microformats2.Items do

defp parse_prop("e-" <> _, child = {_, _, children}, _, _) do
%{
html: Microformats2.stripped_or_nil(Floki.raw_html(children)),
text: Microformats2.stripped_or_nil(Floki.text(child))
html: stripped_or_nil(Floki.raw_html(children)),
text: stripped_or_nil(Floki.text(child))
}
end

defp parse_prop(_, _, _, _), do: nil

defp get_value(class, p) do
name_key = if Application.get_env(:microformats2, :atomize_keys, true), do: :name, else: "name"
url_key = if Application.get_env(:microformats2, :atomize_keys, true), do: :url, else: "url"
name_key = normalized_key("name")
url_key = normalized_key("url")

cond do
Microformats2.is_a?(class, "p") and p[:properties][name_key] != nil ->
is_a?(class, "p") and p[:properties][name_key] != nil ->
List.first(p[:properties][name_key])

Microformats2.is_a?(class, "u") and p[:properties][url_key] != nil ->
is_a?(class, "u") and p[:properties][url_key] != nil ->
List.first(p[:properties][url_key])

# and p[:properties][url_key] != nil ->
Microformats2.is_a?(class, "e") ->
is_a?(class, "e") ->
# TODO handle
nil

@@ -156,44 +165,27 @@ defmodule Microformats2.Items do
props =
Enum.reduce(classes, item[:properties], fn class, acc ->
prop =
if Microformats2.is_rootlevel?(child) do
if is_rootlevel?(child) do
Map.put(p, :value, get_value(class, p))
else
parse_prop(class, child, doc, url)
end

key = strip_prefix(class) |> to_key
key = if Application.get_env(:microformats2, :atomize_keys, true), do: String.to_atom(key), else: key
key = strip_prefix(class) |> to_key |> normalized_key()
val = if acc[key] != nil, do: acc[key], else: []
Map.put(acc, key, val ++ [prop])
end)

if Microformats2.blank?(classes) and not Microformats2.blank?(p) and Microformats2.is_rootlevel?(child) do
Map.put(item, :children, (item[:children] || []) ++ [p])
else
Map.put(item, :properties, props)
end
end

defp strip_prefix("p-" <> rest) do
rest
end

defp strip_prefix("u-" <> rest) do
rest
if blank?(classes) and present?(p) and is_rootlevel?(child),
do: Map.update(item, :children, [p], &(&1 ++ [p])),
else: Map.put(item, :properties, props)
end

defp strip_prefix("dt-" <> rest) do
rest
end

defp strip_prefix("e-" <> rest) do
rest
end

defp strip_prefix(rest) do
rest
end
defp strip_prefix("p-" <> rest), do: rest
defp strip_prefix("u-" <> rest), do: rest
defp strip_prefix("dt-" <> rest), do: rest
defp strip_prefix("e-" <> rest), do: rest
defp strip_prefix(rest), do: rest

def text_content(child, text \\ "")

@@ -202,7 +194,7 @@ defmodule Microformats2.Items do
if elem == "img" do
alt = Floki.attribute(child, "alt")

if !Microformats2.blank?(alt) do
if !blank?(alt) do
alt
else
Floki.attribute(child, "src")
@@ -212,16 +204,8 @@ defmodule Microformats2.Items do
""
end

Enum.reduce(children, text <> txt, fn child, acc ->
text_content(child, acc)
end)
end

def text_content(child, text) when is_bitstring(child) do
text <> child
Enum.reduce(children, text <> txt, &text_content/2)
end

defp to_key(str) do
String.replace(str, ~r/[-]/, "_")
end
def text_content(child, text) when is_bitstring(child), do: text <> child
end

+ 0
- 62
lib/microformats2.ex View File

@@ -24,66 +24,4 @@ defmodule Microformats2 do

%{items: items, rels: rels[:rels], rel_urls: rels[:rel_urls]}
end

def attr_list(node, attr \\ "class") do
Floki.attribute(node, attr) |> List.first() |> to_string |> String.split(" ", trim: true)
end

def blank?(nil), do: true
def blank?(""), do: true
def blank?([]), do: true
def blank?(_), do: false

def stripped_or_nil(nil), do: nil
def stripped_or_nil(val), do: String.trim(val)

def is_rootlevel?(node) when is_tuple(node) do
attr_list(node, "class")
|> Enum.any?(fn cls -> is_a?(cls, "h") end)
end

def is_rootlevel?(class_name) when is_bitstring(class_name) do
is_a?(class_name, "h")
end

def is_a?("h-" <> _, wanted), do: wanted == "h"
def is_a?("p-" <> _, wanted), do: wanted == "p"
def is_a?("e-" <> _, wanted), do: wanted == "e"
def is_a?("u-" <> _, wanted), do: wanted == "u"
def is_a?("dt-" <> _, wanted), do: wanted == "dt"
def is_a?(_, _), do: false

def has_a?(node, wanted) do
attr_list(node) |> Enum.filter(fn class -> is_a?(class, wanted) end) |> blank?
end

def abs_uri(url, base_url, doc) do
parsed = URI.parse(url)
parsed_base = URI.parse(base_url)

cond do
# absolute URI
not blank?(parsed.scheme) ->
url

# protocol relative URI
blank?(parsed.scheme) and not blank?(parsed.host) ->
URI.to_string(%{parsed | scheme: parsed_base.scheme})

true ->
base_element = Floki.find(doc, "base")

new_base =
if blank?(base_element) or blank?(Floki.attribute(base_element, "href")) do
base_url
else
abs_uri(Floki.attribute(base_element, "href") |> List.first(), base_url, [])
end

parsed_new_base = URI.parse(new_base)
new_path = Path.expand(parsed.path || "/", Path.dirname(parsed_new_base.path || "/"))

URI.to_string(%{parsed | scheme: parsed_new_base.scheme, host: parsed_new_base.host, path: new_path})
end
end
end

+ 17
- 26
lib/rels.ex View File

@@ -1,4 +1,6 @@
defmodule Microformats2.Rels do
import Microformats2.Helpers

def parse(doc, base_url) do
link_rels =
Floki.find(doc, "[rel][href]")
@@ -9,8 +11,8 @@ defmodule Microformats2.Rels do
String.trim(to_string(rel)) != "" and String.trim(to_string(href)) != ""
end)
|> Enum.reduce(%{rels: %{}, rel_urls: %{}}, fn element, acc ->
rel = Microformats2.attr_list(element, "rel")
url = Floki.attribute(element, "href") |> List.first() |> Microformats2.abs_uri(base_url, doc)
rel = attr_list(element, "rel")
url = Floki.attribute(element, "href") |> List.first() |> abs_uri(base_url, doc)

acc
|> save_urls_by_rels(rel, url)
@@ -23,45 +25,34 @@ defmodule Microformats2.Rels do

defp save_urls_by_rels(map, rel, url) do
Enum.reduce(rel, map, fn rel, nmap ->
if nmap[:rels][rel] == nil do
Map.put(nmap, :rels, Map.put(nmap[:rels], rel, [url]))
else
Map.put(nmap, :rels, Map.put(nmap[:rels], rel, Enum.uniq(nmap[:rels][rel] ++ [url])))
end
if nmap[:rels][rel] == nil,
do: put_in(nmap, [:rels, rel], [url]),
else: put_in(nmap, [:rels, rel], Enum.uniq(nmap[:rels][rel] ++ [url]))
end)
end

defp save_rels_by_urls(map, rel, url) do
if map[:rel_urls][url] == nil do
Map.put(map, :rel_urls, Map.put(map[:rel_urls], url, %{rels: rel}))
else
Map.put(
map,
:rel_urls,
Map.put(map[:rel_urls], url, Map.put(map[:rel_urls][url], :rels, Enum.uniq(map[:rel_urls][url][:rels] ++ rel)))
)
end
if map[:rel_urls][url] == nil,
do: put_in(map, [:rel_urls, url], %{rels: rel}),
else: put_in(map, [:rel_urls, url, :rels], Enum.uniq(map[:rel_urls][url][:rels] ++ rel))
end

defp save_text(map, element, url) do
text = Floki.text(element)

if String.trim(to_string(text)) == "" or map[:rel_urls][url][:text] != nil do
map
else
Map.put(map, :rel_urls, Map.put(map[:rel_urls], url, Map.put(map[:rel_urls][url], :text, text)))
end
if String.trim(to_string(text)) == "" or map[:rel_urls][url][:text] != nil,
do: map,
else: put_in(map, [:rel_urls, url, :text], text)
end

defp save_attributes(map, element, url) do
Enum.reduce(["hreflang", "media", "title", "type"], save_text(map, element, url), fn att, nmap ->
val = Floki.attribute(element, att) |> List.first()
key = String.to_atom(att)

if String.trim(to_string(val)) == "" or nmap[:rel_urls][url][String.to_atom(att)] != nil do
nmap
else
Map.put(nmap, :rel_urls, Map.put(nmap[:rel_urls], url, Map.put(nmap[:rel_urls][url], String.to_atom(att), val)))
end
if String.trim(to_string(val)) == "" or nmap[:rel_urls][url][key] != nil,
do: nmap,
else: put_in(nmap, [:rel_urls, url, key], val)
end)
end
end

Loading…
Cancel
Save