From 7dad01944a466fca3d5d0977acaa6846ad21494d Mon Sep 17 00:00:00 2001 From: Christian Kruse Date: Sun, 31 Mar 2019 09:28:16 +0200 Subject: [PATCH] the great refactoring[tm] Much more idiomatic Elixir code --- .gitignore | 1 + lib/helpers.ex | 93 +++++++++++++++++++++++++++++++++ lib/implied.ex | 122 +++++++++++++++---------------------------- lib/items.ex | 110 +++++++++++++++++--------------------- lib/microformats2.ex | 62 ---------------------- lib/rels.ex | 43 ++++++--------- 6 files changed, 200 insertions(+), 231 deletions(-) create mode 100644 lib/helpers.ex diff --git a/.gitignore b/.gitignore index c1091ee..1ac6659 100644 --- a/.gitignore +++ b/.gitignore @@ -4,3 +4,4 @@ erl_crash.dump *.ez /.elixir_ls +/doc \ No newline at end of file diff --git a/lib/helpers.ex b/lib/helpers.ex new file mode 100644 index 0000000..34b5e2a --- /dev/null +++ b/lib/helpers.ex @@ -0,0 +1,93 @@ +defmodule Microformats2.Helpers do + @spec attr_list(String.t() | [any()] | tuple(), String.t()) :: [String.t()] + def attr_list(node, attr \\ "class") do + node + |> Floki.attribute(attr) + |> List.first() + |> to_string + |> String.split(" ", trim: true) + end + + @spec blank?(any()) :: boolean() + def blank?(nil), do: true + def blank?(""), do: true + def blank?([]), do: true + def blank?(_), do: false + + @spec present?(any()) :: boolean() + def present?(v), do: not blank?(v) + + @spec stripped_or_nil(nil | String.t()) :: nil | String.t() + def stripped_or_nil(nil), do: nil + def stripped_or_nil(val), do: String.trim(val) + + @spec is_rootlevel?(bitstring() | tuple()) :: boolean() + def is_rootlevel?(node) when is_tuple(node) do + node + |> attr_list("class") + |> Enum.any?(&is_a?(&1, "h")) + end + + def is_rootlevel?(class_name) when is_bitstring(class_name) do + is_a?(class_name, "h") + end + + @spec is_a?(any(), any()) :: boolean() + def is_a?("h-" <> _, wanted), do: wanted == "h" + def is_a?("p-" <> _, wanted), do: wanted == "p" + def is_a?("e-" <> _, wanted), do: wanted == "e" + def is_a?("u-" <> _, wanted), do: wanted == "u" + def is_a?("dt-" <> _, wanted), do: wanted == "dt" + def is_a?(_, _), do: false + + @spec has_a?(String.t() | [any()] | tuple(), any()) :: boolean() + def has_a?(node, wanted) do + node + |> attr_list() + |> Enum.filter(&is_a?(&1, wanted)) + |> blank? + end + + @spec abs_uri(String.t(), String.t(), any()) :: String.t() + def abs_uri(url, base_url, doc) do + parsed = URI.parse(url) + parsed_base = URI.parse(base_url) + + cond do + # absolute URI + present?(parsed.scheme) -> + url + + # protocol relative URI + blank?(parsed.scheme) and present?(parsed.host) -> + URI.to_string(%{parsed | scheme: parsed_base.scheme}) + + true -> + base_element = Floki.find(doc, "base") + + new_base = + if blank?(base_element) or blank?(Floki.attribute(base_element, "href")) do + base_url + else + abs_uri(Floki.attribute(base_element, "href") |> List.first(), base_url, []) + end + + parsed_new_base = URI.parse(new_base) + new_path = Path.expand(parsed.path || "/", Path.dirname(parsed_new_base.path || "/")) + + URI.to_string(%{parsed | scheme: parsed_new_base.scheme, host: parsed_new_base.host, path: new_path}) + end + end + + @spec to_key(String.t()) :: String.t() + def to_key(str) do + String.replace(str, ~r/[-]/, "_") + end + + @spec normalized_key(String.t()) :: String.t() | atom() + def normalized_key(key) do + if Application.get_env(:microformats2, :atomize_keys, true), + do: String.to_atom(key), + else: key + end +end diff --git a/lib/implied.ex b/lib/implied.ex index e2b265f..5b3adcd 100644 --- a/lib/implied.ex +++ b/lib/implied.ex @@ -1,58 +1,62 @@ defmodule Microformats2.Items.ImpliedProperties do + import Microformats2.Helpers + + alias Microformats2.Items + def parse(entry, root, url, doc) do - implied_name_property(entry, root) + entry + |> implied_name_property(root) |> implied_photo_property(root) |> implied_url_property(root, url, doc) end defp implied_url_property(entry, root, doc_url, doc) do - url_key = if Application.get_env(:microformats2, :atomize_keys, true), do: :url, else: "url" + url_key = normalized_key("url") + if entry[:properties][url_key] == nil do val = implied_url_attrval(root) url = - if Microformats2.blank?(val) do + if blank?(val) do implied_url_deep(root) else val end - |> Microformats2.stripped_or_nil() + |> stripped_or_nil() - if Microformats2.blank?(url) do - entry - else - Map.put(entry, :properties, Map.put(entry[:properties], url_key, [Microformats2.abs_uri(url, doc_url, doc)])) - end + if blank?(url), + do: entry, + else: put_in(entry, [:properties, url_key], [abs_uri(url, doc_url, doc)]) else entry end end defp implied_photo_property(entry, root) do - photo_key = if Application.get_env(:microformats2, :atomize_keys, true), do: :photo, else: "photo" + photo_key = normalized_key("photo") + if entry[:properties][photo_key] == nil do val = implied_photo_attrval(root) url = - if Microformats2.blank?(val) do + if blank?(val) do implied_photo_deep(root) else val end - |> Microformats2.stripped_or_nil() + |> stripped_or_nil() - if Microformats2.blank?(url) do - entry - else - Map.put(entry, :properties, Map.put(entry[:properties], photo_key, [url])) - end + if blank?(url), + do: entry, + else: put_in(entry, [:properties, photo_key], [url]) else entry end end defp implied_name_property(entry, root = {elem, _, _}) do - name_key = if Application.get_env(:microformats2, :atomize_keys, true), do: :name, else: "name" + name_key = normalized_key("name") + if entry[:properties][name_key] == nil do nam = cond do @@ -65,38 +69,28 @@ defmodule Microformats2.Items.ImpliedProperties do true -> val = implied_name_deep(root) - if Microformats2.blank?(val) do - Microformats2.Items.text_content(root) - else - val - end + if blank?(val), + do: Items.text_content(root), + else: val end - |> Microformats2.stripped_or_nil() + |> stripped_or_nil() - Map.put(entry, :properties, Map.put(entry[:properties], name_key, [nam])) + put_in(entry, [:properties, name_key], [nam]) else entry end end defp implied_name_deep({_, _, children}) do - only_nodes = - Enum.filter(children, fn - el when is_bitstring(el) -> false - _ -> true - end) + only_nodes = Enum.reject(children, &is_bitstring/1) if Enum.count(only_nodes) == 1 do sec_node = List.first(only_nodes) {_, _, sec_node_children} = sec_node attrval = implied_name_attrval(sec_node) - if Microformats2.blank?(attrval) do - sec_only_nodes = - Enum.filter(sec_node_children, fn - el when is_bitstring(el) -> false - _ -> true - end) + if blank?(attrval) do + sec_only_nodes = Enum.reject(sec_node_children, &is_bitstring/1) if Enum.count(sec_only_nodes) == 1 do third_node = sec_only_nodes |> List.first() @@ -108,21 +102,10 @@ defmodule Microformats2.Items.ImpliedProperties do end end - defp implied_name_attrval(node = {"img", _, _}) do - Floki.attribute(node, "alt") |> List.first() - end - - defp implied_name_attrval(node = {"area", _, _}) do - Floki.attribute(node, "alt") |> List.first() - end - - defp implied_name_attrval(node = {"abbr", _, _}) do - Floki.attribute(node, "title") |> List.first() - end - - defp implied_name_attrval(_) do - nil - end + defp implied_name_attrval(node = {"img", _, _}), do: Floki.attribute(node, "alt") |> List.first() + defp implied_name_attrval(node = {"area", _, _}), do: Floki.attribute(node, "alt") |> List.first() + defp implied_name_attrval(node = {"abbr", _, _}), do: Floki.attribute(node, "title") |> List.first() + defp implied_name_attrval(_), do: nil defp implied_photo_deep(root) do imgs = direct_not_h_children_with_attr(root, "img", "src") @@ -137,12 +120,7 @@ defmodule Microformats2.Items.ImpliedProperties do true -> {_, _, children} = root - - only_nodes = - Enum.filter(children, fn - el when is_bitstring(el) -> false - _ -> true - end) + only_nodes = Enum.reject(children, &is_bitstring/1) if Enum.count(only_nodes) == 1 do child = List.first(children) @@ -181,36 +159,20 @@ defmodule Microformats2.Items.ImpliedProperties do end end - defp implied_photo_attrval(node = {"img", _, _}) do - Floki.attribute(node, "src") |> List.first() - end - - defp implied_photo_attrval(node = {"object", _, _}) do - Floki.attribute(node, "data") |> List.first() - end - - defp implied_photo_attrval(_) do - nil - end + defp implied_photo_attrval(node = {"img", _, _}), do: Floki.attribute(node, "src") |> List.first() + defp implied_photo_attrval(node = {"object", _, _}), do: Floki.attribute(node, "data") |> List.first() + defp implied_photo_attrval(_), do: nil defp direct_not_h_children_with_attr({_, _, children}, name, attr) do Enum.filter(children, fn {el, _, _} -> el == name v when is_bitstring(v) -> false end) - |> Enum.filter(fn el -> not Microformats2.is_rootlevel?(el) end) + |> Enum.filter(fn el -> not is_rootlevel?(el) end) |> Enum.filter(fn el -> Enum.count(Floki.attribute(el, attr)) > 0 end) end - defp implied_url_attrval(node = {"a", _, _}) do - Floki.attribute(node, "href") |> List.first() - end - - defp implied_url_attrval(node = {"area", _, _}) do - Floki.attribute(node, "href") |> List.first() - end - - defp implied_url_attrval(_) do - nil - end + defp implied_url_attrval(node = {"a", _, _}), do: Floki.attribute(node, "href") |> List.first() + defp implied_url_attrval(node = {"area", _, _}), do: Floki.attribute(node, "href") |> List.first() + defp implied_url_attrval(_), do: nil end diff --git a/lib/items.ex b/lib/items.ex index 350bdda..48b7200 100644 --- a/lib/items.ex +++ b/lib/items.ex @@ -1,4 +1,8 @@ defmodule Microformats2.Items do + import Microformats2.Helpers + + alias Microformats2.Items.ImpliedProperties + def parse(nodes, doc, url, items \\ []) def parse([head | tail], doc, url, items) when is_bitstring(head), do: parse(tail, doc, url, items) def parse([head | tail], doc, url, items), do: parse(tail, doc, url, parse(head, doc, url, items)) @@ -6,8 +10,8 @@ defmodule Microformats2.Items do def parse(root, doc, url, items) do root_classes = - Microformats2.attr_list(root) - |> Enum.filter(fn class_name -> Microformats2.is_rootlevel?(class_name) end) + attr_list(root) + |> Enum.filter(&is_rootlevel?/1) |> Enum.sort() {_, _, children} = root @@ -15,7 +19,7 @@ defmodule Microformats2.Items do if not Enum.empty?(root_classes) do entry = parse_sub(children, doc, url, %{type: root_classes, properties: %{}}) - |> Microformats2.Items.ImpliedProperties.parse(root, url, doc) + |> ImpliedProperties.parse(root, url, doc) items ++ [entry] else @@ -28,14 +32,14 @@ defmodule Microformats2.Items do defp parse_sub([child = {_, _, child_children} | children], doc, url, item) do p = - if Microformats2.has_a?(child, "h-") do + if has_a?(child, "h-") do parse(child, doc, url, []) |> List.first() else [] end classes = - Microformats2.attr_list(child) + attr_list(child) |> Enum.filter(fn "p-" <> _ -> true "u-" <> _ -> true @@ -45,7 +49,11 @@ defmodule Microformats2.Items do end) props = gen_prop(child, classes, item, p, doc, url) - n_item = if Microformats2.is_rootlevel?(child), do: props, else: parse_sub(child_children, doc, url, props) + + n_item = + if is_rootlevel?(child), + do: props, + else: parse_sub(child_children, doc, url, props) parse_sub(children, doc, url, n_item) end @@ -57,10 +65,10 @@ defmodule Microformats2.Items do alt = Floki.attribute(child, "alt") |> List.first() cond do - elem == "abbr" and not Microformats2.blank?(title) -> + elem == "abbr" and present?(title) -> title - elem == "img" and not Microformats2.blank?(alt) -> + elem == "img" and present?(alt) -> alt true -> @@ -77,29 +85,29 @@ defmodule Microformats2.Items do value = Floki.attribute(child, "value") |> List.first() cond do - Enum.member?(["a", "area"], elem) and not Microformats2.blank?(href) -> + Enum.member?(["a", "area"], elem) and present?(href) -> href - Enum.member?(["img", "audio", "video", "source"], elem) and not Microformats2.blank?(src) -> + Enum.member?(["img", "audio", "video", "source"], elem) and present?(src) -> src - elem == "object" and not Microformats2.blank?(data) -> + elem == "object" and present?(data) -> data - elem == "video" and not Microformats2.blank?(poster) -> + elem == "video" and present?(poster) -> poster # TODO value-class-pattern at this position - elem == "abbr" and not Microformats2.blank?(title) -> + elem == "abbr" and present?(title) -> title - Enum.member?(["data", "input"], elem) and not Microformats2.blank?(value) -> + Enum.member?(["data", "input"], elem) and present?(value) -> value true -> text_content(child) |> String.trim() end - |> Microformats2.abs_uri(url, doc) + |> abs_uri(url, doc) end defp parse_prop("dt-" <> _, child = {elem, _, _}, _, _) do @@ -108,13 +116,13 @@ defmodule Microformats2.Items do value = Floki.attribute(child, "value") cond do - Enum.member?(["time", "ins", "del"], elem) and not Microformats2.blank?(dt) -> + Enum.member?(["time", "ins", "del"], elem) and present?(dt) -> dt |> List.first() - elem == "abbr" and not Microformats2.blank?(title) -> + elem == "abbr" and present?(title) -> title |> List.first() - Enum.member?(["data", "input"], elem) and not Microformats2.blank?(value) -> + Enum.member?(["data", "input"], elem) and present?(value) -> value |> List.first() true -> @@ -124,25 +132,26 @@ defmodule Microformats2.Items do defp parse_prop("e-" <> _, child = {_, _, children}, _, _) do %{ - html: Microformats2.stripped_or_nil(Floki.raw_html(children)), - text: Microformats2.stripped_or_nil(Floki.text(child)) + html: stripped_or_nil(Floki.raw_html(children)), + text: stripped_or_nil(Floki.text(child)) } end defp parse_prop(_, _, _, _), do: nil defp get_value(class, p) do - name_key = if Application.get_env(:microformats2, :atomize_keys, true), do: :name, else: "name" - url_key = if Application.get_env(:microformats2, :atomize_keys, true), do: :url, else: "url" + name_key = normalized_key("name") + url_key = normalized_key("url") + cond do - Microformats2.is_a?(class, "p") and p[:properties][name_key] != nil -> + is_a?(class, "p") and p[:properties][name_key] != nil -> List.first(p[:properties][name_key]) - Microformats2.is_a?(class, "u") and p[:properties][url_key] != nil -> + is_a?(class, "u") and p[:properties][url_key] != nil -> List.first(p[:properties][url_key]) # and p[:properties][url_key] != nil -> - Microformats2.is_a?(class, "e") -> + is_a?(class, "e") -> # TODO handle nil @@ -156,44 +165,27 @@ defmodule Microformats2.Items do props = Enum.reduce(classes, item[:properties], fn class, acc -> prop = - if Microformats2.is_rootlevel?(child) do + if is_rootlevel?(child) do Map.put(p, :value, get_value(class, p)) else parse_prop(class, child, doc, url) end - key = strip_prefix(class) |> to_key - key = if Application.get_env(:microformats2, :atomize_keys, true), do: String.to_atom(key), else: key + key = strip_prefix(class) |> to_key |> normalized_key() val = if acc[key] != nil, do: acc[key], else: [] Map.put(acc, key, val ++ [prop]) end) - if Microformats2.blank?(classes) and not Microformats2.blank?(p) and Microformats2.is_rootlevel?(child) do - Map.put(item, :children, (item[:children] || []) ++ [p]) - else - Map.put(item, :properties, props) - end - end - - defp strip_prefix("p-" <> rest) do - rest - end - - defp strip_prefix("u-" <> rest) do - rest + if blank?(classes) and present?(p) and is_rootlevel?(child), + do: Map.update(item, :children, [p], &(&1 ++ [p])), + else: Map.put(item, :properties, props) end - defp strip_prefix("dt-" <> rest) do - rest - end - - defp strip_prefix("e-" <> rest) do - rest - end - - defp strip_prefix(rest) do - rest - end + defp strip_prefix("p-" <> rest), do: rest + defp strip_prefix("u-" <> rest), do: rest + defp strip_prefix("dt-" <> rest), do: rest + defp strip_prefix("e-" <> rest), do: rest + defp strip_prefix(rest), do: rest def text_content(child, text \\ "") @@ -202,7 +194,7 @@ defmodule Microformats2.Items do if elem == "img" do alt = Floki.attribute(child, "alt") - if !Microformats2.blank?(alt) do + if !blank?(alt) do alt else Floki.attribute(child, "src") @@ -212,16 +204,8 @@ defmodule Microformats2.Items do "" end - Enum.reduce(children, text <> txt, fn child, acc -> - text_content(child, acc) - end) - end - - def text_content(child, text) when is_bitstring(child) do - text <> child + Enum.reduce(children, text <> txt, &text_content/2) end - defp to_key(str) do - String.replace(str, ~r/[-]/, "_") - end + def text_content(child, text) when is_bitstring(child), do: text <> child end diff --git a/lib/microformats2.ex b/lib/microformats2.ex index fb4bddc..ae9f23c 100644 --- a/lib/microformats2.ex +++ b/lib/microformats2.ex @@ -24,66 +24,4 @@ defmodule Microformats2 do %{items: items, rels: rels[:rels], rel_urls: rels[:rel_urls]} end - - def attr_list(node, attr \\ "class") do - Floki.attribute(node, attr) |> List.first() |> to_string |> String.split(" ", trim: true) - end - - def blank?(nil), do: true - def blank?(""), do: true - def blank?([]), do: true - def blank?(_), do: false - - def stripped_or_nil(nil), do: nil - def stripped_or_nil(val), do: String.trim(val) - - def is_rootlevel?(node) when is_tuple(node) do - attr_list(node, "class") - |> Enum.any?(fn cls -> is_a?(cls, "h") end) - end - - def is_rootlevel?(class_name) when is_bitstring(class_name) do - is_a?(class_name, "h") - end - - def is_a?("h-" <> _, wanted), do: wanted == "h" - def is_a?("p-" <> _, wanted), do: wanted == "p" - def is_a?("e-" <> _, wanted), do: wanted == "e" - def is_a?("u-" <> _, wanted), do: wanted == "u" - def is_a?("dt-" <> _, wanted), do: wanted == "dt" - def is_a?(_, _), do: false - - def has_a?(node, wanted) do - attr_list(node) |> Enum.filter(fn class -> is_a?(class, wanted) end) |> blank? - end - - def abs_uri(url, base_url, doc) do - parsed = URI.parse(url) - parsed_base = URI.parse(base_url) - - cond do - # absolute URI - not blank?(parsed.scheme) -> - url - - # protocol relative URI - blank?(parsed.scheme) and not blank?(parsed.host) -> - URI.to_string(%{parsed | scheme: parsed_base.scheme}) - - true -> - base_element = Floki.find(doc, "base") - - new_base = - if blank?(base_element) or blank?(Floki.attribute(base_element, "href")) do - base_url - else - abs_uri(Floki.attribute(base_element, "href") |> List.first(), base_url, []) - end - - parsed_new_base = URI.parse(new_base) - new_path = Path.expand(parsed.path || "/", Path.dirname(parsed_new_base.path || "/")) - - URI.to_string(%{parsed | scheme: parsed_new_base.scheme, host: parsed_new_base.host, path: new_path}) - end - end end diff --git a/lib/rels.ex b/lib/rels.ex index 2fbc13e..f540728 100644 --- a/lib/rels.ex +++ b/lib/rels.ex @@ -1,4 +1,6 @@ defmodule Microformats2.Rels do + import Microformats2.Helpers + def parse(doc, base_url) do link_rels = Floki.find(doc, "[rel][href]") @@ -9,8 +11,8 @@ defmodule Microformats2.Rels do String.trim(to_string(rel)) != "" and String.trim(to_string(href)) != "" end) |> Enum.reduce(%{rels: %{}, rel_urls: %{}}, fn element, acc -> - rel = Microformats2.attr_list(element, "rel") - url = Floki.attribute(element, "href") |> List.first() |> Microformats2.abs_uri(base_url, doc) + rel = attr_list(element, "rel") + url = Floki.attribute(element, "href") |> List.first() |> abs_uri(base_url, doc) acc |> save_urls_by_rels(rel, url) @@ -23,45 +25,34 @@ defmodule Microformats2.Rels do defp save_urls_by_rels(map, rel, url) do Enum.reduce(rel, map, fn rel, nmap -> - if nmap[:rels][rel] == nil do - Map.put(nmap, :rels, Map.put(nmap[:rels], rel, [url])) - else - Map.put(nmap, :rels, Map.put(nmap[:rels], rel, Enum.uniq(nmap[:rels][rel] ++ [url]))) - end + if nmap[:rels][rel] == nil, + do: put_in(nmap, [:rels, rel], [url]), + else: put_in(nmap, [:rels, rel], Enum.uniq(nmap[:rels][rel] ++ [url])) end) end defp save_rels_by_urls(map, rel, url) do - if map[:rel_urls][url] == nil do - Map.put(map, :rel_urls, Map.put(map[:rel_urls], url, %{rels: rel})) - else - Map.put( - map, - :rel_urls, - Map.put(map[:rel_urls], url, Map.put(map[:rel_urls][url], :rels, Enum.uniq(map[:rel_urls][url][:rels] ++ rel))) - ) - end + if map[:rel_urls][url] == nil, + do: put_in(map, [:rel_urls, url], %{rels: rel}), + else: put_in(map, [:rel_urls, url, :rels], Enum.uniq(map[:rel_urls][url][:rels] ++ rel)) end defp save_text(map, element, url) do text = Floki.text(element) - if String.trim(to_string(text)) == "" or map[:rel_urls][url][:text] != nil do - map - else - Map.put(map, :rel_urls, Map.put(map[:rel_urls], url, Map.put(map[:rel_urls][url], :text, text))) - end + if String.trim(to_string(text)) == "" or map[:rel_urls][url][:text] != nil, + do: map, + else: put_in(map, [:rel_urls, url, :text], text) end defp save_attributes(map, element, url) do Enum.reduce(["hreflang", "media", "title", "type"], save_text(map, element, url), fn att, nmap -> val = Floki.attribute(element, att) |> List.first() + key = String.to_atom(att) - if String.trim(to_string(val)) == "" or nmap[:rel_urls][url][String.to_atom(att)] != nil do - nmap - else - Map.put(nmap, :rel_urls, Map.put(nmap[:rel_urls], url, Map.put(nmap[:rel_urls][url], String.to_atom(att), val))) - end + if String.trim(to_string(val)) == "" or nmap[:rel_urls][url][key] != nil, + do: nmap, + else: put_in(nmap, [:rel_urls, url, key], val) end) end end