You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

88 lines
2.5 KiB

  1. defmodule Microformats2 do
  2. def parse(url) do
  3. response = HTTPotion.get(url, follow_redirects: true)
  4. if HTTPotion.Response.success?(response) do
  5. parse(response.body, url)
  6. else
  7. :error
  8. end
  9. end
  10. def parse(content, url) do
  11. doc =
  12. Floki.parse(content)
  13. |> Floki.filter_out("template")
  14. |> Floki.filter_out("style")
  15. |> Floki.filter_out("script")
  16. |> Floki.filter_out(:comment)
  17. rels = Microformats2.Rels.parse(doc, url)
  18. items = Microformats2.Items.parse(doc, doc, url)
  19. %{items: items, rels: rels[:rels], rel_urls: rels[:rel_urls]}
  20. end
  21. def attr_list(node, attr \\ "class") do
  22. Floki.attribute(node, attr) |> List.first() |> to_string |> String.split(" ", trim: true)
  23. end
  24. def blank?(nil), do: true
  25. def blank?(""), do: true
  26. def blank?([]), do: true
  27. def blank?(_), do: false
  28. def stripped_or_nil(nil), do: nil
  29. def stripped_or_nil(val), do: String.trim(val)
  30. def is_rootlevel?(node) when is_tuple(node) do
  31. attr_list(node, "class")
  32. |> Enum.any?(fn cls -> is_a?(cls, "h") end)
  33. end
  34. def is_rootlevel?(class_name) when is_bitstring(class_name) do
  35. is_a?(class_name, "h")
  36. end
  37. def is_a?("h-" <> _, wanted), do: wanted == "h"
  38. def is_a?("p-" <> _, wanted), do: wanted == "p"
  39. def is_a?("e-" <> _, wanted), do: wanted == "e"
  40. def is_a?("u-" <> _, wanted), do: wanted == "u"
  41. def is_a?("dt-" <> _, wanted), do: wanted == "dt"
  42. def is_a?(_, _), do: false
  43. def has_a?(node, wanted) do
  44. attr_list(node) |> Enum.filter(fn class -> is_a?(class, wanted) end) |> blank?
  45. end
  46. def abs_uri(url, base_url, doc) do
  47. parsed = URI.parse(url)
  48. parsed_base = URI.parse(base_url)
  49. cond do
  50. # absolute URI
  51. not Microformats2.blank?(parsed.scheme) ->
  52. url
  53. # protocol relative URI
  54. Microformats2.blank?(parsed.scheme) and not Microformats2.blank?(parsed.host) ->
  55. URI.to_string(%{parsed | scheme: parsed_base.scheme})
  56. true ->
  57. base_element = Floki.find(doc, "base")
  58. new_base =
  59. if base_element == nil or Microformats2.blank?(Floki.attribute(base_element, "href")) do
  60. base_url
  61. else
  62. abs_uri(Floki.attribute(base_element, "href") |> List.first(), base_url, [])
  63. end
  64. parsed_new_base = URI.parse(new_base)
  65. new_path = Path.expand(parsed.path || "/", Path.dirname(parsed_new_base.path || "/"))
  66. URI.to_string(%{parsed | scheme: parsed_new_base.scheme, host: parsed_new_base.host, path: new_path})
  67. end
  68. end
  69. end