You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

179 lines
5.0 KiB

  1. defmodule Microformats2.Items.ImpliedProperties do
  2. import Microformats2.Helpers
  3. alias Microformats2.Items
  4. def parse(entry, root, url, doc) do
  5. entry
  6. |> implied_name_property(root)
  7. |> implied_photo_property(root)
  8. |> implied_url_property(root, url, doc)
  9. end
  10. defp implied_url_property(entry, root, doc_url, doc) do
  11. url_key = normalized_key("url")
  12. if entry[:properties][url_key] == nil do
  13. val = implied_url_attrval(root)
  14. url =
  15. if blank?(val) do
  16. implied_url_deep(root)
  17. else
  18. val
  19. end
  20. |> stripped_or_nil()
  21. if blank?(url),
  22. do: entry,
  23. else: put_in(entry, [:properties, url_key], [abs_uri(url, doc_url, doc)])
  24. else
  25. entry
  26. end
  27. end
  28. defp implied_photo_property(entry, root) do
  29. photo_key = normalized_key("photo")
  30. if entry[:properties][photo_key] == nil do
  31. val = implied_photo_attrval(root)
  32. url =
  33. if blank?(val) do
  34. implied_photo_deep(root)
  35. else
  36. val
  37. end
  38. |> stripped_or_nil()
  39. if blank?(url),
  40. do: entry,
  41. else: put_in(entry, [:properties, photo_key], [url])
  42. else
  43. entry
  44. end
  45. end
  46. defp implied_name_property(entry, root = {elem, _, _}) do
  47. name_key = normalized_key("name")
  48. if entry[:properties][name_key] == nil do
  49. nam =
  50. cond do
  51. elem == "img" or elem == "area" ->
  52. Floki.attribute(root, "alt") |> List.first()
  53. elem == "abbr" ->
  54. Floki.attribute(root, "title") |> List.first()
  55. true ->
  56. val = implied_name_deep(root)
  57. if blank?(val),
  58. do: Items.text_content(root),
  59. else: val
  60. end
  61. |> stripped_or_nil()
  62. put_in(entry, [:properties, name_key], [nam])
  63. else
  64. entry
  65. end
  66. end
  67. defp implied_name_deep({_, _, children}) do
  68. only_nodes = Enum.reject(children, &is_bitstring/1)
  69. if Enum.count(only_nodes) == 1 do
  70. sec_node = List.first(only_nodes)
  71. {_, _, sec_node_children} = sec_node
  72. attrval = implied_name_attrval(sec_node)
  73. if blank?(attrval) do
  74. sec_only_nodes = Enum.reject(sec_node_children, &is_bitstring/1)
  75. if Enum.count(sec_only_nodes) == 1 do
  76. third_node = sec_only_nodes |> List.first()
  77. implied_name_attrval(third_node)
  78. end
  79. else
  80. attrval
  81. end
  82. end
  83. end
  84. defp implied_name_attrval(node = {"img", _, _}), do: Floki.attribute(node, "alt") |> List.first()
  85. defp implied_name_attrval(node = {"area", _, _}), do: Floki.attribute(node, "alt") |> List.first()
  86. defp implied_name_attrval(node = {"abbr", _, _}), do: Floki.attribute(node, "title") |> List.first()
  87. defp implied_name_attrval(_), do: nil
  88. defp implied_photo_deep(root) do
  89. imgs = direct_not_h_children_with_attr(root, "img", "src")
  90. objects = direct_not_h_children_with_attr(root, "object", "data")
  91. cond do
  92. Enum.count(imgs) == 1 ->
  93. List.first(imgs) |> Floki.attribute("src") |> List.first()
  94. Enum.count(objects) == 1 ->
  95. List.first(objects) |> Floki.attribute("data") |> List.first()
  96. true ->
  97. {_, _, children} = root
  98. only_nodes = Enum.reject(children, &is_bitstring/1)
  99. if Enum.count(only_nodes) == 1 do
  100. child = List.first(children)
  101. sec_imgs = direct_not_h_children_with_attr(child, "img", "src")
  102. sec_objs = direct_not_h_children_with_attr(child, "object", "data")
  103. cond do
  104. Enum.count(sec_imgs) == 1 ->
  105. List.first(sec_imgs) |> Floki.attribute("src") |> List.first()
  106. Enum.count(sec_objs) == 1 ->
  107. List.first(sec_objs) |> Floki.attribute("data") |> List.first()
  108. true ->
  109. nil
  110. end
  111. else
  112. nil
  113. end
  114. end
  115. end
  116. defp implied_url_deep(root) do
  117. as = direct_not_h_children_with_attr(root, "a", "href")
  118. areas = direct_not_h_children_with_attr(root, "area", "href")
  119. cond do
  120. Enum.count(as) == 1 ->
  121. List.first(as) |> Floki.attribute("href") |> List.first()
  122. Enum.count(areas) == 1 ->
  123. List.first(areas) |> Floki.attribute("href") |> List.first()
  124. true ->
  125. nil
  126. end
  127. end
  128. defp implied_photo_attrval(node = {"img", _, _}), do: Floki.attribute(node, "src") |> List.first()
  129. defp implied_photo_attrval(node = {"object", _, _}), do: Floki.attribute(node, "data") |> List.first()
  130. defp implied_photo_attrval(_), do: nil
  131. defp direct_not_h_children_with_attr({_, _, children}, name, attr) do
  132. Enum.filter(children, fn
  133. {el, _, _} -> el == name
  134. v when is_bitstring(v) -> false
  135. end)
  136. |> Enum.filter(fn el -> not is_rootlevel?(el) end)
  137. |> Enum.filter(fn el -> Enum.count(Floki.attribute(el, attr)) > 0 end)
  138. end
  139. defp implied_url_attrval(node = {"a", _, _}), do: Floki.attribute(node, "href") |> List.first()
  140. defp implied_url_attrval(node = {"area", _, _}), do: Floki.attribute(node, "href") |> List.first()
  141. defp implied_url_attrval(_), do: nil
  142. end