You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

214 lines
5.4 KiB

  1. defmodule Microformats2.Items.ImpliedProperties do
  2. def parse(entry, root, url, doc) do
  3. implied_name_property(entry, root)
  4. |> implied_photo_property(root)
  5. |> implied_url_property(root, url, doc)
  6. end
  7. defp implied_url_property(entry, root, doc_url, doc) do
  8. if entry[:properties][:url] == nil do
  9. val = implied_url_attrval(root)
  10. url =
  11. if Microformats2.blank?(val) do
  12. implied_url_deep(root)
  13. else
  14. val
  15. end
  16. |> Microformats2.stripped_or_nil()
  17. if Microformats2.blank?(url) do
  18. entry
  19. else
  20. Map.put(entry, :properties, Map.put(entry[:properties], :url, [Microformats2.abs_uri(url, doc_url, doc)]))
  21. end
  22. else
  23. entry
  24. end
  25. end
  26. defp implied_photo_property(entry, root) do
  27. if entry[:properties][:photo] == nil do
  28. val = implied_photo_attrval(root)
  29. url =
  30. if Microformats2.blank?(val) do
  31. implied_photo_deep(root)
  32. else
  33. val
  34. end
  35. |> Microformats2.stripped_or_nil()
  36. if Microformats2.blank?(url) do
  37. entry
  38. else
  39. Map.put(entry, :properties, Map.put(entry[:properties], :photo, [url]))
  40. end
  41. else
  42. entry
  43. end
  44. end
  45. defp implied_name_property(entry, root = {elem, _, _}) do
  46. if entry[:properties][:name] == nil do
  47. nam =
  48. cond do
  49. elem == "img" or elem == "area" ->
  50. Floki.attribute(root, "alt") |> List.first()
  51. elem == "abbr" ->
  52. Floki.attribute(root, "title") |> List.first()
  53. true ->
  54. val = implied_name_deep(root)
  55. if Microformats2.blank?(val) do
  56. Microformats2.Items.text_content(root)
  57. else
  58. val
  59. end
  60. end
  61. |> Microformats2.stripped_or_nil()
  62. Map.put(entry, :properties, Map.put(entry[:properties], :name, [nam]))
  63. else
  64. entry
  65. end
  66. end
  67. defp implied_name_deep({_, _, children}) do
  68. only_nodes =
  69. Enum.filter(children, fn
  70. el when is_bitstring(el) -> false
  71. _ -> true
  72. end)
  73. if Enum.count(only_nodes) == 1 do
  74. sec_node = List.first(only_nodes)
  75. {_, _, sec_node_children} = sec_node
  76. attrval = implied_name_attrval(sec_node)
  77. if Microformats2.blank?(attrval) do
  78. sec_only_nodes =
  79. Enum.filter(sec_node_children, fn
  80. el when is_bitstring(el) -> false
  81. _ -> true
  82. end)
  83. if Enum.count(sec_only_nodes) == 1 do
  84. third_node = sec_only_nodes |> List.first()
  85. implied_name_attrval(third_node)
  86. end
  87. else
  88. attrval
  89. end
  90. end
  91. end
  92. defp implied_name_attrval(node = {"img", _, _}) do
  93. Floki.attribute(node, "alt") |> List.first()
  94. end
  95. defp implied_name_attrval(node = {"area", _, _}) do
  96. Floki.attribute(node, "alt") |> List.first()
  97. end
  98. defp implied_name_attrval(node = {"abbr", _, _}) do
  99. Floki.attribute(node, "title") |> List.first()
  100. end
  101. defp implied_name_attrval(_) do
  102. nil
  103. end
  104. defp implied_photo_deep(root) do
  105. imgs = direct_not_h_children_with_attr(root, "img", "src")
  106. objects = direct_not_h_children_with_attr(root, "object", "data")
  107. cond do
  108. Enum.count(imgs) == 1 ->
  109. List.first(imgs) |> Floki.attribute("src") |> List.first()
  110. Enum.count(objects) == 1 ->
  111. List.first(objects) |> Floki.attribute("data") |> List.first()
  112. true ->
  113. {_, _, children} = root
  114. only_nodes =
  115. Enum.filter(children, fn
  116. el when is_bitstring(el) -> false
  117. _ -> true
  118. end)
  119. if Enum.count(only_nodes) == 1 do
  120. child = List.first(children)
  121. sec_imgs = direct_not_h_children_with_attr(child, "img", "src")
  122. sec_objs = direct_not_h_children_with_attr(child, "object", "data")
  123. cond do
  124. Enum.count(sec_imgs) == 1 ->
  125. List.first(sec_imgs) |> Floki.attribute("src") |> List.first()
  126. Enum.count(sec_objs) == 1 ->
  127. List.first(sec_objs) |> Floki.attribute("data") |> List.first()
  128. true ->
  129. nil
  130. end
  131. else
  132. nil
  133. end
  134. end
  135. end
  136. defp implied_url_deep(root) do
  137. as = direct_not_h_children_with_attr(root, "a", "href")
  138. areas = direct_not_h_children_with_attr(root, "area", "href")
  139. cond do
  140. Enum.count(as) == 1 ->
  141. List.first(as) |> Floki.attribute("href") |> List.first()
  142. Enum.count(areas) == 1 ->
  143. List.first(areas) |> Floki.attribute("href") |> List.first()
  144. true ->
  145. nil
  146. end
  147. end
  148. defp implied_photo_attrval(node = {"img", _, _}) do
  149. Floki.attribute(node, "src") |> List.first()
  150. end
  151. defp implied_photo_attrval(node = {"object", _, _}) do
  152. Floki.attribute(node, "data") |> List.first()
  153. end
  154. defp implied_photo_attrval(_) do
  155. nil
  156. end
  157. defp direct_not_h_children_with_attr({_, _, children}, name, attr) do
  158. Enum.filter(children, fn
  159. {el, _, _} -> el == name
  160. v when is_bitstring(v) -> false
  161. end)
  162. |> Enum.filter(fn el -> not Microformats2.is_rootlevel?(el) end)
  163. |> Enum.filter(fn el -> Enum.count(Floki.attribute(el, attr)) > 0 end)
  164. end
  165. defp implied_url_attrval(node = {"a", _, _}) do
  166. Floki.attribute(node, "href") |> List.first()
  167. end
  168. defp implied_url_attrval(node = {"area", _, _}) do
  169. Floki.attribute(node, "href") |> List.first()
  170. end
  171. defp implied_url_attrval(_) do
  172. nil
  173. end
  174. end