You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

210 lines
5.5 KiB

  1. defmodule Microformats2.Items do
  2. import Microformats2.Helpers
  3. alias Microformats2.Items.ImpliedProperties
  4. def parse(nodes, doc, url, items \\ [])
  5. def parse([head | tail], doc, url, items) when is_bitstring(head), do: parse(tail, doc, url, items)
  6. def parse([head | tail], doc, url, items), do: parse(tail, doc, url, parse(head, doc, url, items))
  7. def parse([], _, _, items), do: items
  8. def parse(root, doc, url, items) do
  9. root_classes =
  10. attr_list(root)
  11. |> Enum.filter(&is_rootlevel?/1)
  12. |> Enum.sort()
  13. {_, _, children} = root
  14. if not Enum.empty?(root_classes) do
  15. entry =
  16. parse_sub(children, doc, url, %{type: root_classes, properties: %{}})
  17. |> ImpliedProperties.parse(root, url, doc)
  18. items ++ [entry]
  19. else
  20. parse(children, doc, url, items)
  21. end
  22. end
  23. defp parse_sub([], _, _, item), do: item
  24. defp parse_sub([child | children], doc, url, item) when is_bitstring(child), do: parse_sub(children, doc, url, item)
  25. defp parse_sub([child = {_, _, child_children} | children], doc, url, item) do
  26. p =
  27. if has_a?(child, "h-") do
  28. parse(child, doc, url, []) |> List.first()
  29. else
  30. []
  31. end
  32. classes =
  33. child
  34. |> attr_list()
  35. |> Enum.filter(&non_h_type?/1)
  36. props = gen_prop(child, classes, item, p, doc, url)
  37. n_item =
  38. if is_rootlevel?(child),
  39. do: props,
  40. else: parse_sub(child_children, doc, url, props)
  41. parse_sub(children, doc, url, n_item)
  42. end
  43. defp maybe_parse_prop(type, child, doc, url) do
  44. if valid_mf2_name?(type),
  45. do: parse_prop(type, child, doc, url),
  46. else: nil
  47. end
  48. defp parse_prop("p-" <> _, child, _, _) do
  49. # TODO value pattern parsing
  50. {elem, _, _} = child
  51. title = Floki.attribute(child, "title") |> List.first()
  52. alt = Floki.attribute(child, "alt") |> List.first()
  53. cond do
  54. elem == "abbr" and present?(title) ->
  55. title
  56. elem == "img" and present?(alt) ->
  57. alt
  58. true ->
  59. text_content(child) |> String.trim()
  60. end
  61. end
  62. defp parse_prop("u-" <> _, child = {elem, _, _}, doc, url) do
  63. href = Floki.attribute(child, "href") |> List.first()
  64. src = Floki.attribute(child, "src") |> List.first()
  65. data = Floki.attribute(child, "data") |> List.first()
  66. poster = Floki.attribute(child, "poster") |> List.first()
  67. title = Floki.attribute(child, "title") |> List.first()
  68. value = Floki.attribute(child, "value") |> List.first()
  69. cond do
  70. Enum.member?(["a", "area"], elem) and present?(href) ->
  71. href
  72. Enum.member?(["img", "audio", "video", "source"], elem) and present?(src) ->
  73. src
  74. elem == "object" and present?(data) ->
  75. data
  76. elem == "video" and present?(poster) ->
  77. poster
  78. # TODO value-class-pattern at this position
  79. elem == "abbr" and present?(title) ->
  80. title
  81. Enum.member?(["data", "input"], elem) and present?(value) ->
  82. value
  83. true ->
  84. text_content(child) |> String.trim()
  85. end
  86. |> abs_uri(url, doc)
  87. end
  88. defp parse_prop("dt-" <> _, child = {elem, _, _}, _, _) do
  89. dt = Floki.attribute(child, "datetime")
  90. title = Floki.attribute(child, "title")
  91. value = Floki.attribute(child, "value")
  92. cond do
  93. Enum.member?(["time", "ins", "del"], elem) and present?(dt) ->
  94. dt |> List.first()
  95. elem == "abbr" and present?(title) ->
  96. title |> List.first()
  97. Enum.member?(["data", "input"], elem) and present?(value) ->
  98. value |> List.first()
  99. true ->
  100. text_content(child) |> String.trim()
  101. end
  102. end
  103. defp parse_prop("e-" <> _, child = {_, _, children}, _, _) do
  104. %{
  105. html: stripped_or_nil(Floki.raw_html(children)),
  106. text: stripped_or_nil(Floki.text(child))
  107. }
  108. end
  109. defp parse_prop(_, _, _, _), do: nil
  110. defp get_value(class, p) do
  111. name_key = normalized_key("name")
  112. url_key = normalized_key("url")
  113. cond do
  114. is_a?(class, "p") and p[:properties][name_key] != nil ->
  115. List.first(p[:properties][name_key])
  116. is_a?(class, "u") and p[:properties][url_key] != nil ->
  117. List.first(p[:properties][url_key])
  118. # and p[:properties][url_key] != nil ->
  119. is_a?(class, "e") ->
  120. # TODO handle
  121. nil
  122. true ->
  123. # TODO handle
  124. nil
  125. end
  126. end
  127. defp gen_prop(child, classes, item, p, doc, url) do
  128. props =
  129. Enum.reduce(classes, item[:properties], fn class, acc ->
  130. prop =
  131. if is_rootlevel?(child),
  132. do: Map.put(p, :value, get_value(class, p)),
  133. else: maybe_parse_prop(class, child, doc, url)
  134. key = strip_prefix(class) |> to_key |> normalized_key()
  135. Map.update(acc, key, [prop], &(&1 ++ [prop]))
  136. end)
  137. if blank?(classes) and present?(p) and is_rootlevel?(child),
  138. do: Map.update(item, :children, [p], &(&1 ++ [p])),
  139. else: Map.put(item, :properties, props)
  140. end
  141. defp strip_prefix("p-" <> rest), do: rest
  142. defp strip_prefix("u-" <> rest), do: rest
  143. defp strip_prefix("dt-" <> rest), do: rest
  144. defp strip_prefix("e-" <> rest), do: rest
  145. defp strip_prefix(rest), do: rest
  146. def text_content(child, text \\ "")
  147. def text_content(child = {elem, _, children}, text) do
  148. txt =
  149. if elem == "img" do
  150. alt = Floki.attribute(child, "alt")
  151. if !blank?(alt) do
  152. alt
  153. else
  154. Floki.attribute(child, "src")
  155. end
  156. |> List.first()
  157. else
  158. ""
  159. end
  160. Enum.reduce(children, text <> txt, &text_content/2)
  161. end
  162. def text_content(child, text) when is_bitstring(child), do: text <> child
  163. end