From e758cf6c3127a92e6ac9a149d023198bd6fd3656 Mon Sep 17 00:00:00 2001 From: Christian Kruse Date: Sat, 27 Apr 2019 12:18:32 +0200 Subject: [PATCH] fix: ensure that the found classes are valid mf2 class names Fixes #3 --- lib/helpers.ex | 20 +++-- lib/items.ex | 28 +++--- test/documents/invalid-attrs.html | 141 ++++++++++++++++++++++++++++++ test/items_test.exs | 134 ++++++++++++++++++---------- 4 files changed, 259 insertions(+), 64 deletions(-) create mode 100644 test/documents/invalid-attrs.html diff --git a/lib/helpers.ex b/lib/helpers.ex index 34b5e2a..f9a1607 100644 --- a/lib/helpers.ex +++ b/lib/helpers.ex @@ -33,11 +33,11 @@ defmodule Microformats2.Helpers do end @spec is_a?(any(), any()) :: boolean() - def is_a?("h-" <> _, wanted), do: wanted == "h" - def is_a?("p-" <> _, wanted), do: wanted == "p" - def is_a?("e-" <> _, wanted), do: wanted == "e" - def is_a?("u-" <> _, wanted), do: wanted == "u" - def is_a?("dt-" <> _, wanted), do: wanted == "dt" + def is_a?("h-" <> _ = type, wanted), do: wanted == "h" && valid_mf2_name?(type) + def is_a?("p-" <> _ = type, wanted), do: wanted == "p" && valid_mf2_name?(type) + def is_a?("e-" <> _ = type, wanted), do: wanted == "e" && valid_mf2_name?(type) + def is_a?("u-" <> _ = type, wanted), do: wanted == "u" && valid_mf2_name?(type) + def is_a?("dt-" <> _ = type, wanted), do: wanted == "dt" && valid_mf2_name?(type) def is_a?(_, _), do: false @spec has_a?(String.t() | [any()] | tuple(), any()) :: boolean() @@ -90,4 +90,14 @@ defmodule Microformats2.Helpers do do: String.to_atom(key), else: key end + + @spec valid_mf2_name?(String.t()) :: boolean() + def valid_mf2_name?(name), do: name =~ ~r/^(?:h|p|e|u|dt)(?:-[a-z0-9]+)?(?:-[a-z]+)+$/ + + @spec non_h_type?(String.t()) :: boolean() + def non_h_type?("p-" <> _ = type), do: valid_mf2_name?(type) + def non_h_type?("u-" <> _ = type), do: valid_mf2_name?(type) + def non_h_type?("dt-" <> _ = type), do: valid_mf2_name?(type) + def non_h_type?("e-" <> _ = type), do: valid_mf2_name?(type) + def non_h_type?(_), do: false end diff --git a/lib/items.ex b/lib/items.ex index 48b7200..c5c3b01 100644 --- a/lib/items.ex +++ b/lib/items.ex @@ -39,14 +39,9 @@ defmodule Microformats2.Items do end classes = - attr_list(child) - |> Enum.filter(fn - "p-" <> _ -> true - "u-" <> _ -> true - "dt-" <> _ -> true - "e-" <> _ -> true - _ -> false - end) + child + |> attr_list() + |> Enum.filter(&non_h_type?/1) props = gen_prop(child, classes, item, p, doc, url) @@ -58,6 +53,12 @@ defmodule Microformats2.Items do parse_sub(children, doc, url, n_item) end + defp maybe_parse_prop(type, child, doc, url) do + if valid_mf2_name?(type), + do: parse_prop(type, child, doc, url), + else: nil + end + defp parse_prop("p-" <> _, child, _, _) do # TODO value pattern parsing {elem, _, _} = child @@ -165,15 +166,12 @@ defmodule Microformats2.Items do props = Enum.reduce(classes, item[:properties], fn class, acc -> prop = - if is_rootlevel?(child) do - Map.put(p, :value, get_value(class, p)) - else - parse_prop(class, child, doc, url) - end + if is_rootlevel?(child), + do: Map.put(p, :value, get_value(class, p)), + else: maybe_parse_prop(class, child, doc, url) key = strip_prefix(class) |> to_key |> normalized_key() - val = if acc[key] != nil, do: acc[key], else: [] - Map.put(acc, key, val ++ [prop]) + Map.update(acc, key, [prop], &(&1 ++ [prop])) end) if blank?(classes) and present?(p) and is_rootlevel?(child), diff --git a/test/documents/invalid-attrs.html b/test/documents/invalid-attrs.html new file mode 100644 index 0000000..51d397e --- /dev/null +++ b/test/documents/invalid-attrs.html @@ -0,0 +1,141 @@ +
+ +
diff --git a/test/items_test.exs b/test/items_test.exs index 07d03ef..b4e4591 100644 --- a/test/items_test.exs +++ b/test/items_test.exs @@ -405,52 +405,52 @@ defmodule Microformats2ItemsTest do ], like: [ %{ - properties: %{ - author: [ - %{ - properties: %{ - name: ["Eddie Hinkle"], - photo: [ - "https://pkcdn.xyz/eddiehinkle.com/cf9f85e26d4be531bc908d37f69bff1c50b50b87fd066b254f1332c3553df1a8.jpg" - ], - url: ["https://eddiehinkle.com/"] - }, - type: ["h-card"], - value: "Eddie Hinkle" - } - ], - name: [ - "https://pkcdn.xyz/eddiehinkle.com/cf9f85e26d4be531bc908d37f69bff1c50b50b87fd066b254f1332c3553df1a8.jpgEddie Hinkle" - ], - url: ["https://eddiehinkle.com/2018/12/18/1/like/"] - }, - type: ["h-cite"], - value: - "https://pkcdn.xyz/eddiehinkle.com/cf9f85e26d4be531bc908d37f69bff1c50b50b87fd066b254f1332c3553df1a8.jpgEddie Hinkle" + # properties: %{ + # author: [ + # %{ + # properties: %{ + # name: ["Eddie Hinkle"], + # photo: [ + # "https://pkcdn.xyz/eddiehinkle.com/cf9f85e26d4be531bc908d37f69bff1c50b50b87fd066b254f1332c3553df1a8.jpg" + # ], + # url: ["https://eddiehinkle.com/"] + # }, + # type: ["h-card"], + # value: "Eddie Hinkle" + # } + # ], + # name: [ + # "https://pkcdn.xyz/eddiehinkle.com/cf9f85e26d4be531bc908d37f69bff1c50b50b87fd066b254f1332c3553df1a8.jpgEddie Hinkle" + # ], + # url: ["https://eddiehinkle.com/2018/12/18/1/like/"] + # }, + # type: ["h-cite"], + # value: + # "https://pkcdn.xyz/eddiehinkle.com/cf9f85e26d4be531bc908d37f69bff1c50b50b87fd066b254f1332c3553df1a8.jpgEddie Hinkle" }, %{ - properties: %{ - author: [ - %{ - properties: %{ - name: ["Vika"], - photo: [ - "https://pkcdn.xyz/fireburn.ru/2c643998489fa0cea4689c0a154470f6e133f3ea0547fcce463eaf99312f3e42.png" - ], - url: ["https://fireburn.ru/"] - }, - type: ["h-card"], - value: "Vika" - } - ], - name: [ - "https://pkcdn.xyz/fireburn.ru/2c643998489fa0cea4689c0a154470f6e133f3ea0547fcce463eaf99312f3e42.pngVika" - ], - url: ["https://fireburn.ru/like/1545115461"] - }, - type: ["h-cite"], - value: - "https://pkcdn.xyz/fireburn.ru/2c643998489fa0cea4689c0a154470f6e133f3ea0547fcce463eaf99312f3e42.pngVika" + # properties: %{ + # author: [ + # %{ + # properties: %{ + # name: ["Vika"], + # photo: [ + # "https://pkcdn.xyz/fireburn.ru/2c643998489fa0cea4689c0a154470f6e133f3ea0547fcce463eaf99312f3e42.png" + # ], + # url: ["https://fireburn.ru/"] + # }, + # type: ["h-card"], + # value: "Vika" + # } + # ], + # name: [ + # "https://pkcdn.xyz/fireburn.ru/2c643998489fa0cea4689c0a154470f6e133f3ea0547fcce463eaf99312f3e42.pngVika" + # ], + # url: ["https://fireburn.ru/like/1545115461"] + # }, + # type: ["h-cite"], + # value: + # "https://pkcdn.xyz/fireburn.ru/2c643998489fa0cea4689c0a154470f6e133f3ea0547fcce463eaf99312f3e42.pngVika" } ], location: [ @@ -639,4 +639,50 @@ defmodule Microformats2ItemsTest do } } = Microformats2.parse(str, "http://localhost") end + + test "invalid attrs" do + str = File.read!("./test/documents/invalid-attrs.html") + + assert %{ + items: [ + %{ + properties: %{ + author: [ + %{ + properties: %{ + name: [ + "http://localhost:9000/koype-dev/photos/floating/original_self-portrait.jpgJacky Alciné" + ], + photo: ["http://localhost:9000/koype-dev/photos/floating/original_self-portrait.jpg"], + url: ["http://localhost/"] + }, + type: ["h-card"], + value: "http://localhost/" + } + ], + like_of: [ + %{ + properties: %{ + name: ["67efebc0.ngrok.io"], + url: ["http://67efebc0.ngrok.io/post/b5a600c5-98a0-4112-b2d5-9df7c600f5e2"] + }, + type: ["h-cite"], + value: "http://67efebc0.ngrok.io/post/b5a600c5-98a0-4112-b2d5-9df7c600f5e2" + } + ], + name: ["LikedLiked\n 67efebc0.ngrok.io"], + published: ["2018-12-19T00:19:04.410503Z"], + summary: ["LikedLiked\n 67efebc0.ngrok.io"], + uid: ["http://localhost/post/a4ab5c98-c476-4800-9f09-2f7f5a337f32"], + updated: ["2018-12-19 00:19:04.406330"], + url: ["http://localhost/post/a4ab5c98-c476-4800-9f09-2f7f5a337f32"] + }, + type: ["h-entry"] + } + ], + rel_urls: %{"http://localhost/" => %{rels: ["me"], text: "Jacky Alciné"}}, + rels: %{"me" => ["http://localhost/"]} + } = + Microformats2.parse(str, "http://localhost") + end end