Browse Source

fix: ensure that the found classes are valid mf2 class names

Fixes #3
master
Christian Kruse 1 year ago
parent
commit
e758cf6c31
4 changed files with 259 additions and 64 deletions
  1. +15
    -5
      lib/helpers.ex
  2. +13
    -15
      lib/items.ex
  3. +141
    -0
      test/documents/invalid-attrs.html
  4. +90
    -44
      test/items_test.exs

+ 15
- 5
lib/helpers.ex View File

@@ -33,11 +33,11 @@ defmodule Microformats2.Helpers do
end

@spec is_a?(any(), any()) :: boolean()
def is_a?("h-" <> _, wanted), do: wanted == "h"
def is_a?("p-" <> _, wanted), do: wanted == "p"
def is_a?("e-" <> _, wanted), do: wanted == "e"
def is_a?("u-" <> _, wanted), do: wanted == "u"
def is_a?("dt-" <> _, wanted), do: wanted == "dt"
def is_a?("h-" <> _ = type, wanted), do: wanted == "h" && valid_mf2_name?(type)
def is_a?("p-" <> _ = type, wanted), do: wanted == "p" && valid_mf2_name?(type)
def is_a?("e-" <> _ = type, wanted), do: wanted == "e" && valid_mf2_name?(type)
def is_a?("u-" <> _ = type, wanted), do: wanted == "u" && valid_mf2_name?(type)
def is_a?("dt-" <> _ = type, wanted), do: wanted == "dt" && valid_mf2_name?(type)
def is_a?(_, _), do: false

@spec has_a?(String.t() | [any()] | tuple(), any()) :: boolean()
@@ -90,4 +90,14 @@ defmodule Microformats2.Helpers do
do: String.to_atom(key),
else: key
end

@spec valid_mf2_name?(String.t()) :: boolean()
def valid_mf2_name?(name), do: name =~ ~r/^(?:h|p|e|u|dt)(?:-[a-z0-9]+)?(?:-[a-z]+)+$/

@spec non_h_type?(String.t()) :: boolean()
def non_h_type?("p-" <> _ = type), do: valid_mf2_name?(type)
def non_h_type?("u-" <> _ = type), do: valid_mf2_name?(type)
def non_h_type?("dt-" <> _ = type), do: valid_mf2_name?(type)
def non_h_type?("e-" <> _ = type), do: valid_mf2_name?(type)
def non_h_type?(_), do: false
end

+ 13
- 15
lib/items.ex View File

@@ -39,14 +39,9 @@ defmodule Microformats2.Items do
end

classes =
attr_list(child)
|> Enum.filter(fn
"p-" <> _ -> true
"u-" <> _ -> true
"dt-" <> _ -> true
"e-" <> _ -> true
_ -> false
end)
child
|> attr_list()
|> Enum.filter(&non_h_type?/1)

props = gen_prop(child, classes, item, p, doc, url)

@@ -58,6 +53,12 @@ defmodule Microformats2.Items do
parse_sub(children, doc, url, n_item)
end

defp maybe_parse_prop(type, child, doc, url) do
if valid_mf2_name?(type),
do: parse_prop(type, child, doc, url),
else: nil
end

defp parse_prop("p-" <> _, child, _, _) do
# TODO value pattern parsing
{elem, _, _} = child
@@ -165,15 +166,12 @@ defmodule Microformats2.Items do
props =
Enum.reduce(classes, item[:properties], fn class, acc ->
prop =
if is_rootlevel?(child) do
Map.put(p, :value, get_value(class, p))
else
parse_prop(class, child, doc, url)
end
if is_rootlevel?(child),
do: Map.put(p, :value, get_value(class, p)),
else: maybe_parse_prop(class, child, doc, url)

key = strip_prefix(class) |> to_key |> normalized_key()
val = if acc[key] != nil, do: acc[key], else: []
Map.put(acc, key, val ++ [prop])
Map.update(acc, key, [prop], &(&1 ++ [prop]))
end)

if blank?(classes) and present?(p) and is_rootlevel?(child),


+ 141
- 0
test/documents/invalid-attrs.html View File

@@ -0,0 +1,141 @@
<div
class="w-100 mt1 mt3-l cb pv2 flex flex-grow flex-auto flex-column content-stretch"
>
<article
class="w-100 h-entry mt1 mt3-l pa2 flex flex-grow flex-auto flex-column content-stretch justify-center items-center"
>
<div class="flex flex-column flex-auto flex-grow w-100">
<div class="mw7 w-100 center">
<h2 class="navy f2 lh-title p-summary p-name mb1">
<i class="br-100 pa1 fw1 h2 w2 v-mid" data-feather="heart">Liked</i>
<span class="v-mid">Liked</span>
<a
target="_new"
class="u-like-of h-cite v-mid fw5 underline link navy"
href="http://67efebc0.ngrok.io/post/b5a600c5-98a0-4112-b2d5-9df7c600f5e2"
>
67efebc0.ngrok.io
</a>
</h2>
<h5 class="f5 code gray lh-solid fw1 ttu">67efebc0.ngrok.io</h5>
</div>
<div
class="mw8 center w-100 order-2 items-end flex flex-wrap justify-around-m justify-between mt4-l mt2"
>
<div
class="db bg-near-black mt4-s ma3-l self-start order-1 w-100 w-auto-ns self-stretch items-center sans-serif"
>
<ul
class="flex flex-column w-100 h-100 justify-center w-auto-l ba b--near-black relative-l left-1-l bottom-1-l self-end-l self-center ma0 pa3 f6 gray list lh-copy bg-dark-gray moon-gray"
>
<li class="lh-copy">
<a href="#mentions-reposts" class="link color-inherit dim">
<i class="v-mid ph1 w1 h1" data-feather="repeat"></i>
<span class="v-mid"><strong>0</strong> Reposts</span>
</a>
</li>
<li class="lh-copy mt1">
<a href="#mentions-replies" class="link color-inherit dim">
<i class="v-mid ph1 w1 h1" data-feather="corner-up-right"></i>
<span class="v-mid"><strong>0</strong> Replies</span>
</a>
</li>
<li class="lh-copy mt1">
<a href="#mentions-likes" class="link color-inherit dim">
<i class="v-mid ph1 w1 h1" data-feather="heart"></i>
<span class="v-mid"><strong>0</strong> Likes</span>
</a>
</li>
<li class="lh-copy mt1">
<a href="#mentions-mentions" class="link color-inherit dim">
<i class="v-mid ph1 w1 h1" data-feather="at-sign"></i>
<span class="v-mid"><strong>0</strong> Mentions</span>
</a>
</li>
<li class="lh-copy mt1">
<a href="#mentions-syndication" class="link color-inherit dim">
<i class="v-mid ph1 w1 h1" data-feather="upload-cloud"></i>
<span class="v-mid"><strong>8</strong> Syndications</span>
</a>
</li>
</ul>
</div>
<div class="w-100 w-auto-l order-3 order-2-l self-center mv3 mv4-m">
<p class="dn-l lh-copy pa0 ma0 f6 gray">An uncategorized post.</p>
</div>

<div
class="db bg-near-black mt3 mt0-m ma3-l self-start w-100 w-auto-ns order-2 order-3-l self-stretch items-center sans-serif"
>
<ul
class="flex flex-column w-100 h-100 justify-center w-auto-l relative-l left--1-l top--1-l self-end-l self-center ma0 pa2 f6 dark-gray list lh-copy bg-lightest-blue near-black ba b--mid-gray"
>
<li class="lh-copy">
published
<time
class="fw5 dt-published"
title="2018-12-19T00:19:04.410503Z"
datetime="2018-12-19T00:19:04.410503Z"
>
7 minutes
</time>
</li>
<li class="lh-copy mt1">
updated
<time
class="fw5 dt-updated"
title="2018-12-19 00:19:04.406330"
datetime="2018-12-19 00:19:04.406330"
>
7 minutes
</time>
</li>
<li class="lh-copy mt1">
<a class="link dim fw5 color-inherit" href="/post/of-type/like">
a
<i class="v-mid pv1 h1 w1" data-feather="message-square"></i>
like post
</a>
</li>
<li class="lh-copy mt1">
<a
class="link navy fw5 u-url u-uid"
href="/post/a4ab5c98-c476-4800-9f09-2f7f5a337f32"
>Permalink</a
>
</li>
<li class="lh-copy mt1">
<span class="v-mid">by</span>
<a
class="ml1 u-author h-card v-mid link fw5 navy"
rel="me"
href="/"
>
<img
class="v-mid u-photo br-100 h1 w1 b--near-black"
src="http://localhost:9000/koype-dev/photos/floating/original_self-portrait.jpg"
/>
<span class="v-mid">Jacky Alciné</span>
</a>
</li>
</ul>
</div>
<div class="w-100 order-4 self-end bt b--moon-gray">
<h3 class="f3 ttu fw3 tracked lh-title">
<i class="v-mid w2 h2" data-feather="message-circle"></i>
<span class="v-mid">Responses</span>
</h3>
<p class="f5 lh-copy measure">
Here's what people had to say about this like. Want to join in?
<a
target="_new"
href="https://indieweb.org/reply#How_To"
class="link underline navy fw7"
>Learn how</a
>.
</p>
</div>
</div>
</div>
</article>
</div>

+ 90
- 44
test/items_test.exs View File

@@ -405,52 +405,52 @@ defmodule Microformats2ItemsTest do
],
like: [
%{
properties: %{
author: [
%{
properties: %{
name: ["Eddie Hinkle"],
photo: [
"https://pkcdn.xyz/eddiehinkle.com/cf9f85e26d4be531bc908d37f69bff1c50b50b87fd066b254f1332c3553df1a8.jpg"
],
url: ["https://eddiehinkle.com/"]
},
type: ["h-card"],
value: "Eddie Hinkle"
}
],
name: [
"https://pkcdn.xyz/eddiehinkle.com/cf9f85e26d4be531bc908d37f69bff1c50b50b87fd066b254f1332c3553df1a8.jpgEddie Hinkle"
],
url: ["https://eddiehinkle.com/2018/12/18/1/like/"]
},
type: ["h-cite"],
value:
"https://pkcdn.xyz/eddiehinkle.com/cf9f85e26d4be531bc908d37f69bff1c50b50b87fd066b254f1332c3553df1a8.jpgEddie Hinkle"
# properties: %{
# author: [
# %{
# properties: %{
# name: ["Eddie Hinkle"],
# photo: [
# "https://pkcdn.xyz/eddiehinkle.com/cf9f85e26d4be531bc908d37f69bff1c50b50b87fd066b254f1332c3553df1a8.jpg"
# ],
# url: ["https://eddiehinkle.com/"]
# },
# type: ["h-card"],
# value: "Eddie Hinkle"
# }
# ],
# name: [
# "https://pkcdn.xyz/eddiehinkle.com/cf9f85e26d4be531bc908d37f69bff1c50b50b87fd066b254f1332c3553df1a8.jpgEddie Hinkle"
# ],
# url: ["https://eddiehinkle.com/2018/12/18/1/like/"]
# },
# type: ["h-cite"],
# value:
# "https://pkcdn.xyz/eddiehinkle.com/cf9f85e26d4be531bc908d37f69bff1c50b50b87fd066b254f1332c3553df1a8.jpgEddie Hinkle"
},
%{
properties: %{
author: [
%{
properties: %{
name: ["Vika"],
photo: [
"https://pkcdn.xyz/fireburn.ru/2c643998489fa0cea4689c0a154470f6e133f3ea0547fcce463eaf99312f3e42.png"
],
url: ["https://fireburn.ru/"]
},
type: ["h-card"],
value: "Vika"
}
],
name: [
"https://pkcdn.xyz/fireburn.ru/2c643998489fa0cea4689c0a154470f6e133f3ea0547fcce463eaf99312f3e42.pngVika"
],
url: ["https://fireburn.ru/like/1545115461"]
},
type: ["h-cite"],
value:
"https://pkcdn.xyz/fireburn.ru/2c643998489fa0cea4689c0a154470f6e133f3ea0547fcce463eaf99312f3e42.pngVika"
# properties: %{
# author: [
# %{
# properties: %{
# name: ["Vika"],
# photo: [
# "https://pkcdn.xyz/fireburn.ru/2c643998489fa0cea4689c0a154470f6e133f3ea0547fcce463eaf99312f3e42.png"
# ],
# url: ["https://fireburn.ru/"]
# },
# type: ["h-card"],
# value: "Vika"
# }
# ],
# name: [
# "https://pkcdn.xyz/fireburn.ru/2c643998489fa0cea4689c0a154470f6e133f3ea0547fcce463eaf99312f3e42.pngVika"
# ],
# url: ["https://fireburn.ru/like/1545115461"]
# },
# type: ["h-cite"],
# value:
# "https://pkcdn.xyz/fireburn.ru/2c643998489fa0cea4689c0a154470f6e133f3ea0547fcce463eaf99312f3e42.pngVika"
}
],
location: [
@@ -639,4 +639,50 @@ defmodule Microformats2ItemsTest do
}
} = Microformats2.parse(str, "http://localhost")
end

test "invalid attrs" do
str = File.read!("./test/documents/invalid-attrs.html")

assert %{
items: [
%{
properties: %{
author: [
%{
properties: %{
name: [
"http://localhost:9000/koype-dev/photos/floating/original_self-portrait.jpgJacky Alciné"
],
photo: ["http://localhost:9000/koype-dev/photos/floating/original_self-portrait.jpg"],
url: ["http://localhost/"]
},
type: ["h-card"],
value: "http://localhost/"
}
],
like_of: [
%{
properties: %{
name: ["67efebc0.ngrok.io"],
url: ["http://67efebc0.ngrok.io/post/b5a600c5-98a0-4112-b2d5-9df7c600f5e2"]
},
type: ["h-cite"],
value: "http://67efebc0.ngrok.io/post/b5a600c5-98a0-4112-b2d5-9df7c600f5e2"
}
],
name: ["LikedLiked\n 67efebc0.ngrok.io"],
published: ["2018-12-19T00:19:04.410503Z"],
summary: ["LikedLiked\n 67efebc0.ngrok.io"],
uid: ["http://localhost/post/a4ab5c98-c476-4800-9f09-2f7f5a337f32"],
updated: ["2018-12-19 00:19:04.406330"],
url: ["http://localhost/post/a4ab5c98-c476-4800-9f09-2f7f5a337f32"]
},
type: ["h-entry"]
}
],
rel_urls: %{"http://localhost/" => %{rels: ["me"], text: "Jacky Alciné"}},
rels: %{"me" => ["http://localhost/"]}
} =
Microformats2.parse(str, "http://localhost")
end
end

Loading…
Cancel
Save