Browse Source

loads of bugfixes

tags/microformats2-0.0.2
Christian Kruse 5 years ago
parent
commit
2eec0d5d9f
6 changed files with 220 additions and 49 deletions
  1. +45
    -46
      lib/items.ex
  2. +6
    -1
      lib/microformats2.ex
  3. +1
    -1
      mix.exs
  4. +1
    -1
      mix.lock
  5. +139
    -0
      test/documents/real_world_note.html
  6. +28
    -0
      test/items_test.exs

+ 45
- 46
lib/items.ex View File

@@ -1,10 +1,8 @@
defmodule Microformats2.Items do
def parse(nodes, doc, url, items \\ [])
def parse([head | tail], doc, url, items) when is_bitstring(head), do: parse(tail, doc, url, items)
#def parse([{:comment, _} | tail], doc, url, items), do: parse(tail, doc, url, items)
def parse([head | tail], doc, url, items), do: parse(tail, doc, url, parse(head, doc, url, items))
def parse([], _, _, items), do: items
def parse({:comment, _}, _, _, items), do: items

def parse(root, doc, url, items) do
root_classes = Microformats2.attr_list(root) |>
@@ -18,62 +16,30 @@ defmodule Microformats2.Items do
%{type: root_classes,
properties: %{}}) |> Microformats2.Items.ImpliedProperties.parse(root, url, doc)

children_entries = parse(children, doc, url, [])

if not Microformats2.blank?(children_entries) do
items ++ [Map.put(entry, :children, children_entries)]
else
items ++ [entry]
end

items ++ [entry]
else
parse(children, doc, url, items)
end
end

defp parse_sub([], _, _, item), do: item
defp parse_sub([{:comment, _} | children], doc, url, item), do: parse_sub(children, doc, url, item)
defp parse_sub([child | children], doc, url, item) when is_bitstring(child), do: parse_sub(children, doc, url, item)
defp parse_sub([child = {_, _, child_children} | children], doc, url, item) do
props = Microformats2.attr_list(child) |>
p = if Microformats2.has_a?(child, "h-") do
parse(child, doc, url, []) |> List.first
else
[]
end

classes = Microformats2.attr_list(child) |>
Enum.filter(fn("p-" <> _) -> true
("u-" <> _) -> true
("dt-" <> _) -> true
("e-" <> _) -> true
(_) -> false end) |>
Enum.reduce(item[:properties], fn(class, acc) ->
prop = if Microformats2.is_rootlevel?(child) do
p = parse(child, doc, url, []) |> List.first

val = cond do
Microformats2.is_a?(class, "p") and p[:properties][:name] != nil ->
List.first(p[:properties][:name])
Microformats2.is_a?(class, "u") and p[:properties][:url] != nil ->
List.first(p[:properties][:url])
Microformats2.is_a?(class, "e") -> #and p[:properties][:url] != nil ->
# TODO handle
nil
true ->
# TODO handle
nil
end

Map.put(p, :value, val)
else
parse_prop(class, child, doc, url)
end

key = strip_prefix(class) |> to_key |> String.to_atom
val = if acc[key] != nil, do: acc[key], else: []
Map.put(acc, key, val ++ [prop])
end)

propped_item = Map.put(item, :properties, props)
n_item = if Microformats2.is_rootlevel?(child) do
propped_item
else
parse_sub(child_children, doc, url, propped_item)
end
(_) -> false end)

props = gen_prop(child, classes, item, p, doc, url)
n_item = if Microformats2.is_rootlevel?(child), do: props, else: parse_sub(child_children, doc, url, props)

parse_sub(children, doc, url, n_item)
end
@@ -147,7 +113,40 @@ defmodule Microformats2.Items do
defp parse_prop(_, _, _, _), do: nil


defp get_value(class, p) do
val = cond do
Microformats2.is_a?(class, "p") and p[:properties][:name] != nil ->
List.first(p[:properties][:name])
Microformats2.is_a?(class, "u") and p[:properties][:url] != nil ->
List.first(p[:properties][:url])
Microformats2.is_a?(class, "e") -> #and p[:properties][:url] != nil ->
# TODO handle
nil
true ->
# TODO handle
nil
end
end

defp gen_prop(child, classes, item, p, doc, url) do
props = Enum.reduce(classes, item[:properties], fn(class, acc) ->
prop = if Microformats2.is_rootlevel?(child) do
Map.put(p, :value, get_value(class, p))
else
parse_prop(class, child, doc, url)
end

key = strip_prefix(class) |> to_key |> String.to_atom
val = if acc[key] != nil, do: acc[key], else: []
Map.put(acc, key, val ++ [prop])
end)

if Microformats2.blank?(classes) and not Microformats2.blank?(p) and Microformats2.is_rootlevel?(child) do
Map.put(item, :children, (item[:children] || []) ++ [p])
else
Map.put(item, :properties, props)
end
end


defp strip_prefix("p-" <> rest) do


+ 6
- 1
lib/microformats2.ex View File

@@ -14,7 +14,8 @@ defmodule Microformats2 do
doc = Floki.parse(content) |>
Floki.filter_out("template") |>
Floki.filter_out("style") |>
Floki.filter_out("script")
Floki.filter_out("script") |>
Floki.filter_out(:comment)

rels = Microformats2.Rels.parse(doc, url)
items = Microformats2.Items.parse(doc, doc, url)
@@ -50,6 +51,10 @@ defmodule Microformats2 do
def is_a?("dt-" <> _, wanted), do: wanted == "dt"
def is_a?(_, _), do: false

def has_a?(node, wanted) do
attr_list(node) |> Enum.filter(fn(class) -> is_a?(class, wanted) end) |> blank?
end

def abs_uri(url, base_url, doc) do
parsed = URI.parse(url)
parsed_base = URI.parse(base_url)


+ 1
- 1
mix.exs View File

@@ -42,7 +42,7 @@ defmodule Microformats2.Mixfile do
#
# Type "mix help deps" for more examples and options
defp deps do
[{:floki, "~> 0.7"},
[{:floki, github: "philss/floki"},
{:httpotion, "~> 2.2.0"}]
end
end

+ 1
- 1
mix.lock View File

@@ -1,4 +1,4 @@
%{"floki": {:hex, :floki, "0.7.1"},
%{"floki": {:git, "https://github.com/philss/floki.git", "eb63415ca70bafcfe6e2c602029d7dc41bcf308b", []},
"httpotion": {:hex, :httpotion, "2.2.0"},
"ibrowse": {:hex, :ibrowse, "4.2.2"},
"mochiweb": {:hex, :mochiweb, "2.12.2"}}

+ 139
- 0
test/documents/real_world_note.html View File

@@ -0,0 +1,139 @@

<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="utf-8">
<title>Note #587 - jeena.net</title>
<link rel="alternate" type="application/atom+xml" title="Blog" href="https://jeena.net/posts.atom" />
<link rel="alternate" type="application/atom+xml" title="Notes" href="https://jeena.net/notes.atom" />
<link rel="alternate" type="application/atom+xml" title="Photos" href="https://jeena.net/photos.atom" />
<!--[if lte IE 8]><script src="/javascripts/html5.js"></script><![endif]-->
<meta name="viewport" content="width=device-width,initial-scale=1,user-scalable=no">
<link rel="apple-touch-icon" href="/avatar.jpg">

<link rel="openid.server" href="http://www.clavid.com/provider/openid">
<link rel="openid2.provider" href="http://www.clavid.com/provider/openid">
<link rel="openid.delegate" href="http://jeena.clavid.com">
<link rel="openid2.local_id" href="http://jeena.clavid.com">
<link rel="pingback" href="http://webmention.io/webmention?forward=https://jeena.net/webmentions" />
<link rel="authorization_endpoint" href="https://indieauth.com/auth">
<link rel="token_endpoint" href="https://tokens.indieauth.com/token">
<link rel="micropub" href="https://jeena.net/pub">

<link type="text/plain" rel="author" href="/humans.txt">

<link rel="stylesheet" media="all" href="/assets/application-ef2f50b9ff64c7ec74f15bda4b4a4fad0a175442f3675a18c7f89138e7ec1064.css" />


<link rel="prev" href="/notes/584"/>
<script src="/javascripts/previous-next.js"></script>
</head>
<body>
<header>
<a href="/">home</a> /
<a rel="feed" href="/posts">blog</a>
/
<a rel="feed" href="/notes">notes</a>
/
<a rel="feed" href="/photos">photos</a>
/
<a rel="me" href="/about">about</a> /
<a href="/more">more</a>
</header>


<article class="h-entry h-as-note note">

<header>
<h1 class="hidden p-name">Note #587</h1>
<span class="h-card p-author">
<img src="/avatar.jpg" alt="" class="u-photo noborder">
<a rel="author" class="p-name u-url" href="/">Jeena</a></span>,
<a class="u-url date" href="/comments/587">
<time title="2016-02-18T19:33:25Z" class="dt-published" datetime="2016-02-18T19:33:25Z">5 days ago</time>
<time class="dt-updated hidden" datetime="2016-02-18T19:33:25Z">2016-02-18 19:33:25</time>
</a>
</header>

<p class="in-reply-to"><strong>In reply to:</strong>
<a class="u-in-reply-to" href="https://wwwtech.de/pictures/51">https://wwwtech.de/pictures/51</a><br>
</p>


<div class="e-content">

<p>He's right, you know?</p>


</div>



<section id="comments">



<h2 class="replies-headline">1 Reply</h2>
<article class="p-comment h-cite" id="w3832">
<header>
<span class="h-card p-author">
<a class="avatar" rel="author" href="https://wwwtech.de/notes/132"><img alt="" class="u-photo noborder" width="48" onerror="setDefaultAvatar(this)" src="/cache?size=40x40&gt;&amp;url=https%3A%2F%2Fwwwtech.de%2Fimages%2Fchristian-kruse-242470c34a3671da4cab3e3b0d941729.jpg%3Fvsn%3Dd" /></a>
<a class="p-name u-url" rel="author" href="https://wwwtech.de/notes/132">Christian Kruse</a></span>,
<a class="u-url date" href="https://wwwtech.de/notes/132"><time datetime="2016-02-19T10:50:17Z" class="dt-published">4 days ago</time></a>
</header>


<p class="e-content">
Of course he is!
</p>

</article>






</section>


<form action="/webmentions" method="POST" class="webmention-form">
<p>Have you written a response? Let me know the URL:</p>
<p>
<input name="source" placeholder="http://example.com/something.html">
<button>Send</button>
<input type="hidden" name="target" value="https://jeena.net/comments/587">
<input type="hidden" name="browser" value="true">
</p>
<p><sub>There's also <a href="http://webmention.org">indie comments (webmentions)</a> support.</sub></p>
</form>


</article>




<script>
function parentP(o) {
while(o.parentNode) {
o = o.parentNode;
if(o.nodeName == "P") return o;
}
}

var query = ".e-content > p img, .e-content > p iframe, .e-content > p canvas";
var imgs = document.querySelectorAll(query);
for (var i = 0; i < imgs.length; i++) {
parentP(imgs[i]).className += " big-img";
}
</script>

</body>
</html>

+ 28
- 0
test/items_test.exs View File

@@ -141,4 +141,32 @@ defmodule Microformats2ItemsTest do
Microformats2.parse("<div class=\"h-card\"><a href=\"/foo\">Ben Ward</a></div>",
"http://benward.me")
end

test "jeena entry" do
{:ok, str} = File.read "./test/documents/real_world_note.html"

assert %{rels: _, rel_urls: _,
items: [%{properties:
%{author: [%{properties: %{name: ["Jeena"],
photo: ["http://localhost/avatar.jpg"],
url: ["http://localhost/"]},
type: ["h-card"], value: "Jeena"}],
comment: [%{properties: %{
author: [%{properties: %{name: ["Christian Kruse"],
photo: ["http://localhost/cache?size=40x40>&url=https%3A%2F%2Fwwwtech.de%2Fimages%2Fchristian-kruse-242470c34a3671da4cab3e3b0d941729.jpg%3Fvsn%3Dd"],
url: ["https://wwwtech.de/notes/132"]},
type: ["h-card"],
value: "Christian Kruse"}],
content: [%{html: "<p class=\"e-content\">\n\t\t Of course he is!\n\t </p>",
text: "\n\t\t Of course he is!\n\t "}],
name: ["Christian Kruse,\n\t\t 4 days ago\n\t\t Of course he is!"],
published: ["2016-02-19T10:50:17Z"],
url: ["https://wwwtech.de/notes/132"]}, type: ["h-cite"],
value: "Christian Kruse,\n\t\t 4 days ago\n\t\t Of course he is!"}],
content: [%{html: "<div class=\"e-content\"><p>He's right, you know?</p></div>",
text: "He's right, you know?"}],
in_reply_to: ["https://wwwtech.de/pictures/51"], name: ["Note #587"],
published: ["2016-02-18T19:33:25Z"], updated: ["2016-02-18T19:33:25Z"],
url: ["http://localhost/comments/587"]}, type: ["h-as-note", "h-entry"]}]} = Microformats2.parse(str, "http://localhost")
end
end

Loading…
Cancel
Save