Browse Source

Merge pull request #5 from myfreeweb/master

Allow processing already parsed html trees, add option to keep string keys
pull/6/head
Christian Kruse 1 year ago
committed by GitHub
parent
commit
b2ad5c62d1
No known key found for this signature in database GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 33 additions and 21 deletions
  1. +5
    -1
      README.md
  2. +9
    -6
      lib/implied.ex
  3. +9
    -6
      lib/items.ex
  4. +3
    -1
      lib/microformats2.ex
  5. +7
    -7
      mix.lock

+ 5
- 1
README.md View File

@@ -47,10 +47,14 @@ It will parse the object to a structure like that:
category: ["Strategy",
"Leadership"]}}]}

You can also provide HTML trees already parsed with Floki:

Microformats2.parse(Floki.parse("""<div class="h-card">...</div>"""))

## Dependencies

We need [Floki](https://github.com/philss/floki) for HTML parsing and
[HTTPotion](https://github.com/myfreeweb/httpotion) for parsing URLs.
[HTTPotion](https://github.com/myfreeweb/httpotion) for fetching URLs.

## Features



+ 9
- 6
lib/implied.ex View File

@@ -6,7 +6,8 @@ defmodule Microformats2.Items.ImpliedProperties do
end

defp implied_url_property(entry, root, doc_url, doc) do
if entry[:properties][:url] == nil do
url_key = if Application.get_env(:microformats2, :atomize_keys, true), do: :url, else: "url"
if entry[:properties][url_key] == nil do
val = implied_url_attrval(root)

url =
@@ -20,7 +21,7 @@ defmodule Microformats2.Items.ImpliedProperties do
if Microformats2.blank?(url) do
entry
else
Map.put(entry, :properties, Map.put(entry[:properties], :url, [Microformats2.abs_uri(url, doc_url, doc)]))
Map.put(entry, :properties, Map.put(entry[:properties], url_key, [Microformats2.abs_uri(url, doc_url, doc)]))
end
else
entry
@@ -28,7 +29,8 @@ defmodule Microformats2.Items.ImpliedProperties do
end

defp implied_photo_property(entry, root) do
if entry[:properties][:photo] == nil do
photo_key = if Application.get_env(:microformats2, :atomize_keys, true), do: :photo, else: "photo"
if entry[:properties][photo_key] == nil do
val = implied_photo_attrval(root)

url =
@@ -42,7 +44,7 @@ defmodule Microformats2.Items.ImpliedProperties do
if Microformats2.blank?(url) do
entry
else
Map.put(entry, :properties, Map.put(entry[:properties], :photo, [url]))
Map.put(entry, :properties, Map.put(entry[:properties], photo_key, [url]))
end
else
entry
@@ -50,7 +52,8 @@ defmodule Microformats2.Items.ImpliedProperties do
end

defp implied_name_property(entry, root = {elem, _, _}) do
if entry[:properties][:name] == nil do
name_key = if Application.get_env(:microformats2, :atomize_keys, true), do: :name, else: "name"
if entry[:properties][name_key] == nil do
nam =
cond do
elem == "img" or elem == "area" ->
@@ -70,7 +73,7 @@ defmodule Microformats2.Items.ImpliedProperties do
end
|> Microformats2.stripped_or_nil()

Map.put(entry, :properties, Map.put(entry[:properties], :name, [nam]))
Map.put(entry, :properties, Map.put(entry[:properties], name_key, [nam]))
else
entry
end


+ 9
- 6
lib/items.ex View File

@@ -132,14 +132,16 @@ defmodule Microformats2.Items do
defp parse_prop(_, _, _, _), do: nil

defp get_value(class, p) do
name_key = if Application.get_env(:microformats2, :atomize_keys, true), do: :name, else: "name"
url_key = if Application.get_env(:microformats2, :atomize_keys, true), do: :url, else: "url"
cond do
Microformats2.is_a?(class, "p") and p[:properties][:name] != nil ->
List.first(p[:properties][:name])
Microformats2.is_a?(class, "p") and p[:properties][name_key] != nil ->
List.first(p[:properties][name_key])

Microformats2.is_a?(class, "u") and p[:properties][:url] != nil ->
List.first(p[:properties][:url])
Microformats2.is_a?(class, "u") and p[:properties][url_key] != nil ->
List.first(p[:properties][url_key])

# and p[:properties][:url] != nil ->
# and p[:properties][url_key] != nil ->
Microformats2.is_a?(class, "e") ->
# TODO handle
nil
@@ -160,7 +162,8 @@ defmodule Microformats2.Items do
parse_prop(class, child, doc, url)
end

key = strip_prefix(class) |> to_key |> String.to_atom()
key = strip_prefix(class) |> to_key
key = if Application.get_env(:microformats2, :atomize_keys, true), do: String.to_atom(key), else: key
val = if acc[key] != nil, do: acc[key], else: []
Map.put(acc, key, val ++ [prop])
end)


+ 3
- 1
lib/microformats2.ex View File

@@ -9,9 +9,11 @@ defmodule Microformats2 do
end
end

def parse(content, url) when is_binary(content), do: parse(Floki.parse(content), url)

def parse(content, url) do
doc =
Floki.parse(content)
content
|> Floki.filter_out("template")
|> Floki.filter_out("style")
|> Floki.filter_out("script")


+ 7
- 7
mix.lock View File

@@ -1,12 +1,12 @@
%{
"earmark": {:hex, :earmark, "1.3.0", "17f0c38eaafb4800f746b457313af4b2442a8c2405b49c645768680f900be603", [:mix], [], "hexpm"},
"ex_doc": {:hex, :ex_doc, "0.19.1", "519bb9c19526ca51d326c060cb1778d4a9056b190086a8c6c115828eaccea6cf", [:mix], [{:earmark, "~> 1.1", [hex: :earmark, repo: "hexpm", optional: false]}, {:makeup_elixir, "~> 0.7", [hex: :makeup_elixir, repo: "hexpm", optional: false]}], "hexpm"},
"earmark": {:hex, :earmark, "1.3.2", "b840562ea3d67795ffbb5bd88940b1bed0ed9fa32834915125ea7d02e35888a5", [:mix], [], "hexpm"},
"ex_doc": {:hex, :ex_doc, "0.19.3", "3c7b0f02851f5fc13b040e8e925051452e41248f685e40250d7e40b07b9f8c10", [:mix], [{:earmark, "~> 1.2", [hex: :earmark, repo: "hexpm", optional: false]}, {:makeup_elixir, "~> 0.10", [hex: :makeup_elixir, repo: "hexpm", optional: false]}], "hexpm"},
"floki": {:hex, :floki, "0.20.4", "be42ac911fece24b4c72f3b5846774b6e61b83fe685c2fc9d62093277fb3bc86", [:mix], [{:html_entities, "~> 0.4.0", [hex: :html_entities, repo: "hexpm", optional: false]}, {:mochiweb, "~> 2.15", [hex: :mochiweb, repo: "hexpm", optional: false]}], "hexpm"},
"html_entities": {:hex, :html_entities, "0.4.0", "f2fee876858cf6aaa9db608820a3209e45a087c5177332799592142b50e89a6b", [:mix], [], "hexpm"},
"httpotion": {:hex, :httpotion, "3.1.0", "14d20d9b0ce4e86e253eb91e4af79e469ad949f57a5d23c0a51b2f86559f6589", [:mix], [{:ibrowse, "~> 4.4", [hex: :ibrowse, repo: "hexpm", optional: false]}], "hexpm"},
"ibrowse": {:hex, :ibrowse, "4.4.1", "2b7d0637b0f8b9b4182de4bd0f2e826a4da2c9b04898b6e15659ba921a8d6ec2", [:rebar3], [], "hexpm"},
"makeup": {:hex, :makeup, "0.5.6", "da47b331b1fe0a5f0380cc3a6967200eac5e1daaa9c6bff4b0310b3fcc12b98f", [:mix], [{:nimble_parsec, "~> 0.4.0", [hex: :nimble_parsec, repo: "hexpm", optional: false]}], "hexpm"},
"makeup_elixir": {:hex, :makeup_elixir, "0.10.0", "0f09c2ddf352887a956d84f8f7e702111122ca32fbbc84c2f0569b8b65cbf7fa", [:mix], [{:makeup, "~> 0.5.5", [hex: :makeup, repo: "hexpm", optional: false]}], "hexpm"},
"httpotion": {:hex, :httpotion, "3.1.1", "b8ad199dea2c56a70c89e7f9e4d09898c7e85871783b7417c04cb4f1d4d8e919", [:mix], [{:ibrowse, "== 4.4.0", [hex: :ibrowse, repo: "hexpm", optional: false]}], "hexpm"},
"ibrowse": {:hex, :ibrowse, "4.4.0", "2d923325efe0d2cb09b9c6a047b2835a5eda69d8a47ed6ff8bc03628b764e991", [:rebar3], [], "hexpm"},
"makeup": {:hex, :makeup, "0.8.0", "9cf32aea71c7fe0a4b2e9246c2c4978f9070257e5c9ce6d4a28ec450a839b55f", [:mix], [{:nimble_parsec, "~> 0.5.0", [hex: :nimble_parsec, repo: "hexpm", optional: false]}], "hexpm"},
"makeup_elixir": {:hex, :makeup_elixir, "0.13.0", "be7a477997dcac2e48a9d695ec730b2d22418292675c75aa2d34ba0909dcdeda", [:mix], [{:makeup, "~> 0.8", [hex: :makeup, repo: "hexpm", optional: false]}], "hexpm"},
"mochiweb": {:hex, :mochiweb, "2.18.0", "eb55f1db3e6e960fac4e6db4e2db9ec3602cc9f30b86cd1481d56545c3145d2e", [:rebar3], [], "hexpm"},
"nimble_parsec": {:hex, :nimble_parsec, "0.4.0", "ee261bb53214943679422be70f1658fff573c5d0b0a1ecd0f18738944f818efe", [:mix], [], "hexpm"},
"nimble_parsec": {:hex, :nimble_parsec, "0.5.0", "90e2eca3d0266e5c53f8fbe0079694740b9c91b6747f2b7e3c5d21966bba8300", [:mix], [], "hexpm"},
}

Loading…
Cancel
Save