diff --git a/README.md b/README.md index 3ecfd3a..c55cda2 100644 --- a/README.md +++ b/README.md @@ -40,3 +40,52 @@ You can also convert the tree into HTML. ] iex> MfmParser.Parser.parse("$[twitch.speed=5s 🍮]") |> MfmParser.to_html() "🍮" + +## Reading +### The Parser + +A [parser](https://en.wikipedia.org/wiki/Parsing#Parser) takes in structured text and outputs a so called "tree". A tree is a data structure which can be more easily worked with. + +A parser typically consists of three parts +* a Reader +* a Lexer (aka Tokeniser) +* the Parser + +A Reader typically has a `next` function which takes the next character out of the input and returns it. +A `peek` function allows it to peek at the next character without changing the input. +There's also some way of detecting if the eof (End Of File) is reached. +Depending on the needs of the parser, it may be implemented to allow asking for the nth character instead of just the next. + +A Lexer uses the Reader. It also has a `peek` and `next` function, but instead of returning the next (or nth) character, it returns the next (or nth) token. +E.g. if you have the MFM `$[spin some text]`, then `$[spin`, `some text`, and `]` can be considered three different tokens. + +The parser takes in the tokens and forms the tree. This is typically a data structure the programming language understands and can more easily work with. + +### The Encoder + +Once we have a good data structure, we can process this and do things with it. +E.g. an Encoder encodes the tree into a different format. + +### The code + +The code can be found in the *lib* folder. It contains, among other things, the Reader, Lexer, Parse, and Encoder modules. + +The *test* folder contains the unit tests. + +## License + + A parser/encoder for Misskey Flavoured Markdown. + Copyright (C) 2022 Ilja + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as + published by the Free Software Foundation, either version 3 of the + License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see . diff --git a/lib/encoder.ex b/lib/encoder.ex index 7625629..e4cc387 100644 --- a/lib/encoder.ex +++ b/lib/encoder.ex @@ -2,6 +2,30 @@ defmodule MfmParser.Encoder do alias MfmParser.Parser alias MfmParser.Node + @moduledoc """ + An encoder who can turn a tree into HTML. + + It only works for the MFM specific tags of the form $[name.opts content]. + + Other parts of MFM (html, Markdown and [KaTeX](https://katex.org/)) are out of scope for this project. + + It can directly take input from function `MfmParser.Parser.parse`. + + ## Examples + + iex> [ + ...> %MfmParser.Node.MFM.Twitch{ + ...> children: [%MfmParser.Node.Text{props: %{text: "🍮"}}], + ...> props: %{speed: "5s"} + ...> } + ...> ] + ...> |> MfmParser.Encoder.to_html() + "🍮" + + iex> MfmParser.Parser.parse("$[twitch.speed=5s 🍮]") |> MfmParser.Encoder.to_html() + "🍮" + """ + def to_html(tree) when is_list(tree) do {html, styles} = to_html_styles(tree) diff --git a/lib/mfm_parser.ex b/lib/mfm_parser.ex deleted file mode 100644 index 3d21a11..0000000 --- a/lib/mfm_parser.ex +++ /dev/null @@ -1,23 +0,0 @@ -defmodule MfmParser do - @moduledoc """ - `MfmParser` is a parser for [Misskey Flavoured Markdown](https://mk.nixnet.social/mfm-cheat-sheet). - - It can parse MFM and return a tree. It can also turn a tree into HTML. - - It only works for the MFM specific tags of the form $[name.opts content]. - - Other parts of MFM (html, Markdown and [KaTeX](https://katex.org/)) are out of scope here. - - ## Examples - - iex> MfmParser.Parser.parse("$[twitch.speed=5s 🍮]") - [ - %MfmParser.Node.MFM.Twitch{ - children: [%MfmParser.Node.Text{props: %{text: "🍮"}}], - props: %{speed: "5s"} - } - ] - iex> MfmParser.Parser.parse("$[twitch.speed=5s 🍮]") |> MfmParser.Encoder.to_html() - "🍮" - """ -end diff --git a/lib/node.ex b/lib/node.ex new file mode 100644 index 0000000..d160362 --- /dev/null +++ b/lib/node.ex @@ -0,0 +1,67 @@ +defmodule MfmParser.Node.Text do + defstruct props: %{text: ""} +end + +defmodule MfmParser.Node.Newline do + defstruct props: %{text: "\n"} +end + +defmodule MfmParser.Node.MFM.Blur do + defstruct props: %{}, children: [] +end + +defmodule MfmParser.Node.MFM.Bounce do + defstruct props: %{speed: "0.75s"}, children: [] +end + +defmodule MfmParser.Node.MFM.Flip do + defstruct props: %{v: false, h: false}, children: [] +end + +defmodule MfmParser.Node.MFM.Font do + defstruct props: %{font: nil}, children: [] +end + +defmodule MfmParser.Node.MFM.Jelly do + defstruct props: %{speed: "1s"}, children: [] +end + +defmodule MfmParser.Node.MFM.Jump do + defstruct props: %{speed: "0.75s"}, children: [] +end + +defmodule MfmParser.Node.MFM.Rainbow do + defstruct props: %{speed: "1s"}, children: [] +end + +defmodule MfmParser.Node.MFM.Rotate do + defstruct props: %{}, children: [] +end + +defmodule MfmParser.Node.MFM.Shake do + defstruct props: %{speed: "0.5s"}, children: [] +end + +defmodule MfmParser.Node.MFM.Sparkle do + defstruct props: %{}, children: [] +end + +defmodule MfmParser.Node.MFM.Spin do + defstruct props: %{axis: "z", direction: "normal", speed: "1.5s"}, children: [] +end + +defmodule MfmParser.Node.MFM.Tada do + defstruct props: %{speed: "1s"}, children: [] +end + +defmodule MfmParser.Node.MFM.Twitch do + defstruct props: %{speed: "0.5s"}, children: [] +end + +defmodule MfmParser.Node.MFM.Undefined do + defstruct props: %{}, children: [] +end + +defmodule MfmParser.Node.MFM.X do + defstruct props: %{size: nil}, children: [] +end diff --git a/lib/node/mfm/blur.ex b/lib/node/mfm/blur.ex deleted file mode 100644 index ed41ec7..0000000 --- a/lib/node/mfm/blur.ex +++ /dev/null @@ -1,3 +0,0 @@ -defmodule MfmParser.Node.MFM.Blur do - defstruct props: %{}, children: [] -end diff --git a/lib/node/mfm/bounce.ex b/lib/node/mfm/bounce.ex deleted file mode 100644 index b439a2a..0000000 --- a/lib/node/mfm/bounce.ex +++ /dev/null @@ -1,3 +0,0 @@ -defmodule MfmParser.Node.MFM.Bounce do - defstruct props: %{speed: "0.75s"}, children: [] -end diff --git a/lib/node/mfm/flip.ex b/lib/node/mfm/flip.ex deleted file mode 100644 index 917bd57..0000000 --- a/lib/node/mfm/flip.ex +++ /dev/null @@ -1,3 +0,0 @@ -defmodule MfmParser.Node.MFM.Flip do - defstruct props: %{v: false, h: false}, children: [] -end diff --git a/lib/node/mfm/font.ex b/lib/node/mfm/font.ex deleted file mode 100644 index 2ff6ecb..0000000 --- a/lib/node/mfm/font.ex +++ /dev/null @@ -1,3 +0,0 @@ -defmodule MfmParser.Node.MFM.Font do - defstruct props: %{font: nil}, children: [] -end diff --git a/lib/node/mfm/jelly.ex b/lib/node/mfm/jelly.ex deleted file mode 100644 index 764f414..0000000 --- a/lib/node/mfm/jelly.ex +++ /dev/null @@ -1,3 +0,0 @@ -defmodule MfmParser.Node.MFM.Jelly do - defstruct props: %{speed: "1s"}, children: [] -end diff --git a/lib/node/mfm/jump.ex b/lib/node/mfm/jump.ex deleted file mode 100644 index 87665e6..0000000 --- a/lib/node/mfm/jump.ex +++ /dev/null @@ -1,3 +0,0 @@ -defmodule MfmParser.Node.MFM.Jump do - defstruct props: %{speed: "0.75s"}, children: [] -end diff --git a/lib/node/mfm/rainbow.ex b/lib/node/mfm/rainbow.ex deleted file mode 100644 index fb33626..0000000 --- a/lib/node/mfm/rainbow.ex +++ /dev/null @@ -1,3 +0,0 @@ -defmodule MfmParser.Node.MFM.Rainbow do - defstruct props: %{speed: "1s"}, children: [] -end diff --git a/lib/node/mfm/rotate.ex b/lib/node/mfm/rotate.ex deleted file mode 100644 index 03c14d5..0000000 --- a/lib/node/mfm/rotate.ex +++ /dev/null @@ -1,3 +0,0 @@ -defmodule MfmParser.Node.MFM.Rotate do - defstruct props: %{}, children: [] -end diff --git a/lib/node/mfm/shake.ex b/lib/node/mfm/shake.ex deleted file mode 100644 index 4ab0a8a..0000000 --- a/lib/node/mfm/shake.ex +++ /dev/null @@ -1,3 +0,0 @@ -defmodule MfmParser.Node.MFM.Shake do - defstruct props: %{speed: "0.5s"}, children: [] -end diff --git a/lib/node/mfm/sparkle.ex b/lib/node/mfm/sparkle.ex deleted file mode 100644 index 9eff541..0000000 --- a/lib/node/mfm/sparkle.ex +++ /dev/null @@ -1,3 +0,0 @@ -defmodule MfmParser.Node.MFM.Sparkle do - defstruct props: %{}, children: [] -end diff --git a/lib/node/mfm/spin.ex b/lib/node/mfm/spin.ex deleted file mode 100644 index 006a52a..0000000 --- a/lib/node/mfm/spin.ex +++ /dev/null @@ -1,3 +0,0 @@ -defmodule MfmParser.Node.MFM.Spin do - defstruct props: %{axis: "z", direction: "normal", speed: "1.5s"}, children: [] -end diff --git a/lib/node/mfm/tada.ex b/lib/node/mfm/tada.ex deleted file mode 100644 index d47d52d..0000000 --- a/lib/node/mfm/tada.ex +++ /dev/null @@ -1,3 +0,0 @@ -defmodule MfmParser.Node.MFM.Tada do - defstruct props: %{speed: "1s"}, children: [] -end diff --git a/lib/node/mfm/twitch.ex b/lib/node/mfm/twitch.ex deleted file mode 100644 index 92b7b61..0000000 --- a/lib/node/mfm/twitch.ex +++ /dev/null @@ -1,3 +0,0 @@ -defmodule MfmParser.Node.MFM.Twitch do - defstruct props: %{speed: "0.5s"}, children: [] -end diff --git a/lib/node/mfm/undefined.ex b/lib/node/mfm/undefined.ex deleted file mode 100644 index 36ba3bf..0000000 --- a/lib/node/mfm/undefined.ex +++ /dev/null @@ -1,3 +0,0 @@ -defmodule MfmParser.Node.MFM.Undefined do - defstruct props: %{}, children: [] -end diff --git a/lib/node/mfm/x.ex b/lib/node/mfm/x.ex deleted file mode 100644 index 7de5024..0000000 --- a/lib/node/mfm/x.ex +++ /dev/null @@ -1,3 +0,0 @@ -defmodule MfmParser.Node.MFM.X do - defstruct props: %{size: nil}, children: [] -end diff --git a/lib/node/newline.ex b/lib/node/newline.ex deleted file mode 100644 index 8fe4b54..0000000 --- a/lib/node/newline.ex +++ /dev/null @@ -1,3 +0,0 @@ -defmodule MfmParser.Node.Newline do - defstruct props: %{text: "\n"} -end diff --git a/lib/node/text.ex b/lib/node/text.ex deleted file mode 100644 index d644d28..0000000 --- a/lib/node/text.ex +++ /dev/null @@ -1,3 +0,0 @@ -defmodule MfmParser.Node.Text do - defstruct props: %{text: ""} -end diff --git a/lib/parser.ex b/lib/parser.ex index 91493e3..6fe3254 100644 --- a/lib/parser.ex +++ b/lib/parser.ex @@ -3,6 +3,26 @@ defmodule MfmParser.Parser do alias MfmParser.Node alias MfmParser.Lexer + @moduledoc """ + `MfmParser` is a parser for [Misskey Flavoured Markdown](https://mk.nixnet.social/mfm-cheat-sheet). + + It can parse MFM and return a tree. It also has an encoder who can turn a tree into HTML. + + It only works for the MFM specific tags of the form $[name.opts content]. + + Other parts of MFM (html, Markdown and [KaTeX](https://katex.org/)) are out of scope for this project. + + ## Examples + + iex> MfmParser.Parser.parse("$[twitch.speed=5s 🍮]") + [ + %MfmParser.Node.MFM.Twitch{ + children: [%MfmParser.Node.Text{props: %{text: "🍮"}}], + props: %{speed: "5s"} + } + ] + """ + def parse(input, tree \\ [], is_end_token \\ fn _ -> false end) do case Lexer.next(input) do :eof -> @@ -83,8 +103,65 @@ defmodule MfmParser.Parser do end defp fill_props(node = %{props: props}, %{content: content}) do - new_props = props |> Map.merge(Token.MFM.to_props(content)) + new_props = props |> Map.merge(to_props(content)) node |> Map.merge(%{props: new_props}) end + + def to_props(opts_string) when is_binary(opts_string) do + cond do + opts_string =~ "." -> + Regex.replace(~r/^.*?\./u, opts_string, "") + |> String.trim() + |> String.split(",") + |> Enum.reduce(%{}, fn opt, acc -> + acc + |> Map.merge( + cond do + opt =~ "speed" -> + %{speed: String.replace(opt, "speed=", "")} + + opt =~ "v" -> + %{v: true} + + opt =~ "h" -> + %{h: true} + + opt =~ "x" -> + %{axis: "x"} + + opt =~ "y" -> + %{axis: "y"} + + opt =~ "left" -> + %{direction: "left"} + + opt =~ "alternate" -> + %{direction: "alternate"} + + true -> + if Regex.match?(~r/^\$\[font/, opts_string) do + %{font: opt} + else + %{} + end + end + ) + end) + + opts_string =~ "$[x" -> + %{ + size: + case opts_string |> String.replace("$[x", "") |> String.trim() do + "2" -> "200%" + "3" -> "400%" + "4" -> "600%" + _ -> "100%" + end + } + + true -> + %{} + end + end end diff --git a/lib/token.ex b/lib/token.ex index fea2aaf..0efc237 100644 --- a/lib/token.ex +++ b/lib/token.ex @@ -3,3 +3,19 @@ defmodule MfmParser.Token do token |> Map.put(:content, content <> new_char) end end + +defmodule MfmParser.Token.Text do + defstruct content: "" +end + +defmodule MfmParser.Token.Newline do + defstruct content: "" +end + +defmodule MfmParser.Token.MFM.Open do + defstruct content: "" +end + +defmodule MfmParser.Token.MFM.Close do + defstruct content: "" +end diff --git a/lib/token/mfm.ex b/lib/token/mfm.ex deleted file mode 100644 index 4e555bd..0000000 --- a/lib/token/mfm.ex +++ /dev/null @@ -1,58 +0,0 @@ -defmodule MfmParser.Token.MFM do - def to_props(opts_string) when is_binary(opts_string) do - cond do - opts_string =~ "." -> - Regex.replace(~r/^.*?\./u, opts_string, "") - |> String.trim() - |> String.split(",") - |> Enum.reduce(%{}, fn opt, acc -> - acc - |> Map.merge( - cond do - opt =~ "speed" -> - %{speed: String.replace(opt, "speed=", "")} - - opt =~ "v" -> - %{v: true} - - opt =~ "h" -> - %{h: true} - - opt =~ "x" -> - %{axis: "x"} - - opt =~ "y" -> - %{axis: "y"} - - opt =~ "left" -> - %{direction: "left"} - - opt =~ "alternate" -> - %{direction: "alternate"} - - true -> - if Regex.match?(~r/^\$\[font/, opts_string) do - %{font: opt} - else - %{} - end - end - ) - end) - - opts_string =~ "$[x" -> - %{ - size: - case opts_string |> String.replace("$[x", "") |> String.trim() do - "2" -> "200%" - "3" -> "400%" - "4" -> "600%" - _ -> "100%" - end - } - - true -> - %{} - end - end -end diff --git a/lib/token/mfm/close.ex b/lib/token/mfm/close.ex deleted file mode 100644 index 67c5d4c..0000000 --- a/lib/token/mfm/close.ex +++ /dev/null @@ -1,3 +0,0 @@ -defmodule MfmParser.Token.MFM.Close do - defstruct content: "" -end diff --git a/lib/token/mfm/open.ex b/lib/token/mfm/open.ex deleted file mode 100644 index 603b93b..0000000 --- a/lib/token/mfm/open.ex +++ /dev/null @@ -1,3 +0,0 @@ -defmodule MfmParser.Token.MFM.Open do - defstruct content: "" -end diff --git a/lib/token/newline.ex b/lib/token/newline.ex deleted file mode 100644 index b31774d..0000000 --- a/lib/token/newline.ex +++ /dev/null @@ -1,3 +0,0 @@ -defmodule MfmParser.Token.Newline do - defstruct content: "" -end diff --git a/lib/token/text.ex b/lib/token/text.ex deleted file mode 100644 index a9389e5..0000000 --- a/lib/token/text.ex +++ /dev/null @@ -1,3 +0,0 @@ -defmodule MfmParser.Token.Text do - defstruct content: "" -end diff --git a/test/encoder_test.exs b/test/encoder_test.exs index 64102b3..e8e8c1f 100644 --- a/test/encoder_test.exs +++ b/test/encoder_test.exs @@ -4,6 +4,8 @@ defmodule MfmParser.EncoderTest do alias MfmParser.Encoder alias MfmParser.Node + doctest MfmParser.Encoder + describe "to_html" do test "it handles text" do input_tree = [%Node.Text{props: %{text: "chocolatine"}}] diff --git a/test/mfm_parser_test.exs b/test/mfm_parser_test.exs deleted file mode 100644 index 6035485..0000000 --- a/test/mfm_parser_test.exs +++ /dev/null @@ -1,4 +0,0 @@ -defmodule MfmParserTest do - use ExUnit.Case - doctest MfmParser -end diff --git a/test/parser_test.exs b/test/parser_test.exs index 0080a1c..9ed8e1f 100644 --- a/test/parser_test.exs +++ b/test/parser_test.exs @@ -2,6 +2,8 @@ defmodule MfmParser.ParserTest do use ExUnit.Case alias MfmParser.Parser + doctest MfmParser.Parser + describe "single element input" do test "it can handle an empty string as input" do input = "" @@ -586,4 +588,35 @@ defmodule MfmParser.ParserTest do ] end end + + describe "to_props/1" do + test "it returns speed in the list of parameters" do + assert %{speed: "5s"} = Parser.to_props("$[blabla.speed=5s") + assert %{speed: "0.5s"} = Parser.to_props("$[blabla.speed=0.5s") + end + + test "it returns v and h in the list of parameters" do + assert %{v: true} = Parser.to_props("$[blabla.v") + assert %{v: true, h: true} = Parser.to_props("$[blabla.h,v") + end + + test "it returns fonts" do + assert %{font: "some_font"} = Parser.to_props("$[font.some_font") + end + + test "it returns a size for an x element" do + assert %{size: "200%"} = Parser.to_props("$[x2") + assert %{size: "400%"} = Parser.to_props("$[x3") + assert %{size: "600%"} = Parser.to_props("$[x4") + assert %{size: "100%"} = Parser.to_props("$[xqsdfqsf") + end + + test "it returns an empty list when there are no parameters" do + assert %{} = Parser.to_props("$[blabla") + end + + test "it ignores unknown parameters" do + assert %{} = Parser.to_props("$[blabla.idk") + end + end end diff --git a/test/token/mfm_test.exs b/test/token/mfm_test.exs deleted file mode 100644 index 907d481..0000000 --- a/test/token/mfm_test.exs +++ /dev/null @@ -1,34 +0,0 @@ -defmodule MfmParser.MFMTest do - use ExUnit.Case - - alias MfmParser.Token.MFM - - test "it returns speed in the list of parameters" do - assert %{speed: "5s"} = MFM.to_props("$[blabla.speed=5s") - assert %{speed: "0.5s"} = MFM.to_props("$[blabla.speed=0.5s") - end - - test "it returns v and h in the list of parameters" do - assert %{v: true} = MFM.to_props("$[blabla.v") - assert %{v: true, h: true} = MFM.to_props("$[blabla.h,v") - end - - test "it returns fonts" do - assert %{font: "some_font"} = MFM.to_props("$[font.some_font") - end - - test "it returns a size for an x element" do - assert %{size: "200%"} = MFM.to_props("$[x2") - assert %{size: "400%"} = MFM.to_props("$[x3") - assert %{size: "600%"} = MFM.to_props("$[x4") - assert %{size: "100%"} = MFM.to_props("$[xqsdfqsf") - end - - test "it returns an empty list when there are no parameters" do - assert %{} = MFM.to_props("$[blabla") - end - - test "it ignores unknown parameters" do - assert %{} = MFM.to_props("$[blabla.idk") - end -end