diff --git a/lib/lexer.ex b/lib/lexer.ex index ed8c0c2..90b9337 100644 --- a/lib/lexer.ex +++ b/lib/lexer.ex @@ -1,6 +1,12 @@ defmodule MfmParser.Lexer do alias MfmParser.Reader + alias MfmParser.Token + alias MfmParser.Token.MFMOpen + alias MfmParser.Token.MFMClose + alias MfmParser.Token.Newline + alias MfmParser.Token.Text + def peek(input) do case next(input) do {:ok, token, _} -> {:ok, token} @@ -9,34 +15,44 @@ defmodule MfmParser.Lexer do end def next(input) do - recursive_next(Reader.next(input), "", type_of_token(input)) + recursive_extract_next_token(Reader.next(input), get_empty_token(input)) end - defp recursive_next(:eof, _, _) do + defp recursive_extract_next_token(:eof, _) do :eof end - defp recursive_next({:ok, char, rest}, part, token_type) do - if is_end_of_token?(char, rest, token_type) do - {:ok, part <> char, rest} + defp recursive_extract_next_token({:ok, char, rest}, token) do + if is_last_char_of_token?(char, rest, token) do + {:ok, token |> Token.append(char), rest} else - recursive_next(Reader.next(rest), part <> char, token_type) + recursive_extract_next_token(Reader.next(rest), token |> Token.append(char)) end end - defp is_end_of_token?(char, _, :mfm_open) do - char in [" "] + defp get_empty_token(input) do + case Reader.peek(input) do + :eof -> :eof + {:ok, "$"} -> %MFMOpen{} + {:ok, "]"} -> %MFMClose{} + {:ok, "\n"} -> %Newline{} + _ -> %Text{} + end end - defp is_end_of_token?(_, _, :mfm_close) do + defp is_last_char_of_token?(char, _, %MFMOpen{}) do + char == " " + end + + defp is_last_char_of_token?(_, _, %MFMClose{}) do true end - defp is_end_of_token?(_, _, :newline) do + defp is_last_char_of_token?(_, _, %Newline{}) do true end - defp is_end_of_token?(_, rest, :text) do + defp is_last_char_of_token?(_, rest, %Text{}) do case Reader.next(rest) do :eof -> true {:ok, "]", _} -> true @@ -44,14 +60,4 @@ defmodule MfmParser.Lexer do _ -> false end end - - defp type_of_token(input) do - case Reader.peek(input) do - :eof -> :eof - {:ok, "$"} -> :mfm_open - {:ok, "]"} -> :mfm_close - {:ok, "\n"} -> :newline - _ -> :text - end - end end diff --git a/lib/token.ex b/lib/token.ex new file mode 100644 index 0000000..fea2aaf --- /dev/null +++ b/lib/token.ex @@ -0,0 +1,5 @@ +defmodule MfmParser.Token do + def append(token = %{content: content}, new_char) do + token |> Map.put(:content, content <> new_char) + end +end diff --git a/lib/token/mfm_close.ex b/lib/token/mfm_close.ex new file mode 100644 index 0000000..2245bb3 --- /dev/null +++ b/lib/token/mfm_close.ex @@ -0,0 +1,3 @@ +defmodule MfmParser.Token.MFMClose do + defstruct content: "" +end diff --git a/lib/token/mfm_open.ex b/lib/token/mfm_open.ex new file mode 100644 index 0000000..647eede --- /dev/null +++ b/lib/token/mfm_open.ex @@ -0,0 +1,3 @@ +defmodule MfmParser.Token.MFMOpen do + defstruct content: "" +end diff --git a/lib/token/newline.ex b/lib/token/newline.ex new file mode 100644 index 0000000..d45ca15 --- /dev/null +++ b/lib/token/newline.ex @@ -0,0 +1,3 @@ +defmodule MfmParser.Token.Newline do + defstruct content: "" +end diff --git a/lib/token/text.ex b/lib/token/text.ex new file mode 100644 index 0000000..f19efa5 --- /dev/null +++ b/lib/token/text.ex @@ -0,0 +1,3 @@ +defmodule MfmParser.Token.Text do + defstruct content: "" +end diff --git a/test/lexer_test.exs b/test/lexer_test.exs index dd88175..2e99cf9 100644 --- a/test/lexer_test.exs +++ b/test/lexer_test.exs @@ -1,7 +1,13 @@ defmodule MfmParser.LexerTest do use ExUnit.Case + alias MfmParser.Lexer + alias MfmParser.Token.MFMOpen + alias MfmParser.Token.MFMClose + alias MfmParser.Token.Newline + alias MfmParser.Token.Text + describe "eof" do test "peek/1 handles eof" do assert Lexer.peek("") == :eof @@ -14,9 +20,11 @@ defmodule MfmParser.LexerTest do describe "mfm $[ token" do test "it ends with a space" do - assert Lexer.peek("$[ola puerca]") == {:ok, "$[ola "} - assert Lexer.next("$[ola puerca]") == {:ok, "$[ola ", "puerca]"} - assert Lexer.next("$[ola.x,speed=5s puerca]") == {:ok, "$[ola.x,speed=5s ", "puerca]"} + assert Lexer.peek("$[ola puerca]") == {:ok, %MFMOpen{content: "$[ola "}} + assert Lexer.next("$[ola puerca]") == {:ok, %MFMOpen{content: "$[ola "}, "puerca]"} + + assert Lexer.next("$[ola.x,speed=5s puerca]") == + {:ok, %MFMOpen{content: "$[ola.x,speed=5s "}, "puerca]"} end test "it doesn't crash if the token can't be completed" do @@ -27,54 +35,54 @@ defmodule MfmParser.LexerTest do describe "] token" do test "it handles ] as a token" do - assert Lexer.peek("]ve anime") == {:ok, "]"} - assert Lexer.next("]ve anime") == {:ok, "]", "ve anime"} + assert Lexer.peek("]ve anime") == {:ok, %MFMClose{content: "]"}} + assert Lexer.next("]ve anime") == {:ok, %MFMClose{content: "]"}, "ve anime"} end test "it works at the eof" do - assert Lexer.peek("]") == {:ok, "]"} - assert Lexer.next("]") == {:ok, "]", ""} + assert Lexer.peek("]") == {:ok, %MFMClose{content: "]"}} + assert Lexer.next("]") == {:ok, %MFMClose{content: "]"}, ""} end end describe "text token" do test "it ends when a mfm token opens while a $ alone doesn't end the text token" do assert Lexer.peek("Tu abuela ve anime y no se lava el $[spin culo]") == - {:ok, "Tu abuela ve anime y no se lava el "} + {:ok, %Text{content: "Tu abuela ve anime y no se lava el "}} assert Lexer.next("Tu abuela ve anime y no se lava el $[spin culo]") == - {:ok, "Tu abuela ve anime y no se lava el ", "$[spin culo]"} + {:ok, %Text{content: "Tu abuela ve anime y no se lava el "}, "$[spin culo]"} - assert Lexer.peek("A $2 chocolatine") == {:ok, "A $2 chocolatine"} - assert Lexer.next("A $2 chocolatine") == {:ok, "A $2 chocolatine", ""} + assert Lexer.peek("A $2 chocolatine") == {:ok, %Text{content: "A $2 chocolatine"}} + assert Lexer.next("A $2 chocolatine") == {:ok, %Text{content: "A $2 chocolatine"}, ""} - assert Lexer.peek("Eyes like $$") == {:ok, "Eyes like $$"} - assert Lexer.next("Eyes like $$") == {:ok, "Eyes like $$", ""} + assert Lexer.peek("Eyes like $$") == {:ok, %Text{content: "Eyes like $$"}} + assert Lexer.next("Eyes like $$") == {:ok, %Text{content: "Eyes like $$"}, ""} end test "it ends when a mfm token closes" do - assert Lexer.peek("el culo]") == {:ok, "el culo"} - assert Lexer.next("el culo]") == {:ok, "el culo", "]"} + assert Lexer.peek("el culo]") == {:ok, %Text{content: "el culo"}} + assert Lexer.next("el culo]") == {:ok, %Text{content: "el culo"}, "]"} end test "it ends when the eof is reached" do assert Lexer.peek("Tu abuela ve anime y no se lava el culo") == - {:ok, "Tu abuela ve anime y no se lava el culo"} + {:ok, %Text{content: "Tu abuela ve anime y no se lava el culo"}} assert Lexer.next("Tu abuela ve anime y no se lava el culo") == - {:ok, "Tu abuela ve anime y no se lava el culo", ""} + {:ok, %Text{content: "Tu abuela ve anime y no se lava el culo"}, ""} end end describe "newline token" do test "it handles \n as a token" do - assert Lexer.peek("\nchocolat") == {:ok, "\n"} - assert Lexer.next("\nchocolat") == {:ok, "\n", "chocolat"} + assert Lexer.peek("\nchocolat") == {:ok, %Newline{content: "\n"}} + assert Lexer.next("\nchocolat") == {:ok, %Newline{content: "\n"}, "chocolat"} end test "it works at the eof" do - assert Lexer.peek("\n") == {:ok, "\n"} - assert Lexer.next("\n") == {:ok, "\n", ""} + assert Lexer.peek("\n") == {:ok, %Newline{content: "\n"}} + assert Lexer.next("\n") == {:ok, %Newline{content: "\n"}, ""} end end end diff --git a/test/token_test.exs b/test/token_test.exs new file mode 100644 index 0000000..57a82fc --- /dev/null +++ b/test/token_test.exs @@ -0,0 +1,9 @@ +defmodule MfmParser.TokenTest do + use ExUnit.Case + + alias MfmParser.Token + + test "it appends a character to the content" do + assert %{content: "$[p"} = Token.append(%{content: "$["}, "p") + end +end