forked from AkkomaGang/mfm-parser
Change Lexer to use a data structure for tokens
This commit is contained in:
parent
4748d833b2
commit
733388fa6e
8 changed files with 82 additions and 42 deletions
48
lib/lexer.ex
48
lib/lexer.ex
|
@ -1,6 +1,12 @@
|
|||
defmodule MfmParser.Lexer do
|
||||
alias MfmParser.Reader
|
||||
|
||||
alias MfmParser.Token
|
||||
alias MfmParser.Token.MFMOpen
|
||||
alias MfmParser.Token.MFMClose
|
||||
alias MfmParser.Token.Newline
|
||||
alias MfmParser.Token.Text
|
||||
|
||||
def peek(input) do
|
||||
case next(input) do
|
||||
{:ok, token, _} -> {:ok, token}
|
||||
|
@ -9,34 +15,44 @@ defmodule MfmParser.Lexer do
|
|||
end
|
||||
|
||||
def next(input) do
|
||||
recursive_next(Reader.next(input), "", type_of_token(input))
|
||||
recursive_extract_next_token(Reader.next(input), get_empty_token(input))
|
||||
end
|
||||
|
||||
defp recursive_next(:eof, _, _) do
|
||||
defp recursive_extract_next_token(:eof, _) do
|
||||
:eof
|
||||
end
|
||||
|
||||
defp recursive_next({:ok, char, rest}, part, token_type) do
|
||||
if is_end_of_token?(char, rest, token_type) do
|
||||
{:ok, part <> char, rest}
|
||||
defp recursive_extract_next_token({:ok, char, rest}, token) do
|
||||
if is_last_char_of_token?(char, rest, token) do
|
||||
{:ok, token |> Token.append(char), rest}
|
||||
else
|
||||
recursive_next(Reader.next(rest), part <> char, token_type)
|
||||
recursive_extract_next_token(Reader.next(rest), token |> Token.append(char))
|
||||
end
|
||||
end
|
||||
|
||||
defp is_end_of_token?(char, _, :mfm_open) do
|
||||
char in [" "]
|
||||
defp get_empty_token(input) do
|
||||
case Reader.peek(input) do
|
||||
:eof -> :eof
|
||||
{:ok, "$"} -> %MFMOpen{}
|
||||
{:ok, "]"} -> %MFMClose{}
|
||||
{:ok, "\n"} -> %Newline{}
|
||||
_ -> %Text{}
|
||||
end
|
||||
end
|
||||
|
||||
defp is_end_of_token?(_, _, :mfm_close) do
|
||||
defp is_last_char_of_token?(char, _, %MFMOpen{}) do
|
||||
char == " "
|
||||
end
|
||||
|
||||
defp is_last_char_of_token?(_, _, %MFMClose{}) do
|
||||
true
|
||||
end
|
||||
|
||||
defp is_end_of_token?(_, _, :newline) do
|
||||
defp is_last_char_of_token?(_, _, %Newline{}) do
|
||||
true
|
||||
end
|
||||
|
||||
defp is_end_of_token?(_, rest, :text) do
|
||||
defp is_last_char_of_token?(_, rest, %Text{}) do
|
||||
case Reader.next(rest) do
|
||||
:eof -> true
|
||||
{:ok, "]", _} -> true
|
||||
|
@ -44,14 +60,4 @@ defmodule MfmParser.Lexer do
|
|||
_ -> false
|
||||
end
|
||||
end
|
||||
|
||||
defp type_of_token(input) do
|
||||
case Reader.peek(input) do
|
||||
:eof -> :eof
|
||||
{:ok, "$"} -> :mfm_open
|
||||
{:ok, "]"} -> :mfm_close
|
||||
{:ok, "\n"} -> :newline
|
||||
_ -> :text
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
5
lib/token.ex
Normal file
5
lib/token.ex
Normal file
|
@ -0,0 +1,5 @@
|
|||
defmodule MfmParser.Token do
|
||||
def append(token = %{content: content}, new_char) do
|
||||
token |> Map.put(:content, content <> new_char)
|
||||
end
|
||||
end
|
3
lib/token/mfm_close.ex
Normal file
3
lib/token/mfm_close.ex
Normal file
|
@ -0,0 +1,3 @@
|
|||
defmodule MfmParser.Token.MFMClose do
|
||||
defstruct content: ""
|
||||
end
|
3
lib/token/mfm_open.ex
Normal file
3
lib/token/mfm_open.ex
Normal file
|
@ -0,0 +1,3 @@
|
|||
defmodule MfmParser.Token.MFMOpen do
|
||||
defstruct content: ""
|
||||
end
|
3
lib/token/newline.ex
Normal file
3
lib/token/newline.ex
Normal file
|
@ -0,0 +1,3 @@
|
|||
defmodule MfmParser.Token.Newline do
|
||||
defstruct content: ""
|
||||
end
|
3
lib/token/text.ex
Normal file
3
lib/token/text.ex
Normal file
|
@ -0,0 +1,3 @@
|
|||
defmodule MfmParser.Token.Text do
|
||||
defstruct content: ""
|
||||
end
|
|
@ -1,7 +1,13 @@
|
|||
defmodule MfmParser.LexerTest do
|
||||
use ExUnit.Case
|
||||
|
||||
alias MfmParser.Lexer
|
||||
|
||||
alias MfmParser.Token.MFMOpen
|
||||
alias MfmParser.Token.MFMClose
|
||||
alias MfmParser.Token.Newline
|
||||
alias MfmParser.Token.Text
|
||||
|
||||
describe "eof" do
|
||||
test "peek/1 handles eof" do
|
||||
assert Lexer.peek("") == :eof
|
||||
|
@ -14,9 +20,11 @@ defmodule MfmParser.LexerTest do
|
|||
|
||||
describe "mfm $[ token" do
|
||||
test "it ends with a space" do
|
||||
assert Lexer.peek("$[ola puerca]") == {:ok, "$[ola "}
|
||||
assert Lexer.next("$[ola puerca]") == {:ok, "$[ola ", "puerca]"}
|
||||
assert Lexer.next("$[ola.x,speed=5s puerca]") == {:ok, "$[ola.x,speed=5s ", "puerca]"}
|
||||
assert Lexer.peek("$[ola puerca]") == {:ok, %MFMOpen{content: "$[ola "}}
|
||||
assert Lexer.next("$[ola puerca]") == {:ok, %MFMOpen{content: "$[ola "}, "puerca]"}
|
||||
|
||||
assert Lexer.next("$[ola.x,speed=5s puerca]") ==
|
||||
{:ok, %MFMOpen{content: "$[ola.x,speed=5s "}, "puerca]"}
|
||||
end
|
||||
|
||||
test "it doesn't crash if the token can't be completed" do
|
||||
|
@ -27,54 +35,54 @@ defmodule MfmParser.LexerTest do
|
|||
|
||||
describe "] token" do
|
||||
test "it handles ] as a token" do
|
||||
assert Lexer.peek("]ve anime") == {:ok, "]"}
|
||||
assert Lexer.next("]ve anime") == {:ok, "]", "ve anime"}
|
||||
assert Lexer.peek("]ve anime") == {:ok, %MFMClose{content: "]"}}
|
||||
assert Lexer.next("]ve anime") == {:ok, %MFMClose{content: "]"}, "ve anime"}
|
||||
end
|
||||
|
||||
test "it works at the eof" do
|
||||
assert Lexer.peek("]") == {:ok, "]"}
|
||||
assert Lexer.next("]") == {:ok, "]", ""}
|
||||
assert Lexer.peek("]") == {:ok, %MFMClose{content: "]"}}
|
||||
assert Lexer.next("]") == {:ok, %MFMClose{content: "]"}, ""}
|
||||
end
|
||||
end
|
||||
|
||||
describe "text token" do
|
||||
test "it ends when a mfm token opens while a $ alone doesn't end the text token" do
|
||||
assert Lexer.peek("Tu abuela ve anime y no se lava el $[spin culo]") ==
|
||||
{:ok, "Tu abuela ve anime y no se lava el "}
|
||||
{:ok, %Text{content: "Tu abuela ve anime y no se lava el "}}
|
||||
|
||||
assert Lexer.next("Tu abuela ve anime y no se lava el $[spin culo]") ==
|
||||
{:ok, "Tu abuela ve anime y no se lava el ", "$[spin culo]"}
|
||||
{:ok, %Text{content: "Tu abuela ve anime y no se lava el "}, "$[spin culo]"}
|
||||
|
||||
assert Lexer.peek("A $2 chocolatine") == {:ok, "A $2 chocolatine"}
|
||||
assert Lexer.next("A $2 chocolatine") == {:ok, "A $2 chocolatine", ""}
|
||||
assert Lexer.peek("A $2 chocolatine") == {:ok, %Text{content: "A $2 chocolatine"}}
|
||||
assert Lexer.next("A $2 chocolatine") == {:ok, %Text{content: "A $2 chocolatine"}, ""}
|
||||
|
||||
assert Lexer.peek("Eyes like $$") == {:ok, "Eyes like $$"}
|
||||
assert Lexer.next("Eyes like $$") == {:ok, "Eyes like $$", ""}
|
||||
assert Lexer.peek("Eyes like $$") == {:ok, %Text{content: "Eyes like $$"}}
|
||||
assert Lexer.next("Eyes like $$") == {:ok, %Text{content: "Eyes like $$"}, ""}
|
||||
end
|
||||
|
||||
test "it ends when a mfm token closes" do
|
||||
assert Lexer.peek("el culo]") == {:ok, "el culo"}
|
||||
assert Lexer.next("el culo]") == {:ok, "el culo", "]"}
|
||||
assert Lexer.peek("el culo]") == {:ok, %Text{content: "el culo"}}
|
||||
assert Lexer.next("el culo]") == {:ok, %Text{content: "el culo"}, "]"}
|
||||
end
|
||||
|
||||
test "it ends when the eof is reached" do
|
||||
assert Lexer.peek("Tu abuela ve anime y no se lava el culo") ==
|
||||
{:ok, "Tu abuela ve anime y no se lava el culo"}
|
||||
{:ok, %Text{content: "Tu abuela ve anime y no se lava el culo"}}
|
||||
|
||||
assert Lexer.next("Tu abuela ve anime y no se lava el culo") ==
|
||||
{:ok, "Tu abuela ve anime y no se lava el culo", ""}
|
||||
{:ok, %Text{content: "Tu abuela ve anime y no se lava el culo"}, ""}
|
||||
end
|
||||
end
|
||||
|
||||
describe "newline token" do
|
||||
test "it handles \n as a token" do
|
||||
assert Lexer.peek("\nchocolat") == {:ok, "\n"}
|
||||
assert Lexer.next("\nchocolat") == {:ok, "\n", "chocolat"}
|
||||
assert Lexer.peek("\nchocolat") == {:ok, %Newline{content: "\n"}}
|
||||
assert Lexer.next("\nchocolat") == {:ok, %Newline{content: "\n"}, "chocolat"}
|
||||
end
|
||||
|
||||
test "it works at the eof" do
|
||||
assert Lexer.peek("\n") == {:ok, "\n"}
|
||||
assert Lexer.next("\n") == {:ok, "\n", ""}
|
||||
assert Lexer.peek("\n") == {:ok, %Newline{content: "\n"}}
|
||||
assert Lexer.next("\n") == {:ok, %Newline{content: "\n"}, ""}
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
9
test/token_test.exs
Normal file
9
test/token_test.exs
Normal file
|
@ -0,0 +1,9 @@
|
|||
defmodule MfmParser.TokenTest do
|
||||
use ExUnit.Case
|
||||
|
||||
alias MfmParser.Token
|
||||
|
||||
test "it appends a character to the content" do
|
||||
assert %{content: "$[p"} = Token.append(%{content: "$["}, "p")
|
||||
end
|
||||
end
|
Loading…
Reference in a new issue