Change Lexer to use a data structure for tokens

This commit is contained in:
Ilja 2022-07-23 20:17:34 +02:00
parent 4748d833b2
commit 733388fa6e
8 changed files with 82 additions and 42 deletions

View file

@ -1,6 +1,12 @@
defmodule MfmParser.Lexer do defmodule MfmParser.Lexer do
alias MfmParser.Reader alias MfmParser.Reader
alias MfmParser.Token
alias MfmParser.Token.MFMOpen
alias MfmParser.Token.MFMClose
alias MfmParser.Token.Newline
alias MfmParser.Token.Text
def peek(input) do def peek(input) do
case next(input) do case next(input) do
{:ok, token, _} -> {:ok, token} {:ok, token, _} -> {:ok, token}
@ -9,34 +15,44 @@ defmodule MfmParser.Lexer do
end end
def next(input) do def next(input) do
recursive_next(Reader.next(input), "", type_of_token(input)) recursive_extract_next_token(Reader.next(input), get_empty_token(input))
end end
defp recursive_next(:eof, _, _) do defp recursive_extract_next_token(:eof, _) do
:eof :eof
end end
defp recursive_next({:ok, char, rest}, part, token_type) do defp recursive_extract_next_token({:ok, char, rest}, token) do
if is_end_of_token?(char, rest, token_type) do if is_last_char_of_token?(char, rest, token) do
{:ok, part <> char, rest} {:ok, token |> Token.append(char), rest}
else else
recursive_next(Reader.next(rest), part <> char, token_type) recursive_extract_next_token(Reader.next(rest), token |> Token.append(char))
end end
end end
defp is_end_of_token?(char, _, :mfm_open) do defp get_empty_token(input) do
char in [" "] case Reader.peek(input) do
:eof -> :eof
{:ok, "$"} -> %MFMOpen{}
{:ok, "]"} -> %MFMClose{}
{:ok, "\n"} -> %Newline{}
_ -> %Text{}
end
end end
defp is_end_of_token?(_, _, :mfm_close) do defp is_last_char_of_token?(char, _, %MFMOpen{}) do
char == " "
end
defp is_last_char_of_token?(_, _, %MFMClose{}) do
true true
end end
defp is_end_of_token?(_, _, :newline) do defp is_last_char_of_token?(_, _, %Newline{}) do
true true
end end
defp is_end_of_token?(_, rest, :text) do defp is_last_char_of_token?(_, rest, %Text{}) do
case Reader.next(rest) do case Reader.next(rest) do
:eof -> true :eof -> true
{:ok, "]", _} -> true {:ok, "]", _} -> true
@ -44,14 +60,4 @@ defmodule MfmParser.Lexer do
_ -> false _ -> false
end end
end end
defp type_of_token(input) do
case Reader.peek(input) do
:eof -> :eof
{:ok, "$"} -> :mfm_open
{:ok, "]"} -> :mfm_close
{:ok, "\n"} -> :newline
_ -> :text
end
end
end end

5
lib/token.ex Normal file
View file

@ -0,0 +1,5 @@
defmodule MfmParser.Token do
def append(token = %{content: content}, new_char) do
token |> Map.put(:content, content <> new_char)
end
end

3
lib/token/mfm_close.ex Normal file
View file

@ -0,0 +1,3 @@
defmodule MfmParser.Token.MFMClose do
defstruct content: ""
end

3
lib/token/mfm_open.ex Normal file
View file

@ -0,0 +1,3 @@
defmodule MfmParser.Token.MFMOpen do
defstruct content: ""
end

3
lib/token/newline.ex Normal file
View file

@ -0,0 +1,3 @@
defmodule MfmParser.Token.Newline do
defstruct content: ""
end

3
lib/token/text.ex Normal file
View file

@ -0,0 +1,3 @@
defmodule MfmParser.Token.Text do
defstruct content: ""
end

View file

@ -1,7 +1,13 @@
defmodule MfmParser.LexerTest do defmodule MfmParser.LexerTest do
use ExUnit.Case use ExUnit.Case
alias MfmParser.Lexer alias MfmParser.Lexer
alias MfmParser.Token.MFMOpen
alias MfmParser.Token.MFMClose
alias MfmParser.Token.Newline
alias MfmParser.Token.Text
describe "eof" do describe "eof" do
test "peek/1 handles eof" do test "peek/1 handles eof" do
assert Lexer.peek("") == :eof assert Lexer.peek("") == :eof
@ -14,9 +20,11 @@ defmodule MfmParser.LexerTest do
describe "mfm $[ token" do describe "mfm $[ token" do
test "it ends with a space" do test "it ends with a space" do
assert Lexer.peek("$[ola puerca]") == {:ok, "$[ola "} assert Lexer.peek("$[ola puerca]") == {:ok, %MFMOpen{content: "$[ola "}}
assert Lexer.next("$[ola puerca]") == {:ok, "$[ola ", "puerca]"} assert Lexer.next("$[ola puerca]") == {:ok, %MFMOpen{content: "$[ola "}, "puerca]"}
assert Lexer.next("$[ola.x,speed=5s puerca]") == {:ok, "$[ola.x,speed=5s ", "puerca]"}
assert Lexer.next("$[ola.x,speed=5s puerca]") ==
{:ok, %MFMOpen{content: "$[ola.x,speed=5s "}, "puerca]"}
end end
test "it doesn't crash if the token can't be completed" do test "it doesn't crash if the token can't be completed" do
@ -27,54 +35,54 @@ defmodule MfmParser.LexerTest do
describe "] token" do describe "] token" do
test "it handles ] as a token" do test "it handles ] as a token" do
assert Lexer.peek("]ve anime") == {:ok, "]"} assert Lexer.peek("]ve anime") == {:ok, %MFMClose{content: "]"}}
assert Lexer.next("]ve anime") == {:ok, "]", "ve anime"} assert Lexer.next("]ve anime") == {:ok, %MFMClose{content: "]"}, "ve anime"}
end end
test "it works at the eof" do test "it works at the eof" do
assert Lexer.peek("]") == {:ok, "]"} assert Lexer.peek("]") == {:ok, %MFMClose{content: "]"}}
assert Lexer.next("]") == {:ok, "]", ""} assert Lexer.next("]") == {:ok, %MFMClose{content: "]"}, ""}
end end
end end
describe "text token" do describe "text token" do
test "it ends when a mfm token opens while a $ alone doesn't end the text token" do test "it ends when a mfm token opens while a $ alone doesn't end the text token" do
assert Lexer.peek("Tu abuela ve anime y no se lava el $[spin culo]") == assert Lexer.peek("Tu abuela ve anime y no se lava el $[spin culo]") ==
{:ok, "Tu abuela ve anime y no se lava el "} {:ok, %Text{content: "Tu abuela ve anime y no se lava el "}}
assert Lexer.next("Tu abuela ve anime y no se lava el $[spin culo]") == assert Lexer.next("Tu abuela ve anime y no se lava el $[spin culo]") ==
{:ok, "Tu abuela ve anime y no se lava el ", "$[spin culo]"} {:ok, %Text{content: "Tu abuela ve anime y no se lava el "}, "$[spin culo]"}
assert Lexer.peek("A $2 chocolatine") == {:ok, "A $2 chocolatine"} assert Lexer.peek("A $2 chocolatine") == {:ok, %Text{content: "A $2 chocolatine"}}
assert Lexer.next("A $2 chocolatine") == {:ok, "A $2 chocolatine", ""} assert Lexer.next("A $2 chocolatine") == {:ok, %Text{content: "A $2 chocolatine"}, ""}
assert Lexer.peek("Eyes like $$") == {:ok, "Eyes like $$"} assert Lexer.peek("Eyes like $$") == {:ok, %Text{content: "Eyes like $$"}}
assert Lexer.next("Eyes like $$") == {:ok, "Eyes like $$", ""} assert Lexer.next("Eyes like $$") == {:ok, %Text{content: "Eyes like $$"}, ""}
end end
test "it ends when a mfm token closes" do test "it ends when a mfm token closes" do
assert Lexer.peek("el culo]") == {:ok, "el culo"} assert Lexer.peek("el culo]") == {:ok, %Text{content: "el culo"}}
assert Lexer.next("el culo]") == {:ok, "el culo", "]"} assert Lexer.next("el culo]") == {:ok, %Text{content: "el culo"}, "]"}
end end
test "it ends when the eof is reached" do test "it ends when the eof is reached" do
assert Lexer.peek("Tu abuela ve anime y no se lava el culo") == assert Lexer.peek("Tu abuela ve anime y no se lava el culo") ==
{:ok, "Tu abuela ve anime y no se lava el culo"} {:ok, %Text{content: "Tu abuela ve anime y no se lava el culo"}}
assert Lexer.next("Tu abuela ve anime y no se lava el culo") == assert Lexer.next("Tu abuela ve anime y no se lava el culo") ==
{:ok, "Tu abuela ve anime y no se lava el culo", ""} {:ok, %Text{content: "Tu abuela ve anime y no se lava el culo"}, ""}
end end
end end
describe "newline token" do describe "newline token" do
test "it handles \n as a token" do test "it handles \n as a token" do
assert Lexer.peek("\nchocolat") == {:ok, "\n"} assert Lexer.peek("\nchocolat") == {:ok, %Newline{content: "\n"}}
assert Lexer.next("\nchocolat") == {:ok, "\n", "chocolat"} assert Lexer.next("\nchocolat") == {:ok, %Newline{content: "\n"}, "chocolat"}
end end
test "it works at the eof" do test "it works at the eof" do
assert Lexer.peek("\n") == {:ok, "\n"} assert Lexer.peek("\n") == {:ok, %Newline{content: "\n"}}
assert Lexer.next("\n") == {:ok, "\n", ""} assert Lexer.next("\n") == {:ok, %Newline{content: "\n"}, ""}
end end
end end
end end

9
test/token_test.exs Normal file
View file

@ -0,0 +1,9 @@
defmodule MfmParser.TokenTest do
use ExUnit.Case
alias MfmParser.Token
test "it appends a character to the content" do
assert %{content: "$[p"} = Token.append(%{content: "$["}, "p")
end
end