Change Lexer to use a data structure for tokens

This commit is contained in:
Ilja 2022-07-23 20:17:34 +02:00
parent 4748d833b2
commit 733388fa6e
8 changed files with 82 additions and 42 deletions

View file

@ -1,6 +1,12 @@
defmodule MfmParser.Lexer do
alias MfmParser.Reader
alias MfmParser.Token
alias MfmParser.Token.MFMOpen
alias MfmParser.Token.MFMClose
alias MfmParser.Token.Newline
alias MfmParser.Token.Text
def peek(input) do
case next(input) do
{:ok, token, _} -> {:ok, token}
@ -9,34 +15,44 @@ defmodule MfmParser.Lexer do
end
def next(input) do
recursive_next(Reader.next(input), "", type_of_token(input))
recursive_extract_next_token(Reader.next(input), get_empty_token(input))
end
defp recursive_next(:eof, _, _) do
defp recursive_extract_next_token(:eof, _) do
:eof
end
defp recursive_next({:ok, char, rest}, part, token_type) do
if is_end_of_token?(char, rest, token_type) do
{:ok, part <> char, rest}
defp recursive_extract_next_token({:ok, char, rest}, token) do
if is_last_char_of_token?(char, rest, token) do
{:ok, token |> Token.append(char), rest}
else
recursive_next(Reader.next(rest), part <> char, token_type)
recursive_extract_next_token(Reader.next(rest), token |> Token.append(char))
end
end
defp is_end_of_token?(char, _, :mfm_open) do
char in [" "]
defp get_empty_token(input) do
case Reader.peek(input) do
:eof -> :eof
{:ok, "$"} -> %MFMOpen{}
{:ok, "]"} -> %MFMClose{}
{:ok, "\n"} -> %Newline{}
_ -> %Text{}
end
end
defp is_end_of_token?(_, _, :mfm_close) do
defp is_last_char_of_token?(char, _, %MFMOpen{}) do
char == " "
end
defp is_last_char_of_token?(_, _, %MFMClose{}) do
true
end
defp is_end_of_token?(_, _, :newline) do
defp is_last_char_of_token?(_, _, %Newline{}) do
true
end
defp is_end_of_token?(_, rest, :text) do
defp is_last_char_of_token?(_, rest, %Text{}) do
case Reader.next(rest) do
:eof -> true
{:ok, "]", _} -> true
@ -44,14 +60,4 @@ defmodule MfmParser.Lexer do
_ -> false
end
end
defp type_of_token(input) do
case Reader.peek(input) do
:eof -> :eof
{:ok, "$"} -> :mfm_open
{:ok, "]"} -> :mfm_close
{:ok, "\n"} -> :newline
_ -> :text
end
end
end

5
lib/token.ex Normal file
View file

@ -0,0 +1,5 @@
defmodule MfmParser.Token do
def append(token = %{content: content}, new_char) do
token |> Map.put(:content, content <> new_char)
end
end

3
lib/token/mfm_close.ex Normal file
View file

@ -0,0 +1,3 @@
defmodule MfmParser.Token.MFMClose do
defstruct content: ""
end

3
lib/token/mfm_open.ex Normal file
View file

@ -0,0 +1,3 @@
defmodule MfmParser.Token.MFMOpen do
defstruct content: ""
end

3
lib/token/newline.ex Normal file
View file

@ -0,0 +1,3 @@
defmodule MfmParser.Token.Newline do
defstruct content: ""
end

3
lib/token/text.ex Normal file
View file

@ -0,0 +1,3 @@
defmodule MfmParser.Token.Text do
defstruct content: ""
end

View file

@ -1,7 +1,13 @@
defmodule MfmParser.LexerTest do
use ExUnit.Case
alias MfmParser.Lexer
alias MfmParser.Token.MFMOpen
alias MfmParser.Token.MFMClose
alias MfmParser.Token.Newline
alias MfmParser.Token.Text
describe "eof" do
test "peek/1 handles eof" do
assert Lexer.peek("") == :eof
@ -14,9 +20,11 @@ defmodule MfmParser.LexerTest do
describe "mfm $[ token" do
test "it ends with a space" do
assert Lexer.peek("$[ola puerca]") == {:ok, "$[ola "}
assert Lexer.next("$[ola puerca]") == {:ok, "$[ola ", "puerca]"}
assert Lexer.next("$[ola.x,speed=5s puerca]") == {:ok, "$[ola.x,speed=5s ", "puerca]"}
assert Lexer.peek("$[ola puerca]") == {:ok, %MFMOpen{content: "$[ola "}}
assert Lexer.next("$[ola puerca]") == {:ok, %MFMOpen{content: "$[ola "}, "puerca]"}
assert Lexer.next("$[ola.x,speed=5s puerca]") ==
{:ok, %MFMOpen{content: "$[ola.x,speed=5s "}, "puerca]"}
end
test "it doesn't crash if the token can't be completed" do
@ -27,54 +35,54 @@ defmodule MfmParser.LexerTest do
describe "] token" do
test "it handles ] as a token" do
assert Lexer.peek("]ve anime") == {:ok, "]"}
assert Lexer.next("]ve anime") == {:ok, "]", "ve anime"}
assert Lexer.peek("]ve anime") == {:ok, %MFMClose{content: "]"}}
assert Lexer.next("]ve anime") == {:ok, %MFMClose{content: "]"}, "ve anime"}
end
test "it works at the eof" do
assert Lexer.peek("]") == {:ok, "]"}
assert Lexer.next("]") == {:ok, "]", ""}
assert Lexer.peek("]") == {:ok, %MFMClose{content: "]"}}
assert Lexer.next("]") == {:ok, %MFMClose{content: "]"}, ""}
end
end
describe "text token" do
test "it ends when a mfm token opens while a $ alone doesn't end the text token" do
assert Lexer.peek("Tu abuela ve anime y no se lava el $[spin culo]") ==
{:ok, "Tu abuela ve anime y no se lava el "}
{:ok, %Text{content: "Tu abuela ve anime y no se lava el "}}
assert Lexer.next("Tu abuela ve anime y no se lava el $[spin culo]") ==
{:ok, "Tu abuela ve anime y no se lava el ", "$[spin culo]"}
{:ok, %Text{content: "Tu abuela ve anime y no se lava el "}, "$[spin culo]"}
assert Lexer.peek("A $2 chocolatine") == {:ok, "A $2 chocolatine"}
assert Lexer.next("A $2 chocolatine") == {:ok, "A $2 chocolatine", ""}
assert Lexer.peek("A $2 chocolatine") == {:ok, %Text{content: "A $2 chocolatine"}}
assert Lexer.next("A $2 chocolatine") == {:ok, %Text{content: "A $2 chocolatine"}, ""}
assert Lexer.peek("Eyes like $$") == {:ok, "Eyes like $$"}
assert Lexer.next("Eyes like $$") == {:ok, "Eyes like $$", ""}
assert Lexer.peek("Eyes like $$") == {:ok, %Text{content: "Eyes like $$"}}
assert Lexer.next("Eyes like $$") == {:ok, %Text{content: "Eyes like $$"}, ""}
end
test "it ends when a mfm token closes" do
assert Lexer.peek("el culo]") == {:ok, "el culo"}
assert Lexer.next("el culo]") == {:ok, "el culo", "]"}
assert Lexer.peek("el culo]") == {:ok, %Text{content: "el culo"}}
assert Lexer.next("el culo]") == {:ok, %Text{content: "el culo"}, "]"}
end
test "it ends when the eof is reached" do
assert Lexer.peek("Tu abuela ve anime y no se lava el culo") ==
{:ok, "Tu abuela ve anime y no se lava el culo"}
{:ok, %Text{content: "Tu abuela ve anime y no se lava el culo"}}
assert Lexer.next("Tu abuela ve anime y no se lava el culo") ==
{:ok, "Tu abuela ve anime y no se lava el culo", ""}
{:ok, %Text{content: "Tu abuela ve anime y no se lava el culo"}, ""}
end
end
describe "newline token" do
test "it handles \n as a token" do
assert Lexer.peek("\nchocolat") == {:ok, "\n"}
assert Lexer.next("\nchocolat") == {:ok, "\n", "chocolat"}
assert Lexer.peek("\nchocolat") == {:ok, %Newline{content: "\n"}}
assert Lexer.next("\nchocolat") == {:ok, %Newline{content: "\n"}, "chocolat"}
end
test "it works at the eof" do
assert Lexer.peek("\n") == {:ok, "\n"}
assert Lexer.next("\n") == {:ok, "\n", ""}
assert Lexer.peek("\n") == {:ok, %Newline{content: "\n"}}
assert Lexer.next("\n") == {:ok, %Newline{content: "\n"}, ""}
end
end
end

9
test/token_test.exs Normal file
View file

@ -0,0 +1,9 @@
defmodule MfmParser.TokenTest do
use ExUnit.Case
alias MfmParser.Token
test "it appends a character to the content" do
assert %{content: "$[p"} = Token.append(%{content: "$["}, "p")
end
end