확장
간단하게 테스트를 해봤으니 이제는 테스트의 범위를 좀더 늘려볼 계획
'let five = 5;' 라는 입력이 들어올때 얘를 렉싱해볼 생각
우선 렉서는 한글자씩만 읽었었음
따라서 let이나 five같은걸 l,e,t 이딴식으로 저장하게 됌 현재로는
하지만 이렇게 쓸수가 없으니 let 이런식으로 통째로 저장할 방법 생각
또한 = 같은 토큰들은 하나로 끝나니까 이전처럼 처리하면됌
문제가 생기는 부분은 숫자와 문자은 원하는 만큼 readChar로 땡겨서
한글자씩 문자열에 추가하려고 했는데
다시 생각해보니 input 위치에서 그냥 strncpy하면 됐음
중간중간 공백처리, ; 처리 등 고민을 해보고 내 나름 해봤는데
책 뒷장보니까 바로 방법이 적혀있었음..
코드
lexer.h
#pragma once
#include"token.h"
#include<stdbool.h>
typedef struct _lexer {
char* input;
int position;
int readPosition;
char ch;
}Lexer;
Lexer* New(char* input);
void readChar(Lexer* lexer);
char* readNumber(Lexer* lexer);
Token* NextToken(Lexer* lexer);
Token* newToken(TokenType token,char input);
void skipWhitespace(Lexer* l);
bool isLetter(char ch);
bool isDigit(char ch);
char* readIdentifier(Lexer* l);
const char* LookupIdent(const char* ident);
lexer.c
#include "lexer.h"
#include<stdlib.h>
#include<stdio.h>
#include<string.h>
Lexer* New(char* input)
{
Lexer* l = (Lexer*)malloc(sizeof(Lexer));
l->input = _strdup(input);
if (!l->input) {
fprintf(stderr, "Memory allocation failed for input string\n");
free(l);
exit(EXIT_FAILURE);
}
l->ch = 0;
l->position = 0;
l->readPosition = 0;
readChar(l);
return l;
}
void readChar(Lexer* lexer)
{
if (lexer->readPosition > strlen(lexer->input)){
lexer->ch = 0;
}
else {
lexer->ch = lexer->input[lexer->readPosition];
}
lexer->position = lexer->readPosition;
(lexer->readPosition)++;
}
char* readNumber(Lexer* lexer)
{
int position = lexer->position;
while (isDigit(lexer->ch)) {
readChar(lexer);
}
const int len = lexer->position - position;
char* str = (char*)malloc(len + 1);
if (str == NULL) {
printf("Memory allocation failed\n");
return;
}
strncpy_s(str, len + 1, lexer->input + position, len);
return str;
}
Token* NextToken(Lexer* lexer)
{
Token *tok=NULL;
skipWhitespace(lexer);
char lch = lexer->ch;
switch (lch)
{
case '=':
tok = newToken(ASSIGN_TOKEN, lch);
break;
case ';':
tok = newToken(SEMICOLON_TOKEN, lch);
break;
case '(':
tok = newToken(LPAREN_TOKEN, lch);
break;
case ')':
tok = newToken(RPAREN_TOKEN, lch);
break;
case ',':
tok = newToken(COMMA_TOKEN, lch);
break;
case '+':
tok = newToken(PLUS_TOKEN, lch);
break;
case '{':
tok = newToken(LBRACE_TOKEN, lch);
break;
case '}':
tok = newToken(RBRACE_TOKEN, lch);
break;
case 0:
tok = (Token*)malloc(sizeof(Token));
if (tok == NULL) {
printf("Memory allocation failed for tok.\n");
exit(1);
}
tok->type = EOF_TOKEN;
tok->literal = _strdup("");
break;
default:
if (isLetter(lch)) {
tok = (Token*)malloc(sizeof(Token));
if (tok == NULL) {
printf("Memory allocation failed for tok.\n");
exit(1);
}
tok->literal = _strdup(readIdentifier(lexer));
tok->type = LookupIdent(tok->literal);
return tok;
}
else if (isDigit(lch)) {
tok = (Token*)malloc(sizeof(Token));
if (tok == NULL) {
printf("Memory allocation failed for tok.\n");
exit(1);
}
tok->type = INT;
tok->literal = readNumber(lexer);
return tok;
}
else {
tok = newToken(ILLEGAL_TOKEN, lch);
}
}
readChar(lexer);
return tok;
}
Token*newToken(TokenType token, char input)
{
Token* newToken = (Token*)malloc(sizeof(Token));
char* str = (char*)malloc(2);
str[0] = input;
str[1] = '\0';
newToken->type = token;
newToken->literal = _strdup(str);
return newToken;
}
void skipWhitespace(Lexer* l)
{
while (l->ch == ' ' || l->ch == '\t' || l->ch == '\n' || l->ch == '\r')
readChar(l);
}
bool isLetter(char ch)
{
return 'a'<=ch&&ch<='z'||'A'<=ch&&ch<='Z'||ch=='_';
}
bool isDigit(char ch)
{
return '0'<=ch&&ch<='9';
}
char* readIdentifier(Lexer* l)
{
int position = l->position;
while (isLetter(l->ch)) {
readChar(l);
}
const int len = l->position - position;
char* str = (char*)malloc(len + 1);
if (str == NULL) {
printf("Memory allocation failed\n");
return;
}
strncpy_s(str, len + 1, l->input + position, len);
return str;
}
const char* LookupIdent(const char* ident) {
if (strcmp(ident, "fn") == 0) {
return FUNCTION_TOKEN;
}
if (strcmp(ident, "let") == 0) {
return LET_TOKEN;
}
return IDENT_TOKEN;
}
lexer_test.c
#include<stdio.h>
#include "token.h"
#include "lexer.h"
#include<string.h>
int main() {
const char* input = "let five = 5; "
"let ten = 10; "
"let add = fn(x, y) { x + y; }; "
"let result = add(five, ten);";
Token tests[] = {
{LET_TOKEN,"let"},
{IDENT_TOKEN,"five"},
{ASSIGN_TOKEN,"="},
{INT,"5"},
{SEMICOLON_TOKEN,";"},
{LET_TOKEN,"let"},
{IDENT_TOKEN,"ten"},
{ASSIGN_TOKEN,"="},
{INT,"10"},
{SEMICOLON_TOKEN,";"},
{LET_TOKEN,"let"},
{IDENT_TOKEN,"add"},
{ASSIGN_TOKEN,"="},
{FUNCTION_TOKEN,"fn"},
{LPAREN_TOKEN,"("},
{IDENT_TOKEN,"x"},
{COMMA_TOKEN,","},
{IDENT_TOKEN,"y"},
{RPAREN_TOKEN,")"},
{LBRACE_TOKEN,"{"},
{IDENT_TOKEN,"x"},
{PLUS_TOKEN,"+"},
{IDENT_TOKEN,"y"},
{SEMICOLON_TOKEN,";"},
{RBRACE_TOKEN,"}"},
{SEMICOLON_TOKEN,";"},
{LET_TOKEN,"let"},
{IDENT_TOKEN,"result"},
{ASSIGN_TOKEN,"="},
{IDENT_TOKEN,"add"},
{LPAREN_TOKEN,"("},
{IDENT_TOKEN,"five"},
{COMMA_TOKEN,","},
{IDENT_TOKEN,"ten"},
{RPAREN_TOKEN,")"},
{SEMICOLON_TOKEN,";"},
{EOF_TOKEN,""},
};//테스트케이스 생성
Lexer* lexer = New(input);
//렉서를 만든후 분휴 시작
int len = sizeof(tests) / sizeof(Token);
for (int i = 0; i < len; i++) {
Token* tok = NextToken(lexer);
if (tok->type != tests[i].type) {
printf("tests[%d] - tokentype wrong. expected = %s, got = %s\n", i,tests[i].type, tok->type);
}
if (strcmp(tok->literal, tests[i].literal) != 0) {
printf("tests[%d] - literal wrong. expected = %s, got = %s\n", i, tests[i].literal, tok->literal);
}
printf("tests[%d]'s type = %s, literal = %s\n", i, tests[i].type, tok->literal);
//free(tok->literal);
free(tok);
}
free(lexer->input);
free(lexer);
//l은 input을 렉서해서 받아옴
//받아온다면 tests랑 비교하기
}
결과
