add Token struct and lexical analyzer

This commit is contained in:
Ilya Bezrukov 2024-10-17 13:36:13 +03:00
parent 8c21942dad
commit 5fc3c03cef

162
main.c
View File

@ -10,105 +10,139 @@ F ::= (E)|i
#include <stdlib.h> #include <stdlib.h>
#include <string.h> #include <string.h>
#include <stdbool.h> #include <stdbool.h>
#include <ctype.h>
#define eprintf(...) fprintf(stderr, __VA_ARGS__) #define eprintf(...) fprintf(stderr, __VA_ARGS__)
typedef enum TokenType { typedef enum TokenType {
INT, INTEGER = 1,
PLUS, PLUS = 2,
MINUS, MULT = 4,
LBRACE, LBRACE = 8,
RBRACE RBRACE = 16,
LF = 32,
EF = 64,
} TokenType; } TokenType;
typedef struct _Token { typedef struct _Token {
TokenType type; TokenType type;
int value; int value;
} Token; } Token;
void fatal(const char* message); void fatal(const char* message);
void scan(); Token scan();
void e();
void t();
void f();
int e();
int t();
int f();
const int LINE_MAX = 1024; const int LINE_MAX = 1024;
char *line;
int symbol; Token current;
int char_num = 0;
int main(int argc, char* argv[]) { int main(int argc, char* argv[]) {
line = calloc(sizeof(char), LINE_MAX);
memset(line, 0, LINE_MAX);
scan(); current = scan();
e(); int result = e();
if (symbol != '\n') { printf("Success, result: %d\n", result);
fatal("Syntax invalid");
}
printf("Success\n");
free(line);
return 0; return 0;
} }
void scan() { Token scan() {
symbol = getchar(); static char prev_s = 0;
if (symbol == '\n')
line[char_num++] = ' ';
else
line[char_num++] = symbol;
}
void e() { Token token = {0, 0};
t(); char s;
while (symbol == '+') {
scan();
t();
}
}
void t() { if (prev_s) {
f(); s = prev_s;
while (symbol == '*') { prev_s = 0;
scan();
f();
}
}
void f() {
if (symbol == '(') {
scan();
e();
if (symbol != ')') {
fatal("Expected parentheses");
}
}
else if (symbol >= '0' && symbol <= '9') {
;
}
else if (symbol == '\n') {
fatal("LF too early");
} }
else { else {
fatal("Unexpected symbol"); s = getchar();
} }
scan();
while (isblank(s)) s = getchar();
while (s >= '0' && s <= '9') {
token.type = INTEGER;
token.value = token.value * 10 + ((int) (s - '0'));
s = getchar();
}
if (token.type == INTEGER) {
prev_s = s;
return token;
}
switch (s) {
case '+': token.type = PLUS; break;
case '*': token.type = MULT; break;
case '(': token.type = LBRACE; break;
case ')': token.type = RBRACE; break;
case '\n':
case '\r':
case '\f':
token.type = LF; break;
case EOF: token.type = EF; break;
default:
eprintf("'%c' (%d)\n", s, s);
fatal("Unexpected symbol");
break;
}
return token;
}
int e() {
int result = t();
while (current.type == PLUS) {
current = scan();
result += t();
}
return result;
}
int t() {
int result = f();
while (current.type == MULT) {
current = scan();
result *= f();
}
return result;
}
int f() {
int result = 0;
if (current.type == LBRACE) {
current = scan();
result = e();
if (current.type != RBRACE) {
fatal("Expected closing parentheses");
}
}
else if (current.type == INTEGER) {
result = current.value;
}
else if (current.type == LF) {
fatal("Unexpected line feed");
}
else if (current.type == EF) {
fatal("Unexpected EOF");
}
else {
fatal("Unexpected token");
}
current = scan();
return result;
} }
void fatal(const char* message) { void fatal(const char* message) {
eprintf("\nError at char %d\n%s\n", char_num, line); eprintf("Error: %s\n", message);
eprintf("%*c\n", char_num, '^');
eprintf("Message: %s\n", message);
exit(1); exit(1);
} }