add Token struct and lexical analyzer

This commit is contained in:
Ilya Bezrukov 2024-10-17 13:36:13 +03:00
parent 8c21942dad
commit 5fc3c03cef

162
main.c
View File

@ -10,105 +10,139 @@ F ::= (E)|i
#include <stdlib.h>
#include <string.h>
#include <stdbool.h>
#include <ctype.h>
#define eprintf(...) fprintf(stderr, __VA_ARGS__)
typedef enum TokenType {
INT,
PLUS,
MINUS,
LBRACE,
RBRACE
INTEGER = 1,
PLUS = 2,
MULT = 4,
LBRACE = 8,
RBRACE = 16,
LF = 32,
EF = 64,
} TokenType;
typedef struct _Token {
TokenType type;
int value;
} Token;
void fatal(const char* message);
void scan();
void e();
void t();
void f();
Token scan();
int e();
int t();
int f();
const int LINE_MAX = 1024;
char *line;
int symbol;
int char_num = 0;
Token current;
int main(int argc, char* argv[]) {
line = calloc(sizeof(char), LINE_MAX);
memset(line, 0, LINE_MAX);
scan();
e();
current = scan();
int result = e();
if (symbol != '\n') {
fatal("Syntax invalid");
}
printf("Success, result: %d\n", result);
printf("Success\n");
free(line);
return 0;
}
void scan() {
symbol = getchar();
if (symbol == '\n')
line[char_num++] = ' ';
else
line[char_num++] = symbol;
}
Token scan() {
static char prev_s = 0;
void e() {
t();
while (symbol == '+') {
scan();
t();
}
}
Token token = {0, 0};
char s;
void t() {
f();
while (symbol == '*') {
scan();
f();
}
}
void f() {
if (symbol == '(') {
scan();
e();
if (symbol != ')') {
fatal("Expected parentheses");
}
}
else if (symbol >= '0' && symbol <= '9') {
;
}
else if (symbol == '\n') {
fatal("LF too early");
if (prev_s) {
s = prev_s;
prev_s = 0;
}
else {
fatal("Unexpected symbol");
s = getchar();
}
scan();
while (isblank(s)) s = getchar();
while (s >= '0' && s <= '9') {
token.type = INTEGER;
token.value = token.value * 10 + ((int) (s - '0'));
s = getchar();
}
if (token.type == INTEGER) {
prev_s = s;
return token;
}
switch (s) {
case '+': token.type = PLUS; break;
case '*': token.type = MULT; break;
case '(': token.type = LBRACE; break;
case ')': token.type = RBRACE; break;
case '\n':
case '\r':
case '\f':
token.type = LF; break;
case EOF: token.type = EF; break;
default:
eprintf("'%c' (%d)\n", s, s);
fatal("Unexpected symbol");
break;
}
return token;
}
int e() {
int result = t();
while (current.type == PLUS) {
current = scan();
result += t();
}
return result;
}
int t() {
int result = f();
while (current.type == MULT) {
current = scan();
result *= f();
}
return result;
}
int f() {
int result = 0;
if (current.type == LBRACE) {
current = scan();
result = e();
if (current.type != RBRACE) {
fatal("Expected closing parentheses");
}
}
else if (current.type == INTEGER) {
result = current.value;
}
else if (current.type == LF) {
fatal("Unexpected line feed");
}
else if (current.type == EF) {
fatal("Unexpected EOF");
}
else {
fatal("Unexpected token");
}
current = scan();
return result;
}
void fatal(const char* message) {
eprintf("\nError at char %d\n%s\n", char_num, line);
eprintf("%*c\n", char_num, '^');
eprintf("Message: %s\n", message);
eprintf("Error: %s\n", message);
exit(1);
}