lexical analyzer for a c program

- June 24, 2024

This program identifies tokens in a C program ignoring white spaces and comments.The input C program is given as argument

*******************************

#include <stdio.h>

%option noyywrap

/* Define regular expressions for different token types */

DIGIT [0-9]

LETTER [a-zA-Z]

IDENTIFIER {LETTER}({LETTER}|{DIGIT})*

NUMBER {DIGIT}+

WHITESPACE [ \t\n]+

COMMENT1 "//".*

COMMENT2 "/*"([^*]|\*+[^/*])*\*+"/"

{WHITESPACE} { /* Ignore whitespace */ }

{COMMENT1} { /* Ignore comments */ }

{COMMENT2} { /* Ignore comments */ }

"int" { printf("TOKEN_INT\n"); }

"return" { printf("TOKEN_RETURN\n"); }

"if" { printf("TOKEN_IF\n"); }

"else" { printf("TOKEN_ELSE\n"); }

"while" { printf("TOKEN_WHILE\n"); }

"for" { printf("TOKEN_FOR\n"); }

"=" { printf("TOKEN_ASSIGN\n"); }

"==" { printf("TOKEN_EQ\n"); }

"!=" { printf("TOKEN_NEQ\n"); }

"<" { printf("TOKEN_LT\n"); }

"<=" { printf("TOKEN_LE\n"); }

">" { printf("TOKEN_GT\n"); }

">=" { printf("TOKEN_GE\n"); }

"+" { printf("TOKEN_PLUS\n"); }

"-" { printf("TOKEN_MINUS\n"); }

"*" { printf("TOKEN_MULT\n"); }

"/" { printf("TOKEN_DIV\n"); }

"(" { printf("TOKEN_LPAREN\n"); }

")" { printf("TOKEN_RPAREN\n"); }

"{" { printf("TOKEN_LBRACE\n"); }

"}" { printf("TOKEN_RBRACE\n"); }

";" { printf("TOKEN_SEMICOLON\n"); }

{IDENTIFIER} { printf("TOKEN_IDENTIFIER(%s)\n", yytext); }

{NUMBER} { printf("TOKEN_NUMBER(%s)\n", yytext); }

. { printf("TOKEN_UNKNOWN(%s)\n", yytext); }

int main(int argc, char **argv) {

if (argc > 1) {

FILE *file = fopen(argv[1], "r");

if (!file) {

fprintf(stderr, "Could not open %s\n", argv[1]);

return 1;

}

yyin = file;

}

yylex();

return 0;

}

Execution

input C file is t.c

$ cat t.c

/* sample program

this is used as input to lex */

int main() {

int a = 5;

int b = 10;

// This is a comment

if (a == b) {

return 0;

} else {

return 1;

}

$flex lexanlz.lex

$ gcc lex.yy.c

$ ./a.out t.c

./a.out t.c

TOKEN_INT

TOKEN_IDENTIFIER(main)

TOKEN_LPAREN

TOKEN_RPAREN

TOKEN_LBRACE

TOKEN_INT

TOKEN_IDENTIFIER(a)

TOKEN_ASSIGN

TOKEN_NUMBER(5)

TOKEN_SEMICOLON

TOKEN_INT

TOKEN_IDENTIFIER(b)

TOKEN_ASSIGN

TOKEN_NUMBER(10)

TOKEN_SEMICOLON

TOKEN_IF

TOKEN_LPAREN

TOKEN_IDENTIFIER(a)

TOKEN_EQ

TOKEN_IDENTIFIER(b)

TOKEN_RPAREN

TOKEN_LBRACE

TOKEN_RETURN

TOKEN_NUMBER(0)

TOKEN_SEMICOLON

TOKEN_RBRACE

TOKEN_ELSE

TOKEN_LBRACE

TOKEN_RETURN

TOKEN_NUMBER(1)

TOKEN_SEMICOLON

TOKEN_RBRACE

Search This Blog

KTU Compiler Lab Semester 7 CSL 411 - Dr Binu V P

lexical analyzer for a c program

Comments

Post a Comment

Popular posts from this blog

KTU Compiler Lab CSL411 - Dr Binu V P

count frequency of occurrence of a word - lex program