/*-------------------------------------------------------------------*/ /* fsa.c */ /* Illustrate lexical analysis using a deterministic finite state */ /* automaton (FSA) */ /*-------------------------------------------------------------------*/ #include "list.h" #include #include #include #define BOOLEAN int #define TRUE 1 #define FALSE 0 #define MAX_LINE_SIZE 1024 enum TokenType {NUMBER, WORD}; enum LexState {START_STATE, IN_NUMBER_STATE, IN_WORD_STATE, NUMBER_FOUND_STATE, WORD_FOUND_STATE, NO_TOKEN_FOUND_STATE}; struct Token { enum TokenType type; char *value; }; BOOLEAN isEndOfLine(char c) { return (c == '\n') || (c == '\0'); } BOOLEAN isDigit(char c) { return (c >= '0') && (c <= '9'); } BOOLEAN isLetter(char c) { return ((c >= 'a') && (c <= 'z')) || ((c >= 'A') && (c <= 'Z')) || (c == '_'); } struct Token *getToken(char **ppcCurrentChar) /* Return the next token in line *ppcCurrentChar. */ { enum LexState iState = START_STATE; static char pcBuffer[MAX_LINE_SIZE]; int i = 0; char c; struct Token *psToken; while (TRUE) { switch (iState) { case START_STATE: c = **ppcCurrentChar; ++(*ppcCurrentChar); if (isEndOfLine(c)) { iState = NO_TOKEN_FOUND_STATE; } else if (isDigit(c)) { pcBuffer[i++] = c; iState = IN_NUMBER_STATE; } else if (isLetter(c)) { pcBuffer[i++] = c; iState = IN_WORD_STATE; } else { iState = START_STATE; } break; case IN_NUMBER_STATE: c = **ppcCurrentChar; ++(*ppcCurrentChar); if (isDigit(c)) { pcBuffer[i++] = c; iState = IN_NUMBER_STATE; } else { --(*ppcCurrentChar); iState = NUMBER_FOUND_STATE; } break; case IN_WORD_STATE: c = **ppcCurrentChar; ++(*ppcCurrentChar); if (isLetter(c)) { pcBuffer[i++] = c; iState = IN_WORD_STATE; } else { --(*ppcCurrentChar); iState = WORD_FOUND_STATE; } break; case NUMBER_FOUND_STATE: pcBuffer[i++] = '\0'; psToken = (struct Token*)malloc(sizeof(*psToken)); psToken->type = NUMBER; psToken->value = (char*)malloc(i); strcpy(psToken->value, pcBuffer); return psToken; case WORD_FOUND_STATE: pcBuffer[i++] = '\0'; psToken = (struct Token*)malloc(sizeof(*psToken)); psToken->type = WORD; psToken->value = (char*)malloc(i); strcpy(psToken->value, pcBuffer); return psToken; case NO_TOKEN_FOUND_STATE: return NULL; } } } void printToken(void **ppvToken, void *pvExtra) /* Print *ppvToken to stdout. */ { struct Token *psToken = *((struct Token**)ppvToken); printf("%s\n", psToken->value); } void freeToken(void **ppvToken, void *pvExtra) /* Free **ppvToken. */ { struct Token *psToken = *((struct Token**)ppvToken); free(psToken->value); free(psToken); } int main(int argc, char *argv[]) /* Write to stdout each number and word that stdin contains. */ { struct Token *psToken; List_T oNumberList; List_T oWordList; char pcLine[MAX_LINE_SIZE]; char *pcCurrentChar; oNumberList = List_new(); oWordList = List_new(); while (fgets(pcLine, MAX_LINE_SIZE, stdin) != NULL) { pcCurrentChar = pcLine; while ((psToken = getToken(&pcCurrentChar)) != NULL) { if (psToken->type == NUMBER) List_addLast(oNumberList, psToken); else List_addLast(oWordList, psToken); } } printf("-------\n"); printf("Numbers\n"); printf("-------\n"); List_map(oNumberList, printToken, NULL); printf("-----\n"); printf("Words\n"); printf("-----\n"); List_map(oWordList, printToken, NULL); List_map(oNumberList, freeToken, NULL); List_map(oWordList, freeToken, NULL); List_free(oNumberList); List_free(oWordList); return 0; }