/*-------------------------------------------------------------------*/ /* fsa.c */ /* Illustrate lexical analysis using a deterministic finite state */ /* automaton (FSA) */ /*-------------------------------------------------------------------*/ #include "dynarray.h" #include #include #include #include #define MAX_LINE_SIZE 1024 /*-------------------------------------------------------------------*/ enum Boolean {FALSE, TRUE}; enum TokenType {TKN_NUMBER, TKN_WORD}; enum LexState {START_STATE, IN_NUMBER_STATE, IN_WORD_STATE, NUMBER_FOUND_STATE, WORD_FOUND_STATE, NO_TOKEN_FOUND_STATE, ERROR_STATE}; /*-------------------------------------------------------------------*/ struct Token { enum TokenType iType; char *pcValue; }; /*-------------------------------------------------------------------*/ void freeToken(void *pvItem, void *pvExtra) /* Free token pvItem. */ { struct Token *psToken = (struct Token*)pvItem; free(psToken->pcValue); free(psToken); } /*-------------------------------------------------------------------*/ void printNumberToken(void *pvItem, void *pvExtra) /* Print token pvItem to stdout iff it is a number. */ { struct Token *psToken = (struct Token *)pvItem; if (psToken->iType == TKN_NUMBER) printf("%s ", psToken->pcValue); } /*-------------------------------------------------------------------*/ void printWordToken(void *pvItem, void *pvExtra) /* Print token pvItem to stdout iff it is a word. */ { struct Token *psToken = (struct Token *)pvItem; if (psToken->iType == TKN_WORD) printf("%s ", psToken->pcValue); } /*-------------------------------------------------------------------*/ struct Token *getToken(const char **ppcLine, enum Boolean *pbSuccessful) /* Return the next token in string *ppcLine, and update *ppcLine to point to the first character beyond that token. Assign TRUE to *pbSuccessful iff successful. The caller owns the token. */ { enum LexState iState = START_STATE; char pcBuffer[MAX_LINE_SIZE]; int i = 0; char c; struct Token *psToken; while (TRUE) { switch (iState) { case START_STATE: c = **ppcLine; (*ppcLine)++; if ((c == '\n') || (c == '\0')) { iState = NO_TOKEN_FOUND_STATE; } else if (isdigit((int)c)) { pcBuffer[i++] = c; iState = IN_NUMBER_STATE; } else if (isalpha((int)c)) { pcBuffer[i++] = c; iState = IN_WORD_STATE; } else if (isspace((int)c)) { iState = START_STATE; } else { iState = ERROR_STATE; } break; case IN_NUMBER_STATE: c = **ppcLine; (*ppcLine)++; if (isdigit((int)c)) { pcBuffer[i++] = c; iState = IN_NUMBER_STATE; } else if (isspace((int)c) || (c == '\0')) { (*ppcLine)--; iState = NUMBER_FOUND_STATE; } else { iState = ERROR_STATE; } break; case IN_WORD_STATE: c = **ppcLine; (*ppcLine)++; if (isalpha((int)c)) { pcBuffer[i++] = c; iState = IN_WORD_STATE; } else if (isspace((int)c) || (c == '\0')) { (*ppcLine)--; iState = WORD_FOUND_STATE; } else { iState = ERROR_STATE; } break; case NUMBER_FOUND_STATE: pcBuffer[i++] = '\0'; psToken = (struct Token*)malloc(sizeof(*psToken)); psToken->iType = TKN_NUMBER; psToken->pcValue = (char*)malloc(i); strcpy(psToken->pcValue, pcBuffer); *pbSuccessful = TRUE; return psToken; case WORD_FOUND_STATE: pcBuffer[i++] = '\0'; psToken = (struct Token*)malloc(sizeof(*psToken)); psToken->iType = TKN_WORD; psToken->pcValue = (char*)malloc(i); strcpy(psToken->pcValue, pcBuffer); *pbSuccessful = TRUE; return psToken; case NO_TOKEN_FOUND_STATE: *pbSuccessful = TRUE; return NULL; case ERROR_STATE: fprintf(stderr, "Invalid token\n\n"); *pbSuccessful = FALSE; return NULL; } } } /*-------------------------------------------------------------------*/ void processLine(const char *pcLine) /* Write to stdout each number and word that string pcLine contains. */ { enum Boolean bSuccessful; struct Token *psToken; DynArray_T oTokens; oTokens = DynArray_new(0); while (TRUE) { psToken = getToken(&pcLine, &bSuccessful); if ((psToken == NULL) || (! bSuccessful)) break; DynArray_add(oTokens, psToken); } if (! bSuccessful) { DynArray_map(oTokens, freeToken, NULL); DynArray_free(oTokens); return; } printf("Numbers: "); DynArray_map(oTokens, printNumberToken, NULL); printf("\n"); printf("Words: "); DynArray_map(oTokens, printWordToken, NULL); printf("\n\n"); DynArray_map(oTokens, freeToken, NULL); DynArray_free(oTokens); } /*-------------------------------------------------------------------*/ int main(int argc, char *argv[]) /* Read a line from stdin, and write to stdout each number and word that it contains. Repeat until EOF. */ { char pcLine[MAX_LINE_SIZE]; while (fgets(pcLine, MAX_LINE_SIZE, stdin) != NULL) processLine(pcLine); return 0; }