/*--------------------------------------------------------------------*/ /* dfa.c */ /* Author: Bob Dondero */ /*--------------------------------------------------------------------*/ #include "dynarray.h" #include #include #include #include #include /*--------------------------------------------------------------------*/ /* The maximum number of characters in a line of stdin. */ enum {MAX_LINE_LENGTH = 2048}; /* A Token object can be either a number or a word. */ enum TokenType {TOKEN_NUMBER, TOKEN_WORD}; /*--------------------------------------------------------------------*/ /* A Token is either a number or a word, expressed as a string. */ struct Token { /* The type of the token. */ enum TokenType eType; /* The string which is the token's value. */ char *pcValue; }; /*--------------------------------------------------------------------*/ /* Print all tokens in oTokens to stdout. First print the number tokens; then print the word tokens. */ static void printTokens(DynArray_T oTokens) { int i; int iLength; struct Token *psToken; assert(oTokens != NULL); iLength = DynArray_getLength(oTokens); printf("Numbers: "); for (i = 0; i < iLength; i++) { psToken = DynArray_get(oTokens, i); if (psToken->eType == TOKEN_NUMBER) printf("%s ", psToken->pcValue); } printf("\n"); printf("Words: "); for (i = 0; i < iLength; i++) { psToken = DynArray_get(oTokens, i); if (psToken->eType == TOKEN_WORD) printf("%s ", psToken->pcValue); } printf("\n"); } /*--------------------------------------------------------------------*/ /* Free all of the tokens in oTokens. */ static void freeTokens(DynArray_T oTokens) { int i; int iLength; struct Token *psToken; assert(oTokens != NULL); iLength = DynArray_getLength(oTokens); for (i = 0; i < iLength; i++) { psToken = DynArray_get(oTokens, i); free(psToken->pcValue); free(psToken); } } /*--------------------------------------------------------------------*/ /* Create and return a Token whose type is eTokenType and whose value consists of string pcValue. The caller owns the Token. */ static struct Token *newToken(enum TokenType eTokenType, char *pcValue) { struct Token *psToken; assert(pcValue != NULL); psToken = (struct Token*)malloc(sizeof(struct Token)); if (psToken == NULL) { fprintf(stderr, "Cannot allocate memory\n"); exit(EXIT_FAILURE); } psToken->eType = eTokenType; psToken->pcValue = (char*)malloc(strlen(pcValue) + 1); if (psToken->pcValue == NULL) { fprintf(stderr, "Cannot allocate memory\n"); exit(EXIT_FAILURE); } strcpy(psToken->pcValue, pcValue); return psToken; } /*--------------------------------------------------------------------*/ /* Lexically analyze string pcLine. Populate oTokens with the tokens that pcLine contains. Return 1 (TRUE) if successful, or 0 (FALSE) otherwise. In the latter case, oTokens may contain tokens that were discovered before the error. The caller owns the tokens placed in oTokens. */ static int lexLine(const char *pcLine, DynArray_T oTokens) { /* In lieu of a Boolean data type. */ enum {FALSE, TRUE}; /* lexLine() uses a DFA approach. It "reads" its characters from pcLine. The DFA has these three states: */ enum LexState {STATE_START, STATE_IN_NUMBER, STATE_IN_WORD}; /* The current state of the DFA. */ enum LexState eState = STATE_START; /* An index into pcLine. */ int iLineIndex = 0; /* An array in which the characters comprising each token are accumulated. */ char acValue[MAX_LINE_LENGTH]; /* An index into acValue. */ int iValueIndex = 0; char c; struct Token *psToken; assert(pcLine != NULL); assert(oTokens != NULL); for (;;) { /* "Read" the next character from pcLine. */ c = pcLine[iLineIndex++]; switch (eState) { /* Handle the START state. */ case STATE_START: if (c == '\0') return TRUE; else if (isdigit(c)) { acValue[iValueIndex++] = c; eState = STATE_IN_NUMBER; } else if (isalpha(c)) { acValue[iValueIndex++] = c; eState = STATE_IN_WORD; } else if (isspace(c)) eState = STATE_START; else { fprintf(stderr, "Invalid line\n"); return FALSE; } break; /* Handle the IN_NUMBER state. */ case STATE_IN_NUMBER: if (c == '\0') { /* Create a NUMBER token. */ acValue[iValueIndex] = '\0'; psToken = newToken(TOKEN_NUMBER, acValue); if (! DynArray_add(oTokens, psToken)) { fprintf(stderr, "Cannot allocate memory\n"); exit(EXIT_FAILURE); } iValueIndex = 0; return TRUE; } else if (isdigit(c)) { acValue[iValueIndex++] = c; eState = STATE_IN_NUMBER; } else if (isspace(c)) { /* Create a NUMBER token. */ acValue[iValueIndex] = '\0'; psToken = newToken(TOKEN_NUMBER, acValue); if (! DynArray_add(oTokens, psToken)) { fprintf(stderr, "Cannot allocate memory\n"); exit(EXIT_FAILURE); } iValueIndex = 0; eState = STATE_START; } else { fprintf(stderr, "Invalid line\n"); return FALSE; } break; /* Handle the IN_WORD state. */ case STATE_IN_WORD: if (c == '\0') { /* Create a WORD token. */ acValue[iValueIndex] = '\0'; psToken = newToken(TOKEN_WORD, acValue); if (! DynArray_add(oTokens, psToken)) { fprintf(stderr, "Cannot allocate memory\n"); exit(EXIT_FAILURE); } iValueIndex = 0; return TRUE; } else if (isalpha(c)) { acValue[iValueIndex++] = c; eState = STATE_IN_WORD; } else if (isspace(c)) { /* Create a WORD token. */ acValue[iValueIndex] = '\0'; psToken = newToken(TOKEN_WORD, acValue); if (! DynArray_add(oTokens, psToken)) { fprintf(stderr, "Cannot allocate memory\n"); exit(EXIT_FAILURE); } iValueIndex = 0; eState = STATE_START; } else { fprintf(stderr, "Invalid line\n"); return FALSE; } break; default: assert(FALSE); } } } /*--------------------------------------------------------------------*/ /* Read a line from stdin, and write to stdout each number and word that it contains. Repeat until EOF. Return 0 iff successful. */ int main(void) { char acLine[MAX_LINE_LENGTH]; DynArray_T oTokens; int iSuccessful; printf("------------------------------------\n"); while (fgets(acLine, MAX_LINE_LENGTH, stdin) != NULL) { oTokens = DynArray_new(0); if (oTokens == NULL) { fprintf(stderr, "Cannot allocate memory\n"); exit(EXIT_FAILURE); } iSuccessful = lexLine(acLine, oTokens); if (iSuccessful) printTokens(oTokens); printf("------------------------------------\n"); freeTokens(oTokens); DynArray_free(oTokens); } return 0; }