/*--------------------------------------------------------------------*/
/* dfa.c                                                              */
/* Author: Bob Dondero                                                */
/* Illustrate lexical analysis using a deterministic finite state     */
/* automaton (DFA)                                                    */
/*--------------------------------------------------------------------*/

#include "dynarray.h"
#include <ctype.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <assert.h>

/*--------------------------------------------------------------------*/

enum {MAX_LINE_SIZE = 1024};

enum {FALSE, TRUE};

enum TokenType {TOKEN_NUMBER, TOKEN_WORD};

enum LexState {STATE_START, STATE_IN_NUMBER, STATE_IN_WORD,
               STATE_ERROR, STATE_EXIT};

/*--------------------------------------------------------------------*/

/* A Token is either a number or a word, expressed as a string. */

struct Token
{
   /* The type of the token. */
   enum TokenType eType;

   /* The string which is the token's value. */
   char *pcValue;
};

/*--------------------------------------------------------------------*/

static void freeToken(void *pvItem, void *pvExtra)

/* Free token pvItem.  pvExtra is unused. */

{
   struct Token *psToken = (struct Token*)pvItem;
   free(psToken->pcValue);
   free(psToken);
}

/*--------------------------------------------------------------------*/

static void printNumberToken(void *pvItem, void *pvExtra)

/* Print token pvItem to stdout iff it is a number.  pvExtra is
   unused. */

{
   struct Token *psToken = (struct Token*)pvItem;
   if (psToken->eType == TOKEN_NUMBER)
      printf("%s ", psToken->pcValue);
}

/*--------------------------------------------------------------------*/

static void printWordToken(void *pvItem, void *pvExtra)

/* Print token pvItem to stdout iff it is a word.  pvExtra is
   unused. */

{
   struct Token *psToken = (struct Token*)pvItem;
   if (psToken->eType == TOKEN_WORD)
      printf("%s ", psToken->pcValue);
}

/*--------------------------------------------------------------------*/

static struct Token *makeToken(enum TokenType eTokenType,
   char *pcValue)

/* Create and return a Token whose type is eTokenType and whose
   value consists of string pcValue.  The caller owns the Token. */

{
   struct Token *psToken;

   psToken = (struct Token*)malloc(sizeof(struct Token));
   assert(psToken != NULL);

   psToken->eType = eTokenType;

   psToken->pcValue = (char*)malloc(strlen(pcValue) + 1);
   assert(psToken->pcValue != NULL);
   strcpy(psToken->pcValue, pcValue);

   return psToken;
}

/*--------------------------------------------------------------------*/

static int lexLine(const char *pcLine, DynArray_T oTokens)

/* Lexically analyze string pcLine.  Populate oTokens with the
   tokens that pcLine contains.  Return TRUE if successful, and FALSE
   if pcLine contains a lexical error.  In the latter case, oTokens
   may contain tokens that were discovered before the lexical error.
   The caller owns the tokens placed in oTokens. */

/* lexLine() uses a DFA approach.  It "reads" its characters from
   pcLine.*/

{
   enum LexState eState = STATE_START;
   int iLineIndex = 0;
   int iValueIndex = 0;
   char c;
   char acValue[MAX_LINE_SIZE];
   struct Token *psToken;

   while ((eState != STATE_EXIT) && (eState != STATE_ERROR))
   {
      /* "Read" the next character from pcLine. */
      c = pcLine[iLineIndex++];

      switch (eState)
      {
         case STATE_START:
            if ((c == '\n') || (c == '\0'))
               eState = STATE_EXIT;
            else if (isdigit(c))
            {
               acValue[iValueIndex++] = c;
               eState = STATE_IN_NUMBER;
            }
            else if (isalpha(c))
            {
               acValue[iValueIndex++] = c;
               eState = STATE_IN_WORD;
            }
            else if ((c == ' ') || (c == '\t'))
               eState = STATE_START;
            else
               eState = STATE_ERROR;
            break;

         case STATE_IN_NUMBER:
            if ((c == '\n') || (c == '\0'))
            {
               /* Create a NUMBER token. */
               acValue[iValueIndex] = '\0';
               psToken = makeToken(TOKEN_NUMBER, acValue);
               DynArray_add(oTokens, psToken);
               iValueIndex = 0;

               eState = STATE_EXIT;
            }
            else if (isdigit(c))
            {
               acValue[iValueIndex++] = c;
               eState = STATE_IN_NUMBER;
            }
            else if ((c == ' ') || (c == '\t'))
            {
               /* Create a NUMBER token. */
               acValue[iValueIndex] = '\0';
               psToken = makeToken(TOKEN_NUMBER, acValue);
               DynArray_add(oTokens, psToken);
               iValueIndex = 0;

               eState = STATE_START;
            }
            else
               eState = STATE_ERROR;
            break;

         case STATE_IN_WORD:
            if ((c == '\n') || (c == '\0'))
            {
               /* Create a WORD token. */
               acValue[iValueIndex] = '\0';
               psToken = makeToken(TOKEN_WORD, acValue);
               DynArray_add(oTokens, psToken);
               iValueIndex = 0;

               eState = STATE_EXIT;
            }
            else if (isalpha(c))
            {
               acValue[iValueIndex++] = c;
               eState = STATE_IN_WORD;
            }
            else if ((c == ' ') || (c == '\t'))
            {
               /* Create a WORD token. */
               acValue[iValueIndex] = '\0';
               psToken = makeToken(TOKEN_WORD, acValue);
               DynArray_add(oTokens, psToken);
               iValueIndex = 0;

               eState = STATE_START;
            }
            else
               eState = STATE_ERROR;
            break;

         default:
            assert(0);
      }
   }

   if (eState == STATE_ERROR)
      return FALSE;
   return TRUE;
}

/*--------------------------------------------------------------------*/

int main(void)

/* Read a line from stdin, and write to stdout each number and word
   that it contains.  Repeat until EOF.  Return 0. */

{
   char acLine[MAX_LINE_SIZE];
   DynArray_T oTokens;
   int iSuccessful;

   printf("------------------------------------\n");
   while (fgets(acLine, MAX_LINE_SIZE, stdin) != NULL)
   {
      oTokens = DynArray_new(0);

      iSuccessful = lexLine(acLine, oTokens);
      if (iSuccessful)
      {
          printf("Numbers:  ");
          DynArray_map(oTokens, printNumberToken, NULL);
          printf("\n");

          printf("Words:  ");
          DynArray_map(oTokens, printWordToken, NULL);
          printf("\n");
      }
      else
         printf("Invalid line\n");
      printf("------------------------------------\n");

      DynArray_map(oTokens, freeToken, NULL);
      DynArray_free(oTokens);
   }

   return 0;
}