/*
**  PROJECT
**      CSV Library
**
**  DESCRIPTION
**      Implementation of Libcsv, a simple C library for manipulating
**      CSV (Comma Separated Value) files.
**
**  DEVELOPER
**      Scott C. Karlin
**
**  HISTORY
**      24 Apr 2003  sck  Initial Version
**      20 May 2003  sck  Finished csv_SplitNew
**
**  CVS ID
**      $Id: csv.c,v 1.3 2003/05/23 17:05:53 scott Exp $
*/

#include <assert.h>       /* assert */
#include <ctype.h>        /* iscntrl, isdigit, isgraph, isprint, isxdigit */

#include <stddef.h>       /* NULL */
#include <stdlib.h>       /* calloc, malloc, realloc, free */
#include <string.h>       /* strlen, strspn, memcpy, memmove, memset */

#include "csv.h"          /* self */

/****************************************************************************/

//#define LOCAL_DEBUG

/****************************************************************************/

#ifdef LOCAL_DEBUG
#include <stdio.h>
#define dprintf(format, args...) fprintf(stderr, format, ## args)
#else
#define dprintf(format, args...)
#endif

/****************************************************************************/
/*
**  CSV Parser States
*/

#define ST_WFIELD       0
#define ST_FIELD        1
#define ST_QFIELD       2
#define ST_BSLASH       3
#define ST_HAV1OD       4
#define ST_HAV2OD       5
#define ST_HEXESC       6
#define ST_HAV1HD       7
#define ST_WCOMMA       8
#define ST_DONE         9
#define ST_ERROR       10

/****************************************************************************/

static int hexof(int c)
{
   const char *s = "0123456789ABCDEF0123456789abcdef";
   char *p;

   p = strchr(s, c);

   return (p == NULL) ? 0 : ((p - s) & 0xF);
}

/****************************************************************************/
/*
**  This function is described in csv.h
**
**  NOTE
**     The layout of the <priv> component of the structure is a follows:
**        * The initial strlen(csv)+1 bytes hold the directory, name,
**          and value strings.
**        * There is a pad of 0 to sizeof(char *)-1 bytes so the
**          next fields are aligned on a sizeof(char *) boundary.
**        * The remainder of the <priv> area hold the <dir>, <arg_name>,
**          and <arg_value> vectors.
*/

int csv_SplitNew(const char *csv, csv_Split_t **pscsv)
{
   csv_Split_t *scsv;
   char  *d;               /* destination buffer */
   char  *dp;              /* destination pointer */
   char **pp;
   char  *cp;
   const char *sp;         /* source pointer */
   int    alloc_size;
   int    c;
   int    d1;
   int    d2;
   int    d3;
   int    esc;
   int    csvlen;
   int    i;
   int    li;              /* last index */
   int    st;              /* state */
   int    nd;              /* number of destination characters */
   int    nv;              /* number of offsets */
   int    retval;
   int   *v;               /* list of offsets */
   int   *vp;              /* offset pointer */
   unsigned long next;
/*
**  Check parameters
*/
   assert(csv != NULL);
   assert(pscsv != NULL);
/*
**  Assume the worst
*/
   *pscsv = NULL;
/*
**  Handle the case of a blank line or a comment line
*/
   csvlen = strlen(csv);
   if((csvlen == 0) || (csv[0] == '#') || (strspn(csv, " \t\r\n") == csvlen))
      {
         scsv = calloc(1, sizeof(*scsv));   /* ptrs are NULL, ints are 0 */
         if(scsv == NULL)
            {
               return -1;
            }
         scsv->field = (char **) &scsv->priv;
         *pscsv = scsv;
         return 0;
      }
/*
**  Allocate some initial working space.
**  (Way) over-estimate the number of fields and the buffer
**  space needed for the fields.
*/
   d = calloc(2 * csvlen + 1, sizeof(*d));   /* ptrs are NULL, ints are 0 */
   v = calloc(csvlen + 1, sizeof(*v));       /* ptrs are NULL, ints are 0 */
   if((v == NULL) || (d == NULL))
      {
         retval = -1;     /* out of memory */
         goto cleanup;
      }
/*
**  Prevent compiler warnings about possibly initialized variables
*/
   d1 = 0;
   d2 = 0;
/*
**  Parse the line
**     * copy desired characters from <csv> via <sp> to <d> via <dp>.
**     * add field-terminating null characters.
**     * store integer offsets of fields in <d> into <v> via <vp>.
*/
   dp = d;
   vp = v;
   sp = csv;
   li = -1;
   st = ST_WFIELD;

   while(1)
      {
         c = *sp++;
#ifdef LOCAL_DEBUG
   {
      static const char *st_name[] = { "ST_WFIELD", "ST_FIELD", "ST_QFIELD",
                                       "ST_BSLASH", "ST_HAV1OD", "ST_HAV2OD",
                                       "ST_HEXESC", "ST_HAV1HD", "ST_WCOMMA",
                                       "ST_DONE", "ST_ERROR", };
      dprintf("%s (%d) : c = 0x%02X  st = %s\n", __FUNCTION__, __LINE__, c, st_name[st]);
   }
#endif
         switch(st)
            {
               /*
               ** ST_WFIELD  --  waiting for a new field to start
               */
               case ST_WFIELD:
                  if((c == ' ') || (c == '\t'))
                     {
                        st = ST_WFIELD;
                     }
                  else if((c == '\0') || (c == '\r') || (c == '\n'))
                     {
                        st = ST_DONE;
                     }
                  else if(c == '"')
                     {
                        li = dp - d;
                        st = ST_QFIELD;
                     }
                  else if(isprint(c) && (c != ','))
                     {
                        /* start field with <c> */
                        li = dp - d;
                        *dp++ = c;
                        st = ST_FIELD;
                     }
                  else
                     {
                        st = ST_ERROR;
                     }
                  break;
               /*
               ** ST_FIELD   --  inside a non-quoted field
               */
               case ST_FIELD:
                  if((c == ' ') || (c == '\t'))
                     {
                        /* finish field: null-terminate, record index */
                        *dp++ = '\0';
                        *vp++ = li;
                        li = -1;
                        st = ST_WCOMMA;
                     }
                  else if((c == '\0') || (c == '\r') || (c == '\n'))
                     {
                        /* finish field: null-terminate, record index */
                        *dp++ = '\0';
                        *vp++ = li;
                        li = -1;
                        st = ST_DONE;
                     }
                  else if(c == ',')
                     {
                        /* finish field: null-terminate, record index */
                        *dp++ = '\0';
                        *vp++ = li;
                        li = -1;
                        st = ST_WFIELD;
                     }
                  else if(isgraph(c) && (c != '"'))
                     {
                        /* continue field with <c> */
                        *dp++ = c;
                        st = ST_FIELD;
                     }
                  else
                     {
                        st = ST_ERROR;
                     }
                  break;
               /*
               ** ST_QFIELD  --  inside a quoted field
               */
               case ST_QFIELD:
                  if(c == '\\')
                     {
                        st = ST_BSLASH;
                     }
                  else if(c == '"')
                     {
                        /* finish field: null-terminate, record index */
                        *dp++ = '\0';
                        *vp++ = li;
                        li = -1;
                        st = ST_WCOMMA;
                     }
                  else if(isprint(c))
                     {
                        /* continue field with <c> */
                        *dp++ = c;
                        st = ST_QFIELD;
                     }
                  else
                     {
                        st = ST_ERROR;
                     }
                  break;
               /*
               ** ST_BSLASH  --  handle backslash constructs
               */
               case ST_BSLASH:
                  if(iscntrl(c) && (c != '\t'))
                     {
                        st = ST_ERROR;
                     }
                  else if(strchr("\"'01234567?\\abfnrtvx", c) == NULL)
                     {
                        /* continue field with a backslash and <c> */
                        *dp++ = '\\';
                        *dp++ = c;
                        st = ST_QFIELD;
                     }
                  else if(isdigit(c))
                     {
                        d1 = c;
                        st = ST_HAV1OD;
                     }
                  else if(c == 'x')
                     {
                        st = ST_HEXESC;
                     }
                  else
                     {
                        switch(c)
                           {
                              case 'a':  c = '\a';  break;
                              case 'b':  c = '\b';  break;
                              case 'f':  c = '\f';  break;
                              case 'n':  c = '\n';  break;
                              case 'r':  c = '\r';  break;
                              case 't':  c = '\t';  break;
                              case 'v':  c = '\v';  break;
                           }
                        /* continue field with <c> */
                        *dp++ = c;
                        st = ST_QFIELD;
                     }
                  break;
               /*
               ** ST_HAV1OD  --  we have 1 octal digit
               */
               case ST_HAV1OD:
                  if(iscntrl(c) && (c != '\t'))
                     {
                        st = ST_ERROR;
                     }
                  else if(strchr("01234567", c) != NULL)
                     {
                        d2 = c;
                        st = ST_HAV2OD;
                     }
                  else
                     {
                        /* continue field with '\<d1>' */
                        if(d1 == '0')
                           {
                              /* don't write '\0' -- continue field with "\\0" */
                              *dp++ = '\\';
                              *dp++ = '0';
                           }
                        else
                           {
                              *dp++ = d1 - '0';
                           }
                        if(c == '"')
                           {
                              /* finish field: null-terminate, record index */
                              *dp++ = '\0';
                              *vp++ = li;
                              li = -1;
                              st = ST_WCOMMA;
                           }
                        else if(c == '\\')
                           {
                              st = ST_BSLASH;
                           }
                        else
                           {
                              /* continue field with <c> */
                              *dp++ = c;
                              st = ST_QFIELD;
                           }
                     }
                  break;
               /*
               ** ST_HAV2OD  --  we have 2 octal digits
               */
               case ST_HAV2OD:
                  if(iscntrl(c) && (c != '\t'))
                     {
                        st = ST_ERROR;
                     }
                  else if(strchr("01234567", c) != NULL)
                     {
                        d3 = c;
                        esc = ((d1 - '0') << 6) | ((d2 - '0') << 3) | (d3 - '0');
                        if(esc == 0)
                           {
                              /* continue field with "\\000" */
                              *dp++ = '\\';
                              *dp++ = '0';
                              *dp++ = '0';
                              *dp++ = '0';
                           }
                        else if(esc <= 255)
                           {
                              /* continue field with <esc> */
                              *dp++ = esc;
                           }
                        else
                           {
                              /* d3 makes result too big.  Use only first 2 digits */
                              esc = ((d1 - '0') << 3) | (d2 - '0');
                              *dp++ = esc;
                              /* continue field with <c> */
                              *dp++ = c;
                           }
                        st = ST_QFIELD;
                     }
                  else
                     {
                        /* Only got 2 digitsc$ */
                        esc = ((d1 - '0') << 3) | (d2 - '0');
                        /* continue field with <esc> */
                        *dp++ = esc;
                        if(c == '"')
                           {
                              /* finish field: null-terminate, record index */
                              *dp++ = '\0';
                              *vp++ = li;
                              li = -1;
                              st = ST_WCOMMA;
                           }
                        else if(c == '\\')
                           {
                              st = ST_BSLASH;
                           }
                        else
                           {
                              /* continue field with <c> */
                              *dp++ = c;
                              st = ST_QFIELD;
                           }
                     }
                  break;
               /*
               ** ST_HEXESC  --  entering hex escape
               */
               case ST_HEXESC:
                  if(iscntrl(c) && (c != '\t'))
                     {
                        st = ST_ERROR;
                     }
                  else if(isxdigit(c))
                     {
                        d1 = c;
                        st = ST_HAV1HD;
                     }
                  else
                     {
                        /* Oops, didn't get a hex digit after all */
                        /* continue field with "\\x" */
                        *dp++ = '\\';
                        *dp++ = 'x';
                        if(c == '"')
                           {
                              /* finish field: null-terminate, record index */
                              *dp++ = '\0';
                              *vp++ = li;
                              li = -1;
                              st = ST_WCOMMA;
                           }
                        else if(c == '\\')
                           {
                              st = ST_BSLASH;
                           }
                        else
                           {
                              /* continue field with <c> */
                              *dp++ = c;
                              st = ST_QFIELD;
                           }
                     }
                  break;
               /*
               ** ST_HAV1HD  --  we have 1 hex digit
               */
               case ST_HAV1HD:
                  if(iscntrl(c) && (c != '\t'))
                     {
                        st = ST_ERROR;
                     }
                  else if(isxdigit(c))
                     {
                        /* we got 2 hex digits */
                        d2 = c;
                        esc = (hexof(d1) << 4) | hexof(d2);
                        if(esc == 0)
                           {
                              /* continue field with "\x00" */
                              *dp++ = '\\';
                              *dp++ = 'x';
                              *dp++ = '0';
                              *dp++ = '0';
                           }
                        else
                           {
                              /* continue field with '\<d1><d2>' */
                              *dp++ = esc;
                           }
                        st = ST_QFIELD;
                     }
                  else
                     {
                        /* we got 1 hex digit and something else */
                        esc = hexof(d1);
                        if(esc == 0)
                           {
                              /* continue field with "\x0" */
                              *dp++ = '\\';
                              *dp++ = 'x';
                              *dp++ = '0';
                           }
                        else
                           {
                              /* continue field with '\<d1>' */
                              *dp++ = esc;
                           }
                        if(c == '"')
                           {
                              /* finish field: null-terminate, record index */
                              *dp++ = '\0';
                              *vp++ = li;
                              li = -1;
                              st = ST_WCOMMA;
                           }
                        else if(c == '\\')
                           {
                              st = ST_BSLASH;
                           }
                        else
                           {
                              /* continue field with <c> */
                              *dp++ = c;
                              st = ST_QFIELD;
                           }
                     }
                  break;
               /*
               ** ST_WCOMMA  --  waiting for a comma
               */
               case ST_WCOMMA:
                  if(c == ',')
                     {
                        st = ST_WFIELD;
                     }
                  else if((c == ' ') || (c == '\t'))
                     {
                        st = ST_WCOMMA;
                     }
                  else if((c == '\0') || (c == '\r') || (c == '\n'))
                     {
                        st = ST_DONE;
                     }
                  else
                     {
                        st = ST_ERROR;
                     }
                  break;
               default:
                  assert(!"illegal state");
                  break;
            }
         if((st == ST_DONE) || (st == ST_ERROR))
            {
               break;
            }
      }

   if(st == ST_ERROR)
      {
         retval = 1;      /* parse error */
         goto cleanup;
      }
   *vp++ = -1;      /* will become NULL */

   nd = dp - d;
   nv = vp - v;
/*
**  Calculate the amount of memory to allocate for the field
**  components, the vectors, and pad to align the vectors.
*/
   alloc_size = ((nd * sizeof(char) + sizeof(char *) -1) & (~(sizeof(char *) - 1)))
                + nv * sizeof(char *);
   dprintf("%s (%d) : nd=%d  nv=%d  alloc_size=%d\n", __FUNCTION__, __LINE__, nd, nv, alloc_size);
/*
**  Allocate the structure
*/
   scsv = calloc(1, sizeof(*scsv));   /* ptrs are NULL, ints are 0 */
   if(scsv == NULL)
      {
         *pscsv = NULL;
         return -1;
      }
   scsv->priv = calloc(1, alloc_size);   /* ptrs are NULL, ints are 0 */
   if(scsv->priv == NULL)
      {
         free(scsv);
         *pscsv = NULL;
         return -1;
      }
/*
**  Copy <d> and <v> into the private area
*/
   cp   = (char *) scsv->priv;
   memcpy(cp, d, nd);
   next = (unsigned long) (nd + cp);
   next = (next + sizeof(char *) - 1) & (~(sizeof(char *) - 1));
   pp   = (char **) next;

   for(i = 0; i < nv; i += 1)
      {
         pp[i] = (v[i] < 0) ? NULL : (cp + v[i]);
      }
/*
**  Finish the structure
*/
   scsv->field = pp;
   scsv->num   = nv - 1;  /* don't count the NULL pointer */

#ifdef LOCAL_DEBUG
   dprintf("%s (%d) : scsv->field = 0x%08lX\n", __FUNCTION__, __LINE__, (unsigned long) scsv->field);
   dprintf("%s (%d) : scsv->num   = %10d\n", __FUNCTION__, __LINE__, scsv->num);
   dprintf("%s (%d) : scsv->priv  = 0x%08lX\n", __FUNCTION__, __LINE__, (unsigned long) scsv->priv);
   {
      unsigned char *p = scsv->priv;
      int xx;
      for(xx = 0; xx < alloc_size; xx += 1, p += 1)
         {
            dprintf("%02X", (unsigned) *p);
         }
      dprintf("%s", "\n");
   }
#endif

   *pscsv = scsv;
   retval = 0;      /* success */
/*
**  Free <d> and <v>
*/
cleanup:
   if(d != NULL)
      {
         free(d);
         d = NULL;
      }
   if(v != NULL)
      {
         free(v);
         v = NULL;
      }

   return retval;
}

/****************************************************************************/
/*
**  This function is described in csv.h
*/

void csv_SplitFree(csv_Split_t **pscsv)
{
   csv_Split_t *scsv;

   assert(pscsv != NULL);

   scsv = *pscsv;
   if(scsv != NULL)
      {
         if(scsv->priv != NULL)
            {
               free(scsv->priv);
            }
         memset(scsv, 0, sizeof(*scsv));
         free(scsv);
         scsv = NULL;
      }
   *pscsv = scsv;
}

/****************************************************************************/
/*
**  This function is described in csv.h
*/

char *csv_StringEncode(const char *s)
{
   char *d;
   char *r;
   const char *sp;
   char *dp;
   int   c;
   int   slen;
   int   dsize;
   int   dlen;
   int   spc;     /* count of spaces and commas in <s> */
   int   esc;     /* count of escaped characters in <s> */
/*
**  Check parameter
*/
   assert(s != NULL);

   slen = strlen(s);
/*
**  Worst case is that every character in <s> is a control character
**  that must be replaced by four characters of the form "\000".
**  Also, leave room for the quotes and the null terminator.
*/
   dsize = 4 * slen + 3;
   d = malloc(dsize * sizeof(*d));
   if(d == NULL)
      {
         return NULL;       /* Failure: out of memory */
      }
/*
**  Process the string assuming that it must be quoted.
*/
   dp = d;
   sp = s;
   spc = 0;
   esc = 0;

   *dp++ = '"';
   while((c = ((int) *sp++) & 0x0FF) != '\0')
      {
         if((c == ' ') || (c == ','))
            {
               spc += 1;
            }
         else if((c == '"') || (c == '\\'))
            {
               esc += 1;
               *dp++ = '\\';
            }
         else if((c < ' ') || (c > '~'))
            {
               esc += 1;
               *dp++ = '\\';
               switch(c)
                  {
                     case '\a':
                        *dp++ = 'a';
                        break;
                     case '\b':
                        *dp++ = 'b';
                        break;
                     case '\f':
                        *dp++ = 'f';
                        break;
                     case '\n':
                        *dp++ = 'n';
                        break;
                     case '\r':
                        *dp++ = 'r';
                        break;
                     case '\t':
                        *dp++ = 't';
                        break;
                     case '\v':
                        *dp++ = 'v';
                        break;
                     default:
                        *dp++ = '0' + ((c >> 6) & 0x3);
                        *dp++ = '0' + ((c >> 3) & 0x7);
                        *dp++ = '0' + ((c >> 0) & 0x7);
                        break;
                  }
               continue;
            }
         *dp++ = c;
      }
   *dp++ = '"';
   *dp   = '\0';
   dlen  = dp - d;
/*
**  If there were no spaces/commas, no escaped characters, and the string was
**  not empty, then we can drop the quotes.
*/
   if((spc == 0) && (esc == 0) && (slen != 0))
      {
         dlen -= 2;                 /* no-quotes in new length */
         memmove(d, d + 1, dlen);   /* shift over by one byte */
         d[dlen] = '\0';
      }
/*
**  Realloc the result down to size (plus one for the null character)
*/
   r = realloc(d, dlen + 1);
   if(r == NULL)
      {
         r = d;     /* wierd, couldn't realloc.  We'll keep old block */
      }

   return r;
}

/****************************************************************************/
