Logo Search packages:      
Sourcecode: p3scan version File versions  Download package

libmime-decoders.c

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <syslog.h>
#include <ctype.h>
#include <sys/stat.h>
#include <unistd.h>
#include <time.h>
#include <errno.h>
#include <dirent.h>

#include "ffget.h"
#include "pldstr.h"
#include "logger.h"
#include "libmime-decoders.h"


#ifndef FL
#define FL __FILE__,__LINE__
#endif

#define MDECODE_ISO_CHARSET_SIZE_MAX 16

// Debug precodes
#define MDECODE_DPEDANTIC ((glb.debug >= MDECODE_DEBUG_PEDANTIC))
#define MDECODE_DNORMAL   ((glb.debug >= MDECODE_DEBUG_NORMAL  ))
#define DMD if ((glb.debug >= MDECODE_DEBUG_NORMAL))


/* our base 64 decoder table */
static unsigned char b64[256]={
      128,  128,  128,  128,  128,  128,  128,  128,  128,  128,  128,  128,  128,  128,  128,  128,\
            128,  128,  128,  128,  128,  128,  128,  128,  128,  128,  128,  128,  128,  128,  128,  128,\
            128,  128,  128,  128,  128,  128,  128,  128,  128,  128,  128,   62,  128,  128,  128,   63,\
            52,   53,   54,   55,   56,   57,   58,   59,   60,   61,  128,  128,  128,    0,  128,  128,\
            128,    0,    1,    2,    3,    4,    5,    6,    7,    8,    9,   10,   11,   12,   13,   14,\
            15,   16,   17,   18,   19,   20,   21,   22,   23,   24,   25,  128,  128,  128,  128,  128,\
            128,   26,   27,   28,   29,   30,   31,   32,   33,   34,   35,   36,   37,   38,   39,   40,\
            41,   42,   43,   44,   45,   46,   47,   48,   49,   50,   51,  128,  128,  128,  128,  128,\
            128,  128,  128,  128,  128,  128,  128,  128,  128,  128,  128,  128,  128,  128,  128,  128,\
            128,  128,  128,  128,  128,  128,  128,  128,  128,  128,  128,  128,  128,  128,  128,  128,\
            128,  128,  128,  128,  128,  128,  128,  128,  128,  128,  128,  128,  128,  128,  128,  128,\
            128,  128,  128,  128,  128,  128,  128,  128,  128,  128,  128,  128,  128,  128,  128,  128,\
            128,  128,  128,  128,  128,  128,  128,  128,  128,  128,  128,  128,  128,  128,  128,  128,\
            128,  128,  128,  128,  128,  128,  128,  128,  128,  128,  128,  128,  128,  128,  128,  128,\
            128,  128,  128,  128,  128,  128,  128,  128,  128,  128,  128,  128,  128,  128,  128,  128,\
            128,  128,  128,  128,  128,  128,  128,  128,  128,  128,  128,  128,  128,  128,  128,  128 \
};

static unsigned char hexconv[256]={
      0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,\
            0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,\
            0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,\
            0,    1,    2,    3,    4,    5,    6,    7,    8,    9,    0,    0,    0,    0,    0,    0,\
            0,   10,   11,   12,   13,   14,   15,    0,    0,    0,    0,    0,    0,    0,    0,    0,\
            0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,\
            0,   10,   11,   12,   13,   14,   15,    0,    0,    0,    0,    0,    0,    0,    0,    0,\
            0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,\
            0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,\
            0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,\
            0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,\
            0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,\
            0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,\
            0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,\
            0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,\
            0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0 \
};



struct MDECODE_globals {
      int debug;
      int verbose;
      int decode_qp;
      int decode_b64;
};

static struct MDECODE_globals glb;




int MDECODE_init( void )
{
      glb.debug = 0;
      glb.verbose = 0;
      glb.decode_qp = 1;
      glb.decode_b64 = 1;

      return 0;
}




/*------------------------------------------------------------------------
Procedure:     MIME_set_debug ID:1
Purpose:       Sets the debug level for reporting in MIME
Input:         int level : What level of debugging to use, currently there
are only two levels, 0 = none, > 0 = debug info
Output:
Errors:
------------------------------------------------------------------------*/
int MDECODE_set_debug( int level )
{
      glb.debug = level;
      return glb.debug;
}


int MDECODE_set_verbose( int level )
{
      glb.verbose = level;
      return glb.verbose;
}

int MDECODE_set_decode_qp( int level )
{ 
      glb.decode_qp = level;
      return glb.decode_qp;
}

int MDECODE_set_decode_b64( int level )
{
      glb.decode_b64 = level;
      return glb.decode_b64;
}


/*------------------------------------------------------------------------
Procedure:     MDECODE_decode_short64 ID:1
Purpose:       Decodes a BASE64 encoded realm
Input:         char *realm : base64 encoded NUL terminated string
Output:                 decoded data is written to the short64 char
Errors:
------------------------------------------------------------------------*/
int MDECODE_decode_short64( char *short64 )
{
      int i;
      int realm_size = strlen( short64 );
      int stopcount = 0; /* How many stop (=) characters we've read in */
      int c; /* a single char as retrieved using MDECODE_get_char() */
      int char_count = 0; /* How many chars have been received */
      char output[3]; /* The 4->3 byte output array */
      char input[4]; /* The 4->3 byte input array */
      char *outstring = short64;

      char_count = 0;
      while (char_count < realm_size)
      {

            /* Initialise the decode buffer */
            input[0] = input[1] = input[2] = input[3] = 0;

            /* snatch 4 characters from the input */
            for (i = 0; i < 4; i++) {

                  /* get a char from the filestream */
                  c = *short64;
                  short64++;

                  /* assuming we've gotten this far, then we increment the char_count */
                  char_count++;

                  /* if we detect the "stopchar" then we better increment the STOP counter */
                  if (c == '=') {
                        stopcount++;
                  }

                  /* test for and discard invalid chars */
                  if (b64[c] == 0x80) {
                        i--;
                        continue;
                  }

                  /* do the conversion from encoded -> decoded */
                  input[i] = (char)b64[c];

            } /* for */

            /* now that our 4-char buffer is full, we can do some fancy bit-shifting and get the required 3-chars of 8-bit data */
            output[0] = (input[0] << 2) | (input[1] >> 4);
            output[1] = (input[1] << 4) | (input[2] >> 2);
            output[2] = (input[2] << 6) | input[3];

            /* determine how many chars to write write and check for errors if our input char count was 4 then we did receive a propper 4:3 Base64 block, hence write it */
            if (i == 4) {
                  for (i = 0; i < (3 -stopcount); i++){
                        *outstring = output[i];
                        outstring++;
                  } /* copy our data across */
            } /* if 4 chars were inputted */
      } /* while more chars to proccess */


      *outstring = '\0';  // Set the last char to NULL

      return 0;
}









/*------------------------------------------------------------------------
Procedure:     MDECODE_decode_quoted_printable ID:1
Purpose:       Decodes quoted printable encoded data.
Input:         char *line : \0 terminated string possibly containing quoted printable data
int qpmode : Selects which decoding ruleset to use ( refer to RFC2047 )
Output:        Decoded string is superimposed over the provided line parameter
Returns: Returns the number of bytes decoded.
------------------------------------------------------------------------*/
int MDECODE_decode_quoted_printable( char *line, int qpmode, char esc_char )
{

      char c;                                               /* The Character to output */
      int op, ip;                                     /* OutputPointer and InputPointer */
      int slen = strlen(line); /* Length of our line */

      DMD LOGGER_log("%s:%d:MDECODE_decode_quoted_printable:DEBUG: input string = '%s' Input length = %d\n",FL, line, slen);

      /* Initialise our "pointers" to the start of the encoded string */
      ip=op=0;

      /* for every character in the string... */

      for (ip = 0; ip < slen; ip++)
      {
            c = line[ip];

            /* if we have the quoted-printable esc char, then lets get cracking */
            if (c == esc_char)
            {

                  /* if we have another two chars... */
                  if ((ip +1) < slen )
                  {
                        int original_ip = ip;

                        /* Is our next char a \n\r ?

                              if it is, then we have to eliminate any further \r\n's etc
                              so as to turn the =\n\r into a 'soft return', which basically
                              means that we ignore it.  Soft-breaks are used so we can
                              fit our long lines into the requirement of a maximum of 76 characters
                              per line.

                              So we move the input-pointer along skipping each character without
                              incrementing the output pointer.

                         */

                        /** Absorb any trailing whitespaces **/
                        if (1)
                        {
                              char *w = &(line[ip +1]);
                              while ((*w == '\t') || (*w == ' ')) {w++;ip++;}
                        }

                        /** Do we now have a line break ? **/
                        if (( line[ip +1] == '\n') || (line[ip +1] == '\r' ))
                        {
                              ip++;
                              if ((ip+1 < slen)&&(( line[ip +1] == '\n') || (line[ip +1] == '\r' )))
                              {
                                    ip++;
                              }
                              continue;
                        }

                        else
                        {
                              /*
                                    if the characters following the '=' symbol are not
                                    of the \n or \r pair, then we will [currently]
                                    assume that the next two characters are in fact the
                                    hexadecimal encodings of the character we do want

                               */
                              
                              /** Revert to original position **/
                              ip = original_ip;

                              /* convert our encoded character from HEX -> decimal */

                              if ( ip < slen-1 ) // was 2, proving - if there are 3 chars in string, =AB, slen = 3, ip = 1
                              {
                                    c = (char)hexconv[(int)line[ip+1]]*16 +hexconv[(int)line[ip+2]];

                                    /* shuffle the pointer up two spaces */
                                    ip+=2;
                              }
                              else {
                                    LOGGER_log("%s:%d:MIME_decode_quoted_printable:WARNING: Ran out of characters when decoding end of '%s'\n", FL, &line[ip] );
                              }
                        }

                  } /* if there were two extra chars after the ='s */


                  /* if we didn't have enough characters, then  we'll make the char the
                   * string terminator (such as what happens when we get a =\n
                   */
                  else
                  {
                        /* 2002-12-16:18H31: changed from 'line[ip]' to 'line[op]' */
                        line[op] = '\0';
                        /* 2002-12-16:18H32: added break statement - if we're out of chars, then we quit the for loop */
                        break;
                  } /* else */

            } /* if c was a encoding char */


            else
                  if (( c == '_' ) && ( qpmode == MDECODE_QPMODE_ISO ))
                  {
                        // RFC2047  (Section 4.2.(2)(3)) says that if we encounter a '_' character in our ISO encodings then
                        //    we must convert that to a space ( as we are not allowed to have spaces in any
                        c = ' ';
                  }

            /* put in the new character, be it converted or not */
            line[op] = c;

            /* shuffle up the output line pointer */
            op++;


      } /* for loop */

      /* terminate the line */

      line[op]='\0';

      DMD LOGGER_log("%s:%d:MDECODE_decode_quoted_printable:DEBUG: Output = '%s' Output length = %d\n", FL, line, strlen(line));

      // 2003-01-26:PLD: Changed from (op -1) -=> op
      return op;
}




/*------------------------------------------------------------------------
Procedure:     MDECODE_decode_text_line ID:1
Purpose:       Decodes a line of text, checking for Quoted-Printable characters
and converting them.  Note - if the character converted is a \0
(after decoding) it shouldn't affect the calling parent because the
calling parent should read back the returned string byte size and
use fwrite() or other non-\0 affected writing/processing functions
Input:         char *line: pointer to the buffer/line we wish to convert/scan
Output:        int: size of final buffer in bytes.
Errors:
------------------------------------------------------------------------*/
int MDECODE_decode_qp_text( char *line )
{
      if (glb.decode_qp == 0) return strlen(line);

      return MDECODE_decode_quoted_printable( line, MDECODE_QPMODE_STD, '=' );
}

int MDECODE_decode_qp_ISO( char *line )
{
//    return MDECODE_decode_quoted_printable( line, MDECODE_QPMODE_ISO, '=' );
      return MDECODE_decode_quoted_printable( line, MDECODE_QPMODE_STD, '=' );
}

int MDECODE_decode_multipart( char *line )
{
      return MDECODE_decode_quoted_printable( line, MDECODE_QPMODE_STD, '%' );
}



/*------------------------------------------------------------------------
Procedure:     MDECODE_decode_ISO ID:1
Purpose:       Decodes an ISO ( RFC2047 ) encoded string into native codepage dependent output
Input:         char *isostring : String containing =?code-page?encoding-type?string?= format
int length : length of the string we're decoding
Output:        isostring is overwritten with the decoded string.
Errors:
------------------------------------------------------------------------*/
int MDECODE_decode_ISO( char *isostring, int size )
{
      char *start_pair, *end_pair;
      char *iso, *iso_copy;
      char encoding_type='-';
      char encoding_charset[ MDECODE_ISO_CHARSET_SIZE_MAX ];
      char *iso_start, *iso_end;
      int iso_decoded;

      DMD LOGGER_log("%s:%d:MDECODE_decode_ISO:DEBUG: ISO-string='%s'",FL,isostring);

      // Process of decoding the ISO encoded string sequence.
      //    ( this process is repeated until we run out of ISO sequences )
      //
      //    1. Check that the string has a =? sequence within it ( indicates the start of the ISO encoding
      //
      //    2. tokenise the sequence succeeding the =? token into its three (3) parts, namely the code-page, encoding-type and string respectively
      //
      //    3. decode the string based on the encoding type, Q = Quoted-Printable, B = BASE64
      //

      iso_end = iso_start = NULL;

      start_pair = end_pair = NULL;

      iso_copy = malloc( sizeof(char) *( size +1 ) );

      do {

            iso_decoded = 0;

            start_pair = strstr( isostring, "=?" );
//          if ( start_pair ) end_pair   = strstr( start_pair +2, "?=" );

            if (( start_pair != NULL ))
            {
                  iso_start = start_pair;

                  // There's probably a better way of doing this, but, for us to find the end of this
                  //    particular 'ISO' sequence, we need to hop past 3 more ?'s ( assuming we've already
                  //    found the first one.
                  DMD LOGGER_log("%s:%d:MDECODE_decode_ISO:DEBUG: ISO start = %s",FL,iso_start);

                  iso_end = strchr( iso_start +strlen("=?"), '?' ); // Jump past the encoding
                  if (iso_end) iso_end = strchr( iso_end +1, '?' ); // Jump past the Q or B
                  if (iso_end) iso_end = strpbrk( iso_end +1, "?\n\r\t;" );  // dropped the SPACE here.
                  if ((iso_end != NULL)&&(*iso_end == '?')) iso_end+=2;


                  if ( (iso_start) && (iso_end) )
                  {
                        char *token_end;
                        char restore_char='\0';

                        // Copy the Encoding page/code.
                        iso = iso_start +strlen("=?");

                        token_end = strchr(iso,'?');
                        if (token_end) *token_end = '\0';
                        snprintf( encoding_charset, sizeof( encoding_charset ), "%s", iso);
                        DMD LOGGER_log("%s:%d:MDECODE_decode_ISO:DEBUG: ISO char set = '%s'",FL,encoding_charset);

                        iso = token_end +1;

                        // Get the encoding _type_ (BASE64/QuotedPrintable etc)
                        token_end = strchr(iso,'?');
                        encoding_type = *iso;

                        iso = token_end +1;

                        DMD LOGGER_log("%s:%d:MDECODE_decode_ISO:DEBUG: ISO encoding char = '%c'",FL,encoding_type);

                        // Get the encoded string
                        token_end = strpbrk(iso,"?;\n\r\t"); //DROPPED THE SPACE here
                        if (token_end != NULL)
                        {
                              if ((*token_end != '?')&&(*token_end != ';'))
                              {
                                    restore_char = *token_end;
                              }
                              *token_end = '\0';
                        }







                        if (iso)
                        {
                              DMD LOGGER_log("%s:%d:MDECODE_decode_ISO:DEBUG: Encoded String = '%s'\n", FL, iso );
                              switch ( encoding_type ) {

                                    case MDECODE_ISO_ENCODING_Q:
                                    case MDECODE_ISO_ENCODING_q:
                                          DMD LOGGER_log("%s:%d:MDECODE_decode_ISO:DEBUG: Decoding filename using Quoted-Printable (%s)\n", FL, iso);
                                          MDECODE_decode_qp_ISO(iso);
                                          iso_decoded = 1;
                                          break;

                                    case MDECODE_ISO_ENCODING_B:
                                    case MDECODE_ISO_ENCODING_b:
                                          DMD LOGGER_log("%s:%d:MDECODE_decode_ISO:DEBUG: Decoding filename using BASE64 (%s)\n", FL, iso);
                                          MDECODE_decode_short64( iso );
                                          iso_decoded = 1;
                                          break;

                                    default:
                                          if (glb.verbose) LOGGER_log("%s:%d:MDECODE_decode_ISO:ERROR: The encoding character '%c' is not a valid type of encoding\n", FL, encoding_type );
                              }

                              // If we decoded the string okay, then we need to recompose the string

                              if ( iso_decoded == 1 )
                              {
                                    char *new_end_pos;

                                    DMD LOGGER_log("%s:%d:MDECODE_decode_ISO:DEBUG: Decoded String = '%s'\n", FL, iso );
                                    *iso_start = '\0'; // Terminate the original string before the start of the ISO data

                                    // Because sometimes ISO strings are broken over multiple lines
                                    //          due to wrapping requirements of RFC(2)822, we need to 
                                    //          sniff out these tab or spaces and crop them out of our
                                    //          final ISO string.  We cannot simply search for the next
                                    //          =? sequence using strstr() because it might traverse 
                                    //          beyond the end of the current 'line' (ie, \r\n termination)

                                    if (token_end)
                                    {
                                          iso_end = token_end +1;
                                          DMD LOGGER_log("%s:%d:MDECODE_decode_ISO:DEBUG: iso_end = '%20s'",FL, iso_end);
                                          while ((*iso_end == '?')||(*iso_end == '=')) iso_end++;
                                          DMD LOGGER_log("%s:%d:MDECODE_decode_ISO:DEBUG: iso_end = '%20s'",FL, iso_end);

                                          new_end_pos = iso_end;
                                          while ((*new_end_pos == ' ')||(*new_end_pos == '\t')) new_end_pos++;
                                          if (strncmp(new_end_pos,"=?",2)==0) iso_end = new_end_pos;
                                    } else {
                                          iso_end = NULL;
                                    }

                                    DMD LOGGER_log("%s:%d:MDECODE_decode_ISO:DEBUG: ISO-END = '%20s'",FL,iso_end);


                                    
                                    /** We now have the string split into 3 peices,
                                      **  isostring = pointing to the start
                                      **  iso = newly decoded string
                                      **  iso_end = start of string after the non-decoded ISO portion
                                      **/
                                    
                                    /** Generate new string using the decoded ISO to a temporary string **/
                                    if (restore_char != '\0')
                                    {
                                          DMD LOGGER_log("%s:%d:MDECODE_decode_ISO:DEBUG: Recomposing string with restore-char of '%c'",FL,restore_char);
                                          DMD LOGGER_log("%s:%d:MDECODE_decode_ISO:DEBUG: ISO-end (start of end of string) is \n%s",FL,iso_end);
                                          snprintf( iso_copy, size, "%s%s%c%s", isostring, iso, restore_char, (iso_end?iso_end:"") );
                                    } else {
                                          DMD LOGGER_log("%s:%d:MDECODE_decode_ISO:DEBUG: Recomposing string with NO restore-char",FL,restore_char);
                                          snprintf( iso_copy, size, "%s%s%s", isostring, iso, (iso_end?iso_end:"") );
                                    }

                                    /** Switch the new headers over to the original headers again **/
                                    snprintf( isostring, size, "%s", iso_copy );
                                    DMD LOGGER_log("%s:%d:MDECODE_decode_ISO:DEBUG: New ISO string = \n%s",FL,isostring);

                              }

                        }
                  }

            } // if (iso_start)

      }
      while (iso_decoded == 1 );

      if (iso_copy) free(iso_copy);

      return 0;

}





//------------END libmime-decoders.c



Generated by  Doxygen 1.6.0   Back to index