/*
 jbw@pittvms.bitnet
 jbw@cisunx.UUCP

 Author permits free distribution of this file.
*/

/* CzW Chinese text transmitting scheme and CzW.c code

  Based on the months-long discussions on SCC and ideas and codes 
contributed to this Bboard by Ed Lai, Yagu Wei, Zhitao Zeng, myself (JB Wang)
and a few others, I believe the following proposal will be a good zW solution
to our Chinese text/news communication problems:

   (To distinguish the original zW scheme and the current proposal, I will
    use 'zW' for the former and 'CzW' for the latter)

Note: CzW.c is included, but the MAC part is not implemented yet.
^^^^

Goals:
~~~~~
  1) Reach a standard output format such that 
    i) all the characters fall within 94 visible ASCII characters plus
       a few standard control characters, e.g., <LF>, <FF>, <CR>, and
       <TAB>.
    ii) the file of this format can be translated into CCDOS, MAC Chinese
       and other Chinese text formats without knowing the origin of the
       encoded file (e.g., an encoded file from MAC can be translated into
       a CCDOS Chinese text without knowing it was from MAC).
    iii) furthermore, if the above steps are successful, the programs 
       developed in the future will read the encoded file directly without
       decoding process.
   2) The encoded file should carry every bit of the original file so that
     it is possible to recover the original file for further editing or
     printing.
   3) The encoded file size should be kept as small as possible.
   4) Try to build a program that can handle both CCDOS and MAC Chinese
      encoding and decoding.

Technical Schemes:
~~~~~~~~~~~~~~~~~
1) In mail/news, there can be mail header and comments that does not
   belong to the context of the original message, to solve it,
   a) zW added two characters 'zW' to every line of the original message
      in the encoded file,
   b) CzW improves it by adding two lines encoder comment starting with 
     '%', and an escape mark '\A@' to the beginning of the encoded file,
     and therefore, 'zW' does not have to lead very line.
2) To distinguish standard ASCII and Chinese codes,
   a) zW prepends a '#' character to very ASCII code
   b) CzW uses an escape mark '\A@' to mark the beginning of ASCII codes,
      i.e., the next 1000 charactera are all in ASCII, this single
     '\A@' is sufficient. 
3) CCDOS uses extended ASCII (8bit byte) >160 for Chinese characters, two
  bytes for each, that has to be brougth down to between 32 to 126,
  a) zW turns off all the 8th bit
  b) so does CzW, i.e., chr -= 128;
     whenever the Chinese codes begins, CzW marks the beginning 
     with an escape mark
     '\zW', therefore, if the following 100 lines are in pure Chinese
     there needs only one '\zW'.
4) Now that there are 256 ASCII codes, we have only dealt with
   160 < ASCII <255 (Chinese in CCDOS) and 31 < ASCII <127 ( standard
   ASCII), the rest are handled as follows
   a) zW take into count two other codes, i.e., ASCII 13 (carriage return)
      and ASCII 141 (WordStar carriage return), and the others are all
      discarded in fear of machine dependency problem.
   b) CzW does something totally different in order to make encoding/decoding
      reversible, i.e.,
      i) output <LF> ASCII 10,  <CR> ASCII 13,  <FF> ASCII 12, and
        <TAB> ASCII 9,   as they are, since they are acceptable in almost
        any system and printer as standard control codes of the same meaning.
      ii) Shift all the rest of 0 <= ASCII < 32 to  31< ASCII < 64 with
         an escape mark  '\^@', and all the 126 < ASCII < 161 to ASCII > 31
         with an escape mark '\W@' (because there are mostly used in 
         WordStar).
      iii) To avoid machine dependency, the undesirable codes in these
         two ranges can be shifted or trimmed in the decoding process, 
         instead of the encoding one. For CCDOS, not problem, just
         translate all of them back.

5) Decoders:  decoding is just a reverse process of encoding, but for CCDOS
   and MAC Chinese there need to be two different encoders and decoders so 
   that the encoded file format can be shared.
   CzW reads in the encoded file and dump the message on to the screen
   until the first '\A@' mark is met (to get rid of the header and message). 
   The rest of the file is decoded and saved into a file.

*/

/*
 jbw@pittvms.bitnet
 jbw@cisunx.UUCP

 Author permits free distribution of this file.
*/


/**********************************************************************
*                          CzW.c                                      *
*                                                                     *
*           This program converts GB/CCDOS output to/from ASCII       *
*              it can be used to convert other binary files too       *
*                                                                     *
***********************************************************************/
/*
 It runs on VMS, UNIX , DOS and MAC systems.

Syntax:
~~~~~~
1) For CCDOS encoding and decoding,
   To convert a file into ASCII:
   $ CzW input_file output_file -e

   To convert a CCDOS file :
   $ CzW input_file output_file -d

2) For MAC Chinese encoding and decoding,
   To convert a file into ASCII:
   $ CzW input_file output_file -me

   To convert a MAC Chinese file :
   $ CzW input_file output_file -md

Compiling instruction:
~~~~~~~~~~~~~~~~~~~~~~
1) On VMS,
$CC CzW.c
$link CzW, sys$library:vaxcrtl.olb/lib
$CzW:==$mypath:CzW.exe
where the last step is to define the foreign command to use command line.

2) On unix,
%cc -o CzW CzW.c

3) On DOS and MAC-- compiler dependent

*/
/* Code starts here */

#if VMS
#include <file.h>
#else
#include <fcntl.h>
#endif

#include <stdio.h>
#include <string.h>

#define FALSE 0
#define TRUE 1
#define NONE 1


#define CLS printf("\n\n")

unsigned char inbuffer[1025];  /* read in 1 kbyte a time */
char outfile_name[60],infile_name[60];
FILE *infile, *outfile;
int filehandle, lens;


/*///*/

ccdos_encode() /* Shift all charaters to 31< ASCII < 128
                 by the following schemes */ 
               /* escape codes:
                 \zW  -- begin characters > 160
                 \^@  -- begin special control characters < 32
                 \A@  --  31< ASCII characters <128
                 \W@  --  126< WordStar characters <161 
                 \c@  --  continue line
               */
{
int bh, i, ws, bl, line_length;
   bh = 0;
   bl = 0;
   ws = 0;
   line_length = 0;

fprintf(stderr, "Start encoding %s\n\n", infile_name);

     outfile=fopen(outfile_name,"w");

 fprintf(outfile,"\
%% This is an encoded Chinese text file generated in CCDOS standard\n\
%% Please do not edit any lines below this\n\
\\A@\n");

  while( lens = read(filehandle, inbuffer, 1024) > 0)
 {

   for (i=0; i<lens; i++)
 {   

  switch(inbuffer[i])
   {

    case 13:
    case 10: line_length = 0; 
    case 12:            
    case 9:
           if (line_length>74)
            {
               fprintf(outfile, "\\c@\n");
               line_length=0;
            }

            fprintf(outfile, "%c", inbuffer[i]); line_length ++; break;
    case 32:
           if (line_length>74)
            {
               fprintf(outfile, "\\c@\n");
               line_length=0;
            }
             fprintf(outfile, " "); line_length ++; break;

    default:
     if (inbuffer[i] > 160)
         { bl = 0;
           ws = 0;
          if (!bh) 
             {
              if (line_length>74)
               {
               fprintf(outfile, "\\c@\n");
               line_length=0;
                }
               fprintf(outfile, "\\zW");
               line_length +=5;
               bh = 1;
             }
           if (line_length>74)
            {
               fprintf(outfile, "\\c@\n");
               line_length=0;
            }

                inbuffer[i] -= 128;
                fprintf(outfile, "%c", inbuffer[i]);
                line_length ++;
                break;
         }
      else if (inbuffer[i]>126) /* Probably the WordStar stuff */
         { 
           bl = 0;
           bh = 0;
          if (!ws) 
             {
              if (line_length>74)
               {
               fprintf(outfile, "\\c@\n");
               line_length=0;
                }
               fprintf(outfile, "\\W@");
               line_length +=3;
               ws = 1;
             }
           if (line_length>74)
            {
               fprintf(outfile, "\\c@\n");
               line_length=0;
            }

                inbuffer[i] -= 95;
                fprintf(outfile, "%c", inbuffer[i]);
                line_length ++;
                break;
         }
      else if (inbuffer[i]<32)
         {
          bh = 0;
          ws = 0;
          if (!bl) 
             {
              if (line_length>74)
               {
               fprintf(outfile, "\\c@\n");
               line_length=0;
                }
               fprintf(outfile, "\\^@");
               line_length +=5;
               bl = 1;
             }
           inbuffer[i] += 32;
           if (line_length>74)
            {
               fprintf(outfile, "\\c@\n");
               line_length=0;
            }
           fprintf(outfile, "%c", inbuffer[i]);
           line_length ++;
           break;
         }
       
      else 
         {

           if (bh||bl||ws)
            {  if (line_length>74)
               {
               fprintf(outfile, "\\c@\n");
               line_length=0;
                }
               fprintf(outfile, "\\A@");
               line_length +=5;
               bh = 0;
               bl = 0;
               ws = 0;
             }

           if (line_length>74)
            {
               fprintf(outfile, "\\c@\n");
               line_length=0;
            }
           fprintf(outfile, "%c", inbuffer[i]);
           line_length ++;

         }

   }
  }
 }

close(filehandle);
fclose(outfile);

}

ccdos_decode() /* Shift all the bytes back to original */
{
int bh, bl, i, line_length, shift;
int comment=1;

   shift = 0;
fprintf(stderr, "Start decoding %s\n\n", infile_name);
   outfile = stderr;

  while( lens = read(filehandle, inbuffer, 1024) >0)
 {

   for (i=0; i<lens; i++)
 {   
    switch(inbuffer[i])
   {
    case 9:  
    case 32: 
    case 12: 
    case 13:
    case 10:
              fprintf(outfile, "%c", inbuffer[i]); break;
    case '\\':
             if (strncmp(&inbuffer[i+1], "zW", 2) == 0)
                { shift = 128;
                  i += 2; break;
                }
              else if (strncmp(&inbuffer[i+1], "^@", 2) == 0)
                { shift = -32;
                  i += 2; break;
                }
              else if (strncmp(&inbuffer[i+1], "A@", 2) == 0)
                { shift = 0;
                  i += 2;
            if (comment) {
                          outfile=fopen(outfile_name,"w");
                          comment = 0;
                          i ++;
                         }
                  break;
                }
              else if (strncmp(&inbuffer[i+1], "W@", 2) == 0)
                { shift = 95;
                  i += 2; break;
                }
              else if (strncmp(&inbuffer[i+1], "c@", 2) == 0)
                {
                  i += 3; break;
                }
              else 
                { inbuffer[i] += shift;
                   fprintf(outfile, "%c", inbuffer[i]);
                }

           break;
    default:
           inbuffer[i] += shift;
           fprintf(outfile, "%c", inbuffer[i]);

   }
  }
 }

close(filehandle);
fclose(outfile);


}



main(argc, argv)
int argc;
char *argv[];
{
int in_file,out_file, status;
int i,j, name_count=0;
char  string[60];
char out_ok=1;
char ch, quit;

/* the following variables are used for the option flags */
int 
    Decode=0,      /* decode the encoded into CCDOS file      -d */
    Encode=1,      /* encode CCDOS into standard ASCII file   -e */
    mDecode=0,      /* decode the encoded MAC Chinese file    -md */
    mEncode=0;      /* encode MAC Chinese into standard ASCII file  -me */
int MAC = 0;

CLS;
in_file=1; out_file=1;

for (i = 1; i<argc; i++)
 if (argv[i][0] == '-')
  { for (j=1; j<strlen(argv[i]); j++)
     switch(argv[i][j])
      {
       case 'm':
       case 'M': MAC = 1; break;
       case 'd': 
       case 'D':
                 if (MAC) mDecode=1;
                 else  Decode =1;
                 Encode = 0;
                 mDecode = 0;
                 break;
       case 'e': 
       case 'E': 
                 if (MAC) mEncode=1;
                 else  Encode =1;
                 Decode = 0;
                 mEncode = 0;
                 break;
       default: fprintf(stderr, "Invalid flag = %c\n", argv[i][j]);
      }
   

  }
 else
  { 

   switch (name_count++)
   {
    case 0: in_file=0; strcpy(infile_name,argv[i]); break;
    case 1: out_file=0; strcpy(outfile_name,argv[i]); break;
    default: fprintf(stderr, "Extra argument = %s\n", argv[i]);
   }

  }

 

 if (in_file) 
   {
   fprintf(stderr, " Input file (file to be translated) >");
   scanf("%s",infile_name);
   fflush(stdin);
   }


   if ((filehandle=open(infile_name, O_RDONLY))<0)
     {
       fputs("Can't find the input file", stderr);
       goto quit;
     }
     else
     {
#if VMS|unix
#else
	setmode(filehandle, O_BINARY);
#endif
	}

while (out_ok)

 {
  if (out_file) 
    {
    fputs(" Output file  >", stderr);
    fscanf(stdin, "%s",outfile_name);
    fflush(stdin);
    }


    out_ok=0; 
   if ((outfile=fopen(outfile_name,"r"))!=NULL)
     { out_ok=1;
       out_file=1;
      fflush(stdin);
      fprintf(stderr, "File <%s> already exist, overwrite ? [N]", outfile_name);

     while((ch=fgetc(stdin))=='\n');

        if ((ch=='y')||(ch=='Y')) out_ok=0;
        fclose(outfile);

     }



 }



if (Encode)
   ccdos_encode();

if (Decode)
   ccdos_decode();

if (mEncode)
   mac_encode();

if (mDecode)
   mac_decode();



quit:

printf("\n\t>>> Thank you for using CzW >>>\n");
printf("\t>>> by Jingbai Wang, August 1989 >>>\n");


}


/* Not implemented yet */
mac_encode() /* Shift all charaters to 31< ASCII < 128
                 by the following schemes */ 
               /* escape codes:
                 \zW  -- begin characters > 160
                 \^@  -- begin special control characters < 32
                 \A@  --  31< ASCII characters <128
                 \W@  --  126< WordStar characters <161 
                 \c@  --  continue line
               */
{
int bh, i, ws, bl, line_length;
   bh = 0;
   bl = 0;
   ws = 0;
   line_length = 0;

fprintf(stderr, "Start encoding %s\n\n", infile_name);

     outfile=fopen(outfile_name,"w");

 fprintf(outfile,"\
%% This is an encoded Chinese text file generated in MAC Chinese standard\n\
%% Please do not edit any lines below this\n\
\\A@\n");

  while( lens = read(filehandle, inbuffer, 1024) > 0)
 {

   for (i=0; i<lens; i++)
 {   

  switch(inbuffer[i])
   {

    case 13:
    case 10: line_length = 0; 
    case 12:            
    case 9:
           if (line_length>74)
            {
               fprintf(outfile, "\\c@\n");
               line_length=0;
            }

            fprintf(outfile, "%c", inbuffer[i]); line_length ++; break;
    case 32:
           if (line_length>74)
            {
               fprintf(outfile, "\\c@\n");
               line_length=0;
            }
             fprintf(outfile, " "); line_length ++; break;

    default:
     if (inbuffer[i] > 160)
         { bl = 0;
           ws = 0;
          if (!bh) 
             {
              if (line_length>74)
               {
               fprintf(outfile, "\\c@\n");
               line_length=0;
                }
               fprintf(outfile, "\\zW");
               line_length +=5;
               bh = 1;
             }
           if (line_length>74)
            {
               fprintf(outfile, "\\c@\n");
               line_length=0;
            }

                inbuffer[i] -= 128;
                fprintf(outfile, "%c", inbuffer[i]);
                line_length ++;
                break;
         }
      else if (inbuffer[i]>126) /* Probably the WordStar stuff */
         { 
           bl = 0;
           bh = 0;
          if (!ws) 
             {
              if (line_length>74)
               {
               fprintf(outfile, "\\c@\n");
               line_length=0;
                }
               fprintf(outfile, "\\W@");
               line_length +=3;
               ws = 1;
             }
           if (line_length>74)
            {
               fprintf(outfile, "\\c@\n");
               line_length=0;
            }

                inbuffer[i] -= 95;
                fprintf(outfile, "%c", inbuffer[i]);
                line_length ++;
                break;
         }
      else if (inbuffer[i]<32)
         {
          bh = 0;
          ws = 0;
          if (!bl) 
             {
              if (line_length>74)
               {
               fprintf(outfile, "\\c@\n");
               line_length=0;
                }
               fprintf(outfile, "\\^@");
               line_length +=5;
               bl = 1;
             }
           inbuffer[i] += 32;
           if (line_length>74)
            {
               fprintf(outfile, "\\c@\n");
               line_length=0;
            }
           fprintf(outfile, "%c", inbuffer[i]);
           line_length ++;
           break;
         }
       
      else 
         {

           if (bh||bl||ws)
            {  if (line_length>74)
               {
               fprintf(outfile, "\\c@\n");
               line_length=0;
                }
               fprintf(outfile, "\\A@");
               line_length +=5;
               bh = 0;
               bl = 0;
               ws = 0;
             }

           if (line_length>74)
            {
               fprintf(outfile, "\\c@\n");
               line_length=0;
            }
           fprintf(outfile, "%c", inbuffer[i]);
           line_length ++;

         }

   }
  }
 }

close(filehandle);
fclose(outfile);

}

/* Not implemented yet */
mac_decode() /* Shift all the bytes back to original */
{
int bh, bl, i, line_length, shift;
int comment=1;

   shift = 0;
fprintf(stderr, "Start decoding %s\n\n", infile_name);
   outfile = stderr;

  while( lens = read(filehandle, inbuffer, 1024) > 0)
 {

   for (i=0; i<lens; i++)
 {   
    switch(inbuffer[i])
   {
    case 9:  
    case 32: 
    case 12: 
    case 13:
    case 10:
              fprintf(outfile, "%c", inbuffer[i]); break;
    case '\\':
             if (strncmp(&inbuffer[i+1], "zW", 2) == 0)
                { shift = 128;
                  i += 2; break;
                }
              else if (strncmp(&inbuffer[i+1], "^@", 2) == 0)
                { shift = -32;
                  i += 2; break;
                }
              else if (strncmp(&inbuffer[i+1], "A@", 2) == 0)
                { shift = 0;
                  i += 2;
            if (comment) {
                          outfile=fopen(outfile_name,"w");
                          comment = 0;
                          i ++;
                         }
                  break;
                }
              else if (strncmp(&inbuffer[i+1], "W@", 2) == 0)
                { shift = 95;
                  i += 2; break;
                }
              else if (strncmp(&inbuffer[i+1], "c@", 2) == 0)
                {
                  i += 3; break;
                }
              else 
                { inbuffer[i] += shift;
                   fprintf(outfile, "%c", inbuffer[i]);
                }

           break;
    default:
           inbuffer[i] += shift;
           fprintf(outfile, "%c", inbuffer[i]);

   }
  }
 }

close(filehandle);
fclose(outfile);


}

