mirror of
git://git.code.sf.net/p/cdesktopenv/code
synced 2025-03-09 15:50:02 +00:00
810 lines
26 KiB
C
810 lines
26 KiB
C
/*
|
|
* CDE - Common Desktop Environment
|
|
*
|
|
* Copyright (c) 1993-2012, The Open Group. All rights reserved.
|
|
*
|
|
* These libraries and programs are free software; you can
|
|
* redistribute them and/or modify them under the terms of the GNU
|
|
* Lesser General Public License as published by the Free Software
|
|
* Foundation; either version 2 of the License, or (at your option)
|
|
* any later version.
|
|
*
|
|
* These libraries and programs are distributed in the hope that
|
|
* they will be useful, but WITHOUT ANY WARRANTY; without even the
|
|
* implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
|
|
* PURPOSE. See the GNU Lesser General Public License for more
|
|
* details.
|
|
*
|
|
* You should have received a copy of the GNU Lesser General Public
|
|
* License along with these librararies and programs; if not, write
|
|
* to the Free Software Foundation, Inc., 51 Franklin Street, Fifth
|
|
* Floor, Boston, MA 02110-1301 USA
|
|
*/
|
|
/*
|
|
* COMPONENT_NAME: austext
|
|
*
|
|
* FUNCTIONS: build_tree
|
|
* char_label
|
|
* huffman_code
|
|
* init_treebase
|
|
* main
|
|
* next_sorted_node
|
|
* print_usage
|
|
* strrev
|
|
* user_args_processor
|
|
*
|
|
* ORIGINS: 27
|
|
*
|
|
*
|
|
* (C) COPYRIGHT International Business Machines Corp. 1990,1996
|
|
* All Rights Reserved
|
|
* Licensed Materials - Property of IBM
|
|
* US Government Users Restricted Rights - Use, duplication or
|
|
* disclosure restricted by GSA ADP Schedule Contract with IBM Corp.
|
|
*/
|
|
/************************** HUFFCODE.C ******************************
|
|
* $XConsortium: huffcode.c /main/9 1996/11/14 15:31:05 rcs $
|
|
* 12/90.
|
|
* Counts frequency of occurrance of every possible byte value of input text.
|
|
* Creates Huffman Code Table based on byte frequencies and writes it
|
|
* in 2 formats to 2 different output files.
|
|
* The encode table (.huf) maintains the frequency counts and explicitly
|
|
* includes the huffman code strings. Generally speaking, the .huf file
|
|
* is intended for humans to read. The decode table (.c) is an array
|
|
* of integers meant to be compiled into an object module, then linked
|
|
* into the decode program. The .c format closely resembles the original
|
|
* huffman code tree in this program.
|
|
* By keeping the tree as an obscure array of integers,
|
|
* the huffman code can double as an encryption technique,
|
|
* and the decoding method kept somewhat proprietary.
|
|
*
|
|
* For a good discussion of Huffman codes and related algorithms,
|
|
* see "Data Compression Techniques and Applications Hardware and
|
|
* Software Considerations" by Gilbert Held and Thomas R. Marshall.
|
|
* The tree itself is balanced to minimize longest bitstring length
|
|
* per Eugene Schwartz, Information and Control 7, 37-44 (1964).
|
|
*
|
|
* At beginning of each new execution, the program tries to
|
|
* open the .huf table file and continue byte frequency counting
|
|
* from the last run.
|
|
* If the .huf file doesn't exist, the table's counts are
|
|
* initialized to zeroes. The .c decode table is recomputed fresh
|
|
* each run, whether it existed before or not.
|
|
*
|
|
* If the input file is not specified then the frequencies in the table
|
|
* are not changed, and the huffman codes are recomputed with the
|
|
* existing frequencies.
|
|
*
|
|
* THIS PROGRAM DOES NOT CHECK .HUF FILE FORMAT!--it had better be correct.
|
|
*
|
|
* HUFFMAN ENCODE TABLE (.huf) FILE FORMAT:
|
|
* Each line represents each possible byte value (0 - 255),
|
|
* the huffman 'literal' character (#256), or comments.
|
|
* There are exactly 257 lines sorted by decreasing count.
|
|
* There are four fields, each separated by one or more tabs (\t).
|
|
*
|
|
* 1. CHARACTER. a number from 0 to 256.
|
|
*
|
|
* The 'character' represented by the number 256 is the literal.
|
|
* it represents all characters whose frequency is so low that
|
|
* there is no huffman code translation--this reduces the max
|
|
* length of the coded bit string when there are lots of zero
|
|
* or low frequency bytes in the input. For example,
|
|
* pure ascii text files only occasionally have byte values
|
|
* less than 32 (control chars) and rarely greater than 127
|
|
* (high order bit turned on).
|
|
*
|
|
* 2. HUFFMAN CODE. a string of binary digits (0's and 1's).
|
|
* Each string is unique to that character.
|
|
* This field will consist of a single blank, when the character
|
|
* will be coded by the huffman literal. If the code of
|
|
* the literal itself is blank, then literal coding is
|
|
* not used in this table--all characters are represented
|
|
* by complete huffman code strings.
|
|
*
|
|
* 3. COUNT. The number of times this character appeared in the text.
|
|
* The literal's count equals the sum of the counts of all the
|
|
* real characters which are represented by the literal.
|
|
* The literal's count may be 0 if all the characters it
|
|
* represents have zero frequencies.
|
|
*
|
|
* 4. COMMENTS. A label depicting the printable char or its description.
|
|
*
|
|
* HUFFMAN DECODE TABLE (.c) FILE FORMAT:
|
|
* A sequence of integers formatted as a C array of integer pairs
|
|
* and intended to be compiled and linked into the decode program.
|
|
* Each huffman tree node contains two integers.
|
|
* The root of the tree is the LAST integer pair.
|
|
* The first (left) integer contains the array index down the '0' branch,
|
|
* the right integer points down the '1' branch.
|
|
* However if an integer is negative, the decoding ceases and the
|
|
* resulting plaintext character is the negative integer + 257,
|
|
* and will always be in the range 0 - 255, or 256 for the literal code.
|
|
*
|
|
* $Log$
|
|
* Revision 2.3 1996/03/25 18:55:04 miker
|
|
* Changed FILENAME_MAX to _POSIX_PATH_MAX.
|
|
*
|
|
* Revision 2.2 1995/10/25 17:50:34 miker
|
|
* Added prolog.
|
|
*
|
|
* Revision 2.1 1995/09/22 20:46:28 miker
|
|
* Freeze DtSearch 0.1, AusText 2.1.8
|
|
*
|
|
* Revision 1.4 1995/09/19 22:04:11 miker
|
|
* Print out nonascii chars in .huf file comments.
|
|
*
|
|
* Revision 1.3 1995/09/05 18:08:00 miker
|
|
* Name changes for DtSearch.
|
|
*/
|
|
#include "SearchP.h"
|
|
#include <limits.h>
|
|
#include <ctype.h>
|
|
#include <errno.h>
|
|
#include <locale.h>
|
|
#include <sys/stat.h>
|
|
#include <stdlib.h>
|
|
|
|
#define MS_huff 30 /* message catalog set number */
|
|
#define DELIMITERS "\t\n" /* betw fields in .huf file */
|
|
#define LAST_BIT '-'
|
|
#define MAX_BITLEN 24
|
|
#define MAX_NODES 514
|
|
/*
|
|
* 256 chars + 'literal' char = max leaves, therefore max treesize
|
|
* = 2n - 1 = 513
|
|
*/
|
|
|
|
/*----------------------- HCTREE --------------------------*/
|
|
/* tree is also a table so tree ptrs are table indexes:
|
|
* 0 - 255 = characters themselves (leaves at base of tree).
|
|
* 256 = literal code (special char/leaf).
|
|
* > 256 = higher nodes.
|
|
* Global 'last_node' = highest actual node alloc so far.
|
|
* When tree completed, last_node = root of tree.
|
|
* -1 = null links.
|
|
*/
|
|
typedef struct {
|
|
char bit; /* '0' or '1' (assoc with link to
|
|
* father) */
|
|
long count; /* freq of occurrance of char */
|
|
int sort; /* specifies output sort order */
|
|
int father; /* index points UP toward root of
|
|
* tree */
|
|
int son0; /* index of '0' (left) subnode */
|
|
int son1; /* index of '1' (right) subnode */
|
|
} HCTREE;
|
|
|
|
/*------------------------ GLOBALS ---------------------------*/
|
|
static int last_node = 256;
|
|
long total_count;
|
|
long literal_threshold = 0L;
|
|
int debug_switch = FALSE;
|
|
int literal_coding_on = TRUE;
|
|
int input_file_specified; /* TRUE if user enters
|
|
* filename */
|
|
int no_huffcode_file; /* TRUE if table file not
|
|
* found */
|
|
HCTREE hctree1[MAX_NODES];
|
|
char filename_input[_POSIX_PATH_MAX];
|
|
char filename_huf[_POSIX_PATH_MAX];
|
|
char filename_huc[_POSIX_PATH_MAX];
|
|
|
|
#ifndef TURBO_COMPILER
|
|
/****************************************/
|
|
/* */
|
|
/* strrev */
|
|
/* */
|
|
/****************************************/
|
|
static char *strrev (char *string)
|
|
{
|
|
int i;
|
|
int j;
|
|
char temp;
|
|
|
|
for (i = 0, j = strlen (string) - 1; i < j; i++, j--) {
|
|
temp = string[i];
|
|
string[i] = string[j];
|
|
string[j] = temp;
|
|
}
|
|
return string;
|
|
}
|
|
|
|
#endif /* !TURBO_COMPILER */
|
|
|
|
/****************************************/
|
|
/* */
|
|
/* Build Tree */
|
|
/* */
|
|
/****************************************/
|
|
/* Each call joins the two nodes with smallest count
|
|
* into a single higher level node. If there are more than 2 nodes with
|
|
* similar 'smallest' counts, then within that group the 2 nodes with the
|
|
* shortest current bitstring length are joined.
|
|
* Returns TRUE for each successful lower level join.
|
|
* Returns FALSE when final join is made at highest level (root).
|
|
*/
|
|
static int build_tree (void)
|
|
{
|
|
int i, j;
|
|
int low0 = -1;
|
|
int low1 = -1;
|
|
int len0 = 0;
|
|
int len1 = 0;
|
|
int curr;
|
|
|
|
/* find 2 lowest counts */
|
|
for (i = 0; i < 257; i++) {
|
|
/* skip over real chars with counts <= 'literal' threshold */
|
|
if (literal_coding_on
|
|
&& i != 256
|
|
&& hctree1[i].count <= literal_threshold) {
|
|
hctree1[i].sort = MAX_BITLEN + 1;
|
|
continue;
|
|
}
|
|
|
|
/* skip over literal if literal coding turned off */
|
|
if (i == 256 && !literal_coding_on) {
|
|
hctree1[256].sort = MAX_BITLEN + 1;
|
|
continue;
|
|
}
|
|
|
|
/*
|
|
* Ascend to highest tree level for current table entry,
|
|
* putting length of bitstring into sort field. Save
|
|
* highest tree level in curr.
|
|
*/
|
|
hctree1[i].sort = 0;
|
|
for (j = i; j != -1; j = hctree1[j].father) {
|
|
hctree1[i].sort++;
|
|
curr = j;
|
|
}
|
|
|
|
/*
|
|
* sanity checks after ascending tree: 1. if bit strings
|
|
* have grown too large, quit. 2. if curr points to top
|
|
* tree level, quit.
|
|
*/
|
|
if (hctree1[i].sort > MAX_BITLEN) {
|
|
fprintf (stderr, "%s", catgets(dtsearch_catd, MS_huff, 30,
|
|
"\n183 Bit strings have grown too large. You probably "
|
|
"have literals\n turned off with grossly unbalanced "
|
|
"character counts.\n\7"));
|
|
exit (2);
|
|
}
|
|
if (hctree1[curr].count >= total_count) {
|
|
fprintf (stderr, "%s", catgets(dtsearch_catd, MS_huff, 31,
|
|
"\n191 Programming Error: Still trying to build\n"
|
|
" Huffman Code Tree after root created.\n\7"));
|
|
exit (2);
|
|
}
|
|
|
|
/*
|
|
* if curr ptr already joins low0 or low1, try the next
|
|
* table entry
|
|
*/
|
|
if (curr == low0 || curr == low1)
|
|
continue;
|
|
|
|
/*
|
|
* If curr count is less than low0, or if curr count = low0
|
|
* but curr bitstring length is less, replace both low0 and
|
|
* low1. (that way, we keep low0 always <= low1)
|
|
*/
|
|
if (low0 == -1 || hctree1[curr].count < hctree1[low0].count ||
|
|
(hctree1[curr].count == hctree1[low0].count && hctree1[i].sort < len0)) {
|
|
low1 = low0;
|
|
len1 = len0;
|
|
low0 = curr;
|
|
len0 = hctree1[i].sort;
|
|
continue;
|
|
}
|
|
|
|
/*
|
|
* At this point curr count is 'greater' than low0. If curr
|
|
* count is less than low1, or if curr count = low1 but
|
|
* curr bitstring length is less, replace only low1
|
|
*/
|
|
if (low1 == -1 || hctree1[curr].count < hctree1[low1].count ||
|
|
(hctree1[curr].count == hctree1[low1].count && hctree1[i].sort < len1)) {
|
|
low1 = curr;
|
|
len1 = hctree1[i].sort;
|
|
continue;
|
|
}
|
|
|
|
/*
|
|
* default: curr count is greater than BOTH low0 and low1,
|
|
* try next table entry
|
|
*/
|
|
} /* end loop to find two lowest counts */
|
|
|
|
/* low0 and low1 now point to two lowest count nodes.
|
|
* link in low0 and low1 to next available new node.
|
|
*/
|
|
last_node++;
|
|
hctree1[low0].bit = '0';
|
|
hctree1[low0].father = last_node;
|
|
hctree1[low1].bit = '1';
|
|
hctree1[low1].father = last_node;
|
|
hctree1[last_node].bit = LAST_BIT;
|
|
hctree1[last_node].father = -1;
|
|
hctree1[last_node].count = hctree1[low0].count + hctree1[low1].count;
|
|
hctree1[last_node].son0 = low0;
|
|
hctree1[last_node].son1 = low1;
|
|
|
|
if (debug_switch)
|
|
printf ("%3d: low0=%6ld\tlow1=%6ld\tsum=%6ld\t(%ld)\n",
|
|
last_node, hctree1[low0].count, hctree1[low1].count,
|
|
hctree1[last_node].count, total_count);
|
|
|
|
if (hctree1[last_node].count < total_count)
|
|
return TRUE;
|
|
else
|
|
return FALSE;
|
|
} /* end of function build_tree */
|
|
|
|
/****************************************/
|
|
/* */
|
|
/* Char Label */
|
|
/* */
|
|
/****************************************/
|
|
static char *char_label (int x)
|
|
{
|
|
static char buf[64];
|
|
|
|
switch (x) {
|
|
case 0:
|
|
return "NULL";
|
|
case 8:
|
|
return "\\b (backspace)";
|
|
case 9:
|
|
return "\\t (tab)";
|
|
case 10:
|
|
return "\\n (linefeed)";
|
|
case 11:
|
|
return "\\v (vert tab)";
|
|
case 12:
|
|
return "\\f (form feed)";
|
|
case 13:
|
|
return "\\r (carr retn)";
|
|
case 26:
|
|
return "CTRL-Z (EOF)";
|
|
case 27:
|
|
return "CTRL-[ (ESC)";
|
|
case 31:
|
|
return "CTRL-dash";
|
|
case 32:
|
|
return "SPACE (blank)";
|
|
case 45:
|
|
return "- (dash)";
|
|
case 95:
|
|
return "_ (underscore)";
|
|
case 127:
|
|
return "DEL";
|
|
case 256:
|
|
return "*** LITERAL CODE ***";
|
|
default:
|
|
if (x > 256)
|
|
return "";
|
|
else if (x < 32) {
|
|
snprintf(buf, sizeof(buf), "'CTRL-%c'", 0x40 | x);
|
|
return buf;
|
|
}
|
|
else if (x >= 128) {
|
|
snprintf(buf, sizeof(buf), "%s", catgets(dtsearch_catd, MS_huff, 32,
|
|
"(nonascii char, high bit set)"));
|
|
return buf;
|
|
}
|
|
else {
|
|
sprintf (buf, "'%c'", x);
|
|
return buf;
|
|
}
|
|
}
|
|
} /* end of function char_label */
|
|
|
|
/****************************************/
|
|
/* */
|
|
/* Next Sorted Node */
|
|
/* */
|
|
/****************************************/
|
|
/* Called repeatedly, returns the next treebase node in sorted order.
|
|
* Sort order is by length of Huffman Code String.
|
|
* Caller must pass index of last node returned (neg at first call).
|
|
* Lasti should never be larger than treebase.
|
|
*/
|
|
static int next_sorted_node (int lasti)
|
|
{
|
|
int i;
|
|
int nexti = -1;
|
|
long nextsortval = MAX_BITLEN + 2;
|
|
|
|
/* permanently mark last returned node as unavailable */
|
|
if (lasti >= 0)
|
|
hctree1[lasti].sort = MAX_BITLEN + 2;
|
|
|
|
/* find next shortest string length */
|
|
for (i = 0; i < 257; i++)
|
|
if (hctree1[i].sort < nextsortval) {
|
|
nextsortval = hctree1[i].sort;
|
|
nexti = i;
|
|
}
|
|
return nexti;
|
|
} /* end of function next_sorted_node */
|
|
|
|
/****************************************/
|
|
/* */
|
|
/* Initialize Treebase */
|
|
/* */
|
|
/****************************************/
|
|
/* 'Treebase' is original 257 character nodes (including literal code).
|
|
* If huffcode table file exists, initializes treebase with its values,
|
|
* else initializes treebase with zero counts.
|
|
*/
|
|
static void init_treebase (void)
|
|
{
|
|
int i;
|
|
FILE *instream_huf;
|
|
char filebuf[128];
|
|
|
|
total_count = 0L;
|
|
|
|
/* .huf table file does not exist--zero all counts */
|
|
if ((instream_huf = fopen (filename_huf, "r")) == NULL) {
|
|
no_huffcode_file = TRUE;
|
|
for (i = 0; i < 257; i++) {
|
|
hctree1[i].bit = LAST_BIT;
|
|
hctree1[i].count = 0L;
|
|
hctree1[i].father = -1;
|
|
hctree1[i].son0 = -1;
|
|
hctree1[i].son1 = -1;
|
|
}
|
|
}
|
|
|
|
/* Table file exists--init treebase with values from file.
|
|
* We are only interested in the character itself (i),
|
|
* and its current count. All other fields will be recreated
|
|
* at output time. FILE FORMAT IS NOT CHECKED--IT HAD BETTER BE CORRECT!
|
|
*/
|
|
else {
|
|
no_huffcode_file = FALSE;
|
|
if(NULL == fgets (filebuf, sizeof (filebuf) - 1, instream_huf)) {
|
|
fprintf (stderr, "No first line in file\n");
|
|
exit(2);
|
|
}
|
|
/* discard this first line (don't need id stamp) */
|
|
while (fgets (filebuf, sizeof (filebuf) - 1, instream_huf)
|
|
!= NULL) {
|
|
i = atoi (strtok (filebuf, DELIMITERS)); /* char */
|
|
if (i < 0 || i > 256) {
|
|
fprintf (stderr, catgets(dtsearch_catd, MS_huff, 33,
|
|
"366 Invalid file format for %s.\n"),
|
|
filename_huf);
|
|
exit (2);
|
|
}
|
|
strtok (NULL, DELIMITERS); /* skip over current huff
|
|
* code */
|
|
hctree1[i].count = (i == 256) ?
|
|
0L : atol (strtok (NULL, DELIMITERS));
|
|
hctree1[i].bit = LAST_BIT;
|
|
hctree1[i].father = -1;
|
|
hctree1[i].son0 = -1;
|
|
hctree1[i].son1 = -1;
|
|
if (i != 256)
|
|
total_count += hctree1[i].count;
|
|
} /* endwhile loop that reads each table line */
|
|
fclose (instream_huf);
|
|
}
|
|
return;
|
|
} /* end of function init_treebase */
|
|
|
|
/****************************************/
|
|
/* */
|
|
/* Huffman Code */
|
|
/* */
|
|
/****************************************/
|
|
/* determines correct huffman code based on current counts in tree,
|
|
* writes out all to both files overlaying previous values if they existed.
|
|
*/
|
|
static void huffman_code (time_t idstamp)
|
|
{
|
|
int i; /* current char */
|
|
int lasti;
|
|
int j; /* ascends tree from i to build bit_string */
|
|
char bit_string[MAX_BITLEN + 4];
|
|
char sprintbuf[128];
|
|
char *bitptr;
|
|
FILE *outstream_huc;
|
|
FILE *outstream_huf;
|
|
|
|
/* establish the 'literal' node (char #256) count
|
|
* equal to sum of all chars whose counts are less than threshold.
|
|
*/
|
|
if (literal_coding_on) {
|
|
hctree1[256].count = 0L;
|
|
for (i = 0; i < 256; i++)
|
|
if (hctree1[i].count <= literal_threshold)
|
|
hctree1[256].count += hctree1[i].count;
|
|
}
|
|
|
|
/* build the Huffman Code tree, and determine root (last_node) */
|
|
while (build_tree ());
|
|
|
|
/* now that we know the total number of tree nodes (last_node),
|
|
* we are ready to write.
|
|
* Open both output files and verify they are not write protected.
|
|
*/
|
|
if ((outstream_huc = fopen (filename_huc, "w")) == NULL) {
|
|
fprintf (stderr, catgets(dtsearch_catd, MS_huff, 34,
|
|
"424 File '%s' failed to open for write. Is it read-only?\n"),
|
|
filename_huc);
|
|
exit (2);
|
|
}
|
|
if ((outstream_huf = fopen (filename_huf, "w")) == NULL) {
|
|
fprintf (stderr, catgets(dtsearch_catd, MS_huff, 34,
|
|
"439 File '%s' failed to open for write. Is it read-only?\n"),
|
|
filename_huf);
|
|
exit (2);
|
|
}
|
|
|
|
/* create the .c decode file (tree as integer array) */
|
|
fprintf (outstream_huc,
|
|
"#include <time.h>\n"
|
|
"char *hctree_name =\t\"%s\";\n"
|
|
"time_t hctree_id =\t%ldL;\n"
|
|
"int hctree_root =\t%d;\n"
|
|
"static int hctree_array[] = {\n",
|
|
filename_huc, idstamp, last_node - 257);
|
|
for (i = 257; i <= last_node; i++) {
|
|
fprintf (outstream_huc, "\t%4d,\t%4d%c\t/* %3d */\n",
|
|
hctree1[i].son0 - 257, hctree1[i].son1 - 257,
|
|
(i == last_node) ? ' ' : ',', /* no comma after last
|
|
* one */
|
|
i - 257); /* comment contains node number */
|
|
}
|
|
fprintf (outstream_huc, "\t};\nint *hctree =\thctree_array;\n");
|
|
fclose (outstream_huc);
|
|
|
|
/* write out the tree base (0-256) in sorted order to .huf file */
|
|
fprintf (outstream_huf, "%ld\tHCTREE_ID\n", idstamp);
|
|
for (lasti = -1; (i = next_sorted_node (lasti)) >= 0; lasti = i) {
|
|
/*
|
|
* Create huffman code digit string. j ascends tree from i
|
|
* to build string in reverse order.
|
|
*/
|
|
bitptr = bit_string;
|
|
for (j = i; j != -1; j = hctree1[j].father)
|
|
*bitptr++ = hctree1[j].bit;
|
|
*bitptr = '\0'; /* terminate reversed string */
|
|
strrev (bit_string); /* reverse the string order */
|
|
if (bit_string[1] == 0)
|
|
strcpy (bit_string, " ");
|
|
if (strlen (bit_string) < 9)
|
|
strcat (bit_string, "\t");
|
|
|
|
/* write out the line for this char */
|
|
sprintf (sprintbuf, "%d\t%s\t%ld\t%s\n",
|
|
i,
|
|
bit_string + 1, /* hop over LAST_BIT */
|
|
hctree1[i].count,
|
|
char_label (i));
|
|
fprintf (outstream_huf, "%s", sprintbuf);
|
|
|
|
} /* end forloop printing out each tree base entry */
|
|
|
|
fclose (outstream_huf);
|
|
return;
|
|
} /* end of function huffman_code */
|
|
|
|
/****************************************/
|
|
/* */
|
|
/* Print Usage */
|
|
/* */
|
|
/****************************************/
|
|
static void print_usage (void)
|
|
{
|
|
fprintf (stderr, catgets(dtsearch_catd, MS_huff, 35,
|
|
"USAGE: huffcode [-lN | -l-] [-o] <huffname> [<infile>]\n"
|
|
" -l<N> specifies the 'literal' threshold count. Any character occurring\n"
|
|
" <= <N> times will be coded with the Huffman literal. Default is -l0,\n"
|
|
" literal coding only for bytes with counts of zero.\n"
|
|
" -l- turns off literal coding. Turning off literal coding in unbalanced\n"
|
|
" trees leads to EXTREMELY LONG bit string codes--don't do it unless\n"
|
|
" the input is known to be a well balanced binary file.\n"
|
|
" -o preauthorizes overwriting any currently existing decode file.\n"
|
|
" <huffname> is the filename prefix for the Huffman Code files.\n"
|
|
" If the encode file (%s) already exists, byte counts from infile will\n"
|
|
" be added to it, otherwise it will be newly created.\n"
|
|
" The decode file (%s) is always newly created each run.\n"
|
|
" <infile> is an input file containing bytes to be counted.\n"
|
|
" It may be omitted if the encode file already exists.\n"),
|
|
EXT_HUFFCODE, EXT_HDECODE);
|
|
return;
|
|
} /* end of function print_usage */
|
|
|
|
/********************************************************/
|
|
/* */
|
|
/* USER_ARGS_PROCESSOR */
|
|
/* */
|
|
/********************************************************/
|
|
/* handles command line arguments for 'main' */
|
|
static void user_args_processor (int argc, char **argv)
|
|
{
|
|
char *argptr;
|
|
int OK_to_overwrite = FALSE;
|
|
FILE *stream;
|
|
|
|
if (argc <= 1) { /* user just wants to see usage msg */
|
|
print_usage ();
|
|
exit (1);
|
|
}
|
|
|
|
/* each pass grabs new parm of "-xxx" format */
|
|
while (--argc > 0 && (*++argv)[0] == '-') {
|
|
argptr = argv[0];
|
|
argptr[1] = tolower (argptr[1]);
|
|
switch (argptr[1]) {
|
|
case 'l': /* literal threshold */
|
|
if (argptr[2] == 0)
|
|
goto BADARG;
|
|
else if (argptr[2] == '-')
|
|
literal_coding_on = FALSE;
|
|
else
|
|
literal_threshold = atoi (argptr + 2);
|
|
break;
|
|
|
|
case 'o': /* OK_to_overwrite .c file if it already
|
|
* exists */
|
|
OK_to_overwrite = TRUE;
|
|
break;
|
|
|
|
case 'v': /* verbose mode = debug switch */
|
|
debug_switch = TRUE;
|
|
break;
|
|
|
|
BADARG:
|
|
default:
|
|
fprintf (stderr, catgets(dtsearch_catd, MS_huff, 36,
|
|
"'%s' is invalid argument.\n"), argptr);
|
|
print_usage ();
|
|
exit (2); /* ABORT program */
|
|
|
|
} /* endswitch */
|
|
} /* endwhile for cmd line '-'processing */
|
|
|
|
/* test for required tree file name */
|
|
if (argc <= 0) {
|
|
fprintf (stderr, "%s", catgets(dtsearch_catd, MS_huff, 37,
|
|
"576 Missing Huffman Code file names prefix.\n"));
|
|
print_usage ();
|
|
exit (2);
|
|
}
|
|
/* create 2 output file names from passed argument */
|
|
strncpy (filename_huf, argv[0], _POSIX_PATH_MAX);
|
|
filename_huf[_POSIX_PATH_MAX - 6] = 0;
|
|
strcat (filename_huf, EXT_HUFFCODE);
|
|
strncpy (filename_huc, argv[0], _POSIX_PATH_MAX);
|
|
filename_huc[_POSIX_PATH_MAX - 6] = 0;
|
|
strcat (filename_huc, EXT_HDECODE);
|
|
|
|
/* Since the decode file is a C source code file (.c extension),
|
|
* we want to be sure not to erase somebody's source program.
|
|
* So if the .c file already exists, and the user didn't specify
|
|
* overwrite in a command line argument, ask him now if it's OK to
|
|
* blow away the old file.
|
|
*/
|
|
if (!OK_to_overwrite)
|
|
if ((stream = fopen (filename_huc, "r")) != NULL) {
|
|
fclose (stream);
|
|
printf (catgets(dtsearch_catd, MS_huff, 38,
|
|
"Decode file '%s' already exists. "
|
|
"Is it OK to overwrite it? [y/n] "),
|
|
filename_huc);
|
|
if (toupper (getchar ()) != 'Y')
|
|
exit (2);
|
|
}
|
|
|
|
/* test for optional input file name */
|
|
if (--argc <= 0)
|
|
input_file_specified = FALSE;
|
|
else {
|
|
input_file_specified = TRUE;
|
|
strncpy (filename_input, argv[1], _POSIX_PATH_MAX);
|
|
filename_input[_POSIX_PATH_MAX - 1] = 0;
|
|
}
|
|
|
|
return;
|
|
} /* end of function user_args_processor */
|
|
|
|
/****************************************/
|
|
/* */
|
|
/* Main */
|
|
/* */
|
|
/****************************************/
|
|
int main (int argc, char *argv[])
|
|
{
|
|
FILE *instream;
|
|
struct stat fstat_input;
|
|
long bytes_in = 0L;
|
|
int mychar;
|
|
time_t now, start_stamp;
|
|
|
|
setlocale (LC_ALL, "");
|
|
dtsearch_catd = catopen (FNAME_DTSRCAT, 0);
|
|
printf (catgets(dtsearch_catd, MS_huff, 40,
|
|
"HUFFCODE Version %s\n"), AUSAPI_VERSION);
|
|
|
|
/* validate user's command line arguments */
|
|
user_args_processor (argc, argv);
|
|
|
|
/* initialize tree table, using the table file if it exists */
|
|
init_treebase ();
|
|
if (total_count == 0L)
|
|
printf ("%s", catgets(dtsearch_catd, MS_huff, 41,
|
|
"Huffman Code Tables will be newly created.\n"));
|
|
else
|
|
printf (catgets(dtsearch_catd, MS_huff, 42,
|
|
"Table '%s' already contains %ld Kbytes from previous runs.\n"),
|
|
filename_huf, total_count / 1000L);
|
|
|
|
if (!input_file_specified && no_huffcode_file) {
|
|
fprintf (stderr, catgets(dtsearch_catd, MS_huff, 43,
|
|
"645 Input file not specified and '%s' table file\n"
|
|
" doesn't exist--nothing to do!\n"),
|
|
filename_huf);
|
|
print_usage ();
|
|
exit (2);
|
|
}
|
|
|
|
/* read the input file and count its bytes */
|
|
if (input_file_specified) {
|
|
if ((instream = fopen (filename_input, "rb")) == NULL) {
|
|
BAD_INPUT_FILE:
|
|
fprintf (stderr, catgets(dtsearch_catd, MS_huff, 44,
|
|
"Could not open input file '%s' or access status: %s\n"),
|
|
filename_input, strerror (errno));
|
|
exit (2);
|
|
}
|
|
if (fstat (fileno (instream), &fstat_input) == -1)
|
|
goto BAD_INPUT_FILE;
|
|
printf (catgets(dtsearch_catd, MS_huff, 45,
|
|
"Input file '%s' contains about %ld Kbytes.\n"),
|
|
filename_input, fstat_input.st_size / 1000L);
|
|
|
|
time (&start_stamp);
|
|
while ((mychar = getc (instream)) != EOF) {
|
|
hctree1[mychar].count++;
|
|
total_count++;
|
|
|
|
/* echo progress to user every so often */
|
|
if (!(++bytes_in % 10000L))
|
|
printf (catgets(dtsearch_catd, MS_huff, 46,
|
|
"\r%ld%% done. %2ld Kbytes read. "
|
|
"Estimate %3ld seconds to completion. "),
|
|
(bytes_in * 100L) / fstat_input.st_size,
|
|
bytes_in / 1000L,
|
|
(fstat_input.st_size - bytes_in) *
|
|
(time (NULL) - start_stamp) / bytes_in);
|
|
} /* end read loop for each char in input file */
|
|
|
|
putchar ('\n');
|
|
fclose (instream);
|
|
} /* endif that processes input file */
|
|
|
|
/* build huffman code tree, write out files */
|
|
time (&now); /* this will be the official tree id time
|
|
* stamp */
|
|
printf (catgets(dtsearch_catd, MS_huff, 47,
|
|
"Identifying timestamp will be '%ld'.\n"
|
|
"%s Huffman Code Tables in '%s' and '%s'..."),
|
|
now,
|
|
(no_huffcode_file) ?
|
|
catgets(dtsearch_catd, MS_huff, 48, "Creating") :
|
|
catgets(dtsearch_catd, MS_huff, 49, "Rebuilding"),
|
|
filename_huf,
|
|
filename_huc);
|
|
huffman_code (now);
|
|
putchar ('\n');
|
|
return 0;
|
|
} /* end of function main */
|
|
|
|
/************************** HUFFCODE.C ******************************/
|