1
0
Fork 0
mirror of git://git.code.sf.net/p/cdesktopenv/code synced 2025-03-09 15:50:02 +00:00
cde/cde/lib/DtMmdb/compression/trie.C
2024-07-16 09:05:46 +00:00

463 lines
10 KiB
C

/*
* CDE - Common Desktop Environment
*
* Copyright (c) 1993-2012, The Open Group. All rights reserved.
*
* These libraries and programs are free software; you can
* redistribute them and/or modify them under the terms of the GNU
* Lesser General Public License as published by the Free Software
* Foundation; either version 2 of the License, or (at your option)
* any later version.
*
* These libraries and programs are distributed in the hope that
* they will be useful, but WITHOUT ANY WARRANTY; without even the
* implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
* PURPOSE. See the GNU Lesser General Public License for more
* details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with these libraries and programs; if not, write
* to the Free Software Foundation, Inc., 51 Franklin Street, Fifth
* Floor, Boston, MA 02110-1301 USA
*/
/*
* $XConsortium: trie.cc /main/3 1996/06/11 17:15:26 cde-hal $
*
* Copyright (c) 1993 HAL Computer Systems International, Ltd.
* All rights reserved. Unpublished -- rights reserved under
* the Copyright Laws of the United States. USE OF A COPYRIGHT
* NOTICE IS PRECAUTIONARY ONLY AND DOES NOT IMPLY PUBLICATION
* OR DISCLOSURE.
*
* THIS SOFTWARE CONTAINS CONFIDENTIAL INFORMATION AND TRADE
* SECRETS OF HAL COMPUTER SYSTEMS INTERNATIONAL, LTD. USE,
* DISCLOSURE, OR REPRODUCTION IS PROHIBITED WITHOUT THE
* PRIOR EXPRESS WRITTEN PERMISSION OF HAL COMPUTER SYSTEMS
* INTERNATIONAL, LTD.
*
* RESTRICTED RIGHTS LEGEND
* Use, duplication, or disclosure by the Government is subject
* to the restrictions as set forth in subparagraph (c)(l)(ii)
* of the Rights in Technical Data and Computer Software clause
* at DFARS 252.227-7013.
*
* HAL COMPUTER SYSTEMS INTERNATIONAL, LTD.
* 1315 Dell Avenue
* Campbell, CA 95008
*
*/
#include "compression/trie.h"
//trie_node_info::trie_node_info (char c, int f, trie_node* x)
trie_node_info::trie_node_info () : child(0)
{
info.int_view = 0;
image.heap = 0;
}
trie_node_info::~trie_node_info ()
{
delete child;
}
trie_node::trie_node(trie_node_info* parent)
{
#ifdef C_API
children = new trie_node_info_ptr_t[LANG_ALPHABET_SZ+1];
for (int i=0; i<LANG_ALPHABET_SZ+1; i++ )
children[i] = new trie_node_info;
children[LANG_ALPHABET_SZ] -> child = (trie_node*)parent;
#else
children[LANG_ALPHABET_SZ].child = (trie_node*)parent;
#endif
}
trie_node::~trie_node()
{
#ifdef C_API
for (int i=0; i<LANG_ALPHABET_SZ+1; i++ )
delete children[i];
delete children;
#else
children[LANG_ALPHABET_SZ].child = 0;
#endif
}
void trie_node::_print(ostream& out, char* prefix, int prefix_sz)
{
#ifdef C_API
for ( int i=0; i<LANG_ALPHABET_SZ; i++ ) {
if ( children[i] -> info.info_view.freq > 0 ) {
prefix[prefix_sz] = children[i] -> info.info_view.letter;
for (int j=0; j<prefix_sz; j++)
out << prefix[j];
out << char(children[i] -> info.info_view.letter);
out << ":" << int(children[i] -> info.info_view.freq) << "\n";
prefix[prefix_sz] = children[i] -> info.info_view.letter;
if ( children[i] -> child )
children[i] -> child -> _print(out, prefix, prefix_sz+1);
}
}
#else
for ( int i=0; i<LANG_ALPHABET_SZ; i++ ) {
if ( children[i].info.info_view.freq > 0 ) {
prefix[prefix_sz] = children[i].info.info_view.letter;
for (int j=0; j<prefix_sz; j++)
out << prefix[j];
out << char(children[i].info.info_view.letter);
out << ":" << int(children[i].info.info_view.freq) << "\n";
prefix[prefix_sz] = children[i].info.info_view.letter;
if ( children[i].child )
children[i].child -> _print(out, prefix, prefix_sz+1);
}
}
#endif
}
//////////////////////////////////////
//
//////////////////////////////////////
trie::trie(int estimatedsz) :
root(0), max_trie_level(0), total_nodes(1),
sorted_freqs(0), alphabet_sz(0), estimated_sz(estimatedsz)
{
alphabet = new encoding_unitPtr[ estimated_sz];
for ( int i=0; i< estimated_sz; alphabet[i++] = 0 );
}
trie::~trie()
{
delete root;
delete sorted_freqs;
for ( unsigned int i=0; i<alphabet_sz; i++ )
delete alphabet[i];
delete alphabet;
}
void trie::extend_alphabet()
{
if ( (int) alphabet_sz >= estimated_sz ) {
encoding_unitPtr* new_alphabet = new encoding_unitPtr[2* estimated_sz];
for ( int k=0; k< estimated_sz; k++ ) {
new_alphabet[k] = alphabet[k] ;
new_alphabet[k + estimated_sz] = 0;
}
estimated_sz *= 2;
}
}
void trie::add(unsigned char* word, int sz, int fq)
{
//cerr << sz << " ";
//for ( int k=0; k<sz; k++ )
// cerr << word[k];
//cerr << "\n";
static int j;
static trie_node* x = 0;
static trie_node_info* y = 0;
if ( root == 0 )
root = new trie_node(0);
x = root;
for ( int i=0; i<sz; i++ ) {
j = word[i];
#ifdef C_API
y = (x -> children[j]);
#else
y = &(x -> children[j]);
#endif
y -> info.info_view.letter = j;
if ( i<sz-1 && y -> child == 0 ) {
y -> child = new trie_node(y);
total_nodes++;
}
if ( i == sz-1 ) {
y -> info.info_view.freq += fq;
if ( y -> image.eu == 0 ) {
y -> info.info_view.mark = 1;
y->image.eu = add_to_alphabet(word, sz, fq);
} else
y -> image.eu -> freq += fq;
}
x = y -> child;
}
}
encoding_unit* trie::add_to_alphabet(unsigned char* word, int sz, int fq)
{
extend_alphabet();
encoding_unit *x = new encoding_unit(new string((char*)word, sz), fq);
alphabet[alphabet_sz++] = x;
return x;
}
void trie::add_letters(unsigned char* letters, int sz)
{
/*
MESSAGE(cerr, "trie::add_letters()");
cerr << sz << " ";
for ( int k=0; k<sz; k++ )
cerr << letters[k];
cerr << "\n";
*/
static int j;
static trie_node_info* y = 0;
static char buf[1];
static string *z;
if ( root == 0 )
root = new trie_node(0);
for ( int i=0; i<sz; i++ ) {
j = letters[i];
#ifdef C_API
y = (root -> children[j]);
#else
y = &(root -> children[j]);
#endif
y -> info.info_view.freq ++;
if ( y -> image.eu == 0 ) {
y -> info.info_view.letter = j;
buf[0] = char(j);
z = new string(buf, 1);
y -> info.info_view.mark = 1;
extend_alphabet();
alphabet[alphabet_sz++] = y->image.eu =new encoding_unit(z, 1);
} else
y -> image.eu -> freq++;
}
}
ostream& operator <<(ostream& out, trie& tr)
{
static char char_buf[100];
if ( tr.root == 0 )
return out;
tr.root -> _print(out, char_buf, 0);
return out;
}
void update_index(int ind, void* x)
{
((trie_node_info*)x) -> image.heap = ind;
}
void trie::_find_leaf(trie_node* z, int& j)
{
trie_node_info* x = 0;
string *y;
for ( int i=0; i<LANG_ALPHABET_SZ; i++ ) {
#ifdef C_API
x = (z -> children[i]);
#else
x = &(z -> children[i]);
#endif
if ( x -> info.info_view.freq > 0 ) {
if ( x -> child == 0 ) {
x -> info.info_view.mark = 1;
y = get_word(x);
alphabet[j++] =x->image.eu =new encoding_unit(y, x -> info.info_view.freq);
} else
_find_leaf(x -> child, j);
}
}
}
encoding_unit** trie::get_alphabet(unsigned int& a_sz)
{
a_sz = alphabet_sz;
return alphabet;
}
string* trie::get_word(trie_node_info* leaf)
{
static char buf[128];
buf[127] = 0;
int i = 127;
trie_node_info* x = leaf;
if ( x == 0 )
throw(stringException("null leaf node pointer"));
while ( x ) {
buf[--i] = x -> info.info_view.letter;
x = &x[LANG_ALPHABET_SZ - x->info.info_view.letter];
x = (trie_node_info*)(x -> child);
}
//debug(cerr, buf+i);
return new string(buf+i, 127-i);
}
Boolean trie::travers_to(char* str, int len,
trie_node*& node, trie_node_info*& node_info
)
{
//MESSAGE(cerr, "travers_to BEG");
if ( root == 0 )
return false;
node = root;
int i, j;
for ( i=0; i<len; i++ ) {
j = str[i];
//debug(cerr, str[i]);
#ifdef C_API
node_info = (node -> children[j]);
#else
node_info = &(node -> children[j]);
#endif
if ( node_info -> info.info_view.freq == 0 ) {
//MESSAGE(cerr, "BREAK");
break;
} else
node = node_info -> child;
}
//MESSAGE(cerr, "travers_to RET");
return ( len == i ) ? true : false;
}
void trie::collect_freqs(trie_node* rt, int level)
{
for ( int i= 0; i<LANG_ALPHABET_SZ; i++ ) {
#ifdef C_API
if ( rt -> children[i] -> info.info_view.freq > 0 ) {
sorted_freqs -> insert(rt -> children[i]);
if ( rt -> children[i] -> child )
collect_freqs(rt -> children[i] -> child, level+1);
}
#else
if ( rt -> children[i].info.info_view.freq > 0 ) {
sorted_freqs -> insert(&(rt -> children[i]));
if ( rt -> children[i].child )
collect_freqs(rt -> children[i].child, level+1);
}
#endif
}
}
encoding_unit* trie::parse(unsigned char* str, int len)
{
if ( root == 0 )
return 0;
trie_node* node = root;
trie_node_info* cell = 0;
trie_node_info* last_hit = 0;
int i=0;
while ( node && i < len ) {
//debug(cerr, str[i]);
#ifdef C_API
cell = (node -> children[str[i++]]);
#else
cell = &(node -> children[str[i++]]);
#endif
if ( cell -> info.info_view.mark == 1 )
last_hit = cell;
node = cell -> child;
}
if ( last_hit == 0 ) {
debug(cerr, len);
debug(cerr, str[i]);
debug(cerr, int(str[i]));
throw(stringException("parse(): string not in trie"));
}
if ( last_hit -> image.eu == 0 )
throw(stringException("parse(): encoding unit not found"));
return last_hit -> image.eu;
}
///////////////////////////////////////
//
///////////////////////////////////////
Boolean trie_node_ls(voidPtr n1, voidPtr n2)
{
if ( ((trie_node_info*)n1) -> info.info_view.freq < ((trie_node_info*)n2) -> info.info_view.freq )
return true;
else
return false;
}
Boolean trie_node_eq(voidPtr n1, voidPtr n2)
{
if ( ((trie_node_info*)n1) -> info.info_view.freq == ((trie_node_info*)n2) -> info.info_view.freq )
return true;
else
return false;
}