wanproxy/xcodec/xcodec_hash.h
2015-08-31 14:01:44 +02:00

177 lines
4.2 KiB
C++

/*
* Copyright (c) 2008-2012 Juli Mallett. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#ifndef XCODEC_XCODEC_HASH_H
#define XCODEC_XCODEC_HASH_H
#include <strings.h>
class XCodecHash {
struct RollingHash {
uint32_t sum1_; /* Really <16-bit. */
uint32_t sum2_; /* Really <32-bit. */
uint32_t buffer_[XCODEC_SEGMENT_LENGTH]; /* Really >8-bit. */
RollingHash(void)
: sum1_(0),
sum2_(0),
buffer_()
{ }
void add(uint32_t ch, unsigned start)
{
buffer_[start] = ch;
sum1_ += ch;
sum2_ += sum1_;
}
void reset(void)
{
sum1_ = 0;
sum2_ = 0;
}
void roll(uint32_t ch, unsigned start)
{
uint32_t dead;
dead = buffer_[start];
sum1_ -= dead;
sum2_ -= dead * XCODEC_SEGMENT_LENGTH;
buffer_[start] = ch;
sum1_ += ch;
sum2_ += sum1_;
}
};
RollingHash bytes_;
RollingHash bits_;
unsigned start_;
#ifndef NDEBUG
unsigned length_;
#endif
public:
XCodecHash(void)
: bytes_(),
bits_(),
start_(0)
#ifndef NDEBUG
, length_(0)
#endif
{ }
~XCodecHash()
{ }
void add(uint8_t ch)
{
unsigned bit = ffs(ch);
unsigned word = (unsigned)ch + 1;
#ifndef NDEBUG
ASSERT("/xcodec/hash", length_ < XCODEC_SEGMENT_LENGTH);
#endif
bytes_.add(word, start_);
bits_.add(bit, start_);
#ifndef NDEBUG
length_++;
#endif
start_ = (start_ + 1) % XCODEC_SEGMENT_LENGTH;
}
void reset(void)
{
bytes_.reset();
bits_.reset();
#ifndef NDEBUG
length_ = 0;
#endif
start_ = 0;
}
void roll(uint8_t ch)
{
unsigned bit = ffs(ch);
unsigned word = (unsigned)ch + 1;
#ifndef NDEBUG
ASSERT("/xcodec/hash", length_ == XCODEC_SEGMENT_LENGTH);
#endif
bytes_.roll(word, start_);
bits_.roll(bit, start_);
start_ = (start_ + 1) % XCODEC_SEGMENT_LENGTH;
}
/*
* XXX
* Need to write a compression function for this; get rid of the
* completely non-entropic bits, anyway, and try to mix the others.
*
* Need to look at what bits can even possibly be set in sum2_,
* looking at all possible ranges resulting from:
* 128*data[0] + 127*data[1] + ... 1*data[127]
*
* It seems like since each data[] must have at least 1 bit set,
* there are a great many impossible values, and there is a large
* minimal value. Should be easy to compress, and would rather
* have the extra bits changing from the normalized (i.e. non-zero)
* input and have to compress than have no bits changing...but
* perhaps those are extensionally-equivalent and that's just
* hocus pocus computer science. Need to think more clearly and
* fully about it.
*/
uint64_t mix(void) const
{
#ifndef NDEBUG
ASSERT("/xcodec/hash", length_ == XCODEC_SEGMENT_LENGTH);
#endif
uint64_t bits_hash = (bits_.sum1_ << 16) + bits_.sum2_;
uint64_t bytes_hash = (bytes_.sum1_ << 20) + bytes_.sum2_;
return ((bits_hash << 36) + bytes_hash);
}
static uint64_t hash(const uint8_t *data)
{
XCodecHash xchash;
unsigned i;
for (i = 0; i < XCODEC_SEGMENT_LENGTH; i++)
xchash.add(*data++);
return (xchash.mix());
}
};
#endif /* !XCODEC_XCODEC_HASH_H */