32-Bit Non-Cryptographic Hash: A Fast, Non-Multiplicative, Security-Centric Hashing Algorithm Without 64-Bit Arithmetic or Unaligned Memory

Eightomic developed a 32-bit hashing algorithm with a library in C99 as a substantial improvement to CityHash32 v1.1, MurmurHash3, SipHash32 and XXHash32.

Library

Source

#include <stdint.h> #include <string.h> struct eightomic_s { uint32_t a; uint32_t b; uint32_t c; uint32_t d; uint32_t e; uint32_t f; uint32_t g; uint32_t h; uint32_t _state; uint32_t state; unsigned long _input_count; }; uint32_t align(uint8_t *input, unsigned long i) { uint32_t _input = 0xFEFF; uint8_t byte_order; memcpy(&byte_order, &_input, sizeof(byte_order)); memcpy(&_input, &input[i], sizeof(_input)); if (byte_order != 0xFF) { _input = (_input << 24) | ((_input & 0xFF00) << 8) | ((_input & 0xFF0000) >> 8) | (_input >> 24); } return _input; } void eightomic_initialize(struct eightomic_s *s) { s->a = 1; s->b = 11; s->c = 111; s->d = 1111; s->e = 11111; s->f = 111111; s->g = 1111111; s->h = 11111111; s->_state = 111111111; s->state = 1111111111; s->_input_count = 0; } void eightomic_transform(unsigned long i, unsigned long input_count, uint8_t *input, struct eightomic_s *s) { if (input_count >= 32) { i = 31; while (i < input_count) { s->_state = s->state; s->state += s->a + s->b + s->c + s->d + s->e + s->f + s->g + s->h; s->a += align(input, i - 3) + ((s->a << 8) | (s->a >> 24)) + s->_state + 1; s->b += align(input, i - 7) + ((s->b << 9) | (s->b >> 23)) + s->_state + 11; s->c += align(input, i - 11) + ((s->c << 10) | (s->c >> 22)) + s->_state + 111; s->d += align(input, i - 15) + ((s->d << 11) | (s->d >> 21)) + s->_state + 1111; s->e += align(input, i - 19) + ((s->e << 12) | (s->e >> 20)) + s->_state + 11111; s->f += align(input, i - 23) + ((s->f << 13) | (s->f >> 19)) + s->_state + 111111; s->g += align(input, i - 27) + ((s->g << 14) | (s->g >> 18)) + s->_state + 1111111; s->h += align(input, i - 31) + ((s->h << 15) | (s->h >> 17)) + s->_state + 11111111; i += 32; } if (i >= input_count) { i -= 32; } s->_state += s->a + s->b + s->c + s->d + s->e + s->f + s->g + s->h; i++; } if ((input_count - i) >= 16) { i += 16; s->_state += s->state; s->state += s->a + s->b + s->c + s->d + s->e; s->a += align(input, i - 16) + ((s->a << 8) | (s->a >> 24)) + s->_state + 1; s->b += align(input, i - 12) + ((s->b << 9) | (s->b >> 23)) + s->_state + 11; s->c += align(input, i - 8) + ((s->c << 10) | (s->c >> 22)) + s->_state + 111; s->d += align(input, i - 4) + ((s->d << 11) | (s->d >> 21)) + s->_state + 1111; } if ((input_count - i) >= 8) { i += 8; s->_state += s->state; s->state += s->a + s->b + s->c; s->a += align(input, i - 8) + ((s->a << 8) | (s->a >> 24)) + s->_state + 1; s->b += align(input, i - 4) + ((s->b << 9) | (s->b >> 23)) + s->_state + 11; } s->_input_count += input_count; if (i != input_count) { s->state += s->_state + ((s->a << 8) | (s->a >> 24)); input_count -= i; if (input_count >= 4) { s->a += s->state + align(input, i) + 1111111111; if (input_count != 4) { s->_state += s->a + s->b; s->state += s->_state + ((s->b << 10) | (s->b >> 22)); if (input_count == 7) { s->b += s->state + (input[i + 4] | (input[i + 5] << 8) | (input[i + 6] << 16)) + 1111111; } else { if (input_count == 6) { s->b += s->state + (input[i + 4] | (input[i + 5] << 8)) + 111111; } else { s->b += s->state + input[i + 4] + 11111; } } } } else { if (input_count == 3) { s->a += s->state + (input[i] | (input[i + 1] << 8) | (input[i + 2] << 16)) + 111; } else { if (input_count == 2) { s->a += s->state + (input[i] | (input[i + 1] << 8)) + 11; } else { s->a += s->state + input[i] + 1; } } } } } void eightomic_finalize(struct eightomic_s *s) { s->a += s->_state; s->state += (s->a << 8) | (s->a >> 24); s->b += s->_state ^ s->state; s->state += (s->b << 9) | (s->b >> 23); if (s->_input_count >= 16) { s->c += s->_state + s->state; s->state += (s->c << 10) | (s->c >> 22); s->d += s->state; s->state += (s->d << 11) | (s->d >> 21); if (s->_input_count >= 32) { s->e += s->state; s->state += (s->e << 12) | (s->e >> 20); s->f += s->state; s->state += (s->f << 13) | (s->f >> 19); s->g += s->state; s->state += (s->g << 14) | (s->g >> 18); s->h += s->a + s->state; s->state += ((s->h << 15) | (s->h >> 17)) + s->_state; } } s->_state += s->state + s->_input_count; s->state += ((s->a << 13) | (s->a >> 19)) ^ s->h; s->b += s->state; s->state += s->a ^ ((s->b << 14) | (s->b >> 18)); s->c += s->_state ^ s->state; s->state += s->b ^ ((s->c << 15) | (s->c >> 17)); s->d += s->_state + s->state; s->state += s->c ^ ((s->d << 17) | (s->d >> 15)); s->_state += s->state; s->e += s->_state ^ s->state; s->state += s->d ^ ((s->e << 18) | (s->e >> 14)); s->f += s->_state + s->state; s->state += s->e ^ ((s->f << 19) | (s->f >> 13)); s->g += s->state; s->state += s->f ^ ((s->g << 20) | (s->g >> 12)); s->h += s->state; s->state += (s->g ^ ((s->h << 21) | (s->h >> 11))) + s->_state; }

Reference

eightomic_initialize() is the initialization function that accepts the following argument.

s is a struct eightomic_s pointer.

eightomic_transform() is the core hashing loop that accepts the 4 following arguments.

i is the starting index position of elements in the input array.

input_count is the count of elements in the input array. When hashing in split segments, the value must be a multiple of 32, with the exception of the end segment.

input is the int8_t array to hash.

s is a struct eightomic_s pointer.

eightomic_finalize() is the finalization function that accepts the following argument.

s is a struct eightomic_s pointer. s.state contains the finalized hash digest result.

The return value data type is void.

Requirements

C compiler with C99 (ISO/IEC 9899:1999) standard compatibility.

CPU with single-threaded, instruction-level parallelism support.

Explanation

This 32-bit non-cryptographic hashing algorithm is designed to hash keys of all sizes in data structures without hidden security issues such as excess collisions, hashDoS vulnerability, poor distribution and slow speeds.

It has speed properties similar to XXHash32 with security properties similar to SipHash.

It's portable for both 32-bit and 64-bit systems.

It meets strict compliance, portability and security requirements.

Endian-agnostic memory reading is aligned safely using memcpy() with resistance to runtime byte order changes.

It doesn't use modulus, multiplication or division arithmetic operations.

It supports unlimited input length by splitting input bytes into descending segments of 256, 128, 64 and the remaining 8–63 bits. All-at-once hashing isn't required and the digest results are consistent when hashing in partial segments.

Single-threaded, instruction-level parallelism with low-cost addition and bitwise instructions work well on a wide range of CPU queue loads and devices.

The staggered additive constants, summed with s->a through s->h, eliminate any possible vulnerability to input elements with a 0 value.

Not allowing a seed secures the possibility of 2^32 deterministic attacks, so all seed-based tests were omitted.

It passes all extended SMHasher excessive torture tests that don't require a seed and it's likely to pass new tests as they're released.

Most of the tests passed beyond the default extended limit. For example, avalanche tests passed with 2000-bit, 3000-bit and 3501-bit inputs.

Compared to CityHash and MurmurHash, the speed's approximately 35% to 54% faster on average for varying input lengths with significantly-less collisions and better avalanche distribution.

Furthermore, CityHash fails some SMHasher tests, including the avalanche test.

Compared to SipHash, the speed's at least 400% faster on average with the similar absence of cryptanalytic vulnerabilities using 320 auxiliary bits of preimage attack security.

Compared to XXHash32, the speed's similar on average for varying input lengths without failing SMHasher tests.

Furthermore, it's the only hashing algorithm that doesn't use multiplication operations out of the 4 compared hashing algorithms.

As with the other hashing functions in SMHasher, it was tested with the following all-at-once hashing function instead of the slower segmented functions from the aforementioned library code.

#include <stdint.h> #include <string.h> uint32_t align(const uint8_t *input, unsigned long i) { uint32_t _input = 0xFEFF; uint8_t byte_order; memcpy(&byte_order, &_input, sizeof(byte_order)); memcpy(&_input, &input[i], sizeof(_input)); if (byte_order != 0xFF) { _input = (_input << 24) | ((_input & 0xFF00) << 8) | ((_input & 0xFF0000) >> 8) | (_input >> 24); } return _input; } uint32_t eightomic(unsigned long input_count, const uint8_t *input) { uint32_t a = 1; uint32_t b = 11; uint32_t c = 111; uint32_t d = 1111; uint32_t e = 11111; uint32_t f = 111111; uint32_t g = 1111111; uint32_t h = 11111111; unsigned long i = 0; uint32_t _state = 111111111; uint32_t state = 1111111111; unsigned long _input_count = 0; if (input_count >= 32) { i = 31; while (i < input_count) { _state = state; state += a + b + c + d + e + f + g + h; a += align(input, i - 3) + ((a << 8) | (a >> 24)) + _state + 1; b += align(input, i - 7) + ((b << 9) | (b >> 23)) + _state + 11; c += align(input, i - 11) + ((c << 10) | (c >> 22)) + _state + 111; d += align(input, i - 15) + ((d << 11) | (d >> 21)) + _state + 1111; e += align(input, i - 19) + ((e << 12) | (e >> 20)) + _state + 11111; f += align(input, i - 23) + ((f << 13) | (f >> 19)) + _state + 111111; g += align(input, i - 27) + ((g << 14) | (g >> 18)) + _state + 1111111; h += align(input, i - 31) + ((h << 15) | (h >> 17)) + _state + 11111111; i += 32; } if (i >= input_count) { i -= 32; } _state += a + b + c + d + e + f + g + h; i++; } if ((input_count - i) >= 16) { i += 16; _state += state; state += a + b + c + d + e; a += align(input, i - 16) + ((a << 8) | (a >> 24)) + _state + 1; b += align(input, i - 12) + ((b << 9) | (b >> 23)) + _state + 11; c += align(input, i - 8) + ((c << 10) | (c >> 22)) + _state + 111; d += align(input, i - 4) + ((d << 11) | (d >> 21)) + _state + 1111; } if ((input_count - i) >= 8) { i += 8; _state += state; state += a + b + c; a += align(input, i - 8) + ((a << 8) | (a >> 24)) + _state + 1; b += align(input, i - 4) + ((b << 9) | (b >> 23)) + _state + 11; } _input_count += input_count; if (i != input_count) { state += _state + ((a << 8) | (a >> 24)); input_count -= i; if (input_count >= 4) { a += state + align(input, i) + 1111111111; if (input_count != 4) { _state += a + b; state += _state + ((b << 10) | (b >> 22)); if (input_count == 7) { b += state + (input[i + 4] | (input[i + 5] << 8) | (input[i + 6] << 16)) + 1111111; } else { if (input_count == 6) { b += state + (input[i + 4] | (input[i + 5] << 8)) + 111111; } else { b += state + input[i + 4] + 11111; } } } } else { if (input_count == 3) { a += state + (input[i] | (input[i + 1] << 8) | (input[i + 2] << 16)) + 111; } else { if (input_count == 2) { a += state + (input[i] | (input[i + 1] << 8)) + 11; } else { a += state + input[i] + 1; } } } } a += _state; state += (a << 8) | (a >> 24); b += state ^ _state; state += (b << 9) | (b >> 23); if (_input_count >= 16) { c += _state + state; state += (c << 10) | (c >> 22); d += state; state += (d << 11) | (d >> 21); if (_input_count >= 32) { e += state; state += (e << 12) | (e >> 20); f += state; state += (f << 13) | (f >> 19); g += state; state += (g << 14) | (g >> 18); h += a + state; state += ((h << 15) | (h >> 17)) + _state; } } _state += state + _input_count; state += ((a << 13) | (a >> 19)) ^ h; b += state; state += a ^ ((b << 14) | (b >> 18)); c += _state ^ state; state += b ^ ((c << 15) | (c >> 17)); d += _state + state; state += c ^ ((d << 17) | (d >> 15)); _state += state; e += _state ^ state; state += d ^ ((e << 18) | (e >> 14)); f += _state + state; state += e ^ ((f << 19) | (f >> 13)); g += state; state += f ^ ((g << 20) | (g >> 12)); h += state; return (g ^ ((h << 21) | (h >> 11))) + state + _state; }

Games

Contrivity Contrivity Icon

Contrivity

Spawn into the hostile quantum laboratory and destroy wave after wave of oscillations.

v1.0.14