Eightomic

Eightomic Hash 32 E: The Fastest 32-bit Hashing Algorithm Versus CityHash32 and XXHash32

Eightomic Hash 32 E is a 32-bit hashing algorithm as a substantial improvement to CityHash32, SipHash32 and XXHash32.

Library

Source

#ifndef EIGHTOMIC_HASH_32_E_H #define EIGHTOMIC_HASH_32_E_H #include <stdint.h> #include <string.h> uint32_t eightomic_hash_32_e(unsigned long input_count, const uint8_t *input); #endif#include "eightomic_hash_32_e.h" uint32_t eightomic_align_32(const uint8_t *input, unsigned long i) { uint32_t input_aligned; memcpy(&input_aligned, &input[i], sizeof(uint32_t)); return input_aligned; } uint32_t eightomic_hash_32_e(unsigned long input_count, const uint8_t *input) { const unsigned long input_count_capture = input_count; uint32_t a = 1; uint32_t b = 11; uint32_t c = 111; uint32_t d = 1111; uint32_t e = 11111; uint32_t f = 111111; uint32_t g = 1111111; uint32_t h = 11111111; uint32_t mix = 1111111111; uint32_t mix_offset = 111111111; unsigned long i = 0; if (input_count >= 32) { i = 31; while (i < input_count) { mix_offset = mix; mix += a + b + c + d + e + f + g + h; a += eightomic_align_32(input, i - 3) + ((a << 8) | (a >> 24)) + mix_offset; b += eightomic_align_32(input, i - 7) + ((b << 9) | (b >> 23)) + mix_offset; c += eightomic_align_32(input, i - 11) + ((c << 10) | (c >> 22)); d += eightomic_align_32(input, i - 15) + ((d << 11) | (d >> 21)); e += eightomic_align_32(input, i - 19) + ((e << 12) | (e >> 20)); f += eightomic_align_32(input, i - 23) + ((f << 13) | (f >> 19)); g += eightomic_align_32(input, i - 27) + ((g << 14) | (g >> 18)); h += eightomic_align_32(input, i - 31) + ((h << 15) | (h >> 17)); i += 32; } if (i >= input_count) { i -= 32; } mix_offset += a + b + c + d + e + f + g + h; i++; } if ((input_count - i) >= 16) { i += 16; a += eightomic_align_32(input, i - 16) + ((a << 8) | (a >> 24)); b += eightomic_align_32(input, i - 12) + ((b << 23) | (b >> 9)); c += eightomic_align_32(input, i - 8) + ((c << 10) | (c >> 22)); d += eightomic_align_32(input, i - 4) + ((d << 21) | (d >> 11)); mix += a + b + c + d; } if ((input_count - i) >= 8) { i += 8; a += eightomic_align_32(input, i - 8) + ((a << 8) | (a >> 24)); b += eightomic_align_32(input, i - 4) + ((b << 23) | (b >> 9)); mix += a + b; } if (i != input_count) { mix += ((a << 8) | (a >> 24)) + mix_offset; input_count -= i; if (input_count >= 4) { a += eightomic_align_32(input, i) + ((a << 23) | (a >> 9)); if (input_count != 4) { mix += a + b; switch (input_count) { case 7: b += (input[i + 6] << 16) | (input[i + 5] << 8) | input[i + 4]; goto finalize_a; case 6: b += (input[i + 5] << 8) | input[i + 4]; goto finalize_a; case 5: b += input[i + 4]; } } goto finalize_a; } switch (input_count) { case 3: a ^= (input[i + 2] << 16) | (input[i + 1] << 8) | input[i]; goto finalize_a; case 2: a ^= (input[i + 1] << 8) | input[i]; goto finalize_a; case 1: a ^= input[i]; } } finalize_a: a += b + mix_offset; mix += (a << 8) | (a >> 24); if (input_count_capture >= 16) { mix += ((c << 22) | (c >> 10)) + ((d << 11) | (d >> 21)); if (input_count_capture >= 32) { mix_offset += ((e << 20) | (e >> 12)) ^ ((g << 18) | (g >> 14)); mix += (h << 15) | (h >> 17); mix_offset += input_count_capture + mix; mix += ((a << 13) | (a >> 19)) ^ h; b ^= mix_offset; mix_offset += a ^ ((b << 18) | (b >> 14)); c ^= mix; mix += b ^ ((c << 15) | (c >> 17)); e ^= mix_offset; mix_offset += d ^ ((e << 18) | (e >> 14)); f ^= ((e << 18) | (e >> 14)) + mix; mix_offset += e ^ ((f << 19) | (f >> 13)); g ^= mix_offset; mix += f ^ ((g << 21) | (g >> 11)); goto finalize_b; } mix_offset += input_count_capture + mix; mix += ((a << 13) | (a >> 19)) ^ h; b ^= mix_offset; mix_offset += a ^ ((b << 18) | (b >> 14)); c ^= mix; mix += b ^ ((c << 15) | (c >> 17)); e ^= mix_offset; mix_offset += d ^ ((e << 18) | (e >> 14)); goto finalize_b; } mix_offset += input_count_capture + mix; mix += ((a << 13) | (a >> 19)) ^ h; b ^= mix_offset; mix_offset += a ^ ((b << 18) | (b >> 14)); if (input_count_capture >= 8) { c ^= mix; mix += b ^ ((c << 15) | (c >> 17)); e ^= mix_offset; mix_offset += d ^ ((e << 18) | (e >> 14)); } finalize_b: mix ^= mix_offset; mix += (mix_offset << 27) | (mix_offset >> 5); mix_offset ^= mix >> 4; mix += (mix_offset << 8) | (mix_offset >> 24); mix ^= mix_offset >> 3; mix_offset += (mix << 14) | (mix >> 18); mix_offset ^= ((mix << 9) | (mix >> 23)) + (mix_offset >> 7); return input_count_capture + mix + mix_offset; }

Reference

eightomic_hash_32_e() is the hashing function that accepts the 2 following arguments in left-to-right order.

1: input_count is the unsigned long count of elements in the input array.

2: input is the const uint8_t array to hash.

The return value data type is uint32_t.

It returns the 32-bit unsigned integer hash digest result.

Example

#include <stdio.h> #include "eightomic_hash_32_e.h" int main(void) { uint8_t input[8] = {'m', 'e', 's', 's', 'a', 'g', 'e', 0}; unsigned char i = 0; while (i < 10) { i++; printf("Result %u is 0x%08x.\n", i, eightomic_hash_32_e(8, input)); input[7]++; } return 0; }

Explanation

Eightomic Hash 32 E uses incrementally-decreasing block sizes based on the largest possible block size within input_count. It starts by hashing blocks of 256 bits, then decreases to 128 bits, 64 bits, 32 bits and the remainder instead of branching into one of several hash functions based on input_count.

The finalization process decreases collisions by mixing between 16 and 256 bits of state into mix and mix_offset based on the input_count to increase speed for inputs smaller than 32 bytes.

The remaining finalization process is similar to the pseudo-cryptographic finalization from Eightomic Hash 32 C to increase diffusion for a good avalanche effect in a combination of 2 32-bit blocks.

In rare instances when hash digests must be saved after program termination and re-used in multiple systems with varying endianness, keys should be re-hashed during initialization instead of slowing down runtime hashing with byte order alignment.

In rare instances when byte order is expected to change during runtime, replace eightomic_align_32() with the following endian-independent version with a tradeoff of slower speed.

uint32_t eightomic_align_32(const uint8_t *input, unsigned long i) { uint32_t input_aligned = input[i]; input_aligned |= input[i + 1] << 8; input_aligned |= input[i + 2] << 16; input_aligned |= input[i + 3] << 24; return input_aligned; }

Seed tests in SMHasher are omitted to both discourage using hashing algorithms as PRNGs and prevent collision vulnerabilities from 2³² different initialized states.

It passes all extended SMHasher tests that don't require a seed.

It's faster than CityHash32 with all input sizes.

Furthermore, CityHash32 fails SMHasher avalanche effect tests as of v1.1.

It's faster than SipHash32, or HalfSipHash, with all input sizes. It has similar pseudo-cryptographic security properties with 320 auxiliary bits against preimage attacks.

It's faster than XXHash32 on average with input sizes segmented from 1 byte to 32 bytes, 1 byte to 64 bytes and greater than 64 bytes.

Furthermore, XXHash32 fails multiple SMHasher collision tests.