Thiago P.

Phone Hashing

Posted in Computer Science, Programming, Snips

← back to the blog


Premise

After learning a bit of hashing and experimenting with polynomial hashing of strings and their collisions, I decided to make a simple hashing solution for phone numbers, i.e. use the phone number as a key.

I decided only to use he last 7 digits of the phone number as the key, ignoring the area code. One of my goals was to make a hash table that could handle the full input range of 7-digit numbers (10 million possible values).
Note that the phone numbers are fed in as string.

To generate a hash code from these 7 digits I simply mushed them together into a single number. For example, the input phone number string "2419515" would result in the int 2419515. This approach ensures that given 2 different keys/phone numbers, no collisions will ever occur (before compression)

To handle hashes outside of the table, I used simple modulo compression against the size of the table. The table itself uses separate chaining to resolve collisions and has the ability to resize and rehash when its load factor reaches a certain amount

 

Observations

To test the table, I inputted ever possible 7 digit number string into the hash table.
"0000000", "0000001", "00000002"..."1000201" etc... all 10 million of them

The most expensive operation was rehashing the table. A low maximum load factor would result in frequent rehashing, which was expensive, and a high maximum load factor would result in less frequent rehashing but when the table did have to rehash, it took some time (1-3 seconds).

Given the constraints of the problem

It was simply cheaper to just pre-allocate an array of size 10 million and never have to rehash.

Code

Hash Table Module

/**
* Hash table to store data using phone number strings as a key, where the hash/key is last 7 digets
* of the phone number.
* Generates a hash from the phone numbers by merging the last 7 digits into a single int.
*
* @example "4075451632" will result in the int hash 5451632
* TODO: Handle duplicate insertions
*/
module.exports = function phoneTable(initialSize) {
	const initialTableSize = initialSize || 50000;
	const maxTableSize = 10000000;
	const rehashScaleFactor = 2;
	const maxLoadFactor = 0.75;

	if(initialTableSize > maxTableSize) {
		initialTableSize = maxTableSize;
	}
	const table = new Array(initialTableSize);
	let entries = 0;

	function phoneHashCode(phone) {
		if(typeof phone != 'string') {
			return -1;
		}
		else if(phone.length < 7) {
			return -1;
		}

		let hash = 0;
		let currentPower = 1;

		for(let i = phone.length-1; i >= phone.length - 7; i--) {
			let currentNumber = parseInt(phone[i]);
			if(isNaN(currentNumber)) {
				return -1;
			}

			hash += currentNumber * currentPower;
			currentPower *= 10;
		}

		if(hash >= table.length) {
			hash = hash % table.length;
		}
		return hash;
	}

	function getLoadFactor() {
		return entries/table.length;
	}
	function rehashTable() {
		if(table.length == maxTableSize) {
			return;
		}

		const oldTableSize = table.length;
		let newTableSize = table.length * rehashScaleFactor;
		if(newTableSize > maxTableSize) {
			newTableSize = maxTableSize;
		}
		table.length = newTableSize;

		for(let i = 0; i < oldTableSize; i++) {
			let bucket = table[i];
			if(bucket == undefined) {
				continue;
			}

			for(let x = bucket.length-1; x >= 0; x--) {
				const dataObject = bucket[x];
				const phoneHash = phoneHashCode(dataObject.phone);
				if(phoneHash == i) {
					// this phone does not need to be rehashed
					continue;
				}

				insert(dataObject.phone, dataObject.data, phoneHash, true);
				bucket.splice(x, 1);
			}

			if(bucket.length == 0) {
				table[i] = undefined;
			}
		}

	}

	/**
	* Insert an entry into the table given the phone as a string and the data to insert
	*
	* @param {string} phone - The phone number to insert. Must be a strin of at least
	* length 7 with all chars as numbers Only the last 7 digits of the number will be used.
	*
	* @param {number} usePhoneHash - An int. If set, this hash/int value will be used instead of
	* calculating one based on the phone arg. The phone arg must still be provided.
	*
	* @param {bool} reinsert - Set to true if an entry is being moved from one place in the table
	* to another. If true, the entries counter will not be incramented.
	*/
	function insert(phone, data, usePhoneHash, reinsert) {
		let phoneHash = usePhoneHash || phoneHashCode(phone);
		if(phoneHash === -1) {
			throw new Error(`Unable to generate a hash int from the phone number: ${phone} Make sure its a valid number and a string and at least 7 digits long.`);
		}

		let dataObject = {
			phone: phone,
			data: data
		};

		let collision = false;
		if(table[phoneHash] == null) {
			table[phoneHash] = [dataObject]
		}
		else {
			table[phoneHash].push(dataObject);
			collision = true;
		}

		if(reinsert !== true) {
			entries ++;
		}
		if(getLoadFactor() >= maxLoadFactor && table.length < maxTableSize) {
			console.log(`Must rehash on phone: ${phone}`);
			rehashTable();
		}

		return collision;

	}
	function get(phone) {
		const phoneHash = phoneHashCode(phone);
		if(phoneHash === -1) {
			throw new Error(`Unable to generate a hash int from the phone number: ${phone} Make sure its a valid number and a string and at least 7 digits long.`);
		}

		if(table[phoneHash] == null) {
			return undefined;
		}
		for(dataObject of table[phoneHash]) {
			if(dataObject.phone === phone) {
				return dataObject;
			}
		}
		return undefined
	}

	function getTable() {
		return table;
	}


	return {
		insert,
		get
	}
}

Testing File

const phoneTable = require('./phoneTable.js');

const table = phoneTable(10000000);
const tablePopulation = 10000000;

function numberToPhone(num) {
	let phone = num.toString();
	while(phone.length < 7) {
		phone = '0' + phone;
	}
	return phone;
}
function populateTable(startNum, endNum) {
	for(let i = startNum; i < endNum; i++) {
		const phoneNumber = numberToPhone(i);
		const collision = table.insert(phoneNumber, i);
		if(collision === true) {
			console.log(`Collision on inserting phone ${phoneNumber}`);
		}
	}
}

console.log(`Populating the table with numbers ${0} through ${tablePopulation}`);
populateTable(0, tablePopulation);

console.log(table.get('0500000'));