해시
hashing
은 데이터를 저장할 위치 = 인덱스를 간단한 연산으로 구하는 것을 말한다.
원소의 검색뿐 아니라 추가 삭제도 효율적으로 수행할 수 있다.
해시 충돌
- 체인법 : 해시값이 같은 원소를 연결 리스트로 관리한다.
- 오픈 주소법 : 빈 버킷(해시 테이블에서 만들어진 원소)을 찾을 때까지 해시를 반복한다.
체인법
from __future__ import annotations
from typing import Any, Type
import hashlib
class Node:
def __init__(self, key: Any, value: Any, next: Node)->None:
self.key = key
self.value = value
self.next = next
class ChainedHash:
def __init__(self, capacity: int) -> None:
self.capacity = capacity
self.table= [None] * self.capacity
def hash_value(self, key: Any) -> int:
if isinstance(key, int):
return key % self.capacity
return(int(hashlib.sha256(str(key).encode()).hexdigest(), 16) % self.capacity)
def search(self, key: Any) -> Any:
hash = self.hash_value(key)
p = self.table[hash]
while p is not None :
if p.key == key:
return p.value
p = p.next
return None
def add(self, key: Any, value: Any)-> bool :
hash = self.hash_value(key)
p = self.table[hash]
while p is not None:
if p.key == key:
return False
p = p.next
temp = Node(key, value, self.table[hash])
self.table[hash] = temp
return True
def remove(self, key: Any) -> bool:
hash = self.hash_value(key)
p = self.table[hash]
pp = None
while p is not None:
if p.key == key:
if pp is None:
self.table[hash] = p.next
else:
pp.next = p.next
return True
pp = p
p = p.next
return False
def dump(self) -> None:
for i in range(self.capacity):
p = self.table[i]
print(i, end='')
while p is not None :
print(f' -> {p.key} ({p.value})', end='')
p = p.next
print()
오픈 주소법
from __future__ import annotations
from typing import Any, Type
from enum import Enum
import hashlib
class Status(Enum):
OCCUPIED = 0
EMPTY = 1
DELETED = 2
class Bucket:
def __init__(self, key: Any = None, value: Any = None,
stat: Status = Status.EMPTY) -> None:
self.key = key
self.value = value
self.stat = stat
def set(self, key:Any, value: Any, stat: Status) -> None:
self.key = key
self.value = value
self.stat = stat
def set_status(self, stat: Status) -> None:
self.stat = stat
class OpenHash:
def __init__(self, capacity: int) -> None:
self.capacity = capacity
self.table = [Bucket()] * self.capacity
def hash_value(self, key: Any) -> int:
if isinstance(key, int):
return key % self.capacity
return(int(hashlib.md5(str(key).encode()).hexdigest(), 16) % self.capacity)
def rehash_value(self, key: Any) -> int:
return (self.hash_value(key) + 1) % self.capacity
def search_node(self,key: Any) -> Any:
hash = self.hash_value(key)
p = self.table[hash]
for i in range(self.capacity):
if p.stat == Status.EMPTY:
break
elif p.stat == Status.OCCUPIED and p.key == key:
return p
hash = self.rehash_value(hash)
p = self.table[hash]
return None
def search(self, key: Any) -> Any:
p = self.search_node(key)
if p is not None:
return p.value
else:
return None
def add(self,key: Any, value: Any) -> bool:
if self.search(key) is not None:
return False
hash = self.hash_value(key)
p = self.table[hash]
for i in range(self.capacity):
if p.stat == Status.EMPTY or p.stat == Status.DELETED:
self.table[hash] = Bucket(key, value, Status.OCCUPIED)
return True
hash = self.rehash_value(hash)
p = self.table[hash]
return False
def remove(self, key:Any) -> int:
p = self.search_node(key)
if p is None:
return False
p.set_status(Status.DELETED)
return True
def dump(self) -> None:
for i in range(self.capacity):
print(f'{i:2} ', end='')
if self.table[i].stat == Status.OCCUPIED:
print(f'{self.table[i].key} ({self.talbe[i].value})')
elif self.table[i].stat == Status.EMPTY:
print('-- 미등록 --')
elif self.table[i].stat == Status.DELETED:
print('-- 삭제 완료 --')