The Weisfeiler-Lehman graph kernel 구현

Sngmng·2023년 1월 18일
1

https://github.com/sngmng6506/WL-graph-kernel

Task

binary graph classification

Dataset

Cora dataset을 통해 graph classification task를 진행하려고 했지만, 해당 데이터는 multi-class 를 갖기 때문에 구현하고 평가하는데 문제가 생겨 Binary-class를 갖는 MUTAG dataset을 통해 평가하는 것으로 방향을 바꾼다.

Weisfeiler-Lehman Graph Kernels 에서도 MUTAG dataset에 대해 실험을 진행했다.

MUTAG는 188개의 화합물의 원자간 결합관계에 대한 데이터다.

Data preparing


#load
df_A = pd.read_csv('./data/MUTAG/MUTAG_A.txt',header=None)
df_B = pd.read_csv('./data/MUTAG/MUTAG_graph_indicator.txt',header=None)
df_C = pd.read_csv('./data/MUTAG/MUTAG_node_labels.txt',header=None)
df_D = pd.read_csv('./data/MUTAG/MUTAG_graph_labels.txt',header=None)


node_num = len(df_C)
adj_total = np.zeros((node_num+1,node_num+1),dtype=int)
A = df_A.to_numpy(dtype=np.int32)
for row,col in A:
    adj_total[row-1][col-1] = 1


B = df_B.to_numpy(dtype=np.int32)
sub_graphs_class ={} # (graph_id : idx )
for idx,graph_id in enumerate(B):
    if graph_id[0] not in sub_graphs_class:
        sub_graphs_class[graph_id[0]] = [idx]

    else :
        sub_graphs_class[graph_id[0]] += [idx]


# adjacency matrix per class
class_num = len(sub_graphs_class)
sub_graphs_adj = {}
for i in range(1,class_num+1):
    start = sub_graphs_class[i][0]
    end = sub_graphs_class[i][-1]+1
    sub_graphs_adj[i] = adj_total[start:end,start:end]
    
def adj_to_degree(T):
    D = np.zeros((T.shape),dtype=np.int32)
    k = 0
    for i in T:
        D[k][k] = sum(i)
        k += 1
    return D

feature map

labeling

def initial_labeling(D): #input = degree matrix
    graph_label = {}
    for i in range(len(D)):
        graph_label[i] = str(D[i][i])
    return graph_label

def multi_labeling(G1,adj1):
    G = G1.copy()

    #labeling
    for x, j in enumerate(adj1):
        for y, k in enumerate(j):
            if k != 0:
                link = (x, y)

                if x == y:
                    continue
                else:
                    G[x] += G1[y]
    #sorting
    for i in G:
        G[i] = ''.join(sorted(G[i]))
    return G

label-compression

def label_compression(G1,G0,H1,H0):
    G1 = G1.copy()
    H1 = H1.copy()
    J = list(G0.values()) + list(H0.values())
    criterion_scalar = max(map(int, J))
    hash_function = {'minima': criterion_scalar}

    domain = J
    for i in domain:
        if i in hash_function:
            continue
        else:
            hash_function[i] = max(hash_function.values()) + 1

    for i in G1:
        G1[i] = str(hash_function[G0[i]])
    for j in H1:
        H1[j] = str(hash_function[H0[j]])

    return G1,H1

The Weisfeiler-Lehman Subtree Kernel

def subtree_kernel(G):
    feature_vector = {}
    for i in range(len(G)):
        if G[i] in feature_vector:
            feature_vector[G[i]] += 1
        else:
            feature_vector[G[i]] = 1
    return feature_vector

Calculate similarity

def kernel_product(feature_G,feature_H):
    ans = 0
    for key in feature_G:
        if key in feature_H:
            ans += feature_G[key] * feature_H[key]
        else:
            ans += 0


    return ans
    
    
#실행 
sim_matrix = np.zeros((len(sub_graphs_class),len(sub_graphs_class)))
for p in tq.tqdm(range(1,len(sub_graphs_class)+1)):
    for q in range(p,len(sub_graphs_class)+1):

        adj1 = sub_graphs_adj[p]
        adj2 = sub_graphs_adj[q]

        graph1_ = adj_to_degree(adj1)
        graph2_ = adj_to_degree(adj2)

        graph1 = initial_labeling(graph1_)
        graph2 = initial_labeling(graph2_)

        similiarity = 0
        height = 3
        for i in range(height):
            a = subtree_kernel(graph1)
            b = subtree_kernel(graph2)
            similiarity += kernel_product(a,b)

            graph11 = multi_labeling(graph1,adj1)
            graph22 = multi_labeling(graph2,adj2)
            graph11,graph22 = label_compression(graph11,graph1,graph22,graph2)

            graph1 = graph11
            graph2 = graph22


        sim_matrix[p - 1][q - 1] = similiarity
        print('subgraph {}와 {}의 similiarity = {}'.format(p, q, similiarity))
profile
개인 공부 기록용

0개의 댓글