# Nonparametric CI Test_MI

papamoon0113·2023년 9월 11일
0

## 🌎 Causal Discovery Python Implementation(EN)

목록 보기
5/9 ## Note : MI, Mutual Information

### What is Mutual Information?

Mutual information is a measure of the amount of information that is shared between two random variables. It quantifies the degree of dependence between the two variables by measuring how much knowing one variable reduces the uncertainty about the other variable. See Wikipedia

The definition of Mutual Information is as follows:

$\operatorname {I} (X;Y)=\sum _{y\in {\mathcal {Y}}}\sum _{x\in {\mathcal {X}}}{P_{(X,Y)}(x,y)\log \left({\frac {P_{(X,Y)}(x,y)}{P_{X}(x)\,P_{Y}(y)}}\right)}}\quad (X\&Y : discrete$

or

$\operatorname {I} (X;Y)=\int _{\mathcal {Y}}\int _{\mathcal {X}}{P_{(X,Y)}(x,y)\log {\left({\frac {P_{(X,Y)}(x,y)}{P_{X}(x)\,P_{Y}(y)}}\right)}}\;dx\,dy}\quad (X\&Y : continuous$

The definition of Conditional Mutual Information is as follows:

$I(X;Y|Z)=\sum _{z\in {\mathcal {Z}}}\sum _{y\in {\mathcal {Y}}}\sum _{x\in {\mathcal {X}}}p_{X,Y,Z}(x,y,z)\log {\frac {p_{Z}(z)p_{X,Y,Z}(x,y,z)}{p_{X,Z}(x,z)p_{Y,Z}(y,z)}}}\quad (X, Y, Z : discrete$

or

$I(X;Y|Z)=\int _{\mathcal {Z}}\int _{\mathcal {Y}}\int _{\mathcal {X}}\log \left({\frac {p_{Z}(z)p_{X,Y,Z}(x,y,z)}{p_{X,Z}(x,z)p_{Y,Z}(y,z)}}\right)p_{X,Y,Z}(x,y,z)dxdydz} \quad (X, Y, Z : continuous$

#### Why we use it for conditional independence test?

That is because (conditional) mutual information is useful in checking the independence of given random variables, using the following two properties :

$\operatorname {I} (X;Y)=0}$ $X$ and $Y$ are independent

$\operatorname {I} (X;Y|Z)=0}$ $X$ and $Y$ are conditional independent given $Z$

#### WARNING : do NOT use CMI to continuous random variables!

(Conditional) Mutual Information is recommended to use only for discrete random variables. there are practical issues in using conditional mutual information to check the independence of continuous random variables.

One of the main issues is that the calculation of conditional mutual information requires estimating conditional probability distributions, which can be difficult for continuous variables.

Another issue is Curse of dimensionality, which means the number of possible combinations of variables grows exponentially with the number of variables. It causes the calculation of conditional mutual information can be computationally expensive, especially for high-dimensional continuous variables.

## Implementation

### def mutual_information

import pandas as pd
import numpy as np

def mutual_information(data, X:set, Y:set, delta = 1):
X = list(X); Y = list(Y)
mi = 0

# Calculate frequency table of X, Y, and XY
P_X = data.groupby(X).size()
P_X = P_X/P_X.sum()

P_Y = data.groupby(Y).size()
P_Y = P_Y/P_Y.sum()

P_XY = data.groupby(X + Y).size()
P_XY = P_XY/P_XY.sum()

# Calculate Mutual Information
for ind in P_XY.index:
x_ind = ind[:len(X)]
y_ind = ind[len(X):]
xy_ind = [ind]

x_ind =  pd.MultiIndex.from_tuples([x_ind], names = X) if len(X) != 1 else pd.Index(x_ind, name = X)
y_ind =  pd.MultiIndex.from_tuples([y_ind], names = Y) if len(Y) != 1 else pd.Index(y_ind, name = Y)
xy_ind = pd.MultiIndex.from_tuples(xy_ind, names = X + Y)

mi += delta * P_XY[xy_ind].item() * np.log2(P_XY[xy_ind].item()/(P_X[x_ind].item() * P_Y[y_ind].item()))

return mi

### def conditional_mutual_information

def conditional_mutual_information(data, X:set, Y:set, Z:set, delta = 1):
X = list(X); Y = list(Y); Z = list(Z)
cmi = 0

P_Z = data.groupby(Z).size()
P_Z = P_Z/P_Z.sum()

P_XZ = data.groupby(X + Z).size()
P_XZ = P_XZ/P_XZ.sum()

P_YZ = data.groupby(Y + Z).size()
P_YZ = P_YZ/P_YZ.sum()

P_XYZ = data.groupby(X + Y + Z).size()
P_XYZ = P_XYZ/P_XYZ.sum()

for ind in P_XYZ.index:
x_ind = ind[:len(X)]
y_ind = ind[len(X):len(X + Y)]
z_ind = ind[len(X + Y):]

xz_ind = x_ind + z_ind
yz_ind = y_ind + z_ind
xyz_ind = ind

z_ind =  pd.MultiIndex.from_tuples([z_ind], names = Z) if len(Z) != 1 else pd.Index(z_ind, name = Z)
xz_ind = pd.MultiIndex.from_tuples([xz_ind], names = X + Z)
yz_ind = pd.MultiIndex.from_tuples([yz_ind], names = Y + Z)
xyz_ind = pd.MultiIndex.from_tuples([xyz_ind], names = X + Y + Z)

cmi += delta * P_XYZ[xyz_ind].item() * np.log2(P_Z[z_ind].item() * P_XYZ[xyz_ind].item() / (P_XZ[xz_ind].item() * P_YZ[yz_ind].item()))

return cmi

### def conditional_mutual_information_test

Return

• True if (conditional) mutual information is sufficiently close to 0, else False
import math

def conditional_mutual_information_test(data:pd.DataFrame, X:set, Y:set, Z:set = None, alpha:float = 0.05, delta:float = None) -> bool:
if delta is not None:
data = data.copy().round(-math.floor(np.log10(delta)))
else: delta = 1

if Z:
mi = conditional_mutual_information(data, X, Y, Z, delta = delta)
else:
mi = mutual_information(data, X, Y, delta = delta)

return mi < alpha

## Example

import scipy.stats as stats

size = 100

data = pd.DataFrame()
data['A'] =  stats.bernoulli(p = 0.5).rvs(size = size)
data['B'] =  stats.bernoulli(p = 0.5).rvs(size = size)
data['C'] =  stats.bernoulli(p = 0.5).rvs(size = size)
# A, B, C are independent

conditional_mutual_information_test(data, {'A'}, {'B'})
True
conditional_mutual_information_test(data, {'A'}, {'B'}, {'C'})
True

data = pd.DataFrame()
data['A'] =  stats.bernoulli(p = 0.5).rvs(size = size)
data['B'] =  stats.bernoulli(p = 0.5).rvs(size = size)
data['C'] =  data['A'] + data['B'] + stats.bernoulli(p = 0.5).rvs(size = size)
# A -> C <- B : v-structure

conditional_mutual_information_test(data, {'A'}, {'B'})
True
conditional_mutual_information_test(data, {'A'}, {'B'}, {'C'})
False

data = pd.DataFrame()
data['A'] =  stats.bernoulli(p = 0.5).rvs(size = size)
data['B'] =  stats.bernoulli(p = 0.5).rvs(size = size)
data['C'] =  data['A'] + data['B'] + stats.bernoulli(p = 0.5).rvs(size = size)
data['D'] =  data['A'] + data['B'] + stats.bernoulli(p = 0.5).rvs(size = size)
# A -> C <- B : v-structure
# A -> D <- B : v-structure
# Therefore, C and D are independent only when A and B are observed

conditional_mutual_information_test(data, {'C'}, {'D'})
False
conditional_mutual_information_test(data, {'C'}, {'D'}, {'A'})
False
conditional_mutual_information_test(data, {'C'}, {'D'}, {'A', 'B'})
True <===벨로그 CLOSE, 옵시디언으로 이주했습니다!! 아래 홈페이지를 참조해주세요! 😁===>