Note : MI, Mutual Information

What is Mutual Information?

Mutual information is a measure of the amount of information that is shared between two random variables. It quantifies the degree of dependence between the two variables by measuring how much knowing one variable reduces the uncertainty about the other variable. See Wikipedia

The definition of Mutual Information is as follows:

I(X;Y)=yYxXP(X,Y)(x,y)log(P(X,Y)(x,y)PX(x)PY(y))(X&Y:discrete){\displaystyle \operatorname {I} (X;Y)=\sum _{y\in {\mathcal {Y}}}\sum _{x\in {\mathcal {X}}}{P_{(X,Y)}(x,y)\log \left({\frac {P_{(X,Y)}(x,y)}{P_{X}(x)\,P_{Y}(y)}}\right)}}\quad (X\&Y : discrete)

or

I(X;Y)=YXP(X,Y)(x,y)log(P(X,Y)(x,y)PX(x)PY(y))  dxdy(X&Y:continuous){\displaystyle \operatorname {I} (X;Y)=\int _{\mathcal {Y}}\int _{\mathcal {X}}{P_{(X,Y)}(x,y)\log {\left({\frac {P_{(X,Y)}(x,y)}{P_{X}(x)\,P_{Y}(y)}}\right)}}\;dx\,dy}\quad (X\&Y : continuous)

The definition of Conditional Mutual Information is as follows:

I(X;YZ)=zZyYxXpX,Y,Z(x,y,z)logpZ(z)pX,Y,Z(x,y,z)pX,Z(x,z)pY,Z(y,z)(X,Y,Z:discrete){\displaystyle I(X;Y|Z)=\sum _{z\in {\mathcal {Z}}}\sum _{y\in {\mathcal {Y}}}\sum _{x\in {\mathcal {X}}}p_{X,Y,Z}(x,y,z)\log {\frac {p_{Z}(z)p_{X,Y,Z}(x,y,z)}{p_{X,Z}(x,z)p_{Y,Z}(y,z)}}}\quad (X, Y, Z : discrete)

or

I(X;YZ)=ZYXlog(pZ(z)pX,Y,Z(x,y,z)pX,Z(x,z)pY,Z(y,z))pX,Y,Z(x,y,z)dxdydz(X,Y,Z:continuous){\displaystyle I(X;Y|Z)=\int _{\mathcal {Z}}\int _{\mathcal {Y}}\int _{\mathcal {X}}\log \left({\frac {p_{Z}(z)p_{X,Y,Z}(x,y,z)}{p_{X,Z}(x,z)p_{Y,Z}(y,z)}}\right)p_{X,Y,Z}(x,y,z)dxdydz} \quad (X, Y, Z : continuous)

Why we use it for conditional independence test?

That is because (conditional) mutual information is useful in checking the independence of given random variables, using the following two properties :

I(X;Y)=0{\displaystyle \operatorname {I} (X;Y)=0} ⇔ XX and YY are independent

I(X;YZ)=0{\displaystyle \operatorname {I} (X;Y|Z)=0} ⇐ XX and YY are conditional independent given ZZ

WARNING : do NOT use CMI to continuous random variables!

(Conditional) Mutual Information is recommended to use only for discrete random variables. there are practical issues in using conditional mutual information to check the independence of continuous random variables.

One of the main issues is that the calculation of conditional mutual information requires estimating conditional probability distributions, which can be difficult for continuous variables.

Another issue is Curse of dimensionality, which means the number of possible combinations of variables grows exponentially with the number of variables. It causes the calculation of conditional mutual information can be computationally expensive, especially for high-dimensional continuous variables.

Implementation

def mutual_information

import pandas as pd
import numpy as np

def mutual_information(data, X:set, Y:set, delta = 1):
        X = list(X); Y = list(Y)
        mi = 0

        # Calculate frequency table of X, Y, and XY
        P_X = data.groupby(X).size()
        P_X = P_X/P_X.sum()

        P_Y = data.groupby(Y).size()
        P_Y = P_Y/P_Y.sum()
        
        P_XY = data.groupby(X + Y).size()
        P_XY = P_XY/P_XY.sum()

        # Calculate Mutual Information
        for ind in P_XY.index:
            x_ind = ind[:len(X)]
            y_ind = ind[len(X):]
            xy_ind = [ind]

            x_ind =  pd.MultiIndex.from_tuples([x_ind], names = X) if len(X) != 1 else pd.Index(x_ind, name = X[0])
            y_ind =  pd.MultiIndex.from_tuples([y_ind], names = Y) if len(Y) != 1 else pd.Index(y_ind, name = Y[0])
            xy_ind = pd.MultiIndex.from_tuples(xy_ind, names = X + Y)

            mi += delta * P_XY[xy_ind].item() * np.log2(P_XY[xy_ind].item()/(P_X[x_ind].item() * P_Y[y_ind].item()))

        return mi

def conditional_mutual_information

def conditional_mutual_information(data, X:set, Y:set, Z:set, delta = 1):
        X = list(X); Y = list(Y); Z = list(Z)
        cmi = 0

        P_Z = data.groupby(Z).size()
        P_Z = P_Z/P_Z.sum()

        P_XZ = data.groupby(X + Z).size()
        P_XZ = P_XZ/P_XZ.sum()

        P_YZ = data.groupby(Y + Z).size()
        P_YZ = P_YZ/P_YZ.sum()

        P_XYZ = data.groupby(X + Y + Z).size()
        P_XYZ = P_XYZ/P_XYZ.sum()

        for ind in P_XYZ.index:
            x_ind = ind[:len(X)]
            y_ind = ind[len(X):len(X + Y)]
            z_ind = ind[len(X + Y):]

            xz_ind = x_ind + z_ind
            yz_ind = y_ind + z_ind
            xyz_ind = ind

            z_ind =  pd.MultiIndex.from_tuples([z_ind], names = Z) if len(Z) != 1 else pd.Index(z_ind, name = Z[0])
            xz_ind = pd.MultiIndex.from_tuples([xz_ind], names = X + Z)
            yz_ind = pd.MultiIndex.from_tuples([yz_ind], names = Y + Z)
            xyz_ind = pd.MultiIndex.from_tuples([xyz_ind], names = X + Y + Z)

            cmi += delta * P_XYZ[xyz_ind].item() * np.log2(P_Z[z_ind].item() * P_XYZ[xyz_ind].item() / (P_XZ[xz_ind].item() * P_YZ[yz_ind].item()))

        return cmi

def conditional_mutual_information_test

Return

  • True if (conditional) mutual information is sufficiently close to 0, else False
import math

def conditional_mutual_information_test(data:pd.DataFrame, X:set, Y:set, Z:set = None, alpha:float = 0.05, delta:float = None) -> bool:
    if delta is not None:
      data = data.copy().round(-math.floor(np.log10(delta)))
    else: delta = 1

    if Z:
        mi = conditional_mutual_information(data, X, Y, Z, delta = delta)
    else:
        mi = mutual_information(data, X, Y, delta = delta)
    
    return mi < alpha

Example

import scipy.stats as stats

size = 100

data = pd.DataFrame()
data['A'] =  stats.bernoulli(p = 0.5).rvs(size = size)
data['B'] =  stats.bernoulli(p = 0.5).rvs(size = size)
data['C'] =  stats.bernoulli(p = 0.5).rvs(size = size)
# A, B, C are independent

conditional_mutual_information_test(data, {'A'}, {'B'})
True
conditional_mutual_information_test(data, {'A'}, {'B'}, {'C'})
True

data = pd.DataFrame()
data['A'] =  stats.bernoulli(p = 0.5).rvs(size = size)
data['B'] =  stats.bernoulli(p = 0.5).rvs(size = size)
data['C'] =  data['A'] + data['B'] + stats.bernoulli(p = 0.5).rvs(size = size)
# A -> C <- B : v-structure

conditional_mutual_information_test(data, {'A'}, {'B'})
True
conditional_mutual_information_test(data, {'A'}, {'B'}, {'C'})
False

data = pd.DataFrame()
data['A'] =  stats.bernoulli(p = 0.5).rvs(size = size)
data['B'] =  stats.bernoulli(p = 0.5).rvs(size = size)
data['C'] =  data['A'] + data['B'] + stats.bernoulli(p = 0.5).rvs(size = size)
data['D'] =  data['A'] + data['B'] + stats.bernoulli(p = 0.5).rvs(size = size)
# A -> C <- B : v-structure
# A -> D <- B : v-structure
# Therefore, C and D are independent only when A and B are observed

conditional_mutual_information_test(data, {'C'}, {'D'})
False
conditional_mutual_information_test(data, {'C'}, {'D'}, {'A'})
False
conditional_mutual_information_test(data, {'C'}, {'D'}, {'A', 'B'})
True
profile
move out to : https://lobster-tech.com?utm_source=velog

0개의 댓글