뇌 MRI이미지로 알츠하이머와 경도인지장애를 진단하는 CNN 딥러닝 모델 개발과정 정리 및 회고.
(1) 서론과 메타데이터 분석
자유주제
(의료/헬스케어)데이터 직무 포지션
에서 풀고자하는 문제 정의데이터셋
선정 및 선정 이유딥러닝 파이프라인
구축 학습 및 검증
한계점
과 추후 발전 방향
(Part1) Intro & Metadata
서론
포지션설정, 기획의도
알츠하이머치매와 경도인지장애
연구의 필요성
목표 및 가설
데이터셋 및 메타데이터
데이터셋 소개
데이터 준비
메타데이터 분석
대시보드
(Part2) Modeling
모델링
개요 및 구조
데이터 로딩 및 분리
전처리 레이어
기본 CNN 모델
모델 성능 개선
전이학습
네거티브 전이
하이퍼파라미터 튜닝
최종 모델 학습 및 검증
(Part3) Prediction & Conclusion
예측 및 비교분석
테스트 데이터셋
예측 및 신뢰도
평가지표 및 혼동행렬
결론
요약
한계와 추후 발전방향
핵심과 소감
포지션 설정
기획 의도
adni = pd.read_csv('metadata/ADNI-Baseline-dashboard.csv')
adni.info()
'''
RangeIndex: 187 entries, 0 to 186
Data columns (total 6 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 ID 187 non-null object
1 Gender 187 non-null object
2 Group 187 non-null object
3 Age 187 non-null int64
4 Age_Range 187 non-null object
5 Images 187 non-null int64
dtypes: int64(2), object(4)
memory usage: 8.9+ KB
'''
# Group by "Research Group"
group_count = adni.groupby(['Group'], as_index=False)['ID'].count()
group_count
# index 순서 바꾸기
group_count_index = [1,2,0]
group_count_sort = group_count.reindex(group_count_index, axis=0).reset_index(drop=True)
group_count_sort
# bar plot
group_count_title = 'Group by "Research Group" (Subjects)'
group_count_bar = plex.bar(data_frame=group_count_sort,
x='Group', y='ID', color='Group',
text_auto=True, title=group_count_title)
group_count_bar.update_layout(xaxis_title='Research Group',
yaxis_title='Count')
group_count_bar.show()
# pie plot
group_count_pie = plex.pie(data_frame=group_count_sort, hole=0.3,
values='ID', names='Group', color='Group',
title=group_count_title)
group_count_pie.update_traces(textposition='inside', textinfo='percent+label')
group_count_pie.update_layout(annotations=[dict(text='Percent', showarrow=False)])
group_count_pie.show()
# Group by "Research Group"
group_images = adni.groupby(['Group'], as_index=False)['Images'].sum()
group_images
# reindex
group_images_index = [1,2,0]
group_images_sort = group_images.reindex(group_images_index, axis=0).reset_index(drop=True)
group_images_sort
# bar plot
group_images_title = 'Group by "Research Group" (Images)'
group_images_bar = plex.bar(data_frame=group_images_sort,
x='Group', y='Images', color='Group',
text_auto=True, title=group_images_title)
group_images_bar.update_layout(xaxis_title='Research Group',
yaxis_title='Count')
group_images_bar.show()
# pie plot
group_images_pie = plex.pie(data_frame=group_images_sort, hole=0.3,
values='Images', names='Group', color='Group',
title=group_images_title)
group_images_pie.update_traces(textposition='inside', textinfo='percent+label')
group_images_pie.update_layout(annotations=[dict(text='Percent', showarrow=False)])
group_images_pie.show()
# Group by "Gender"
group_gender = adni.groupby(['Gender'], as_index=False)['ID'].count()
group_gender
# M -> Male , F -> Female
fix_gender = group_gender.copy()
fix_gender.Gender = fix_gender.Gender.replace({'F':'Female', 'M':'Male'})
fix_gender
# bar plot
gender_title = 'Group by "Gender"'
gender_bar = plex.bar(data_frame=fix_gender, x='Gender', y='ID', color='Gender',
text_auto=True,title=gender_title)
gender_bar.update_layout(xaxis_title = 'Gender', yaxis_title = 'Count')
gender_bar.show()
# pie plot
gender_pie = plex.pie(data_frame=fix_gender, hole=0.3,
values='ID', names='Gender', color='Gender',
title=gender_title)
gender_pie.update_traces(textposition='inside', textinfo='percent+label')
gender_pie.update_layout(annotations=[dict(text='Percent',showarrow=False)])
gender_pie.show()
# Group by "Reserch Group" & "Gender"
group_and_gender = adni.groupby(['Group','Gender'], as_index=False)['ID'].count()
group_and_gender
# reindex & fix gender
gg_list = [2,3,4,5,0,1]
sort_gg = group_and_gender.reindex(gg_list, axis=0).reset_index(drop=True)
fix_gg = sort_gg.copy()
fix_gg.Gender = fix_gg.Gender.replace({'F':'Female', 'M':'Male'})
fix_gg.columns = ['Group','Gender', 'Counts']
fix_gg
gg_bar = plex.bar(data_frame=fix_gg,
x='Group',y='Counts',color='Gender',
title='Group by "Reserch Group" & "Gender"',
text_auto=True, barmode='group')
gg_bar.show()
# 범위 확인
np.min(adni.Age), np.max(adni.Age)
'''
(55, 91)
'''
# Histogram (간격 5)
age_hist = plex.histogram(data_frame=adni, x='Age', nbins=8,
title='Histogram of "Age"',text_auto=True)
age_hist.update_xaxes(dtick=5)
age_hist.show()
age_male = adni.query('Gender == "M"')
age_female = adni.query('Gender == "F"')
age_overlaid = go.Figure()
age_overlaid.add_trace(go.Histogram(x=age_male.Age, name='Male',
marker_color='purple',texttemplate="%{y}"))
age_overlaid.add_trace(go.Histogram(x=age_female.Age, name='Female',
marker_color='pink',texttemplate="%{y}"))
age_overlaid.update_layout(barmode='overlay',
title_text='Histogram of "Age" & "Gender"',
xaxis_title_text="Age",
yaxis_title_text="count")
age_overlaid.update_traces(opacity=0.75)
age_overlaid.update_xaxes(dtick=5)
age_overlaid.show()
# Group by "Age Range"
group_ar = adni.groupby(['Age_Range'], as_index=False)['ID'].count()
group_ar.columns = ['Age_Range', 'Counts']
group_ar
# bar plot
ar_title = 'Group by "Age Range"'
ar_bar = plex.bar(data_frame=group_ar, x='Age_Range', y='Counts', color='Age_Range',
text_auto=True,title=ar_title)
ar_bar.update_layout(xaxis_title = 'Research Group', yaxis_title = 'Count')
ar_bar.show()
# pie plot
ar_pie = plex.pie(data_frame=group_ar, hole=0.3,
values='Counts', names='Age_Range', color='Age_Range',
title=ar_title)
ar_pie.update_traces(textinfo='percent+label')
ar_pie.update_layout(annotations=[dict(text='Percent',showarrow=False)])
ar_pie.show()
# Group by "Age Range" & "Reserch Group"
group_ag = adni.groupby(['Age_Range','Group'], as_index=False)['ID'].count()
group_ag
# 그래프 순서를 맞추기위해 행 추가
group_ag.loc[12] = ['50s', 'CN', 0]
group_ag
# reindex
ag_list = [12,1,0,3,4,2,6,7,5,9,10,8,11]
sort_ag = group_ag.reindex(ag_list, axis=0).reset_index(drop=True)
sort_ag.columns = ['Age_Range', 'Group', 'Counts']
sort_ag
ag_bar = plex.bar(data_frame=sort_ag,
x='Age_Range',y='Counts',color='Group',
title='Group by "Age Range" & "Reserch Group"',
text_auto=True,barmode='group')
ag_bar.show()