bulk-RNA sequencing analysis [3] : GSEA in R

JB·2024년 4월 19일
0

Step 1 : Load required packages

	library(clusterProfiler)
	library(msigdbr)
	library(org.Mm.eg.db)
	library(magrittr)

Step 2 : Import DESeq2 results

	deseq2.dds <- DESeq(dds)										
	deseq2.res <- results(deseq2.dds)	
	res <- deseq.res
	dge_mapped_df <- data.frame(res)

Step 3 : Prepare input

  # 사용하는 데이터의 종에 따라 species를 바꿔줌
	Mm_hallmark_sets <- msigdbr(species = "Mus musculus", category = "H")
    
  # DESeq2 결과 데이터의 중복값이 없는지 확인 => 중복있으면 추가적인 처리과정 필요
	any(duplicated(rownames(dge_mapped_df)))				  		
    
  # log2 fold change 값 추출해서 이름붙임
	lfc_vector <- dge_mapped_df$log2FoldChange
	names(lfc_vector) <- rownames(dge_mapped_df)
    
  # 내림차순 정리
	lfc_vector <- sort(lfc_vector, decreasing = TRUE)
	head(lfc_vector)

Step 4 : Running GSEA

  # Set the seed so our results are reproducible:
	set.seed(2020)
    
	gsea_results <- GSEA(geneList = lfc_vector, minGSSize = 25, maxGSSize = 500, pvalueCutoff = 0.05, eps = 0, seed = TRUE, pAdjustMethod = "BH", TERM2GENE = dplyr::select(Mm_hallmark_sets, gs_name, gene_symbol))

Step 5 : Making graphs

 ## gseaplot
	gsea_result_df <- data.frame(gsea_results@result)
    
  # This returns the 3 rows with the largest NES values
	gsea_result_df %>% dplyr::slice_max(NES, n = 3)
	most_positive_nes_plot <- enrichplot::gseaplot(gsea_results, geneSetID = "HALLMARK_EPITHELIAL_MESENCHYMAL_TRANSITION", title = "EPITHELIAL_MESENCHYMAL_TRANSITION", color.line = "#FFCE30")
	most_positive_nes_plot
    
  # This returns the 3 rows with the lowest NES values
	gsea_result_df %>%dplyr::slice_min(NES, n = 3)
	most_negative_nes_plot <- enrichplot::gseaplot(gsea_results, geneSetID = "HALLMARK_UV_RESPONSE_DN", title = "UV_RESPONSE_DN", color.line = "#288BA8")
	most_negative_nes_plot

 ## Ridgeline plot
	ridgeplot(gsea_results)

 ## gseaplot2 
	gseaplot2(gsea_results, geneSetID = "HALLMARK_EPITHELIAL_MESENCHYMAL_TRANSITION", title = "EPITHELIAL_MESENCHYMAL_TRANSITION")
	gseaplot2(gsea_results, geneSetID = 1:3)
	gseaplot2(gsea_results, geneSetID = 1:3, pvalue_table = TRUE, color = c("#E495A5", "#86B875", "#7DB0DD"), ES_geom = "dot")

0개의 댓글