TCGA-GBM Differential Gene Expression Analysis and Pathway Enrichment
library('TCGAbiolinks') #for the differential cancer the project name should be change (TCGA-LUAD,TCGA-gbm,TCGA-GBM) query<- GDCquery(project ='TCGA-GBM', data.category = 'Transcriptome Profiling', data.type = 'Gene Expression Quantification' , workflow.type = 'STAR - Counts') #summar<- TCGAbiolinks:::getProjectSummary('TCGA-GBM') #TCGAbiolinks:::getGDCprojects()$project_id GDCdownload(query) query <- GDCprepare(query)
library(SummarizedExperiment) library(data.table) querydataset <- assay(query,'unstranded',nrow='querydataset colume number') #nrow should be change based on the querydataset colume number metadT<-data.frame(matrix(unlist(query@colData@listData), nrow=175, byrow =F),stringsAsFactors=FALSE) nomormetadT <- grepl(pattern = 'Solid Tissue Normal', metadT$X5) #ѡ??tumor-meta???? TUMORmetadT <- metadT[nomormetadT == FALSE, ] #ѡ??normal-meta???? nomormetadT <- metadT[nomormetadT == TRUE, ] #ѡ??normal-expression data???? nomordT <- querydataset[,nomormetadT$X1] #ѡ??tumor-expression data???? TUMORdT <- querydataset[,TUMORmetadT$X1] GBMdata <- cbind(nomordT,TUMORdT)
ensmbextr<- sub('..*', '',rownames(GBMdata))
rownames(GBMdata) <- ensmbextr
library('DESeq2') #form GBMdata class(GBMdata)
condition <- factor(c( rep('Normal',35),rep('Tumor',140)))
coldata <- data.frame(row.names=colnames(GBMdata), condition) #?趨Ϊ????
dds <- DESeqDataSetFromMatrix(round(GBMdata), colData=coldata, formula(~ condition)) dds <- DESeq(dds) #ǰ???в???ǰ???ԱȺ??? deseq2.res = results(dds, contrast=c('condition', 'Tumor','Normal')) deseq2.res = deseq2.res[order( deseq2.res$pvalue),] deseq2.res <- as.data.frame(deseq2.res)
library('clusterProfiler') symbol <- bitr(rownames(deseq2.res), fromType='ENSEMBL', toType=c('SYMBOL','ENTREZID'), OrgDb='org.Hs.eg.db')
deseq2.res2 <- deseq2.res[symbol$ENSEMBL,]
deseq2.res2 <- subset(deseq2.res2,abs(deseq2.res2$log2FoldChange)>1&deseq2.res2$pvalue<0.01&deseq2.res2$baseMean>100)
library('clusterProfiler') symbol2 <- bitr(rownames(deseq2.res2), fromType='ENSEMBL', toType=c('SYMBOL','ENTREZID'), OrgDb='org.Hs.eg.db') deseq2.res3 <- deseq2.res[symbol2$ENSEMBL,] rownames(deseq2.res3) <- symbol2$SYMBOL
changegene <- GBMdata[symbol2$ENSEMBL,]
rownames(changegene) <- symbol2$SYMBOL
library(EnhancedVolcano) pdf('f1.pdf',height = 10,width = 15,onefile = F) library('EnhancedVolcano') EnhancedVolcano(deseq2.res3, lab = rownames(deseq2.res3), x = 'log2FoldChange', y = 'pvalue', xlim = c(-8, 8), title = 'GBM', pCutoff = 0.01, FCcutoff = 1) dev.off()
#GO GISTansEA <- TCGAanalyze_EAcomplete(TFname='GBM', RegulonList =row.names(deseq2.res3))
if (!require('BiocManager', quietly = TRUE)) install.packages('BiocManager')
BiocManager::install('EDASeq') TCGAvisualize_EAbarplot(tf = rownames(GISTansEA$ResBP), GOBPTab = GISTansEA$ResBP, GOCCTab = GISTansEA$ResCC, GOMFTab = GISTansEA$ResMF, PathTab = GISTansEA$ResPat, nRGTab = row.names(deseq2.res3), filename='GBM2.pdf', nBar = 20) install('pathview') library(pathview) install('gage') library(gage) install('gageData') library(gageData) data(kegg.sets.hs) library(tidyverse) foldchanges <- deseq2.res3$log2FoldChange names(foldchanges) =symbol2$ENTREZID head(foldchanges) foldchanges <- sort(foldchanges,decreasing = TRUE) heatgene <- changegene install.packages('ClusterGVis')
Note: please update your ComplexHeatmap to the latest version!
install.packages('devtools')
develops::install_github('junjunlab/ClusterGVis') getOption('repos') install('leidenbase') library('ClusterGVis') install('heatgene') cm<-clusterData(exp=heatgene,cluster.method='kmeans',cluster.num=8) pdf('culster1.pdf',height = 10,width = 15,onefile = F) visCluster(object = cm, plot.type = 'line')
dev.off() library(org.Hs.eg.db) enrich <- enrichCluster(object = cm,OrgDb = org.Hs.eg.db,type = 'KEGG',pvalueCutoff = 0.05,topn = 3,seed = 5201314,fromType=,organism='hsa') markGenes = rownames(heatgene)[sample(1:nrow(heatgene),50,replace = F)] install.packages('heatgene') pdf('culster2.pdf',height = 10,width = 15,onefile = F) visCluster(object = cm,plot.type = 'both',column_names_rot = 45,ncol=100,markGenes = NULL,annoTerm.data = enrich,add.sampleanno=FALSE) install.packages('visCluster') dev.off()
edo <- enrichKEGG(foldchanges, organism = 'hsa', pvalueCutoff = 0.05, qvalueCutoff = 0.05) edox <- setReadable(edox, 'org.Hs.eg.db', 'ENTREZID') geneset <- cbind(edox@result[['Description']],edox@result[['geneID']])
p1 <- cnetplot(edox, foldChange=foldchanges,showCategory = 10) pdf('cnetplot kegg GBM.pdf',height = 15,width = 15,onefile = T) p1 dev.off()
Get the results
keggres = gage(foldchanges, gsets=kegg.sets.hs, same.dir=TRUE)
Look at both up (greater), down (less), and statatistics.
lapply(keggres, head)
Get the pathways-stats
keggrespathways = data.frame(id=rownames(keggres$stats), keggres$stats) %>% tbl_df() %>% filter(row_number()<=10) %>% .$id %>% as.character() keggrespathways
Get the IDs.
keggresids = substr(keggrespathways, start=1, stop=8) keggresids
Define plotting function for applying later
plot_pathway = function(pid) pathview(gene.data=foldchanges, pathway.id=pid, species='hsa', new.signature=FALSE)
plot multiple pathways (plots saved to disk and returns a throwaway list object)
tmp = sapply(keggresids, function(pid) pathview(gene.data=foldchanges, pathway.id=pid, species='hsa'))
Get the results
keggres = gage(foldchanges, gsets=kegg.sets.hs, same.dir=TRUE)
Look at both up (greater), down (less), and statatistics.
lapply(keggres, head)
Get the pathways-stats
keggrespathways = data.frame(id=rownames(keggres$less), keggres$less) %>% tbl_df() %>% filter(row_number()<=10) %>% .$id %>% as.character() keggrespathways
Get the IDs.
keggresids = substr(keggrespathways, start=1, stop=8) keggresids
Define plotting function for applying later
plot_pathway = function(pid) pathview(gene.data=foldchanges, pathway.id=pid, species='hsa', new.signature=FALSE)
plot multiple pathways (plots saved to disk and returns a throwaway list object)
tmp = sapply(keggresids, function(pid) pathview(gene.data=foldchanges, pathway.id=pid, species='hsa'))
Get the pathways-stats
keggrespathways = data.frame(id=rownames(keggres$greater), keggres$greater) %>% tbl_df() %>% filter(row_number()<=10) %>% .$id %>% as.character() keggrespathways
Get the IDs.
keggresids = substr(keggrespathways, start=1, stop=8) keggresids
Define plotting function for applying later
plot_pathway = function(pid) pathview(gene.data=foldchanges, pathway.id=pid, species='hsa', new.signature=TRUE)
plot multiple pathways (plots saved to disk and returns a throwaway list object)
tmp = sapply(keggresids, function(pid) pathview(gene.data=foldchanges, pathway.id=pid, species='hsa'))
kk2 <- gseKEGG(geneList = foldchanges, organism = 'hsa', minGSSize = 10, pvalueCutoff = 0.05, verbose = FALSE)
edo2 <- gseDO(foldchanges) pdf('kegg_gsea gbm.pdf',height = 15,width = 15,onefile = T) dotplot(kk2, showCategory=30) + ggtitle('dotplot for kegg') dotplot(edo2, showCategory=30) + ggtitle('dotplot for GSEA') dev.off() pdf('ridgeplot kegg Nano.pdf',height = 15,width = 15,onefile = T) ridgeplot(edo2, showCategory = 5, fill = 'p.adjust', core_enrichment = TRUE, label_format = 30, orderBy = 'NES') dev.off()'dev.off()` is a function in R that is used to close the current graphics device. It is typically used after creating a plot or visualizing data to save the plot and close the graphics device.
原文地址: https://www.cveoy.top/t/topic/lays 著作权归作者所有。请勿转载和采集!