最近发现 msigdb 的KEGG代谢通路Regulation of actin cytoskeleton 中没有这个基因CYRIB ,但是在kegg官网上是有的;
可能是msigdb更新不及时吧
https://www.genome.jp/entry/hsa:51571
下面自己制作最新的kEGG通路基因列表:
hsa_kegg <- clusterProfiler::download_KEGG("hsa")
names(hsa_kegg)
head(hsa_kegg$KEGGPATHID2NAME)
head(hsa_kegg$KEGGPATHID2EXTID)
PATH2ID <- hsa_kegg$KEGGPATHID2EXTID
PATH2NAME <- hsa_kegg$KEGGPATHID2NAME
PATH_ID_NAME <- merge(PATH2ID, PATH2NAME, by="from")
colnames(PATH_ID_NAME) <- c("KEGGID", "ENTREZID", "DESCRPTION")
# write.table(PATH_ID_NAME, "HSA_KEGG.txt", sep="\t")
library(biomaRt)
mart <- useDataset("hsapiens_gene_ensembl", useMart("ensembl"))
entrezgene <- as.integer(PATH_ID_NAME$ENTREZID)
# This step need some time
ensembl_gene_id<- getBM(attributes=c("ensembl_gene_id", "entrezgene_id","hgnc_symbol"),
filters = "entrezgene_id",
values=entrezgene , mart= mart)
PATH_ID_NAME <- merge(PATH_ID_NAME, ensembl_gene_id, by.x= "ENTREZID",by.y= "entrezgene_id")
aa=unique(PATH_ID_NAME$hgnc_symbol[PATH_ID_NAME$DESCRPTION=="Regulation of actin cytoskeleton"])
# 制作KEGG基因打分列表PATH_ID_NAME
# PATH_ID_NAME提取两列,DESCRPTION hgnc_symbol 并去重复:
kegg_score=PATH_ID_NAME[,c("hgnc_symbol","DESCRPTION")]
kegg_score=kegg_score[!duplicated(kegg_score),]
# 按 DESCRPTION 排序
kegg_score=kegg_score[order(kegg_score$DESCRPTION),]
# DESCRPTION 列全部大写,替换空格为下划线,替换 " - ",删除单引号
kegg_score$DESCRPTION=toupper(kegg_score$DESCRPTION)
kegg_score$DESCRPTION=gsub(" - ","_",kegg_score$DESCRPTION)
kegg_score$DESCRPTION=gsub("'","",kegg_score$DESCRPTION)
kegg_score$DESCRPTION=gsub(" ","_",kegg_score$DESCRPTION)
# 去除空白
kegg_score=kegg_score[!kegg_score$hgnc_symbol=="",]
# 保存为tsv文件
write.table(kegg_score,file="KEGG_pathway_gene_list.tsv",sep="\t",row.names = F,quote=F)
如果觉得我的文章对您有用,请随意打赏。你的支持将鼓励我继续创作!