突变signature分析R包“YAPSA”

今天给大家带来的是signature分析的R包“YAPSA”,让大家在分析signature的时候多一个选择,增加绘图展示的多样性,最重要的是让你的老板知道你有多优秀。

大家知道前文分享的“deconstructSigs”也是突变signature的绘图软件,好奇的小伙伴就有疑问了,这俩要是一样就没必要再学了吧?不要偷懒,这两个软件目标是一样的,但是还是不一样的。第一,”与“deconstructSigs”的相比,“YAPSA”用的是Linear Combination Decomposition (LCD)线性组合分解的方法来预测signature,而“deconstructSigs”是利用非负矩阵分解(Nonnegative Matrix Factor)的方法预测;另外,“YAPSA”方法展示可通过阈值和颜色进行调整,较为灵活;再者,“YAPSA”运行更快和所需的资源更少(个人感觉,在突变不是很多的情况下,速度没显著差别)。这两款R包均可选择signature.nature2013 或 signature.COSMIC作为已知signature进行相关性计算。

下边全是干货,请认真阅读,广泛传播!

1、加载包

library(YAPSA)library(knitr)opts_chunk$set(echo=TRUE)opts_chunk$set(fig.show='asis')library(BSgenome.Hsapiens.UCSC.hg19)#注意基因组版本

2、准备数据

3、运行R代码

#读取突变文件data<-read.table(file="C:/Users/snp_mutation.txt",header=T,check.names=F)#提取SNV序列长度word_length <- 3data1 <-  create_mutation_catalogue_from_df(         data,         this_seqnames.field = "CHROM", this_start.field = "POS",         this_end.field = "POS", this_PID.field = "PID",         this_subgroup.field = "SUBGROUP",         this_refGenome = BSgenome.Hsapiens.UCSC.hg19,#注意基因组版本要与SNV calling 的基因组保持一致。         this_wordLength = word_length)names(data1)data_df <- as.data.frame(data1$matrix)
#读入COSMIC_signature(也可选择另外一个cosmic_nature_2013)Alex_COSMIC_signatures_path <-   paste0("http://cancer.sanger.ac.uk/cancergenome/",         "assets/signatures_probabilities.txt")AlexCosmicValid_sig_df <- read.csv(Alex_COSMIC_signatures_path,                                      header=TRUE,sep="t")Alex_COSMIC_rownames <- paste(AlexCosmicValid_sig_df[,1],                              AlexCosmicValid_sig_df[,2],sep=" ")COSMIC_select_ind <- grep("Signature",names(AlexCosmicValid_sig_df))AlexCosmicValid_sig_df <- AlexCosmicValid_sig_df[,COSMIC_select_ind]number_of_Alex_COSMIC_sigs <- dim(AlexCosmicValid_sig_df)[2]names(AlexCosmicValid_sig_df) <- gsub("Signature\.","AC",                                         names(AlexCosmicValid_sig_df))rownames(AlexCosmicValid_sig_df) <- Alex_COSMIC_rownames
#对30个signature进行颜色设置COSMIC_signature_colour_vector <- c("green","pink","goldenrod",    "lightblue","blue","orangered","yellow",    "orange","brown","purple","red",    "darkblue","magenta","maroon",    "yellowgreen","violet","lightgreen",    "sienna4","deeppink","darkorchid",    "seagreen","grey","darkgrey",    "black","yellow4","coral2","chocolate2",    "navyblue","plum","springgreen")#30个signature的生物学意义COSMIC_bio_process_vector <- c("spontaneous deamination","APOBEC",  "defect DNA DSB repair hom. recomb.",  "tobacco mutatgens, benzo(a)pyrene",  "unknown",  "defect DNA MMR, found in MSI tumors",  "UV light exposure","unknown","POL eta and SHM",  "altered POL E",  "alkylating agents, temozolomide",  "unknown","APOBEC","unknown",  "defect DNA MMR","unknown","unknown",  "unknown","unknown",  "associated w. small indels at repeats",  "unknown","aristocholic acid","unknown",  "aflatoxin","unknown","defect DNA MMR",   "unknown","unknown","tobacco chewing","unknown")AlexCosmicValid_sigInd_df <- data.frame(sig=colnames(AlexCosmicValid_sig_df))AlexCosmicValid_sigInd_df$index <- seq_len(dim(AlexCosmicValid_sigInd_df)[1])AlexCosmicValid_sigInd_df$colour <- COSMIC_signature_colour_vectorAlexCosmicValid_sigInd_df$process <- COSMIC_bio_process_vector
current_sig_df <- AlexCosmicValid_sig_dfcurrent_sigInd_df <- AlexCosmicValid_sigInd_dfCOSMICExposures_df <-  LCD(data_df,current_sig_df)
COSMIC_subgroups_df <-      make_subgroups_df(data,     COSMICExposures_df)#绘图exposures_barplot( in_exposures_df = COSMICExposures_df, in_subgroups_df = COSMIC_subgroups_df)

绘图展示如下

 

#换颜色绘图 exposures_barplot(  in_exposures_df = COSMICExposures_df,  in_signatures_ind_df = current_sigInd_df,  in_subgroups_df = COSMIC_subgroups_df)

                                        绘图展示如下

#设置阈值绘图,低于阈值的将过滤掉,不会展示在图上。my_cutoff <- 0.06general_cutoff_vector <- rep(my_cutoff,dim(current_sig_df)[2])CosmicValid_cutoffGen_LCDlist <- LCD_complex_cutoff(  in_mutation_catalogue_df = data_df,  in_signatures_df = current_sig_df,  in_cutoff_vector = general_cutoff_vector,  in_sig_ind_df = current_sigInd_df)   exposures_barplot(    in_exposures_df = CosmicValid_cutoffGen_LCDlist$exposures,    in_signatures_ind_df = CosmicValid_cutoffGen_LCDlist$out_sig_ind_df,    in_subgroups_df = COSMIC_subgroups_df)

   过滤阈值后结果如下

#绘图标准化exposures_barplot(  in_exposures_df = CosmicValid_cutoffGen_LCDlist$norm_exposures,  in_signatures_ind_df = CosmicValid_cutoffGen_LCDlist$out_sig_ind_df,  in_subgroups_df = COSMIC_subgroups_df)

        过滤阈值标准化结果如下

这个R包绘图到此就介绍完了,小伙伴们是不是很想试一试呢?那么小编再次在提醒一下注意事项:1、注意使用的signature选择;2注意基因组版本的使用。

小伙伴们赶紧测试起来吧,如果觉得小编分享的还不错的,请用你的发高分paper的小手转发转发,让更多的小伙伴使用这个包。

生物信息学

网络可视化R包-geomnet

2020-8-28 4:21:24

生物信息学

识别无监督类的工具包ConsensusClusterPlus

2020-8-28 4:27:16

加入Q群
0 条回复 A文章作者 M管理员
    暂无讨论,说说你的看法吧
个人中心
今日签到
有新私信 私信列表
搜索