Introduction
ngstk is an R package to facilitate the analysis of NGS data, such as visualization, conversion of the data format for WEB service input and another purpose.
You can learn some usage of ngstk through this tutorial.
Usage
Data format conversion
We defined a rule to facilitate the data format conversion. It is a good choice to save the all of output colums meta information. Moreover, the other input data should establish a connection with the output colums according the requirement.
- alias, connect the two table by colnames
- extract_pattern, extract defined pattern from one of colum
- raw and new, repace the raw value with the new value
- na.replace, replace the NA value to defined value
The follow example configuration file can be used to convert iseq, a pipeline to analysis genetic variants from NGS data, output data to the web service ProteinPaint, a tool to visulize the mutation data.
Title = "Proteinpaint configuration file"
# muts2pp is a function that can convert mutation data to Proteinpaint input format
[muts2pp.meta.defined_cols]
colnames = ["gene", "refseq", "chromosome", "start", "aachange", "class", "disease", "sample"]
handler_lib = "default_handlers"
mhandler_lib = "default_mhandlers"
[muts2pp.meta.defined_cols.description]
gene = "Gene symbol, e.g TP53, PTEN"
refseq = "Transcript of refSeq or Ensemble, e.g NM_000546, ENST00000635293"
chromosome = "Chromosome, e.g. chr1, chr2"
start = "Chromosome start location of a mutation site, e.g. 153249385"
aachange = "Amino acid level change of gene mutation, e.g. p.R347C, p.L615delinsDL"
class = "Mutation type, e.g. nonsense, proteinIns, proteinDel, frameshift"
disease = "Disease name or sample group name, e.g. B-ALL, T-ALL, G1, G2"
sample = "Sample name"
[muts2pp.format.iseq.gene]
alias = ["gene", "symbol"]
[muts2pp.format.iseq.refseq]
alias = ["refseq", "transcription.id"]
[muts2pp.format.iseq.chromosome]
alias = ["chromosome"]
[muts2pp.format.iseq.start]
alias = ["start"]
[muts2pp.format.iseq.aachange]
alias = ["aachange", "amino.acid.change"]
extract_pattern = "p[.]+.*$"
[muts2pp.format.iseq.class]
alias = ["mutation_type", "mutation.type"]
raw = ["nonframeshift ins", "nonframeshift del",
"frameshift ins", "frameshift del", "stoploss", "nonsense",
"splice"]
new = ["proteinIns", "proteinDel", "frameshift", "frameshift",
"nonsense", "nonsense", "splice"]
na_replace = "splice"
muts2pp, muts2mutation_mapper, muts2oncoprinter and fusions2pp are the example functions based on the established rules.
demo_file <- system.file("extdata", "demo/proteinpaint/muts2pp_iseq.txt", package = "ngstk")
input_data <- read.table(demo_file, sep = "\t", header = TRUE, stringsAsFactors = FALSE)
disease <- "T-ALL"
input_data <- data.frame(input_data, disease)
input_data$disease <- as.character(input_data$disease)
# Convert mutations data to proteinpaint input
result <- muts2pp(input_data, input_type = "iseq")
head(result)
#> gene refseq chromosome start aachange class disease
#> 1 IDH1 NM_001282386 chr2 209113113 p.R132S missense T-ALL
#> 2 PTPRC NM_080921 chr1 198711362 p.A694T missense T-ALL
#> 3 PTPRC NM_080921 chr1 198685843 p.I281fs frameshift T-ALL
#> 4 ASPM NM_001206846 chr1 197093449 p.L1061F missense T-ALL
#> 5 ASPM NM_018136 chr1 197070987 p.Q2465R missense T-ALL
#> 6 FAT1 NM_005245 chr4 187630354 p.L210F missense T-ALL
#> sample
#> 1 A1
#> 2 A2
#> 3 A3
#> 4 A4
#> 5 A5
#> 6 A6
# Convert mutations data to cbioportal input
result <- muts2mutation_mapper(input_data, input_type = "iseq")
#> Warning in handler(handler_data, config_input, defined_cols, input_data, :
#> Validation_Status were not exists or not be recognize correctly in input
#> data!
#> Warning in handler(handler_data, config_input, defined_cols, input_data, :
#> Mutation_Status were not exists or not be recognize correctly in input
#> data!
#> Warning in handler(handler_data, config_input, defined_cols, input_data, :
#> Center were not exists or not be recognize correctly in input data!
head(result)
#> Hugo_Symbol Sample_ID Protein_Change Mutation_Type Chromosome
#> 1 IDH1 A1 R132S Missense_Mutation chr2
#> 2 PTPRC A2 A694T Missense_Mutation chr1
#> 3 PTPRC A3 I281fs Frame_Shift_Ins chr1
#> 4 ASPM A4 L1061F Missense_Mutation chr1
#> 5 ASPM A5 Q2465R Missense_Mutation chr1
#> 6 FAT1 A6 L210F Missense_Mutation chr4
#> Start_Position End_Position Reference_Allele Variant_Allele
#> 1 209113113 209113113 G T
#> 2 198711362 198711362 G A
#> 3 198685843 198685843 - T
#> 4 197093449 197093449 G A
#> 5 197070987 197070987 T C
#> 6 187630354 187630354 G A
#> Validation_Status Mutation_Status Center
#> 1 NA NA NA
#> 2 NA NA NA
#> 3 NA NA NA
#> 4 NA NA NA
#> 5 NA NA NA
#> 6 NA NA NA
result <- muts2oncoprinter(input_data, input_type = "iseq")
head(result)
#> Sample Gene Alteration Type
#> 1 A1 IDH1 R132S MISSENSE
#> 2 A2 PTPRC A694T MISSENSE
#> 3 A3 PTPRC I281fs TRUNC
#> 4 A4 ASPM L1061F MISSENSE
#> 5 A5 ASPM Q2465R MISSENSE
#> 6 A6 FAT1 L210F MISSENSE
demo_file <- system.file('extdata', 'demo/proteinpaint/fusions2pp_fusioncatcher.txt', package = 'ngstk')
input_data <- read.table(demo_file, sep = '\t', header = TRUE, stringsAsFactors = FALSE)
disease <- 'B-ALL'
sampletype <- 'diagnose'
input_data <- data.frame(input_data, disease, sampletype)
input_data$disease <- as.character(input_data$disease)
# Convert fusions data to proteinpaint input
result <- fusions2pp(input_data, input_type = 'fusioncatcher')
head(result)
#> disease sampletype gene_a refseq_a chr_a position_a gene_b
#> 1 B-ALL diagnose TCF3 ENSG00000071564 chr19 1619110 PBX1
#> 2 B-ALL diagnose TCF3 ENSG00000071564 chr19 1619110 PBX1
#> 3 B-ALL diagnose TCF3 ENSG00000071564 chr19 1619110 PBX1
#> 4 B-ALL diagnose TCF3 ENSG00000071564 chr19 1619110 PBX1
#> 5 B-ALL diagnose TCF3 ENSG00000071564 chr19 1619110 PBX1
#> 6 B-ALL diagnose GYPE ENSG00000197465 chr4 144801564 GYPA
#> refseq_b chr_b position_b strand_a strand_b patient
#> 1 ENSG00000185630 chr1 164761731 - + A1
#> 2 ENSG00000185630 chr1 164761731 - + A2
#> 3 ENSG00000185630 chr1 164761731 - + A3
#> 4 ENSG00000185630 chr1 164704095 - + A4
#> 5 ENSG00000185630 chr1 164704095 - + A5
#> 6 ENSG00000170180 chr4 145040934 - - A6
merge_table_files is the another util function to merge multiple table files.
a <- data.frame(col1=1:6, col2=2:7)
b <- data.frame(col1=6:11, col2=1:6)
file_a <- paste0(tempfile(), '_abcd')
file_b <- paste0(tempfile(), '_abcd')
write.table(a, file_a, sep = '\t', row.names = FALSE)
write.table(b, file_b, sep = '\t', row.names = FALSE)
input_files <- c(file_a, file_b)
x1 <- merge_table_files(input_files = input_files)
head(x1)
#> filename
#> 1 /var/folders/nc/yl5qhkkn6vxf_m7s_yz2kzvh0000gn/T//RtmphjjBTA/filef872a5dfd6c_abcd
#> 2 /var/folders/nc/yl5qhkkn6vxf_m7s_yz2kzvh0000gn/T//RtmphjjBTA/filef872a5dfd6c_abcd
#> 3 /var/folders/nc/yl5qhkkn6vxf_m7s_yz2kzvh0000gn/T//RtmphjjBTA/filef872a5dfd6c_abcd
#> 4 /var/folders/nc/yl5qhkkn6vxf_m7s_yz2kzvh0000gn/T//RtmphjjBTA/filef872a5dfd6c_abcd
#> 5 /var/folders/nc/yl5qhkkn6vxf_m7s_yz2kzvh0000gn/T//RtmphjjBTA/filef872a5dfd6c_abcd
#> 6 /var/folders/nc/yl5qhkkn6vxf_m7s_yz2kzvh0000gn/T//RtmphjjBTA/filef872a5dfd6c_abcd
#> col1 col2
#> 1 1 2
#> 2 2 3
#> 3 3 4
#> 4 4 5
#> 5 5 6
#> 6 6 7
x2 <- merge_table_files(files_dir = tempdir(), pattern = '.*_abcd$')
head(x2)
#> filename
#> 1 /var/folders/nc/yl5qhkkn6vxf_m7s_yz2kzvh0000gn/T//RtmphjjBTA/filef872a5dfd6c_abcd
#> 2 /var/folders/nc/yl5qhkkn6vxf_m7s_yz2kzvh0000gn/T//RtmphjjBTA/filef872a5dfd6c_abcd
#> 3 /var/folders/nc/yl5qhkkn6vxf_m7s_yz2kzvh0000gn/T//RtmphjjBTA/filef872a5dfd6c_abcd
#> 4 /var/folders/nc/yl5qhkkn6vxf_m7s_yz2kzvh0000gn/T//RtmphjjBTA/filef872a5dfd6c_abcd
#> 5 /var/folders/nc/yl5qhkkn6vxf_m7s_yz2kzvh0000gn/T//RtmphjjBTA/filef872a5dfd6c_abcd
#> 6 /var/folders/nc/yl5qhkkn6vxf_m7s_yz2kzvh0000gn/T//RtmphjjBTA/filef872a5dfd6c_abcd
#> col1 col2
#> 1 1 2
#> 2 2 3
#> 3 3 4
#> 4 4 5
#> 5 5 6
#> 6 6 7
outfn = tempfile()
x3 <- merge_table_files(files_dir = tempdir(), pattern = ".*_abcd$", outfn = outfn)
head(read.table(outfn, sep = "\t", header = TRUE))
#> filename
#> 1 /var/folders/nc/yl5qhkkn6vxf_m7s_yz2kzvh0000gn/T//RtmphjjBTA/filef872a5dfd6c_abcd
#> 2 /var/folders/nc/yl5qhkkn6vxf_m7s_yz2kzvh0000gn/T//RtmphjjBTA/filef872a5dfd6c_abcd
#> 3 /var/folders/nc/yl5qhkkn6vxf_m7s_yz2kzvh0000gn/T//RtmphjjBTA/filef872a5dfd6c_abcd
#> 4 /var/folders/nc/yl5qhkkn6vxf_m7s_yz2kzvh0000gn/T//RtmphjjBTA/filef872a5dfd6c_abcd
#> 5 /var/folders/nc/yl5qhkkn6vxf_m7s_yz2kzvh0000gn/T//RtmphjjBTA/filef872a5dfd6c_abcd
#> 6 /var/folders/nc/yl5qhkkn6vxf_m7s_yz2kzvh0000gn/T//RtmphjjBTA/filef872a5dfd6c_abcd
#> col1 col2
#> 1 1 2
#> 2 2 3
#> 3 3 4
#> 4 4 5
#> 5 5 6
#> 6 6 7
Data filtration
Data filtration or subset is an important step to clean or run the specific analysis. A series of data filtration function will be establish and fixed that can be re-used in the future.
demo_file <- system.file("extdata", "demo/proteinpaint/fusions2pp_fusioncatcher.txt", package = "ngstk")
input_data <- read.table(demo_file, sep = "\t", header = TRUE, stringsAsFactors = FALSE)
# Get data subset according the defined rule
mhandler_extra_params = list(gene_5 = 1, gene_3 = 2, any_gene = "TCF3", fusions_any_match_flag = TRUE)
result_1 <- fusions_filter(input_data, mhandler_extra_params = mhandler_extra_params)
head(result_1)
#> gene5 gene3 fusion_type
#> 1 TCF3 PBX1 in-frame
#> 2 TCF3 PBX1 in-frame
#> 3 TCF3 PBX1 in-frame
#> 4 TCF3 PBX1 CDS(truncated)/intronic
#> 5 TCF3 PBX1 CDS(truncated)/intronic
mhandler_extra_params = list(gene_3 = 2, right_gene = "GYPA", fusions_right_match_flag = TRUE)
result_2 <- fusions_filter(input_data, mhandler_extra_params = mhandler_extra_params)
head(result_2)
#> gene5 gene3 fusion_type
#> 6 GYPE GYPA in-frame
#> 7 GYPE GYPA in-frame
mhandler_extra_params = list(gene_5 = 1, left_gene = "GYPA", fusions_left_match_flag = TRUE)
result_3 <- fusions_filter(input_data, mhandler_extra_params = mhandler_extra_params)
head(result_3)
#> [1] gene5 gene3 fusion_type
#> <0 rows> (or 0-length row.names)
mhandler_extra_params = list(gene_5 = 1, gene_3 = 2, left_gene = "GYPE", right_gene = "GYPA", fusions_full_match_flag = TRUE)
result_4 <- fusions_filter(input_data, mhandler_extra_params = mhandler_extra_params)
head(result_4)
#> gene5 gene3 fusion_type
#> 6 GYPE GYPA in-frame
#> 7 GYPE GYPA in-frame
mhandler_extra_params = list(gene_5 = 1, gene_3 = 2, left_gene = "GYPE", right_gene = "GYPA", fusions_anyfull_match_flag = TRUE)
result_5 <- fusions_filter(input_data, mhandler_extra_params = mhandler_extra_params)
head(result_5)
#> gene5 gene3 fusion_type
#> 6 GYPE GYPA in-frame
#> 7 GYPE GYPA in-frame
Split data
Split data is an optional step if you want to parallel process the data stream. ngstk provide split_row_data
and split_col_data
to split data.frame and data.table object.
x1 <- data.frame(col1 = 1:39, col2 = 1:39)
x1
#> col1 col2
#> 1 1 1
#> 2 2 2
#> 3 3 3
#> 4 4 4
#> 5 5 5
#> 6 6 6
#> 7 7 7
#> 8 8 8
#> 9 9 9
#> 10 10 10
#> 11 11 11
#> 12 12 12
#> 13 13 13
#> 14 14 14
#> 15 15 15
#> 16 16 16
#> 17 17 17
#> 18 18 18
#> 19 19 19
#> 20 20 20
#> 21 21 21
#> 22 22 22
#> 23 23 23
#> 24 24 24
#> 25 25 25
#> 26 26 26
#> 27 27 27
#> 28 28 28
#> 29 29 29
#> 30 30 30
#> 31 31 31
#> 32 32 32
#> 33 33 33
#> 34 34 34
#> 35 35 35
#> 36 36 36
#> 37 37 37
#> 38 38 38
#> 39 39 39
x <- split_row_data(x1, sections = 2)
x
#> [[1]]
#> col1 col2
#> 1 1 1
#> 2 2 2
#> 3 3 3
#> 4 4 4
#> 5 5 5
#> 6 6 6
#> 7 7 7
#> 8 8 8
#> 9 9 9
#> 10 10 10
#> 11 11 11
#> 12 12 12
#> 13 13 13
#> 14 14 14
#> 15 15 15
#> 16 16 16
#> 17 17 17
#> 18 18 18
#> 19 19 19
#>
#> [[2]]
#> col1 col2
#> 20 20 20
#> 21 21 21
#> 22 22 22
#> 23 23 23
#> 24 24 24
#> 25 25 25
#> 26 26 26
#> 27 27 27
#> 28 28 28
#> 29 29 29
#> 30 30 30
#> 31 31 31
#> 32 32 32
#> 33 33 33
#> 34 34 34
#> 35 35 35
#> 36 36 36
#> 37 37 37
#> 38 38 38
#> 39 39 39
x <- split_row_data(x1, sections = 3)
x
#> [[1]]
#> col1 col2
#> 1 1 1
#> 2 2 2
#> 3 3 3
#> 4 4 4
#> 5 5 5
#> 6 6 6
#> 7 7 7
#> 8 8 8
#> 9 9 9
#> 10 10 10
#> 11 11 11
#> 12 12 12
#> 13 13 13
#>
#> [[2]]
#> col1 col2
#> 14 14 14
#> 15 15 15
#> 16 16 16
#> 17 17 17
#> 18 18 18
#> 19 19 19
#> 20 20 20
#> 21 21 21
#> 22 22 22
#> 23 23 23
#> 24 24 24
#> 25 25 25
#> 26 26 26
#>
#> [[3]]
#> col1 col2
#> 27 27 27
#> 28 28 28
#> 29 29 29
#> 30 30 30
#> 31 31 31
#> 32 32 32
#> 33 33 33
#> 34 34 34
#> 35 35 35
#> 36 36 36
#> 37 37 37
#> 38 38 38
#> 39 39 39
x1 <- data.frame(col1 = 1:10, col2 = 11:20)
x1.t <- t(x1)
x <- split_col_data(x1.t, sections = 3)
x
#> [[1]]
#> [,1] [,2] [,3]
#> col1 1 2 3
#> col2 11 12 13
#>
#> [[2]]
#> [,1] [,2] [,3]
#> col1 4 5 6
#> col2 14 15 16
#>
#> [[3]]
#> [,1] [,2] [,3] [,4]
#> col1 7 8 9 10
#> col2 17 18 19 20
# split file
dat <- data.frame(col1 = 1:10000)
outfn <- tempfile()
write.table(dat, outfn, sep = "\t")
split_row_file(outfn)
#> $`1`
#> [1] "/var/folders/nc/yl5qhkkn6vxf_m7s_yz2kzvh0000gn/T//RtmphjjBTA/filef876d8932e2_split_1"
#>
#> $`2`
#> [1] "/var/folders/nc/yl5qhkkn6vxf_m7s_yz2kzvh0000gn/T//RtmphjjBTA/filef876d8932e2_split_2"
#>
#> $`3`
#> [1] "/var/folders/nc/yl5qhkkn6vxf_m7s_yz2kzvh0000gn/T//RtmphjjBTA/filef876d8932e2_split_3"
#>
#> $`4`
#> [1] "/var/folders/nc/yl5qhkkn6vxf_m7s_yz2kzvh0000gn/T//RtmphjjBTA/filef876d8932e2_split_4"
#>
#> $`5`
#> [1] "/var/folders/nc/yl5qhkkn6vxf_m7s_yz2kzvh0000gn/T//RtmphjjBTA/filef876d8932e2_split_5"
#>
#> $`6`
#> [1] "/var/folders/nc/yl5qhkkn6vxf_m7s_yz2kzvh0000gn/T//RtmphjjBTA/filef876d8932e2_split_6"
#>
#> $`7`
#> [1] "/var/folders/nc/yl5qhkkn6vxf_m7s_yz2kzvh0000gn/T//RtmphjjBTA/filef876d8932e2_split_7"
#>
#> $`8`
#> [1] "/var/folders/nc/yl5qhkkn6vxf_m7s_yz2kzvh0000gn/T//RtmphjjBTA/filef876d8932e2_split_8"
#>
#> $`9`
#> [1] "/var/folders/nc/yl5qhkkn6vxf_m7s_yz2kzvh0000gn/T//RtmphjjBTA/filef876d8932e2_split_9"
#>
#> $`10`
#> [1] "/var/folders/nc/yl5qhkkn6vxf_m7s_yz2kzvh0000gn/T//RtmphjjBTA/filef876d8932e2_split_10"
#>
#> $`11`
#> [1] "/var/folders/nc/yl5qhkkn6vxf_m7s_yz2kzvh0000gn/T//RtmphjjBTA/filef876d8932e2_split_11"
#>
#> $`12`
#> [1] "/var/folders/nc/yl5qhkkn6vxf_m7s_yz2kzvh0000gn/T//RtmphjjBTA/filef876d8932e2_split_12"
#>
#> $`13`
#> [1] "/var/folders/nc/yl5qhkkn6vxf_m7s_yz2kzvh0000gn/T//RtmphjjBTA/filef876d8932e2_split_13"
#>
#> $`14`
#> [1] "/var/folders/nc/yl5qhkkn6vxf_m7s_yz2kzvh0000gn/T//RtmphjjBTA/filef876d8932e2_split_14"
#>
#> $`15`
#> [1] "/var/folders/nc/yl5qhkkn6vxf_m7s_yz2kzvh0000gn/T//RtmphjjBTA/filef876d8932e2_split_15"
#>
#> $`16`
#> [1] "/var/folders/nc/yl5qhkkn6vxf_m7s_yz2kzvh0000gn/T//RtmphjjBTA/filef876d8932e2_split_16"
#>
#> $`17`
#> [1] "/var/folders/nc/yl5qhkkn6vxf_m7s_yz2kzvh0000gn/T//RtmphjjBTA/filef876d8932e2_split_17"
#>
#> $`18`
#> [1] "/var/folders/nc/yl5qhkkn6vxf_m7s_yz2kzvh0000gn/T//RtmphjjBTA/filef876d8932e2_split_18"
#>
#> $`19`
#> [1] "/var/folders/nc/yl5qhkkn6vxf_m7s_yz2kzvh0000gn/T//RtmphjjBTA/filef876d8932e2_split_19"
#>
#> $`20`
#> [1] "/var/folders/nc/yl5qhkkn6vxf_m7s_yz2kzvh0000gn/T//RtmphjjBTA/filef876d8932e2_split_20"
#>
#> $`21`
#> [1] "/var/folders/nc/yl5qhkkn6vxf_m7s_yz2kzvh0000gn/T//RtmphjjBTA/filef876d8932e2_split_21"
#>
#> $`22`
#> [1] "/var/folders/nc/yl5qhkkn6vxf_m7s_yz2kzvh0000gn/T//RtmphjjBTA/filef876d8932e2_split_22"
#>
#> $`23`
#> [1] "/var/folders/nc/yl5qhkkn6vxf_m7s_yz2kzvh0000gn/T//RtmphjjBTA/filef876d8932e2_split_23"
#>
#> $`24`
#> [1] "/var/folders/nc/yl5qhkkn6vxf_m7s_yz2kzvh0000gn/T//RtmphjjBTA/filef876d8932e2_split_24"
#>
#> $`25`
#> [1] "/var/folders/nc/yl5qhkkn6vxf_m7s_yz2kzvh0000gn/T//RtmphjjBTA/filef876d8932e2_split_25"
#>
#> $`26`
#> [1] "/var/folders/nc/yl5qhkkn6vxf_m7s_yz2kzvh0000gn/T//RtmphjjBTA/filef876d8932e2_split_26"
#>
#> $`27`
#> [1] "/var/folders/nc/yl5qhkkn6vxf_m7s_yz2kzvh0000gn/T//RtmphjjBTA/filef876d8932e2_split_27"
#>
#> $`28`
#> [1] "/var/folders/nc/yl5qhkkn6vxf_m7s_yz2kzvh0000gn/T//RtmphjjBTA/filef876d8932e2_split_28"
#>
#> $`29`
#> [1] "/var/folders/nc/yl5qhkkn6vxf_m7s_yz2kzvh0000gn/T//RtmphjjBTA/filef876d8932e2_split_29"
#>
#> $`30`
#> [1] "/var/folders/nc/yl5qhkkn6vxf_m7s_yz2kzvh0000gn/T//RtmphjjBTA/filef876d8932e2_split_30"
#>
#> $`31`
#> [1] "/var/folders/nc/yl5qhkkn6vxf_m7s_yz2kzvh0000gn/T//RtmphjjBTA/filef876d8932e2_split_31"
#>
#> $`32`
#> [1] "/var/folders/nc/yl5qhkkn6vxf_m7s_yz2kzvh0000gn/T//RtmphjjBTA/filef876d8932e2_split_32"
#>
#> $`33`
#> [1] "/var/folders/nc/yl5qhkkn6vxf_m7s_yz2kzvh0000gn/T//RtmphjjBTA/filef876d8932e2_split_33"
#>
#> $`34`
#> [1] "/var/folders/nc/yl5qhkkn6vxf_m7s_yz2kzvh0000gn/T//RtmphjjBTA/filef876d8932e2_split_34"
#>
#> $`35`
#> [1] "/var/folders/nc/yl5qhkkn6vxf_m7s_yz2kzvh0000gn/T//RtmphjjBTA/filef876d8932e2_split_35"
#>
#> $`36`
#> [1] "/var/folders/nc/yl5qhkkn6vxf_m7s_yz2kzvh0000gn/T//RtmphjjBTA/filef876d8932e2_split_36"
#>
#> $`37`
#> [1] "/var/folders/nc/yl5qhkkn6vxf_m7s_yz2kzvh0000gn/T//RtmphjjBTA/filef876d8932e2_split_37"
#>
#> $`38`
#> [1] "/var/folders/nc/yl5qhkkn6vxf_m7s_yz2kzvh0000gn/T//RtmphjjBTA/filef876d8932e2_split_38"
#>
#> $`39`
#> [1] "/var/folders/nc/yl5qhkkn6vxf_m7s_yz2kzvh0000gn/T//RtmphjjBTA/filef876d8932e2_split_39"
#>
#> $`40`
#> [1] "/var/folders/nc/yl5qhkkn6vxf_m7s_yz2kzvh0000gn/T//RtmphjjBTA/filef876d8932e2_split_40"
#>
#> $`41`
#> [1] "/var/folders/nc/yl5qhkkn6vxf_m7s_yz2kzvh0000gn/T//RtmphjjBTA/filef876d8932e2_split_41"
#>
#> $`42`
#> [1] "/var/folders/nc/yl5qhkkn6vxf_m7s_yz2kzvh0000gn/T//RtmphjjBTA/filef876d8932e2_split_42"
#>
#> $`43`
#> [1] "/var/folders/nc/yl5qhkkn6vxf_m7s_yz2kzvh0000gn/T//RtmphjjBTA/filef876d8932e2_split_43"
#>
#> $`44`
#> [1] "/var/folders/nc/yl5qhkkn6vxf_m7s_yz2kzvh0000gn/T//RtmphjjBTA/filef876d8932e2_split_44"
#>
#> $`45`
#> [1] "/var/folders/nc/yl5qhkkn6vxf_m7s_yz2kzvh0000gn/T//RtmphjjBTA/filef876d8932e2_split_45"
#>
#> $`46`
#> [1] "/var/folders/nc/yl5qhkkn6vxf_m7s_yz2kzvh0000gn/T//RtmphjjBTA/filef876d8932e2_split_46"
#>
#> $`47`
#> [1] "/var/folders/nc/yl5qhkkn6vxf_m7s_yz2kzvh0000gn/T//RtmphjjBTA/filef876d8932e2_split_47"
#>
#> $`48`
#> [1] "/var/folders/nc/yl5qhkkn6vxf_m7s_yz2kzvh0000gn/T//RtmphjjBTA/filef876d8932e2_split_48"
#>
#> $`49`
#> [1] "/var/folders/nc/yl5qhkkn6vxf_m7s_yz2kzvh0000gn/T//RtmphjjBTA/filef876d8932e2_split_49"
#>
#> $`50`
#> [1] "/var/folders/nc/yl5qhkkn6vxf_m7s_yz2kzvh0000gn/T//RtmphjjBTA/filef876d8932e2_split_50"
#>
#> $`51`
#> [1] "/var/folders/nc/yl5qhkkn6vxf_m7s_yz2kzvh0000gn/T//RtmphjjBTA/filef876d8932e2_split_51"
#>
#> $`52`
#> [1] "/var/folders/nc/yl5qhkkn6vxf_m7s_yz2kzvh0000gn/T//RtmphjjBTA/filef876d8932e2_split_52"
#>
#> $`53`
#> [1] "/var/folders/nc/yl5qhkkn6vxf_m7s_yz2kzvh0000gn/T//RtmphjjBTA/filef876d8932e2_split_53"
#>
#> $`54`
#> [1] "/var/folders/nc/yl5qhkkn6vxf_m7s_yz2kzvh0000gn/T//RtmphjjBTA/filef876d8932e2_split_54"
#>
#> $`55`
#> [1] "/var/folders/nc/yl5qhkkn6vxf_m7s_yz2kzvh0000gn/T//RtmphjjBTA/filef876d8932e2_split_55"
#>
#> $`56`
#> [1] "/var/folders/nc/yl5qhkkn6vxf_m7s_yz2kzvh0000gn/T//RtmphjjBTA/filef876d8932e2_split_56"
#>
#> $`57`
#> [1] "/var/folders/nc/yl5qhkkn6vxf_m7s_yz2kzvh0000gn/T//RtmphjjBTA/filef876d8932e2_split_57"
#>
#> $`58`
#> [1] "/var/folders/nc/yl5qhkkn6vxf_m7s_yz2kzvh0000gn/T//RtmphjjBTA/filef876d8932e2_split_58"
#>
#> $`59`
#> [1] "/var/folders/nc/yl5qhkkn6vxf_m7s_yz2kzvh0000gn/T//RtmphjjBTA/filef876d8932e2_split_59"
#>
#> $`60`
#> [1] "/var/folders/nc/yl5qhkkn6vxf_m7s_yz2kzvh0000gn/T//RtmphjjBTA/filef876d8932e2_split_60"
#>
#> $`61`
#> [1] "/var/folders/nc/yl5qhkkn6vxf_m7s_yz2kzvh0000gn/T//RtmphjjBTA/filef876d8932e2_split_61"
#>
#> $`62`
#> [1] "/var/folders/nc/yl5qhkkn6vxf_m7s_yz2kzvh0000gn/T//RtmphjjBTA/filef876d8932e2_split_62"
#>
#> $`63`
#> [1] "/var/folders/nc/yl5qhkkn6vxf_m7s_yz2kzvh0000gn/T//RtmphjjBTA/filef876d8932e2_split_63"
#>
#> $`64`
#> [1] "/var/folders/nc/yl5qhkkn6vxf_m7s_yz2kzvh0000gn/T//RtmphjjBTA/filef876d8932e2_split_64"
#>
#> $`65`
#> [1] "/var/folders/nc/yl5qhkkn6vxf_m7s_yz2kzvh0000gn/T//RtmphjjBTA/filef876d8932e2_split_65"
#>
#> $`66`
#> [1] "/var/folders/nc/yl5qhkkn6vxf_m7s_yz2kzvh0000gn/T//RtmphjjBTA/filef876d8932e2_split_66"
#>
#> $`67`
#> [1] "/var/folders/nc/yl5qhkkn6vxf_m7s_yz2kzvh0000gn/T//RtmphjjBTA/filef876d8932e2_split_67"
#>
#> $`68`
#> [1] "/var/folders/nc/yl5qhkkn6vxf_m7s_yz2kzvh0000gn/T//RtmphjjBTA/filef876d8932e2_split_68"
#>
#> $`69`
#> [1] "/var/folders/nc/yl5qhkkn6vxf_m7s_yz2kzvh0000gn/T//RtmphjjBTA/filef876d8932e2_split_69"
#>
#> $`70`
#> [1] "/var/folders/nc/yl5qhkkn6vxf_m7s_yz2kzvh0000gn/T//RtmphjjBTA/filef876d8932e2_split_70"
#>
#> $`71`
#> [1] "/var/folders/nc/yl5qhkkn6vxf_m7s_yz2kzvh0000gn/T//RtmphjjBTA/filef876d8932e2_split_71"
#>
#> $`72`
#> [1] "/var/folders/nc/yl5qhkkn6vxf_m7s_yz2kzvh0000gn/T//RtmphjjBTA/filef876d8932e2_split_72"
#>
#> $`73`
#> [1] "/var/folders/nc/yl5qhkkn6vxf_m7s_yz2kzvh0000gn/T//RtmphjjBTA/filef876d8932e2_split_73"
#>
#> $`74`
#> [1] "/var/folders/nc/yl5qhkkn6vxf_m7s_yz2kzvh0000gn/T//RtmphjjBTA/filef876d8932e2_split_74"
#>
#> $`75`
#> [1] "/var/folders/nc/yl5qhkkn6vxf_m7s_yz2kzvh0000gn/T//RtmphjjBTA/filef876d8932e2_split_75"
#>
#> $`76`
#> [1] "/var/folders/nc/yl5qhkkn6vxf_m7s_yz2kzvh0000gn/T//RtmphjjBTA/filef876d8932e2_split_76"
#>
#> $`77`
#> [1] "/var/folders/nc/yl5qhkkn6vxf_m7s_yz2kzvh0000gn/T//RtmphjjBTA/filef876d8932e2_split_77"
#>
#> $`78`
#> [1] "/var/folders/nc/yl5qhkkn6vxf_m7s_yz2kzvh0000gn/T//RtmphjjBTA/filef876d8932e2_split_78"
#>
#> $`79`
#> [1] "/var/folders/nc/yl5qhkkn6vxf_m7s_yz2kzvh0000gn/T//RtmphjjBTA/filef876d8932e2_split_79"
#>
#> $`80`
#> [1] "/var/folders/nc/yl5qhkkn6vxf_m7s_yz2kzvh0000gn/T//RtmphjjBTA/filef876d8932e2_split_80"
#>
#> $`81`
#> [1] "/var/folders/nc/yl5qhkkn6vxf_m7s_yz2kzvh0000gn/T//RtmphjjBTA/filef876d8932e2_split_81"
#>
#> $`82`
#> [1] "/var/folders/nc/yl5qhkkn6vxf_m7s_yz2kzvh0000gn/T//RtmphjjBTA/filef876d8932e2_split_82"
#>
#> $`83`
#> [1] "/var/folders/nc/yl5qhkkn6vxf_m7s_yz2kzvh0000gn/T//RtmphjjBTA/filef876d8932e2_split_83"
#>
#> $`84`
#> [1] "/var/folders/nc/yl5qhkkn6vxf_m7s_yz2kzvh0000gn/T//RtmphjjBTA/filef876d8932e2_split_84"
#>
#> $`85`
#> [1] "/var/folders/nc/yl5qhkkn6vxf_m7s_yz2kzvh0000gn/T//RtmphjjBTA/filef876d8932e2_split_85"
#>
#> $`86`
#> [1] "/var/folders/nc/yl5qhkkn6vxf_m7s_yz2kzvh0000gn/T//RtmphjjBTA/filef876d8932e2_split_86"
#>
#> $`87`
#> [1] "/var/folders/nc/yl5qhkkn6vxf_m7s_yz2kzvh0000gn/T//RtmphjjBTA/filef876d8932e2_split_87"
#>
#> $`88`
#> [1] "/var/folders/nc/yl5qhkkn6vxf_m7s_yz2kzvh0000gn/T//RtmphjjBTA/filef876d8932e2_split_88"
#>
#> $`89`
#> [1] "/var/folders/nc/yl5qhkkn6vxf_m7s_yz2kzvh0000gn/T//RtmphjjBTA/filef876d8932e2_split_89"
#>
#> $`90`
#> [1] "/var/folders/nc/yl5qhkkn6vxf_m7s_yz2kzvh0000gn/T//RtmphjjBTA/filef876d8932e2_split_90"
#>
#> $`91`
#> [1] "/var/folders/nc/yl5qhkkn6vxf_m7s_yz2kzvh0000gn/T//RtmphjjBTA/filef876d8932e2_split_91"
#>
#> $`92`
#> [1] "/var/folders/nc/yl5qhkkn6vxf_m7s_yz2kzvh0000gn/T//RtmphjjBTA/filef876d8932e2_split_92"
#>
#> $`93`
#> [1] "/var/folders/nc/yl5qhkkn6vxf_m7s_yz2kzvh0000gn/T//RtmphjjBTA/filef876d8932e2_split_93"
#>
#> $`94`
#> [1] "/var/folders/nc/yl5qhkkn6vxf_m7s_yz2kzvh0000gn/T//RtmphjjBTA/filef876d8932e2_split_94"
#>
#> $`95`
#> [1] "/var/folders/nc/yl5qhkkn6vxf_m7s_yz2kzvh0000gn/T//RtmphjjBTA/filef876d8932e2_split_95"
#>
#> $`96`
#> [1] "/var/folders/nc/yl5qhkkn6vxf_m7s_yz2kzvh0000gn/T//RtmphjjBTA/filef876d8932e2_split_96"
#>
#> $`97`
#> [1] "/var/folders/nc/yl5qhkkn6vxf_m7s_yz2kzvh0000gn/T//RtmphjjBTA/filef876d8932e2_split_97"
#>
#> $`98`
#> [1] "/var/folders/nc/yl5qhkkn6vxf_m7s_yz2kzvh0000gn/T//RtmphjjBTA/filef876d8932e2_split_98"
#>
#> $`99`
#> [1] "/var/folders/nc/yl5qhkkn6vxf_m7s_yz2kzvh0000gn/T//RtmphjjBTA/filef876d8932e2_split_99"
#>
#> $`100`
#> [1] "/var/folders/nc/yl5qhkkn6vxf_m7s_yz2kzvh0000gn/T//RtmphjjBTA/filef876d8932e2_split_100"
Filename Process
files_dir <- system.file('extdata', 'demo/format', package = 'ngstk')
pattern <- '*.txt'
list.files(files_dir, pattern)
#> [1] "-cancer-circrna.txt" "common-circrna.txt"
#> [3] "hg38-cancer-circrna.txt" "hg38-common-circrna.txt"
#> [5] "hg38_cancer_circrna_mre.txt"
x <- format_filenames(files_dir = files_dir, pattern = pattern, prefix = 'hg38_')
x
#> [1] "/private/var/folders/nc/yl5qhkkn6vxf_m7s_yz2kzvh0000gn/T/RtmpgzhqT1/Rbuildf764a54524c/ngstk/vignettes/hg38_cancer_circrna.txt"
#> [2] "/private/var/folders/nc/yl5qhkkn6vxf_m7s_yz2kzvh0000gn/T/RtmpgzhqT1/Rbuildf764a54524c/ngstk/vignettes/hg38_common_circrna.txt"
#> [3] "/private/var/folders/nc/yl5qhkkn6vxf_m7s_yz2kzvh0000gn/T/RtmpgzhqT1/Rbuildf764a54524c/ngstk/vignettes/hg38_cancer_circrna.txt"
#> [4] "/private/var/folders/nc/yl5qhkkn6vxf_m7s_yz2kzvh0000gn/T/RtmpgzhqT1/Rbuildf764a54524c/ngstk/vignettes/hg38_common_circrna.txt"
#> [5] "/private/var/folders/nc/yl5qhkkn6vxf_m7s_yz2kzvh0000gn/T/RtmpgzhqT1/Rbuildf764a54524c/ngstk/vignettes/hg38_cancer_circrna_mre.txt"
Command line utils functions
# Collect command line bins files in R package
rbin('ngstk', tempdir())
#> Copying ngstk bin/ demo_bin.sh to /private/var/folders/nc/yl5qhkkn6vxf_m7s_yz2kzvh0000gn/T/RtmphjjBTA
#> Please set /private/var/folders/nc/yl5qhkkn6vxf_m7s_yz2kzvh0000gn/T/RtmphjjBTA in your PATH to use the bin files.
#> Linux/Mac OS X: echo 'export PATH=$PATH:/private/var/folders/nc/yl5qhkkn6vxf_m7s_yz2kzvh0000gn/T/RtmphjjBTA\n' >> ~/.bashrc
#> R users: echo 'Sys.setenv(PATH="/Library/Frameworks/R.framework/Resources/bin:/usr/local/Cellar/hugo/0.48/bin:/Users/ljf/Bioinfo/miniconda3/bin:/usr/local/Cellar/gnu-sed/4.4/bin:/Library/Frameworks/R.framework/Versions/3.5/Resources/library/ngstk/extdata/tools/rbash:/Users/ljf/Bioinfo/spack/bin:/Users/ljf/Bioinfo/miniconda3/bin:/usr/local/Cellar/gnu-sed/4.4/bin:/Library/Frameworks/R.framework/Versions/3.5/Resources/library/ngstk/extdata/tools/rbash:/usr/local/bin:/usr/bin:/bin:/usr/sbin:/sbin:/Library/TeX/texbin:/usr/local/go/bin:/opt/X11/bin:/private/var/folders/nc/yl5qhkkn6vxf_m7s_yz2kzvh0000gn/T/RtmphjjBTA")\n' >> ~/.Rprofile
#> ngstk.demo_bin.sh
#> TRUE
# Print sub commands
option_list <- list(
make_option(c('-l', '--list-all-subcmds'), action = 'store_true',
default = FALSE, help = 'Print all supported subcmds of ngsjs.')
)
subcmds_list <- list(subcmd1 = 'Use method 1 to plot boxplot',
subcmd2 = 'Use method 2 to plot boxplot')
description <- 'Method to plot boxplot'
usage <- 'usage: %prog [options] [params]'
opt_parser_obj <- opt_parser(subcmds_list = subcmds_list,
option_list = option_list,
description = description,
usage = usage)
# Print the command line message
# You can define the message order use
# paramter help_order = c("description", "usage", "options", "subcmds", "epilogue"
print_help(opt_parser_obj)
#> Description:
#> Method to plot boxplot
#>
#> Usage: %prog [options] [params]
#>
#> Options:
#> -l, --list-all-subcmds
#> Print all supported subcmds of ngsjs.
#> -h, --help
#> Show this help message and exit
#>
#> Commands:
#> subcmd1 Use method 1 to plot boxplot
#> subcmd2 Use method 2 to plot boxplot
Download functions
# Use future package to parallel download urls with logs
urls <- c(paste0('https://raw.githubusercontent.com/',
'Miachol/ftp/master/files/images/bioinstaller/maftools3.png'),
paste0('https://raw.githubusercontent.com/',
'Miachol/ftp/master/files/images/bioinstaller/maftools4.png'))
par_download(urls, sprintf('%s/%s', tempdir(), basename(urls)))
Colors
set_colors('default')
#> [1] "#0073c3" "#efc000" "#696969" "#ce534c" "#7ba6db" "#035892" "#052135"
#> [8] "#666633" "#660000" "#990000"
set_colors('proteinpaint_mutations')
#> [1] "#3987cc" "#ff7f0e" "#db3d3d" "#6633ff" "#bbbbbb" "#9467bd" "#998199"
#> [8] "#8c564b" "#819981" "#5781ff"
set_colors('proteinpaint_chromHMM_state')
#> [1] "#c0222c" "#f12424" "#ff00c7" "#d192fb" "#f9982f" "#fcc88e" "#fbf876"
#> [8] "#a6d67b" "#1fb855" "#007d37" "#00a99e" "#11aaec" "#186db9" "#3800f8"
#> [15] "#961a8b" "#47005f"