Extract information from ibd data.

Usage

# S3 method for class 'gwid'
extract(obj = "object of class gwid", snp_start, snp_end, ...)

Arguments

obj: object of class gwid(output of function build_gwid)
snp_start: select starting position of snp, which we want to aggregate.
snp_end: select ending position of snp, which we want to aggregate.
...: other objects

Value

the output will be a result_snps (data.table) object including 3 columns including, “snp_pos”, “case_control”, and “value”

Examples

# \donttest{
piggyback::pb_download(repo = "soroushmdg/gwid",tag = "v0.0.1",dest = tempdir())
#> ℹ All local files already up-to-date!
ibd_data_file <- paste0(tempdir(),"//chr3.ibd")
genome_data_file <- paste0(tempdir(),"//chr3.gds")
phase_data_file <- paste0(tempdir(),"//chr3.vcf")
case_control_data_file <- paste0(tempdir(),"//case-cont-RA.withmap.Rda")
# case-control data
case_control <- gwid::case_control(case_control_rda = case_control_data_file)
names(case_control) #cases and controls group
#> [1] "cases" "case1" "case2" "cont1" "cont2" "cont3"
summary(case_control) # in here, we only consider cases,cont1,cont2,cont3 groups in the study
#>       Length Class  Mode     
#> cases 478    -none- character
#> case1 178    -none- character
#> case2 300    -none- character
#> cont1 477    -none- character
#> cont2 478    -none- character
#> cont3 478    -none- character
case_control$cases[1:3] # first three subject names of cases group
#> [1] "MC.154405@1075678440" "MC.154595@1075642175" "MC.154701@1076254706"
# read SNP data (use SNPRelate to convert it to gds) and count number of minor alleles
snp_data_gds <- gwid::build_gwas(gds_data = genome_data_file,
caco = case_control,gwas_generator = TRUE)
class(snp_data_gds)
#> [1] "gwas"
names(snp_data_gds)
#> [1] "smp.id"   "snp.id"   "snp.pos"  "smp.indx" "smp.snp"  "caco"     "snps"    
head(snp_data_gds$snps) # it has information about counts of minor alleles in each location.
#> Key: <snp_pos>
#>    snp_pos case_control value
#>      <int>       <fctr> <int>
#> 1:   66894        cases   627
#> 2:   66894        case1   240
#> 3:   66894        case2   387
#> 4:   66894        cont1   639
#> 5:   66894        cont2   647
#> 6:   66894        cont3   646
# read haplotype data (output of beagle)
haplotype_data <- gwid::build_phase(phased_vcf = phase_data_file,caco = case_control)
class(haplotype_data)
#> [1] "phase"
names(haplotype_data)
#> [1] "Hap.1" "Hap.2"
dim(haplotype_data$Hap.1) #22302 SNP and 1911 subjects
#> [1] 22302  1911
# read IBD data (output of Refined-IBD)
ibd_data <- gwid::build_gwid(ibd_data = ibd_data_file,gwas = snp_data_gds)
class(ibd_data)
#> [1] "gwid"
ibd_data$ibd # refined IBD output
#>                              V1    V2                      V3    V4    V5
#>                          <char> <int>                  <char> <int> <int>
#>      1: MC.AMD127769@0123889787     2    MC.160821@1075679055     1     3
#>      2: MC.AMD127769@0123889787     1 MC.AMD107154@0123908746     1     3
#>      3: MC.AMD127769@0123889787     2    9474283-1-0238040187     1     3
#>      4: MC.AMD127769@0123889787     1    MC.159487@1075679208     2     3
#>      5:    MC.163045@1082086165     2    MC.160470@1075679095     1     3
#>     ---                                                                  
#> 377560:    1492602-1-0238095971     2    2235472-1-0238095471     2     3
#> 377561:    4618455-1-0238095900     2    3848034-1-0238094219     1     3
#> 377562:    MC.160332@1075641581     2    9630188-1-0238038787     2     3
#> 377563: MC.AMD122238@0124011436     2    MC.159900@1076254946     1     3
#> 377564: MC.AMD105910@0123907456     1    7542312-1-0238039298     1     3
#>                V6        V7    V8    V9
#>             <int>     <int> <num> <num>
#>      1:  32933295  34817627  3.26 1.884
#>      2:  29995340  31752607  4.35 1.757
#>      3:  34165785  35898774  6.36 1.733
#>      4:  21526766  23162240  8.71 1.635
#>      5:  11822616  13523010  5.29 1.700
#>     ---                                
#> 377560: 194785443 196328849  4.92 1.543
#> 377561: 190235788 192423862  7.77 2.188
#> 377562: 184005719 186184328  5.95 2.179
#> 377563: 181482803 184801115  3.58 3.318
#> 377564: 182440135 183972729  3.03 1.533
ibd_data$res # count number of IBD for each SNP location
#>           snp_pos case_control value
#>             <num>       <fctr> <num>
#>      1:     66894        cases    27
#>      2:     82010        cases    28
#>      3:     89511        cases    29
#>      4:    104972        cases    29
#>      5:    107776        cases    29
#>     ---                             
#> 133808: 197687252        cont3    44
#> 133809: 197701913        cont3    44
#> 133810: 197744198        cont3    44
#> 133811: 197762623        cont3    44
#> 133812: 197833758        cont3    44
# plot count of IBD in chromosome 3
plot(ibd_data,y = c("cases","cont1"),ly = FALSE)

# Further investigate location between 117M and 122M
# significant number of IBD's in group cases, compare to cont1, cont2 and cont3.
plot(ibd_data,y = c("cases","cont1"),snp_start = 119026294,snp_end = 120613594,ly = FALSE)

model_fisher <- gwid::fisher_test(ibd_data,case_control,reference = "cases",
snp_start = 119026294,snp_end = 120613594)
class(model_fisher)
#> [1] "test_snps"  "data.table" "data.frame"
plot(model_fisher, y = c("cases","cont1"),ly = FALSE)

hap_str <- gwid::haplotype_structure(ibd_data,phase = haplotype_data,w = 10,
snp_start = 119026294,snp_end = 120613594)
haplo_freq <- gwid::haplotype_frequency(hap_str)
plot(haplo_freq,y = c("cases", "cont1"),plot_type = "haplotype_structure_frequency",
nwin = 1, type = "version1",ly = FALSE)

# }