library(data.table)
library(tidyverse)
library(GenomicRanges)
library(regioneR)
library(Rsamtools)
source("global.R")
whatshap vcf output which include ps field
path_to_denovo <- "./denovo.bed"
path_to_gVCF <- "ped_phased-ont-pac.vcf.gz"
# make sure tabix index file exit for the vcf.gz file
#Rsamtools::indexTabix(path_to_gVCF,format = "vcf")
denovo <- fread(path_to_denovo)
setnames(denovo,c("V1","V2","V3","V4","V5"),c("chrom","start","end","ref","alt"))
head(denovo)
## chrom start end ref alt
## 1: chr18 46882281 46882281 TAATGTTCAAGAATTTAAGGATAAAAAG T
## 2: chr20 41633569 41633569 C CATT
## 3: chr14 91072663 91072663 AAC A
## 4: chr15 59062051 59062051 CTTTTTTT C
## 5: chr8 102862030 102862030 C CA
## 6: chr9 23756559 23756559 AAC A
ref_genome = "GRCh38"
denovo.gr = denovo[9,] %>% regioneR::toGRanges()
# get 1Mb upstream and downstream flanking region
PhaseDenovo(path_to_gVCF,ref_genome,denovo.gr)%>%as.data.frame()
## [1] "**Step1:read vcf file**"
## [1] "**Step2:assign haplotype based on majority vote**"
## [1] 1
## [1] 2
## [1] 3
## seqnames start end width strand ref alt ps.V1 ps.V2 ps.V3
## 1 chr1 164465932 164465932 1 * C T 163844148 NA NA
## gt.BAB9637 gt.BAB9638 gt.BAB9639 hap
## 1 0|1 0/0 0/0 0|1:BAB9639