Required library

library(data.table)
library(tidyverse)
library(GenomicRanges)
library(regioneR)
library(Rsamtools)
source("global.R")

whatshap vcf output which include ps field

path_to_denovo <- "./denovo.bed"
path_to_gVCF <- "ped_phased-ont-pac.vcf.gz"
# make sure tabix index file exit for the vcf.gz file
#Rsamtools::indexTabix(path_to_gVCF,format = "vcf")
denovo <- fread(path_to_denovo)
setnames(denovo,c("V1","V2","V3","V4","V5"),c("chrom","start","end","ref","alt"))
head(denovo)
##    chrom     start       end                          ref  alt
## 1: chr18  46882281  46882281 TAATGTTCAAGAATTTAAGGATAAAAAG    T
## 2: chr20  41633569  41633569                            C CATT
## 3: chr14  91072663  91072663                          AAC    A
## 4: chr15  59062051  59062051                     CTTTTTTT    C
## 5:  chr8 102862030 102862030                            C   CA
## 6:  chr9  23756559  23756559                          AAC    A

Extract long read phased block on single de novo position

ref_genome = "GRCh38"
denovo.gr = denovo[9,] %>% regioneR::toGRanges()
 # get 1Mb upstream and downstream flanking region
PhaseDenovo(path_to_gVCF,ref_genome,denovo.gr)%>%as.data.frame()
## [1] "**Step1:read vcf file**"
## [1] "**Step2:assign haplotype based on majority vote**"
## [1] 1
## [1] 2
## [1] 3
##   seqnames     start       end width strand ref alt     ps.V1 ps.V2 ps.V3
## 1     chr1 164465932 164465932     1      *   C   T 163844148    NA    NA
##   gt.BAB9637 gt.BAB9638 gt.BAB9639         hap
## 1        0|1        0/0        0/0 0|1:BAB9639