Adapted from https://johnbhorne.wordpress.com/2016/09/15/still-making-haplotype-networks-the-old-way-how-to-do-it-in-r/
library("ape")
library("pegas")
Data in fasta format
data<-read.dna("/Users/martafarrebelmonte/Documents/WORK/Langurs/SequencesAll_wildZooLabels.fasta", format="fasta")
Check the alignment graph – does it have gaps? Where are they? If they are at the ends, you can easily trimmed the aligment by subsetting the positions (columns) you want to keep
dataAli<-clustal(data)
checkAlignment(dataAli)
##
## Number of sequences: 48
## Number of sites: 456
##
## Some gap lengths are not multiple of 3: 1 2 5 7 8 10 11 13 14 16 55
##
## Frequencies of gap lengths:
## 1 2 3 5 6 7 8 9 10 11 12 13 14 16 55 63
## 340 21 22 2 7 6 4 4 2 1 1 2 2 1 25 6
## => length of gaps on the left border of the alignment: 55 63 12 13 2 7 16 14 10 9 11 0 8
## => length of gaps on the right border of the alignment: 10 9 8 8 8 7 7 7 7 6 6 6 6 6 5 5 3 3 3 3 3 3
##
## Number of unique contiguous base segments defined by gaps: 61
## Number of segment lengths not multiple of 3: 39
## => on the left border of the alignement: 1
## => on the right border : 1
## => positions of these segments inside the alignment: 56..78 80..257 383..387 389..410 422..453 13..35 37..40 42..45 47..78 412..413 415..418 421..430 433..437 444..448 14..35 37..41 433..433 439..440 3..45 412..430 433..440 8..35 444..450 17..35 416..420 426..430 11..35 444..447 415..430 444..451 10..35 37..37 39..45 444..444 383..410 421..424 446..450
##
## Number of segregating sites (including gaps): 155
## Number of sites with at least one substitution: NA
## Number of sites with 1, 2, 3 or 4 observed bases:
## 1 2 3 4
## 324 71 14 0
trimmedAli<-dataAli[,55:452]
dataHaplo<-haplotype(trimmedAli)
dataHaplo
##
## Haplotypes extracted from: trimmedAli
##
## Number of haplotypes: 41
## Sequence length: 398
##
## Haplotype labels and frequencies:
##
## I II III IV V VI VII VIII IX
## 1 1 1 1 1 1 1 1 1
## X XI XII XIII XIV XV XVI XVII XVIII
## 1 1 1 1 1 1 1 1 1
## XIX XX XXI XXII XXIII XXIV XXV XXVI XXVII
## 1 1 1 1 1 1 1 2 2
## XXVIII XXIX XXX XXXI XXXII XXXIII XXXIV XXXV XXXVI
## 2 1 2 1 2 1 1 2 1
## XXXVII XXXVIII XXXIX XL XLI
## 2 1 1 1 1
Here we generate the networks and create a table with sampleID and haplotypeID
It creates a txt file with the information of sampleID and haplotypeID
dataHaplo<-sort(dataHaplo, what = "labels")
dataNet<-haploNet(dataHaplo)
countHap <- function(hap = h, dna = x){
with(
stack(setNames(attr(hap, "index"), rownames(hap))),
table(hap = ind, pop = attr(dna, "dimnames")[[1]][values])
)
}
write.table(countHap (dataHaplo, trimmedAli),file="FILE_NAME.txt", sep="\t", quote=FALSE)
It prints the plot in an external file
pdf(file="/Users/martafarrebelmonte/Documents/WORK/Langurs/haploNetRenamedAllSeqTrimmed.pdf", width = 8, height = 15, pointsize = 10)
plot(dataNet, size=attr(dataNet, "freq"), scale.ratio=0.2, pie=countHap(dataHaplo, trimmedAli), show.mutation=3)
legend("bottomleft", colnames(countHap(dataHaplo, trimmedAli)), col=rainbow(ncol(countHap(dataHaplo, trimmedAli))), pch=19, ncol=2)
dev.off()
## quartz_off_screen
## 2
pdf(file="/Users/martafarrebelmonte/Documents/WORK/Langurs/haploNetReplot3.pdf", width = 8, height = 15, pointsize = 10)
mydata <- as.data.frame(countHap(dataHaplo, trimmedAli))
good <- mydata[mydata$Freq == 1,]
IDs <- strsplit(as.character(good$pop), "_")
IDs <- sapply(IDs, "[[", 3)
new.hap <- table(good$hap, IDs)
plot(dataNet, size=attr(dataNet, "freq"), scale.ratio=0.2, pie=new.hap, show.mutation=3)
legend("bottomright", colnames(new.hap), col=rainbow(ncol(new.hap)), pch=19, ncol=2)
dev.off()
## quartz_off_screen
## 2
hapDiv<-hap.div(trimmedAli, variance=TRUE)
pi<-nuc.div(trimmedAli, variance = TRUE)
tajima<-tajima.test(trimmedAli)
hapDiv
## [1] 0.9937943262 0.0000190795
pi
## [1] 0.0336523807 0.0002920905
tajima
## $D
## [1] -1.651642
##
## $Pval.normal
## [1] 0.09860757
##
## $Pval.beta
## [1] 0.07985941
wild<-trimmedAli[1:25,]
zoo<-trimmedAli[26:48,]
print("hapDiv,nucDiv and Tajima for Wild")
## [1] "hapDiv,nucDiv and Tajima for Wild"
hap.div(wild, variance=TRUE)
## [1] 1.0e+00 6.4e-05
nuc.div(wild, variance = TRUE)
## [1] 0.0371440750 0.0003673801
tajima.test(wild)
## $D
## [1] 0.2105861
##
## $Pval.normal
## [1] 0.8332102
##
## $Pval.beta
## [1] 0.8186734
print("hapDiv,nucDiv and Tajima for Zoo")
## [1] "hapDiv,nucDiv and Tajima for Zoo"
hap.div(zoo, variance=TRUE)
## [1] 0.9723320158 0.0002068651
nuc.div(zoo, variance = TRUE)
## [1] 0.0272262931 0.0002055543
tajima.test(zoo)
## $D
## [1] -2.087174
##
## $Pval.normal
## [1] 0.0368724
##
## $Pval.beta
## [1] 0.01497876