Intro

We investigate how phylogeny drives the clustering of mammals species in the Wallacea region.

library(ape)
library(phytools)
## Loading required package: maps
library(methClust)
library(rasterVis)
## Loading required package: raster
## Loading required package: sp
## 
## Attaching package: 'raster'
## The following objects are masked from 'package:ape':
## 
##     rotate, zoom
## Loading required package: lattice
## Loading required package: latticeExtra
## Loading required package: RColorBrewer
library(gtools)
library(sp)
library(rgdal)
## rgdal: version: 1.2-20, (SVN revision 725)
##  Geospatial Data Abstraction Library extensions to R successfully loaded
##  Loaded GDAL runtime: GDAL 2.1.3, released 2017/20/01
##  Path to GDAL shared files: /Library/Frameworks/R.framework/Versions/3.5/Resources/library/rgdal/gdal
##  GDAL binary built with GEOS: FALSE 
##  Loaded PROJ.4 runtime: Rel. 4.9.3, 15 August 2016, [PJ_VERSION: 493]
##  Path to PROJ.4 shared files: /Library/Frameworks/R.framework/Versions/3.5/Resources/library/rgdal/proj
##  Linking to sp version: 1.2-7
library(ggplot2)
## 
## Attaching package: 'ggplot2'
## The following object is masked from 'package:latticeExtra':
## 
##     layer
library(maps)
library(mapdata)
library(mapplots)
library(scales)
library(ggthemes)

data

mamms <- get(load("../data/mammals_with_bats.rda"))
latlong_chars <- rownames(mamms)
latlong <- cbind.data.frame(as.numeric(sapply(latlong_chars, 
                              function(x) return(strsplit(x, "_")[[1]][1]))),
         as.numeric(sapply(latlong_chars, 
                           function(x) return(strsplit(x, "_")[[1]][2]))))
phylo.counts <- function(counts, tree, collapse_at){
  root_node <- length(tree$tip.label) + 1
  root_age <- ape::branching.times(tree)[names(ape::branching.times(tree)) == root_node]
  trees_at_slice <- phytools::treeSlice(tree, root_age - collapse_at)
  counts_at_slice <- as.data.frame(counts)
  for( i in 1:length(trees_at_slice)){
    new.column <- as.data.frame(rowSums(counts_at_slice[,trees_at_slice[[i]]$tip.label]))
    colnames(new.column) <- trees_at_slice[[i]]$tip.label[1]
    drops <- trees_at_slice[[i]]$tip.label
    counts_at_slice <- counts_at_slice[,!(names(counts_at_slice) %in% drops)]
    counts_at_slice <- cbind(counts_at_slice, new.column)
  }
  counts_at_slice <- as.matrix(counts_at_slice)
  return(counts_at_slice)
}

Mammals

Create phylogenic counts data

mammals_tree <- ape::read.nexus("../data/mammals.tre")
tree_file <- mammals_tree$mammalST_MSW05_bestDates
mammals_data <- get(load("../data/mammals_with_bats.rda"))
colnames(mammals_data) <- gsub("[.]","_",colnames(mammals_data))
# keep_names <- colnames(mammals_data)[colnames(mammals_data) %in% mammals_tree$mammalST_MSW05_bestDates$tip.label]
# mammals_data_2 <- mammals_data[, keep_names]
tips_to_drop <- setdiff(tree_file$tip.label, colnames(mammals_data))
new_tree <- drop.tip(tree_file, tips_to_drop, trim.internal = TRUE, subtree = FALSE, root.edge = 0,
                     rooted = is.rooted(tree_file),
                     collapse.singles = TRUE, interactive = FALSE)
seq2 <- seq(5, 50, length.out = 10)

Model

phylo_gom <- list()
for(num in 1:length(seq2)){
  mammals_phylo <- phylo.counts(mammals_data, new_tree, collapse_at = seq2[num])
  cat("Processing over", dim(mammals_phylo)[2], "species \n")
  mammals_phylo[mammals_phylo > 0] <- 1
  M <- 1
  fits_list <- list()
  L_array <- c()
  for(m in 1:M){
    counter = 0
    while(counter != 1){
      tmp <- try(meth_topics(mammals_phylo, 1 - mammals_phylo,
                             K=10, tol = 10,
                             use_squarem = FALSE), TRUE)
      if(!inherits(tmp, "try-error")){
        counter = 1
      }else{
        counter = 0
      }
    }
    fits_list[[m]] <- tmp
    cat("We are at iteration", m, "\n")
  }
  
  loglik <- unlist(lapply(fits_list, function(x) return(x$L)))
  ids <- which.max(loglik)
  phylo_gom[[num]] <- fits_list[[ids]]
}
save(phylo_gom, file = "../output/phylo_gom_mammals/gom_10.rda")

Phylogenet cluster plots

phylo_gom <- get(load("../output/phylo_gom_mammals/gom_10.rda"))
color = c("red", "cornflowerblue", "cyan", "brown4", "burlywood", "darkgoldenrod1",
          "azure4", "green","deepskyblue","yellow", "azure1")
intensity <- 0.8
for(num in 1:length(seq2)){
  png(filename=paste0("../docs/phylo_mammals_K_10/geostructure_phylo_", seq2[num], ".png"),width = 1000, height = 800)
map("worldHires",
    ylim=c(-18,20), xlim=c(90,160), # Re-defines the latitude and longitude range
    col = "gray", fill=TRUE, mar=c(0.1,0.1,0.1,0.1))
lapply(1:dim(phylo_gom[[num]]$omega)[1], function(r)
  add.pie(z=as.integer(100*phylo_gom[[num]]$omega[r,]),
          x=latlong[r,1], y=latlong[r,2], labels=c("","",""),
          radius = 0.5,
          col=c(alpha(color[1],intensity),alpha(color[2],intensity),
                alpha(color[3], intensity), alpha(color[4], intensity),
                alpha(color[5], intensity), alpha(color[6], intensity),
                alpha(color[7], intensity), alpha(color[8], intensity),
                alpha(color[9], intensity), alpha(color[10], intensity),
                alpha(color[11], intensity))));
dev.off()
}

Mammals without bats

Create phylogenic counts data

mammals_tree <- ape::read.nexus("../data/mammals.tre")
tree_file <- mammals_tree$mammalST_MSW05_bestDates
mammals_data <- get(load("../data/mammals_without_bats.rda"))
colnames(mammals_data) <- gsub("[.]","_",colnames(mammals_data))
# keep_names <- colnames(mammals_data)[colnames(mammals_data) %in% mammals_tree$mammalST_MSW05_bestDates$tip.label]
# mammals_data_2 <- mammals_data[, keep_names]
tips_to_drop <- setdiff(tree_file$tip.label, colnames(mammals_data))
new_tree <- ape::drop.tip(tree_file, tips_to_drop, trim.internal = TRUE, subtree = FALSE, root.edge = 0, rooted = is.rooted(tree_file),
collapse.singles = TRUE, interactive = FALSE)

Model

phylo_gom <- list()
seq2 <- seq(5, 50, length.out = 10)
for(num in 1:length(seq2)){
  mammals_phylo <- phylo.counts(mammals_data, new_tree, collapse_at = seq2[num])
  cat("Processing over", dim(mammals_phylo)[2], "species \n")
  mammals_phylo[mammals_phylo > 0] <- 1
  M <- 1
  fits_list <- list()
  L_array <- c()
  for(m in 1:M){
    counter = 0
    while(counter != 1){
      tmp <- try(meth_topics(mammals_phylo, 1 - mammals_phylo,
                             K=10, tol = 10,
                             use_squarem = FALSE), TRUE)
      if(!inherits(tmp, "try-error")){
        counter = 1
      }else{
        counter = 0
      }
    }
    fits_list[[m]] <- tmp
    cat("We are at iteration", m, "\n")
  }
  
  loglik <- unlist(lapply(fits_list, function(x) return(x$L)))
  ids <- which.max(loglik)
  phylo_gom[[num]] <- fits_list[[ids]]
}
save(phylo_gom, file = "../output/phylo_gom_mammals_no_bats/gom_10.rda")

Phylogenet cluster plots

phylo_gom <- get(load("../output/phylo_gom_mammals_no_bats/gom_10.rda"))
color = c("red", "cornflowerblue", "cyan", "brown4", "burlywood", "darkgoldenrod1",
          "azure4", "green","deepskyblue","yellow", "azure1")
intensity <- 0.8
for(num in 1:length(seq2)){
  png(filename=paste0("../docs/phylo_mammals_no_bats_K_10/geostructure_phylo_", seq2[num], ".png"),width = 1000, height = 800)
map("worldHires",
    ylim=c(-18,20), xlim=c(90,160), # Re-defines the latitude and longitude range
    col = "gray", fill=TRUE, mar=c(0.1,0.1,0.1,0.1))
lapply(1:dim(phylo_gom[[num]]$omega)[1], function(r)
  add.pie(z=as.integer(100*phylo_gom[[num]]$omega[r,]),
          x=latlong[r,1], y=latlong[r,2], labels=c("","",""),
          radius = 0.5,
          col=c(alpha(color[1],intensity),alpha(color[2],intensity),
                alpha(color[3], intensity), alpha(color[4], intensity),
                alpha(color[5], intensity), alpha(color[6], intensity),
                alpha(color[7], intensity), alpha(color[8], intensity),
                alpha(color[9], intensity), alpha(color[10], intensity),
                alpha(color[11], intensity))));
dev.off()
}

This R Markdown site was created with workflowr