diff --git a/.gitignore b/.gitignore index c155c845e04738f19eb87b15a31adb4e84142224..ee3d72b9ce85ed0673faf083f70c44b1fef0a0b7 100644 --- a/.gitignore +++ b/.gitignore @@ -5,3 +5,5 @@ Stats/.RData Stats/.Rhistory Stats/.Rhistory Stats/.Rhistory +Stats/.Rhistory +Stats/.Rhistory diff --git a/README.md b/README.md index f38fe69da315976e4e6c629b1c7f1cf2cc847074..2a6ec2616f34c24db69703e6f1fdbbcdc2cf391c 100644 --- a/README.md +++ b/README.md @@ -28,4 +28,4 @@ Feel free to contact me if you have questions, tips or anything else to say. I'd Loïc Lehnhoff - <loic.lehnhoff@gmail.com> ## Related to -Results and scripts are presented in details in: Lehnhoff, L.; Glotin, H.; Bernard, S.; Dabin, W.; Le Gall, Y.; Menut, E.; Meheust, E.; Peltier, H.; Pochat, A.; Pochat, K.; Rimaud, T.; Sourget, Q.; Spitz, J.; Van Canneyt, O.; Mérigot, B. *Behavioural response of common *dolphins Delphinus* delphis to a bio-inspired acoustic device for limiting fishery by-catch.* Sustainability, 1, 0 (**IN SUBMISSION PROCESS**) \ No newline at end of file +Results and scripts are presented in details in: Lehnhoff, L.; Glotin, H.; Bernard, S.; Dabin, W.; Le Gall, Y.; Menut, E.; Meheust, E.; Peltier, H.; Pochat, A.; Pochat, K.; Rimaud, T.; Sourget, Q.; Spitz, J.; Van Canneyt, O.; Mérigot, B. Behavioural Responses of Common Dolphins Delphinus delphis to a Bio-Inspired Acoustic Device for Limiting Fishery By-Catch. Sustainability 2022, 14, 13186. https://doi.org/10.3390/su142013186 \ No newline at end of file diff --git a/Stats/.Rhistory b/Stats/.Rhistory index 8f941cb426bd0465bdf87499129a14164d1727b4..2d7f5bdf7fc481d7600c8b044f236b475791a9cb 100644 --- a/Stats/.Rhistory +++ b/Stats/.Rhistory @@ -1,129 +1,3 @@ -### Model for clicks -# Using NB model: -mod.cli <- glm.nb(number_of_clicks ~ acoustic + fishing_net + acoustic:fishing_net + offset(log(number)), -data=acoustic.dta) -car::Anova(mod.cli, type=3) -dwtest(mod.cli) # H0 -> independent if p>0.05 (autocorrelation if p<0.05) -bptest(mod.cli) # H0 -> homoscedasticity if p<0.05 -mod.cli$deviance/mod.cli$df.residual -# slight overdispersion. (ZINB does not clearly improve results so we keep this) -# FYI1: Comparison of combination of explanatory variables between models -# were compared based on BIC criterion. -# The model with the lowest BIC was kept (and is the one shown) -# FYI2: log(number of dolphin per group) does have an effect on data but we have -# no interest in investigating it, that is why we use it as an offset. -##################### Boxplots and comparisons ##################### -### Functions to compute stats -computeLetters <- function(temp, category) { -test <- multcomp::cld(object = temp$emmeans, -Letters = letters) -myletters_df <- data.frame(category=test[,category], -letter=trimws(test$.group)) -colnames(myletters_df)[1] <- category -return(myletters_df) -} -computeStats <- function(data, category, values, two=NULL, three=NULL) { -my_sum <- data %>% -group_by({{category}}, {{two}}, {{three}}) %>% -summarise( -n=n(), -mean=mean({{values}}), -sd=sd({{values}}) -) %>% -mutate( se=sd/sqrt(n)) %>% -mutate( ic=se * qt((1-0.05)/2 + .5, n-1)) -return(my_sum) -} -barPlot <- function(dta, signif, category, old_names, new_names, fill=NULL, size=5, -height, xname="", colours="black", legend_title="", legend_labs="",ytitle=""){ -if (!is.null(signif)){colnames(signif)[1] <- "use"} -dta %>% -mutate(use=fct_relevel({{category}}, old_names)) %>% -ggplot(aes(x=use, y=mean, group={{fill}}, fill={{fill}},color={{fill}})) + -{if(length(colours)==1)geom_point(color=colours, position=position_dodge(.5))}+ -{if(length(colours)==2)geom_point(position=position_dodge(.5), show.legend = FALSE)}+ -{if(length(colours)==2)scale_color_manual(values=colours, name=legend_title, labels=legend_labs)}+ -scale_x_discrete(breaks=old_names, -labels=new_names)+ -ylab(ytitle)+ -xlab(xname)+ -theme_classic()+ theme(text=element_text(size=12))+ -{if(!is.null(signif))geom_text(data=signif, aes(label=letter, y=height), size=size, -colour="black", position=position_dodge(.5))}+ -geom_errorbar(aes(x=use, ymin=mean-ic, ymax=mean+ic), position=position_dodge(.5), width=.1, show.legend = FALSE) -} -####Introducing variables averaged per dolphins #### -# since we introduced an offset, variables can be divided by the number of dolphins -acoustic.dta$whistling_time_per_dolphin <- acoustic.dta$total_whistles_duration/acoustic.dta$number -acoustic.dta$BBPs_per_dolphin <- acoustic.dta$number_of_bbp/acoustic.dta$number -acoustic.dta$clicks_per_dolphin <- acoustic.dta$number_of_clicks/acoustic.dta$number -#### Fishing net #### -# whistles -table <- cld(emmeans(mod.whi, pairwise~fishing_net, adjust="tukey"), Letters = letters) -myletters_df <- data.frame(fishing_net=table$fishing_net, -letter = trimws(table$.group)) -barPlot(computeStats(acoustic.dta, fishing_net, whistling_time_per_dolphin/n_bins), -myletters_df, fishing_net, -old_names = c("SSF","F"), new_names = c("Absent", "Present"), -xname="Presence/Asence of fishing net", height=1, -ytitle="Mean whistling time per dolphin per min (in sec)") -# BBP -table <- cld(emmeans(mod.bbp, pairwise~fishing_net, adjust="tukey"), Letters = letters) -myletters_df <- data.frame(fishing_net=table$fishing_net, -letter = trimws(table$.group)) -barPlot(computeStats(acoustic.dta, fishing_net, BBPs_per_dolphin), -myletters_df, fishing_net, -old_names = c("SSF","F"), new_names = c("Absent", "Present"), -xname="Presence/Asence of fishing net", height=.6, -ytitle="Mean number of BBPs per dolphin per min") -# Clicks -table <- cld(emmeans(mod.cli, pairwise~fishing_net, adjust="tukey"), Letters = letters) -myletters_df <- data.frame(fishing_net=table$fishing_net, -letter = trimws(table$.group)) -barPlot(computeStats(acoustic.dta, fishing_net, clicks_per_dolphin), -myletters_df, fishing_net, -old_names = c("SSF","F"), new_names = c("Absent", "Present"), -xname="Presence/Asence of fishing net", height=100, -ytitle="Mean number of clicks per dolphin per min") -#### Acoustic plots #### -# Whistles -table <- cld(emmeans(mod.whi, pairwise~acoustic, adjust="tukey"), Letters = letters) -myletters_df <- data.frame(acoustic=table$acoustic,letter = trimws(table$.group)) -barPlot(computeStats(acoustic.dta, acoustic, whistling_time_per_dolphin/n_bins), -myletters_df, acoustic, height=1.3, ytitle="Mean whistling time per dolphin per min (in sec)", -old_names = c("AV","AV+D","D","D+AP","AP"), -new_names = c("BEF","BEF+DUR","DUR", "DUR+AFT", "AFT"), -xname="Activation sequence") -# BBPs -table <- cld(emmeans(mod.bbp, pairwise~acoustic, adjust="tukey"), Letters = letters) -myletters_df <- data.frame(acoustic=table$acoustic,letter = trimws(table$.group)) -barPlot(computeStats(acoustic.dta, acoustic, BBPs_per_dolphin), -myletters_df, acoustic, height=1.2, ytitle="Mean number of BBPs per dolphin per min", -old_names = c("AV","AV+D","D","D+AP","AP"), -new_names = c("BEF","BEF+DUR","DUR", "DUR+AFT", "AFT"), -xname="Activation sequence") -# Clicks -table <- cld(emmeans(mod.cli, pairwise~acoustic, adjust="tukey"), Letters = letters) -myletters_df <- data.frame(acoustic=table$acoustic,letter = trimws(table$.group)) -barPlot(computeStats(acoustic.dta, acoustic, clicks_per_dolphin), -myletters_df, acoustic, height=155, ytitle="Mean number of clicks per dolphin per min", -old_names = c("AV","AV+D","D","D+AP","AP"), -new_names = c("BEF","BEF+DUR","DUR", "DUR+AFT", "AFT"), -xname="Activation sequence") -#### Interaction fishing_net:acoustic plots #### -# Whistles -letters_df <- computeLetters(emmeans(mod.whi, pairwise~fishing_net:acoustic, adjust="tukey"), -"fishing_net") -letters_df$acoustic <- computeLetters(emmeans(mod.whi, pairwise~fishing_net:acoustic, adjust="tukey"), -"acoustic")$acoustic -letters_df <- letters_df[, c("acoustic","fishing_net","letter")] -letters_df$letter <- gsub(" ", "", letters_df$letter) -barPlot(computeStats(acoustic.dta, fishing_net, whistling_time_per_dolphin/n_bins, two=acoustic), -NULL, acoustic, fill=fishing_net, -old_names = c("AV","AV+D","D","D+AP","AP"), ytitle="Mean whistling time per dolphin per min (in sec)", -new_names = c("BEF","BEF+DUR","DUR", "DUR+AFT", "AFT"), -xname="Activation sequence", height=c(.95,.95,.95,1,.95,1,.95,1,1,1), -colours=c("#E69F00","#999999"), size=5, legend_title="Fishing net", legend_labs=c("Present", "Absent")) # BBPs letters_df <- computeLetters(emmeans(mod.bbp, pairwise~fishing_net:acoustic, adjust="tukey"), @@ -375,9 +249,9 @@ geom_point() + scale_x_discrete(guide = guide_axis(n.dodge=2)) + theme_classic() + theme(text=element_text(size=12)) + ylab("Mean whistling time per dolphin per min (in sec)")+ xlab("Days of recording") -View(acoustic.dta) -sum(acoustic.dta$date=="09/07/2021") -56/361 +computeStats(acoustic.dta, acoustic, clicks_per_dolphin) +108/43.8 +88.2/43.8 ######################################################################## # STATISTICS # Author : Loic LEHNHOFF @@ -456,57 +330,183 @@ plot(acoustic.dta) # nothing that we can see # What are the H0/ H1 hypotheses ? # H0 : No influence of any of the explanatory variables on a dependant one. # H1 : Influence of an explanatory variable on a dependent one. -View(acoustic.dta) -sum(acoustic.dta$date=="09/07/2021") -sum(acoustic.dta$acoustic=="T") -################################################## -library(pscl) -library(MASS) -library(lmtest) -library(multcomp) -library(emmeans) -library(dplyr) # "%>%" function -library(forcats) # "fct_relevel" function -library(stringr) # "gsub" function -library(rcompanion) # "fullPTable" function -library(multcompView) # "multcompLetters" function -library(ggplot2) -library(pgirmess) -library(postHoc) -#library(tidyquant) # geom_ma() if rolling average needed -n_bins = 187.5 # number of bins per sec for spectrograms (whistles) -################# DATASET IMPORTS ##################################### -folder <- './../' -whistles.dta <-read.table(file=paste0(folder, -'Whistles/Evaluation/whistles_durations.csv'), -sep = ',', header=TRUE) -whistles.dta <- whistles.dta[order(whistles.dta$audio_names),] -bbp.dta <-read.table(file=paste0(folder, -'BBPs/Results/16-06-22_14h00_number_of_BBP.csv'), -sep = ',', header=TRUE) -bbp.dta <- bbp.dta[order(bbp.dta$audio_names),] -clicks.dta <-read.table(file=paste0(folder, -'Clicks/Results/projection_updated_number_of_clicks_02052022.csv'), #number_of_clicks_02052022.csv -sep = ',', header=TRUE) -clicks.dta <- clicks.dta[order(clicks.dta$audio_names),] -# Merge files into 1 dataset -acoustic.dta <- clicks.dta -acoustic.dta$number_of_bbp <- bbp.dta$number_of_BBP -acoustic.dta$total_whistles_duration <- whistles.dta$total_whistles_duration -rm(whistles.dta, bbp.dta, clicks.dta) -# add group IDs -id2020 <- read.table(file=paste0(folder, 'CSV_data/Audio_Data_2020.csv'), -sep = ',', header=TRUE)[1:396,] -id2021 <- read.table(file=paste0(folder, 'CSV_data/Audio_Data_2021.csv'), -sep = ',', header=TRUE)[1:96,] -id2021$ID <- id2021$ID+max(id2020$ID) -id2021$Seq <- id2021$Seq+max(id2020$Seq) -id.dta <- rbind(id2020, id2021) -id.dta$Fichier.Audio <-str_sub(id.dta$Fichier.Audio, -27, -5) -acoustic.dta$ID <- rep(-1, 490) -for (name in acoustic.dta$audio_names){ -acoustic.dta$ID[match(name, acoustic.dta$audio_names)] <- id.dta$ID[match(name, id.dta$Fichier.Audio)] +##################### DATA EXPLORATION ################################ +# Y Outlier detection +par(mfrow=c(2,3)) +boxplot(acoustic.dta$total_whistles_duration, col='red', +ylab='total_whistles_duration') +boxplot(acoustic.dta$number_of_bbp, col='red', +ylab='number_of_bbp') +boxplot(acoustic.dta$number_of_clicks, col='red', +ylab='number_of_clicks') +dotchart(acoustic.dta$total_whistles_duration, pch=16, +xlab='total_whistles_duration', col='red') +dotchart(acoustic.dta$number_of_bbp, pch=16, +xlab='number_of_bbp', col='red') +dotchart(acoustic.dta$number_of_clicks, pch=16, +xlab='number_of_clicks', col='red') +# Y distribution +par(mfrow=c(2,3)) +hist(acoustic.dta$total_whistles_duration, col='red', breaks=8, +xlab='total_whistles_duration', ylab='number') +hist(acoustic.dta$number_of_bbp, col='red', breaks=8, +xlab='number_of_bbp', ylab='number') +hist(acoustic.dta$number_of_clicks, col='red', breaks=8, +xlab='number_of_clicks', ylab='number') +qqnorm(acoustic.dta$total_whistles_duration, col='red', pch=16) +qqline(acoustic.dta$total_whistles_duration) +qqnorm(acoustic.dta$number_of_bbp, col='red', pch=16) +qqline(acoustic.dta$number_of_bbp) +qqnorm(acoustic.dta$number_of_clicks, col='red', pch=16) +qqline(acoustic.dta$number_of_clicks) +shapiro.test(acoustic.dta$total_whistles_duration) +shapiro.test(acoustic.dta$number_of_bbp) +shapiro.test(acoustic.dta$number_of_clicks) +# p-values are significant => they do not follow normal distributions +# will need a transformation or the use of a glim model +# X Number of individuals per level +summary(factor(acoustic.dta$acoustic)) +summary(factor(acoustic.dta$fishing_net)) +summary(factor(acoustic.dta$behavior)) +summary(factor(acoustic.dta$beacon)) +summary(factor(acoustic.dta$net)) +table(factor(acoustic.dta$acoustic),factor(acoustic.dta$fishing_net)) +table(factor(acoustic.dta$acoustic),factor(acoustic.dta$behavior)) +table(factor(acoustic.dta$behavior),factor(acoustic.dta$acoustic)) +ftable(factor(acoustic.dta$fishing_net), factor(acoustic.dta$behavior), factor(acoustic.dta$acoustic)) +# => unbalanced, no big deal but will need more work (no orthogonality): +# Effects can depend on the order of the variables +# => Beacon and net have modalities with <10 individuals => analysis impossible +# => They will be treated apart from the rest as they are likely to be biased +##################### STATISTICAL MODELLING ########################### +### Model tested +# GLM: General linear model (residual hypothesis: normality, homoscedasticity, independant) +# GLIM: Generalized linear model (residual hypothesis: uncorrelated residuals) +# NB : Negative Binomial model (residual hypothesis: independantM) +# ZINB: Zero inflated negative binomial model (residual hypothesis: independant) +# We are using number as an offset (more dolphins => more signals) +# beacon and net explanatory variables could not be tested in models +# as they contain information already present in "fishing_net" which is more +# interesting to keep for our study. They will be treated after +# (using kruskall-Wallis non-parametric test) +# fishing_net, behavior and acoustic where tested with their interactions. +# If a variable is it in a model, it is because it had no significant effect. +par(mfrow=c(1,1)) +### Model for whistles +# Residual hypotheses not verified for GLM +# Overdipsersion when using GLIM (negative binomial) +# Using ZINB: +zero.whi <- zeroinfl(total_whistles_duration ~ +acoustic + fishing_net + behavior + offset(log(number)), +data=acoustic.dta, dist='negbin') +nb.whi <- glm.nb(total_whistles_duration ~ +acoustic + fishing_net + behavior + offset(log(number)), +data=acoustic.dta) +# comparison ZINB VS NB model +vuong(zero.whi, nb.whi) #(if p-value<0.05 then first model in comparison is better) +mod.whi <- zero.whi # => zeroinflated model is indeed better suited +car::Anova(mod.whi, type=3) +dwtest(mod.whi) # H0 -> independent if p>0.05 (autocorrelation if p<0.05) +bptest(mod.whi) # H0 -> homoscedasticity if p<0.05 +mod.whi$df.null/mod.whi$df.residual +# no dispersion, perfect +### Model for BBP +# No normality of residuals for GLM +# overdispersion with GLIM quasipoisson +#try with glim NB: +mod.bbp <- glm.nb(number_of_bbp ~ acoustic + fishing_net + behavior ++ offset(log(number)), +data=acoustic.dta) +car::Anova(mod.bbp, type=3) +dwtest(mod.bbp) # H0 -> independent if p>0.05 (autocorrelation if p<0.05) +bptest(mod.bbp) # H0 -> homoscedasticity if p<0.05 +mod.bbp$deviance/mod.bbp$df.residual +# slight underdispersion, not improved with ZINB so we keep this +### Model for clicks +# Using NB model: +mod.cli <- glm.nb(number_of_clicks ~ acoustic + fishing_net + acoustic:fishing_net + offset(log(number)), +data=acoustic.dta) +car::Anova(mod.cli, type=3) +dwtest(mod.cli) # H0 -> independent if p>0.05 (autocorrelation if p<0.05) +bptest(mod.cli) # H0 -> homoscedasticity if p<0.05 +mod.cli$deviance/mod.cli$df.residual +# slight overdispersion. (ZINB does not clearly improve results so we keep this) +# FYI1: Comparison of combination of explanatory variables between models +# were compared based on BIC criterion. +# The model with the lowest BIC was kept (and is the one shown) +# FYI2: log(number of dolphin per group) does have an effect on data but we have +# no interest in investigating it, that is why we use it as an offset. +##################### Boxplots and comparisons ##################### +### Functions to compute stats +computeLetters <- function(temp, category) { +test <- multcomp::cld(object = temp$emmeans, +Letters = letters) +myletters_df <- data.frame(category=test[,category], +letter=trimws(test$.group)) +colnames(myletters_df)[1] <- category +return(myletters_df) } -acoustic.dta$ID <- as.factor(acoustic.dta$ID) -rm(id2020, id2021, id.dta) -unique(acoustic.dta$ID) +computeStats <- function(data, category, values, two=NULL, three=NULL) { +my_sum <- data %>% +group_by({{category}}, {{two}}, {{three}}) %>% +summarise( +n=n(), +mean=mean({{values}}), +sd=sd({{values}}) +) %>% +mutate( se=sd/sqrt(n)) %>% +mutate( ic=se * qt((1-0.05)/2 + .5, n-1)) +return(my_sum) +} +barPlot <- function(dta, signif, category, old_names, new_names, fill=NULL, size=5, +height, xname="", colours="black", legend_title="", legend_labs="",ytitle=""){ +if (!is.null(signif)){colnames(signif)[1] <- "use"} +dta %>% +mutate(use=fct_relevel({{category}}, old_names)) %>% +ggplot(aes(x=use, y=mean, group={{fill}}, fill={{fill}},color={{fill}})) + +{if(length(colours)==1)geom_point(color=colours, position=position_dodge(.5))}+ +{if(length(colours)==2)geom_point(position=position_dodge(.5), show.legend = FALSE)}+ +{if(length(colours)==2)scale_color_manual(values=colours, name=legend_title, labels=legend_labs)}+ +scale_x_discrete(breaks=old_names, +labels=new_names)+ +ylab(ytitle)+ +xlab(xname)+ +theme_classic()+ theme(text=element_text(size=12))+ +{if(!is.null(signif))geom_text(data=signif, aes(label=letter, y=height), size=size, +colour="black", position=position_dodge(.5))}+ +geom_errorbar(aes(x=use, ymin=mean-ic, ymax=mean+ic), position=position_dodge(.5), width=.1, show.legend = FALSE) +} +####Introducing variables averaged per dolphins #### +# since we introduced an offset, variables can be divided by the number of dolphins +acoustic.dta$whistling_time_per_dolphin <- acoustic.dta$total_whistles_duration/acoustic.dta$number +acoustic.dta$BBPs_per_dolphin <- acoustic.dta$number_of_bbp/acoustic.dta$number +acoustic.dta$clicks_per_dolphin <- acoustic.dta$number_of_clicks/acoustic.dta$number +#### Fishing net #### +#### Behaviour plots #### +# Whistles +table <- cld(emmeans(mod.whi, pairwise~behavior, adjust="tukey"), Letters = letters) +myletters_df <- data.frame(behavior=table$behavior,letter = trimws(table$.group)) +barPlot(computeStats(acoustic.dta, behavior, whistling_time_per_dolphin/n_bins), +myletters_df, behavior, height=1.5, ytitle="Mean whistling time per dolphin per min (in sec)", +old_names = c("CHAS", "DEPL", "SOCI"), +new_names = c("Foraging", "Travelling", "Socialising"), +xname="Behaviours of dolphins") +# BBPs +# real effect measured in model +table <- cld(emmeans(mod.bbp, pairwise~behavior, adjust="tukey"), Letters = letters) +myletters_df <- data.frame(acoustic=table$behavior,letter = trimws(table$.group)) +barPlot(computeStats(acoustic.dta, behavior, BBPs_per_dolphin), +myletters_df, behavior, height=1.2, ytitle="Mean number of BBPs per dolphin per min", +old_names = c("CHAS", "DEPL", "SOCI"), +new_names = c("Foraging", "Travelling", "Socialising"), +xname="Behaviours of dolphins") +# Clicks +# no significant effect in click statistical model so all the same letters +myletters_df <- data.frame(behavior=unique(acoustic.dta$behavior), +letter = rep("a",length(unique(acoustic.dta$behavior)))) +barPlot(computeStats(acoustic.dta, behavior, clicks_per_dolphin), +myletters_df, +behavior, old_names = c("CHAS", "DEPL", "SOCI"), +new_names = c("Foraging", "Travelling", "Socialising"), +xname="Behaviours of dolphins", height=150, +ytitle="Mean number of clicks per dolphin per min")