diff --git a/.gitignore b/.gitignore
index c155c845e04738f19eb87b15a31adb4e84142224..ee3d72b9ce85ed0673faf083f70c44b1fef0a0b7 100644
--- a/.gitignore
+++ b/.gitignore
@@ -5,3 +5,5 @@ Stats/.RData
 Stats/.Rhistory
 Stats/.Rhistory
 Stats/.Rhistory
+Stats/.Rhistory
+Stats/.Rhistory
diff --git a/README.md b/README.md
index f38fe69da315976e4e6c629b1c7f1cf2cc847074..2a6ec2616f34c24db69703e6f1fdbbcdc2cf391c 100644
--- a/README.md
+++ b/README.md
@@ -28,4 +28,4 @@ Feel free to contact me if you have questions, tips or anything else to say. I'd
 Loïc Lehnhoff - <loic.lehnhoff@gmail.com>
 
 ## Related to
-Results and scripts are presented in details in: Lehnhoff, L.; Glotin, H.; Bernard, S.; Dabin, W.; Le Gall, Y.; Menut, E.; Meheust, E.; Peltier, H.; Pochat, A.; Pochat, K.; Rimaud, T.; Sourget, Q.; Spitz, J.; Van Canneyt, O.; Mérigot, B. *Behavioural response of common *dolphins Delphinus* delphis to a bio-inspired acoustic device for limiting fishery by-catch.* Sustainability, 1, 0 (**IN SUBMISSION PROCESS**)
\ No newline at end of file
+Results and scripts are presented in details in: Lehnhoff, L.; Glotin, H.; Bernard, S.; Dabin, W.; Le Gall, Y.; Menut, E.; Meheust, E.; Peltier, H.; Pochat, A.; Pochat, K.; Rimaud, T.; Sourget, Q.; Spitz, J.; Van Canneyt, O.; Mérigot, B. Behavioural Responses of Common Dolphins Delphinus delphis to a Bio-Inspired Acoustic Device for Limiting Fishery By-Catch. Sustainability 2022, 14, 13186. https://doi.org/10.3390/su142013186
\ No newline at end of file
diff --git a/Stats/.Rhistory b/Stats/.Rhistory
index 8f941cb426bd0465bdf87499129a14164d1727b4..2d7f5bdf7fc481d7600c8b044f236b475791a9cb 100644
--- a/Stats/.Rhistory
+++ b/Stats/.Rhistory
@@ -1,129 +1,3 @@
-### Model for clicks
-# Using NB model:
-mod.cli <- glm.nb(number_of_clicks ~ acoustic + fishing_net + acoustic:fishing_net + offset(log(number)),
-data=acoustic.dta)
-car::Anova(mod.cli, type=3)
-dwtest(mod.cli) # H0 -> independent if p>0.05 (autocorrelation if p<0.05)
-bptest(mod.cli) # H0 -> homoscedasticity if p<0.05
-mod.cli$deviance/mod.cli$df.residual
-# slight overdispersion. (ZINB does not clearly improve results so we keep this)
-# FYI1: Comparison of combination of explanatory variables between models
-# were compared based on BIC criterion.
-# The model with the lowest BIC was kept (and is the one shown)
-# FYI2: log(number of dolphin per group) does have an effect on data but we have
-# no interest in investigating it, that is why we use it as an offset.
-##################### Boxplots and comparisons #####################
-### Functions to compute stats
-computeLetters <- function(temp, category) {
-test <- multcomp::cld(object = temp$emmeans,
-Letters = letters)
-myletters_df <- data.frame(category=test[,category],
-letter=trimws(test$.group))
-colnames(myletters_df)[1] <- category
-return(myletters_df)
-}
-computeStats <- function(data, category, values, two=NULL, three=NULL) {
-my_sum <- data %>%
-group_by({{category}}, {{two}}, {{three}}) %>%
-summarise(
-n=n(),
-mean=mean({{values}}),
-sd=sd({{values}})
-) %>%
-mutate( se=sd/sqrt(n))  %>%
-mutate( ic=se * qt((1-0.05)/2 + .5, n-1))
-return(my_sum)
-}
-barPlot <- function(dta, signif, category, old_names, new_names, fill=NULL, size=5,
-height, xname="", colours="black", legend_title="", legend_labs="",ytitle=""){
-if (!is.null(signif)){colnames(signif)[1] <- "use"}
-dta %>%
-mutate(use=fct_relevel({{category}}, old_names)) %>%
-ggplot(aes(x=use, y=mean, group={{fill}}, fill={{fill}},color={{fill}})) +
-{if(length(colours)==1)geom_point(color=colours, position=position_dodge(.5))}+
-{if(length(colours)==2)geom_point(position=position_dodge(.5), show.legend = FALSE)}+
-{if(length(colours)==2)scale_color_manual(values=colours, name=legend_title, labels=legend_labs)}+
-scale_x_discrete(breaks=old_names,
-labels=new_names)+
-ylab(ytitle)+
-xlab(xname)+
-theme_classic()+ theme(text=element_text(size=12))+
-{if(!is.null(signif))geom_text(data=signif, aes(label=letter, y=height), size=size,
-colour="black", position=position_dodge(.5))}+
-geom_errorbar(aes(x=use, ymin=mean-ic, ymax=mean+ic), position=position_dodge(.5), width=.1, show.legend = FALSE)
-}
-####Introducing variables averaged per dolphins ####
-# since we introduced an offset, variables can be divided by the number of dolphins
-acoustic.dta$whistling_time_per_dolphin <- acoustic.dta$total_whistles_duration/acoustic.dta$number
-acoustic.dta$BBPs_per_dolphin <- acoustic.dta$number_of_bbp/acoustic.dta$number
-acoustic.dta$clicks_per_dolphin <- acoustic.dta$number_of_clicks/acoustic.dta$number
-#### Fishing net  ####
-# whistles
-table <- cld(emmeans(mod.whi, pairwise~fishing_net, adjust="tukey"), Letters = letters)
-myletters_df <- data.frame(fishing_net=table$fishing_net,
-letter = trimws(table$.group))
-barPlot(computeStats(acoustic.dta, fishing_net, whistling_time_per_dolphin/n_bins),
-myletters_df, fishing_net,
-old_names = c("SSF","F"), new_names = c("Absent", "Present"),
-xname="Presence/Asence of fishing net", height=1,
-ytitle="Mean whistling time per dolphin per min (in sec)")
-# BBP
-table <- cld(emmeans(mod.bbp, pairwise~fishing_net, adjust="tukey"), Letters = letters)
-myletters_df <- data.frame(fishing_net=table$fishing_net,
-letter = trimws(table$.group))
-barPlot(computeStats(acoustic.dta, fishing_net, BBPs_per_dolphin),
-myletters_df, fishing_net,
-old_names = c("SSF","F"), new_names = c("Absent", "Present"),
-xname="Presence/Asence of fishing net", height=.6,
-ytitle="Mean number of BBPs per dolphin per min")
-# Clicks
-table <- cld(emmeans(mod.cli, pairwise~fishing_net, adjust="tukey"), Letters = letters)
-myletters_df <- data.frame(fishing_net=table$fishing_net,
-letter = trimws(table$.group))
-barPlot(computeStats(acoustic.dta, fishing_net, clicks_per_dolphin),
-myletters_df, fishing_net,
-old_names = c("SSF","F"), new_names = c("Absent", "Present"),
-xname="Presence/Asence of fishing net", height=100,
-ytitle="Mean number of clicks per dolphin per min")
-#### Acoustic plots  ####
-# Whistles
-table <- cld(emmeans(mod.whi, pairwise~acoustic, adjust="tukey"), Letters = letters)
-myletters_df <- data.frame(acoustic=table$acoustic,letter = trimws(table$.group))
-barPlot(computeStats(acoustic.dta, acoustic, whistling_time_per_dolphin/n_bins),
-myletters_df, acoustic, height=1.3, ytitle="Mean whistling time per dolphin per min (in sec)",
-old_names = c("AV","AV+D","D","D+AP","AP"),
-new_names = c("BEF","BEF+DUR","DUR", "DUR+AFT", "AFT"),
-xname="Activation sequence")
-# BBPs
-table <- cld(emmeans(mod.bbp, pairwise~acoustic, adjust="tukey"), Letters = letters)
-myletters_df <- data.frame(acoustic=table$acoustic,letter = trimws(table$.group))
-barPlot(computeStats(acoustic.dta, acoustic, BBPs_per_dolphin),
-myletters_df, acoustic, height=1.2, ytitle="Mean number of BBPs per dolphin per min",
-old_names = c("AV","AV+D","D","D+AP","AP"),
-new_names = c("BEF","BEF+DUR","DUR", "DUR+AFT", "AFT"),
-xname="Activation sequence")
-# Clicks
-table <- cld(emmeans(mod.cli, pairwise~acoustic, adjust="tukey"), Letters = letters)
-myletters_df <- data.frame(acoustic=table$acoustic,letter = trimws(table$.group))
-barPlot(computeStats(acoustic.dta, acoustic, clicks_per_dolphin),
-myletters_df, acoustic, height=155, ytitle="Mean number of clicks per dolphin per min",
-old_names = c("AV","AV+D","D","D+AP","AP"),
-new_names = c("BEF","BEF+DUR","DUR", "DUR+AFT", "AFT"),
-xname="Activation sequence")
-#### Interaction fishing_net:acoustic plots  ####
-# Whistles
-letters_df <- computeLetters(emmeans(mod.whi, pairwise~fishing_net:acoustic, adjust="tukey"),
-"fishing_net")
-letters_df$acoustic <- computeLetters(emmeans(mod.whi, pairwise~fishing_net:acoustic, adjust="tukey"),
-"acoustic")$acoustic
-letters_df <- letters_df[, c("acoustic","fishing_net","letter")]
-letters_df$letter <- gsub(" ", "", letters_df$letter)
-barPlot(computeStats(acoustic.dta, fishing_net, whistling_time_per_dolphin/n_bins, two=acoustic),
-NULL, acoustic, fill=fishing_net,
-old_names = c("AV","AV+D","D","D+AP","AP"), ytitle="Mean whistling time per dolphin per min (in sec)",
-new_names = c("BEF","BEF+DUR","DUR", "DUR+AFT", "AFT"),
-xname="Activation sequence", height=c(.95,.95,.95,1,.95,1,.95,1,1,1),
-colours=c("#E69F00","#999999"), size=5,
 legend_title="Fishing net", legend_labs=c("Present", "Absent"))
 # BBPs
 letters_df <- computeLetters(emmeans(mod.bbp, pairwise~fishing_net:acoustic, adjust="tukey"),
@@ -375,9 +249,9 @@ geom_point() + scale_x_discrete(guide = guide_axis(n.dodge=2)) +
 theme_classic() + theme(text=element_text(size=12)) +
 ylab("Mean whistling time per dolphin per min (in sec)")+
 xlab("Days of recording")
-View(acoustic.dta)
-sum(acoustic.dta$date=="09/07/2021")
-56/361
+computeStats(acoustic.dta, acoustic, clicks_per_dolphin)
+108/43.8
+88.2/43.8
 ########################################################################
 #    STATISTICS
 #    Author : Loic LEHNHOFF
@@ -456,57 +330,183 @@ plot(acoustic.dta) # nothing that we can see
 # What are the H0/ H1 hypotheses ?
 # H0 : No influence of any of the explanatory variables on a dependant one.
 # H1 : Influence of an explanatory variable on a dependent one.
-View(acoustic.dta)
-sum(acoustic.dta$date=="09/07/2021")
-sum(acoustic.dta$acoustic=="T")
-##################################################
-library(pscl)
-library(MASS)
-library(lmtest)
-library(multcomp)
-library(emmeans)
-library(dplyr)        # "%>%" function
-library(forcats)      # "fct_relevel" function
-library(stringr)      # "gsub" function
-library(rcompanion)   # "fullPTable" function
-library(multcompView) # "multcompLetters" function
-library(ggplot2)
-library(pgirmess)
-library(postHoc)
-#library(tidyquant)    # geom_ma() if rolling average needed
-n_bins = 187.5 # number of bins per sec for spectrograms (whistles)
-################# DATASET IMPORTS #####################################
-folder <- './../'
-whistles.dta <-read.table(file=paste0(folder,
-'Whistles/Evaluation/whistles_durations.csv'),
-sep = ',', header=TRUE)
-whistles.dta <- whistles.dta[order(whistles.dta$audio_names),]
-bbp.dta <-read.table(file=paste0(folder,
-'BBPs/Results/16-06-22_14h00_number_of_BBP.csv'),
-sep = ',', header=TRUE)
-bbp.dta <- bbp.dta[order(bbp.dta$audio_names),]
-clicks.dta <-read.table(file=paste0(folder,
-'Clicks/Results/projection_updated_number_of_clicks_02052022.csv'), #number_of_clicks_02052022.csv
-sep = ',', header=TRUE)
-clicks.dta <- clicks.dta[order(clicks.dta$audio_names),]
-# Merge files into 1 dataset
-acoustic.dta <- clicks.dta
-acoustic.dta$number_of_bbp <- bbp.dta$number_of_BBP
-acoustic.dta$total_whistles_duration <- whistles.dta$total_whistles_duration
-rm(whistles.dta, bbp.dta, clicks.dta)
-# add group IDs
-id2020 <- read.table(file=paste0(folder, 'CSV_data/Audio_Data_2020.csv'),
-sep = ',', header=TRUE)[1:396,]
-id2021 <- read.table(file=paste0(folder, 'CSV_data/Audio_Data_2021.csv'),
-sep = ',', header=TRUE)[1:96,]
-id2021$ID <- id2021$ID+max(id2020$ID)
-id2021$Seq <- id2021$Seq+max(id2020$Seq)
-id.dta <- rbind(id2020, id2021)
-id.dta$Fichier.Audio <-str_sub(id.dta$Fichier.Audio, -27, -5)
-acoustic.dta$ID <- rep(-1, 490)
-for (name in acoustic.dta$audio_names){
-acoustic.dta$ID[match(name, acoustic.dta$audio_names)] <- id.dta$ID[match(name, id.dta$Fichier.Audio)]
+##################### DATA EXPLORATION ################################
+# Y Outlier detection
+par(mfrow=c(2,3))
+boxplot(acoustic.dta$total_whistles_duration, col='red',
+ylab='total_whistles_duration')
+boxplot(acoustic.dta$number_of_bbp, col='red',
+ylab='number_of_bbp')
+boxplot(acoustic.dta$number_of_clicks, col='red',
+ylab='number_of_clicks')
+dotchart(acoustic.dta$total_whistles_duration, pch=16,
+xlab='total_whistles_duration', col='red')
+dotchart(acoustic.dta$number_of_bbp, pch=16,
+xlab='number_of_bbp', col='red')
+dotchart(acoustic.dta$number_of_clicks, pch=16,
+xlab='number_of_clicks', col='red')
+# Y distribution
+par(mfrow=c(2,3))
+hist(acoustic.dta$total_whistles_duration, col='red', breaks=8,
+xlab='total_whistles_duration', ylab='number')
+hist(acoustic.dta$number_of_bbp, col='red', breaks=8,
+xlab='number_of_bbp', ylab='number')
+hist(acoustic.dta$number_of_clicks, col='red', breaks=8,
+xlab='number_of_clicks', ylab='number')
+qqnorm(acoustic.dta$total_whistles_duration, col='red', pch=16)
+qqline(acoustic.dta$total_whistles_duration)
+qqnorm(acoustic.dta$number_of_bbp, col='red', pch=16)
+qqline(acoustic.dta$number_of_bbp)
+qqnorm(acoustic.dta$number_of_clicks, col='red', pch=16)
+qqline(acoustic.dta$number_of_clicks)
+shapiro.test(acoustic.dta$total_whistles_duration)
+shapiro.test(acoustic.dta$number_of_bbp)
+shapiro.test(acoustic.dta$number_of_clicks)
+# p-values are significant => they do not follow normal distributions
+# will need a transformation or the use of a glim model
+# X Number of individuals per level
+summary(factor(acoustic.dta$acoustic))
+summary(factor(acoustic.dta$fishing_net))
+summary(factor(acoustic.dta$behavior))
+summary(factor(acoustic.dta$beacon))
+summary(factor(acoustic.dta$net))
+table(factor(acoustic.dta$acoustic),factor(acoustic.dta$fishing_net))
+table(factor(acoustic.dta$acoustic),factor(acoustic.dta$behavior))
+table(factor(acoustic.dta$behavior),factor(acoustic.dta$acoustic))
+ftable(factor(acoustic.dta$fishing_net), factor(acoustic.dta$behavior), factor(acoustic.dta$acoustic))
+# => unbalanced, no big deal but will need more work (no orthogonality):
+# Effects can depend on the order of the variables
+# => Beacon and net have modalities with <10 individuals => analysis impossible
+# => They will be treated apart from the rest as they are likely to be biased
+##################### STATISTICAL MODELLING ###########################
+### Model tested
+# GLM: General linear model (residual hypothesis: normality, homoscedasticity, independant)
+# GLIM: Generalized linear model (residual hypothesis: uncorrelated residuals)
+# NB : Negative Binomial model (residual hypothesis: independantM)
+# ZINB: Zero inflated negative binomial model (residual hypothesis: independant)
+# We are using number as an offset (more dolphins => more signals)
+# beacon and net explanatory variables could not be tested in models
+# as they contain information already present in "fishing_net" which is more
+# interesting to keep for our study. They will be treated after
+# (using kruskall-Wallis non-parametric test)
+# fishing_net, behavior and acoustic where tested with their interactions.
+# If a variable is it in a model, it is because it had no significant effect.
+par(mfrow=c(1,1))
+### Model for whistles
+# Residual hypotheses not verified for GLM
+# Overdipsersion when using GLIM (negative binomial)
+# Using ZINB:
+zero.whi <- zeroinfl(total_whistles_duration ~
+acoustic + fishing_net + behavior + offset(log(number)),
+data=acoustic.dta, dist='negbin')
+nb.whi <- glm.nb(total_whistles_duration ~
+acoustic + fishing_net + behavior + offset(log(number)),
+data=acoustic.dta)
+# comparison ZINB VS NB model
+vuong(zero.whi, nb.whi)  #(if p-value<0.05 then first model in comparison is better)
+mod.whi <- zero.whi # => zeroinflated model is indeed better suited
+car::Anova(mod.whi, type=3)
+dwtest(mod.whi) # H0 -> independent if p>0.05 (autocorrelation if p<0.05)
+bptest(mod.whi) # H0 -> homoscedasticity if p<0.05
+mod.whi$df.null/mod.whi$df.residual
+# no dispersion, perfect
+### Model for BBP
+# No normality of residuals for GLM
+# overdispersion with GLIM quasipoisson
+#try with glim NB:
+mod.bbp <- glm.nb(number_of_bbp ~ acoustic + fishing_net + behavior
++ offset(log(number)),
+data=acoustic.dta)
+car::Anova(mod.bbp, type=3)
+dwtest(mod.bbp) # H0 -> independent if p>0.05 (autocorrelation if p<0.05)
+bptest(mod.bbp) # H0 -> homoscedasticity if p<0.05
+mod.bbp$deviance/mod.bbp$df.residual
+# slight underdispersion, not improved with ZINB so we keep this
+### Model for clicks
+# Using NB model:
+mod.cli <- glm.nb(number_of_clicks ~ acoustic + fishing_net + acoustic:fishing_net + offset(log(number)),
+data=acoustic.dta)
+car::Anova(mod.cli, type=3)
+dwtest(mod.cli) # H0 -> independent if p>0.05 (autocorrelation if p<0.05)
+bptest(mod.cli) # H0 -> homoscedasticity if p<0.05
+mod.cli$deviance/mod.cli$df.residual
+# slight overdispersion. (ZINB does not clearly improve results so we keep this)
+# FYI1: Comparison of combination of explanatory variables between models
+# were compared based on BIC criterion.
+# The model with the lowest BIC was kept (and is the one shown)
+# FYI2: log(number of dolphin per group) does have an effect on data but we have
+# no interest in investigating it, that is why we use it as an offset.
+##################### Boxplots and comparisons #####################
+### Functions to compute stats
+computeLetters <- function(temp, category) {
+test <- multcomp::cld(object = temp$emmeans,
+Letters = letters)
+myletters_df <- data.frame(category=test[,category],
+letter=trimws(test$.group))
+colnames(myletters_df)[1] <- category
+return(myletters_df)
 }
-acoustic.dta$ID <- as.factor(acoustic.dta$ID)
-rm(id2020, id2021, id.dta)
-unique(acoustic.dta$ID)
+computeStats <- function(data, category, values, two=NULL, three=NULL) {
+my_sum <- data %>%
+group_by({{category}}, {{two}}, {{three}}) %>%
+summarise(
+n=n(),
+mean=mean({{values}}),
+sd=sd({{values}})
+) %>%
+mutate( se=sd/sqrt(n))  %>%
+mutate( ic=se * qt((1-0.05)/2 + .5, n-1))
+return(my_sum)
+}
+barPlot <- function(dta, signif, category, old_names, new_names, fill=NULL, size=5,
+height, xname="", colours="black", legend_title="", legend_labs="",ytitle=""){
+if (!is.null(signif)){colnames(signif)[1] <- "use"}
+dta %>%
+mutate(use=fct_relevel({{category}}, old_names)) %>%
+ggplot(aes(x=use, y=mean, group={{fill}}, fill={{fill}},color={{fill}})) +
+{if(length(colours)==1)geom_point(color=colours, position=position_dodge(.5))}+
+{if(length(colours)==2)geom_point(position=position_dodge(.5), show.legend = FALSE)}+
+{if(length(colours)==2)scale_color_manual(values=colours, name=legend_title, labels=legend_labs)}+
+scale_x_discrete(breaks=old_names,
+labels=new_names)+
+ylab(ytitle)+
+xlab(xname)+
+theme_classic()+ theme(text=element_text(size=12))+
+{if(!is.null(signif))geom_text(data=signif, aes(label=letter, y=height), size=size,
+colour="black", position=position_dodge(.5))}+
+geom_errorbar(aes(x=use, ymin=mean-ic, ymax=mean+ic), position=position_dodge(.5), width=.1, show.legend = FALSE)
+}
+####Introducing variables averaged per dolphins ####
+# since we introduced an offset, variables can be divided by the number of dolphins
+acoustic.dta$whistling_time_per_dolphin <- acoustic.dta$total_whistles_duration/acoustic.dta$number
+acoustic.dta$BBPs_per_dolphin <- acoustic.dta$number_of_bbp/acoustic.dta$number
+acoustic.dta$clicks_per_dolphin <- acoustic.dta$number_of_clicks/acoustic.dta$number
+#### Fishing net  ####
+#### Behaviour plots ####
+# Whistles
+table <- cld(emmeans(mod.whi, pairwise~behavior, adjust="tukey"), Letters = letters)
+myletters_df <- data.frame(behavior=table$behavior,letter = trimws(table$.group))
+barPlot(computeStats(acoustic.dta, behavior, whistling_time_per_dolphin/n_bins),
+myletters_df, behavior, height=1.5, ytitle="Mean whistling time per dolphin per min (in sec)",
+old_names = c("CHAS", "DEPL", "SOCI"),
+new_names = c("Foraging", "Travelling", "Socialising"),
+xname="Behaviours of dolphins")
+# BBPs
+# real effect measured in model
+table <- cld(emmeans(mod.bbp, pairwise~behavior, adjust="tukey"), Letters = letters)
+myletters_df <- data.frame(acoustic=table$behavior,letter = trimws(table$.group))
+barPlot(computeStats(acoustic.dta, behavior, BBPs_per_dolphin),
+myletters_df, behavior, height=1.2, ytitle="Mean number of BBPs per dolphin per min",
+old_names = c("CHAS", "DEPL", "SOCI"),
+new_names = c("Foraging", "Travelling", "Socialising"),
+xname="Behaviours of dolphins")
+# Clicks
+# no significant effect in click statistical model so all the same letters
+myletters_df <- data.frame(behavior=unique(acoustic.dta$behavior),
+letter = rep("a",length(unique(acoustic.dta$behavior))))
+barPlot(computeStats(acoustic.dta, behavior, clicks_per_dolphin),
+myletters_df,
+behavior, old_names = c("CHAS", "DEPL", "SOCI"),
+new_names = c("Foraging", "Travelling", "Socialising"),
+xname="Behaviours of dolphins", height=150,
+ytitle="Mean number of clicks per dolphin per min")