Update gitignore

6d2efdbc · Loic-Lenof · 13c76a7d · 6d2efdbc · 6d2efdbc
Commit 6d2efdbc authored Oct 21, 2022 by Loic-Lenof
--- a/.gitignore
+++ b/.gitignore
@@ -4,3 +4,4 @@ Stats/.Rhistory
 Stats/.RData
 Stats/.Rhistory
 Stats/.Rhistory
+Stats/.Rhistory
--- a/Stats/.Rhistory
+++ b/Stats/.Rhistory
-table(factor(acoustic.dta$acoustic),factor(acoustic.dta$behavior))
-table(factor(acoustic.dta$behavior),factor(acoustic.dta$acoustic))
-ftable(factor(acoustic.dta$fishing_net), factor(acoustic.dta$behavior), factor(acoustic.dta$acoustic))
-# => unbalanced, no big deal but will need more work (no orthogonality):
-# Effects can depend on the order of the variables
-# => Beacon and net have modalities with <10 individuals => analysis impossible
-# => They will be treated apart from the rest as they are likely to be biased
-##################### STATISTICAL MODELLING ###########################
-### Model tested
-# GLM: General linear model (residual hypothesis: normality, homoscedasticity, independant)
-# GLIM: Generalized linear model (residual hypothesis: uncorrelated residuals)
-# NB : Negative Binomial model (residual hypothesis: independantM)
-# ZINB: Zero inflated negative binomial model (residual hypothesis: independant)
-# We are using number as an offset (more dolphins => more signals)
-# beacon and net explanatory variables could not be tested in models
-# as they contain information already present in "fishing_net" which is more
-# interesting to keep for our study. They will be treated after
-# (using kruskall-Wallis non-parametric test)
-# fishing_net, behavior and acoustic where tested with their interactions.
-# If a variable is it in a model, it is because it had no significant effect.
-par(mfrow=c(1,1))
-### Model for whistles
-# Residual hypotheses not verified for GLM
-# Overdipsersion when using GLIM (negative binomial)
-# Using ZINB:
-zero.whi <- zeroinfl(total_whistles_duration ~
-acoustic + fishing_net + behavior + offset(log(number)),
-data=acoustic.dta, dist='negbin')
-nb.whi <- glm.nb(total_whistles_duration ~
-acoustic + fishing_net + behavior + offset(log(number)),
-data=acoustic.dta)
-# comparison ZINB VS NB model
-vuong(zero.whi, nb.whi)  #(if p-value<0.05 then first model in comparison is better)
-mod.whi <- zero.whi # => zeroinflated model is indeed better suited
-car::Anova(mod.whi, type=3)
-dwtest(mod.whi) # H0 -> independent if p>0.05 (autocorrelation if p<0.05)
-bptest(mod.whi) # H0 -> homoscedasticity if p<0.05
-mod.whi$df.null/mod.whi$df.residual
-# no dispersion, perfect
-### Model for BBP
-# No normality of residuals for GLM
-# overdispersion with GLIM quasipoisson
-#try with glim NB:
-mod.bbp <- glm.nb(number_of_bbp ~ acoustic + fishing_net + behavior
-+ offset(log(number)),
-data=acoustic.dta)
-car::Anova(mod.bbp, type=3)
-dwtest(mod.bbp) # H0 -> independent if p>0.05 (autocorrelation if p<0.05)
-bptest(mod.bbp) # H0 -> homoscedasticity if p<0.05
-mod.bbp$deviance/mod.bbp$df.residual
-# slight underdispersion, not improved with ZINB so we keep this
 ### Model for clicks
 # Using NB model:
 mod.cli <- glm.nb(number_of_clicks ~ acoustic + fishing_net + acoustic:fishing_net + offset(log(number)),
@@ -510,3 +459,54 @@ plot(acoustic.dta) # nothing that we can see
 View(acoustic.dta)
 sum(acoustic.dta$date=="09/07/2021")
 sum(acoustic.dta$acoustic=="T")
+##################################################
+library(pscl)
+library(MASS)
+library(lmtest)
+library(multcomp)
+library(emmeans)
+library(dplyr)        # "%>%" function
+library(forcats)      # "fct_relevel" function
+library(stringr)      # "gsub" function
+library(rcompanion)   # "fullPTable" function
+library(multcompView) # "multcompLetters" function
+library(ggplot2)
+library(pgirmess)
+library(postHoc)
+#library(tidyquant)    # geom_ma() if rolling average needed
+n_bins = 187.5 # number of bins per sec for spectrograms (whistles)
+################# DATASET IMPORTS #####################################
+folder <- './../'
+whistles.dta <-read.table(file=paste0(folder,
+'Whistles/Evaluation/whistles_durations.csv'),
+sep = ',', header=TRUE)
+whistles.dta <- whistles.dta[order(whistles.dta$audio_names),]
+bbp.dta <-read.table(file=paste0(folder,
+'BBPs/Results/16-06-22_14h00_number_of_BBP.csv'),
+sep = ',', header=TRUE)
+bbp.dta <- bbp.dta[order(bbp.dta$audio_names),]
+clicks.dta <-read.table(file=paste0(folder,
+'Clicks/Results/projection_updated_number_of_clicks_02052022.csv'), #number_of_clicks_02052022.csv
+sep = ',', header=TRUE)
+clicks.dta <- clicks.dta[order(clicks.dta$audio_names),]
+# Merge files into 1 dataset
+acoustic.dta <- clicks.dta
+acoustic.dta$number_of_bbp <- bbp.dta$number_of_BBP
+acoustic.dta$total_whistles_duration <- whistles.dta$total_whistles_duration
+rm(whistles.dta, bbp.dta, clicks.dta)
+# add group IDs
+id2020 <- read.table(file=paste0(folder, 'CSV_data/Audio_Data_2020.csv'),
+sep = ',', header=TRUE)[1:396,]
+id2021 <- read.table(file=paste0(folder, 'CSV_data/Audio_Data_2021.csv'),
+sep = ',', header=TRUE)[1:96,]
+id2021$ID <- id2021$ID+max(id2020$ID)
+id2021$Seq <- id2021$Seq+max(id2020$Seq)
+id.dta <- rbind(id2020, id2021)
+id.dta$Fichier.Audio <-str_sub(id.dta$Fichier.Audio, -27, -5)
+acoustic.dta$ID <- rep(-1, 490)
+for (name in acoustic.dta$audio_names){
+acoustic.dta$ID[match(name, acoustic.dta$audio_names)] <- id.dta$ID[match(name, id.dta$Fichier.Audio)]
+}
+acoustic.dta$ID <- as.factor(acoustic.dta$ID)
+rm(id2020, id2021, id.dta)
+unique(acoustic.dta$ID)