From 6d2efdbc6bff5ed5284830101a5ba71dff77c36a Mon Sep 17 00:00:00 2001 From: Loic-Lenof <loic.lenof@gmail.com> Date: Fri, 21 Oct 2022 15:07:54 +0200 Subject: [PATCH] Update gitignore --- .gitignore | 1 + Stats/.Rhistory | 102 ++++++++++++++++++++++++------------------------ 2 files changed, 52 insertions(+), 51 deletions(-) diff --git a/.gitignore b/.gitignore index b424459..c155c84 100644 --- a/.gitignore +++ b/.gitignore @@ -4,3 +4,4 @@ Stats/.Rhistory Stats/.RData Stats/.Rhistory Stats/.Rhistory +Stats/.Rhistory diff --git a/Stats/.Rhistory b/Stats/.Rhistory index 3cef1a1..8f941cb 100644 --- a/Stats/.Rhistory +++ b/Stats/.Rhistory @@ -1,54 +1,3 @@ -table(factor(acoustic.dta$acoustic),factor(acoustic.dta$behavior)) -table(factor(acoustic.dta$behavior),factor(acoustic.dta$acoustic)) -ftable(factor(acoustic.dta$fishing_net), factor(acoustic.dta$behavior), factor(acoustic.dta$acoustic)) -# => unbalanced, no big deal but will need more work (no orthogonality): -# Effects can depend on the order of the variables -# => Beacon and net have modalities with <10 individuals => analysis impossible -# => They will be treated apart from the rest as they are likely to be biased -##################### STATISTICAL MODELLING ########################### -### Model tested -# GLM: General linear model (residual hypothesis: normality, homoscedasticity, independant) -# GLIM: Generalized linear model (residual hypothesis: uncorrelated residuals) -# NB : Negative Binomial model (residual hypothesis: independantM) -# ZINB: Zero inflated negative binomial model (residual hypothesis: independant) -# We are using number as an offset (more dolphins => more signals) -# beacon and net explanatory variables could not be tested in models -# as they contain information already present in "fishing_net" which is more -# interesting to keep for our study. They will be treated after -# (using kruskall-Wallis non-parametric test) -# fishing_net, behavior and acoustic where tested with their interactions. -# If a variable is it in a model, it is because it had no significant effect. -par(mfrow=c(1,1)) -### Model for whistles -# Residual hypotheses not verified for GLM -# Overdipsersion when using GLIM (negative binomial) -# Using ZINB: -zero.whi <- zeroinfl(total_whistles_duration ~ -acoustic + fishing_net + behavior + offset(log(number)), -data=acoustic.dta, dist='negbin') -nb.whi <- glm.nb(total_whistles_duration ~ -acoustic + fishing_net + behavior + offset(log(number)), -data=acoustic.dta) -# comparison ZINB VS NB model -vuong(zero.whi, nb.whi) #(if p-value<0.05 then first model in comparison is better) -mod.whi <- zero.whi # => zeroinflated model is indeed better suited -car::Anova(mod.whi, type=3) -dwtest(mod.whi) # H0 -> independent if p>0.05 (autocorrelation if p<0.05) -bptest(mod.whi) # H0 -> homoscedasticity if p<0.05 -mod.whi$df.null/mod.whi$df.residual -# no dispersion, perfect -### Model for BBP -# No normality of residuals for GLM -# overdispersion with GLIM quasipoisson -#try with glim NB: -mod.bbp <- glm.nb(number_of_bbp ~ acoustic + fishing_net + behavior -+ offset(log(number)), -data=acoustic.dta) -car::Anova(mod.bbp, type=3) -dwtest(mod.bbp) # H0 -> independent if p>0.05 (autocorrelation if p<0.05) -bptest(mod.bbp) # H0 -> homoscedasticity if p<0.05 -mod.bbp$deviance/mod.bbp$df.residual -# slight underdispersion, not improved with ZINB so we keep this ### Model for clicks # Using NB model: mod.cli <- glm.nb(number_of_clicks ~ acoustic + fishing_net + acoustic:fishing_net + offset(log(number)), @@ -510,3 +459,54 @@ plot(acoustic.dta) # nothing that we can see View(acoustic.dta) sum(acoustic.dta$date=="09/07/2021") sum(acoustic.dta$acoustic=="T") +################################################## +library(pscl) +library(MASS) +library(lmtest) +library(multcomp) +library(emmeans) +library(dplyr) # "%>%" function +library(forcats) # "fct_relevel" function +library(stringr) # "gsub" function +library(rcompanion) # "fullPTable" function +library(multcompView) # "multcompLetters" function +library(ggplot2) +library(pgirmess) +library(postHoc) +#library(tidyquant) # geom_ma() if rolling average needed +n_bins = 187.5 # number of bins per sec for spectrograms (whistles) +################# DATASET IMPORTS ##################################### +folder <- './../' +whistles.dta <-read.table(file=paste0(folder, +'Whistles/Evaluation/whistles_durations.csv'), +sep = ',', header=TRUE) +whistles.dta <- whistles.dta[order(whistles.dta$audio_names),] +bbp.dta <-read.table(file=paste0(folder, +'BBPs/Results/16-06-22_14h00_number_of_BBP.csv'), +sep = ',', header=TRUE) +bbp.dta <- bbp.dta[order(bbp.dta$audio_names),] +clicks.dta <-read.table(file=paste0(folder, +'Clicks/Results/projection_updated_number_of_clicks_02052022.csv'), #number_of_clicks_02052022.csv +sep = ',', header=TRUE) +clicks.dta <- clicks.dta[order(clicks.dta$audio_names),] +# Merge files into 1 dataset +acoustic.dta <- clicks.dta +acoustic.dta$number_of_bbp <- bbp.dta$number_of_BBP +acoustic.dta$total_whistles_duration <- whistles.dta$total_whistles_duration +rm(whistles.dta, bbp.dta, clicks.dta) +# add group IDs +id2020 <- read.table(file=paste0(folder, 'CSV_data/Audio_Data_2020.csv'), +sep = ',', header=TRUE)[1:396,] +id2021 <- read.table(file=paste0(folder, 'CSV_data/Audio_Data_2021.csv'), +sep = ',', header=TRUE)[1:96,] +id2021$ID <- id2021$ID+max(id2020$ID) +id2021$Seq <- id2021$Seq+max(id2020$Seq) +id.dta <- rbind(id2020, id2021) +id.dta$Fichier.Audio <-str_sub(id.dta$Fichier.Audio, -27, -5) +acoustic.dta$ID <- rep(-1, 490) +for (name in acoustic.dta$audio_names){ +acoustic.dta$ID[match(name, acoustic.dta$audio_names)] <- id.dta$ID[match(name, id.dta$Fichier.Audio)] +} +acoustic.dta$ID <- as.factor(acoustic.dta$ID) +rm(id2020, id2021, id.dta) +unique(acoustic.dta$ID) -- GitLab