From d19a6ecb8ccaa80ddcbfdcf70076e453b243722b Mon Sep 17 00:00:00 2001
From: Loic-Lenof <loic.lenof@gmail.com>
Date: Mon, 10 Oct 2022 15:08:00 +0200
Subject: [PATCH] Adding plots

+ adding plots by date
---
 Stats/.Rhistory                    | 240 ++++++++++++++---------------
 Stats/BBP-click-whistles_3models.R |  45 +++++-
 2 files changed, 161 insertions(+), 124 deletions(-)

diff --git a/Stats/.Rhistory b/Stats/.Rhistory
index eaeec58..3cef1a1 100644
--- a/Stats/.Rhistory
+++ b/Stats/.Rhistory
@@ -1,80 +1,3 @@
-sep = ',', header=TRUE)[1:396,]
-id2021 <- read.table(file=paste0(folder, 'CSV_data/Audio_Data_2021.csv'),
-sep = ',', header=TRUE)[1:96,]
-id2021$ID <- id2021$ID+max(id2020$ID)
-id2021$Seq <- id2021$Seq+max(id2020$Seq)
-id.dta <- rbind(id2020, id2021)
-id.dta$Fichier.Audio <-str_sub(id.dta$Fichier.Audio, -27, -5)
-acoustic.dta$ID <- rep(-1, 490)
-for (name in acoustic.dta$audio_names){
-acoustic.dta$ID[match(name, acoustic.dta$audio_names)] <- id.dta$ID[match(name, id.dta$Fichier.Audio)]
-}
-acoustic.dta$ID <- as.factor(acoustic.dta$ID)
-rm(id2020, id2021, id.dta)
-# suppress "T" acoustic data (other groups not tested on our variables)
-acoustic.dta <- acoustic.dta[acoustic.dta$acoustic!="T",]
-# shuffle dataframe
-acoustic.dta <- acoustic.dta[sample(1:nrow(acoustic.dta)), ]
-acoustic.dta$acoustic <- factor(acoustic.dta$acoustic)
-#################### DATA INSPECTION  #################################
-# Data description
-names(acoustic.dta)
-# self explenatory except acoustic : correspond to the activation sequence.
-# Look for obvious correlations
-plot(acoustic.dta) # nothing that we can see
-# Look for zero-inflation
-100*sum(acoustic.dta$number_of_clicks == 0)/nrow(acoustic.dta)
-100*sum(acoustic.dta$number_of_bbp == 0)/nrow(acoustic.dta)
-100*sum(acoustic.dta$total_whistles_duration == 0)/nrow(acoustic.dta)
-# 5.8%, 53.7% & 27.1% of our data are zeros. Will have to be dealt with.
-# QUESTION: This study is aimed at understanding if dolphin's acoustic activity
-# is influenced by their behavior, the emission of a pinger or a fishing net.
-# Dependent variables (Y): number_of_clicks, number_of_bbp, total_whistles_duration.
-# Explanatory variables (X): acoustic, fishing_net, behavior, beacon, net, number.
-# What are the H0/ H1 hypotheses ?
-# H0 : No influence of any of the explanatory variables on a dependant one.
-# H1 : Influence of an explanatory variable on a dependent one.
-##################### DATA EXPLORATION ################################
-# Y Outlier detection
-par(mfrow=c(2,3))
-boxplot(acoustic.dta$total_whistles_duration, col='red',
-ylab='total_whistles_duration')
-boxplot(acoustic.dta$number_of_bbp, col='red',
-ylab='number_of_bbp')
-boxplot(acoustic.dta$number_of_clicks, col='red',
-ylab='number_of_clicks')
-dotchart(acoustic.dta$total_whistles_duration, pch=16,
-xlab='total_whistles_duration', col='red')
-dotchart(acoustic.dta$number_of_bbp, pch=16,
-xlab='number_of_bbp', col='red')
-dotchart(acoustic.dta$number_of_clicks, pch=16,
-xlab='number_of_clicks', col='red')
-# Y distribution
-par(mfrow=c(2,3))
-hist(acoustic.dta$total_whistles_duration, col='red', breaks=8,
-xlab='total_whistles_duration', ylab='number')
-hist(acoustic.dta$number_of_bbp, col='red', breaks=8,
-xlab='number_of_bbp', ylab='number')
-hist(acoustic.dta$number_of_clicks, col='red', breaks=8,
-xlab='number_of_clicks', ylab='number')
-qqnorm(acoustic.dta$total_whistles_duration, col='red', pch=16)
-qqline(acoustic.dta$total_whistles_duration)
-qqnorm(acoustic.dta$number_of_bbp, col='red', pch=16)
-qqline(acoustic.dta$number_of_bbp)
-qqnorm(acoustic.dta$number_of_clicks, col='red', pch=16)
-qqline(acoustic.dta$number_of_clicks)
-shapiro.test(acoustic.dta$total_whistles_duration)
-shapiro.test(acoustic.dta$number_of_bbp)
-shapiro.test(acoustic.dta$number_of_clicks)
-# p-values are significant => they do not follow normal distributions
-# will need a transformation or the use of a glim model
-# X Number of individuals per level
-summary(factor(acoustic.dta$acoustic))
-summary(factor(acoustic.dta$fishing_net))
-summary(factor(acoustic.dta$behavior))
-summary(factor(acoustic.dta$beacon))
-summary(factor(acoustic.dta$net))
-table(factor(acoustic.dta$acoustic),factor(acoustic.dta$fishing_net))
 table(factor(acoustic.dta$acoustic),factor(acoustic.dta$behavior))
 table(factor(acoustic.dta$behavior),factor(acoustic.dta$acoustic))
 ftable(factor(acoustic.dta$fishing_net), factor(acoustic.dta$behavior), factor(acoustic.dta$acoustic))
@@ -193,7 +116,7 @@ letter = trimws(table$.group))
 barPlot(computeStats(acoustic.dta, fishing_net, whistling_time_per_dolphin/n_bins),
 myletters_df, fishing_net,
 old_names = c("SSF","F"), new_names = c("Absent", "Present"),
-xname="Presence/Asence of fishing net", height=.5,
+xname="Presence/Asence of fishing net", height=1,
 ytitle="Mean whistling time per dolphin per min (in sec)")
 # BBP
 table <- cld(emmeans(mod.bbp, pairwise~fishing_net, adjust="tukey"), Letters = letters)
@@ -218,7 +141,7 @@ ytitle="Mean number of clicks per dolphin per min")
 table <- cld(emmeans(mod.whi, pairwise~acoustic, adjust="tukey"), Letters = letters)
 myletters_df <- data.frame(acoustic=table$acoustic,letter = trimws(table$.group))
 barPlot(computeStats(acoustic.dta, acoustic, whistling_time_per_dolphin/n_bins),
-myletters_df, acoustic, height=0.65, ytitle="Mean whistling time per dolphin per min (in sec)",
+myletters_df, acoustic, height=1.3, ytitle="Mean whistling time per dolphin per min (in sec)",
 old_names = c("AV","AV+D","D","D+AP","AP"),
 new_names = c("BEF","BEF+DUR","DUR", "DUR+AFT", "AFT"),
 xname="Activation sequence")
@@ -286,7 +209,7 @@ legend_title="Fishing net", legend_labs=c("Present", "Absent"))
 table <- cld(emmeans(mod.whi, pairwise~behavior, adjust="tukey"), Letters = letters)
 myletters_df <- data.frame(behavior=table$behavior,letter = trimws(table$.group))
 barPlot(computeStats(acoustic.dta, behavior, whistling_time_per_dolphin/n_bins),
-myletters_df, behavior, height=0.75, ytitle="Mean whistling time per dolphin per min (in sec)",
+myletters_df, behavior, height=1.5, ytitle="Mean whistling time per dolphin per min (in sec)",
 old_names = c("CHAS", "DEPL", "SOCI"),
 new_names = c("Foraging", "Travelling", "Socialising"),
 xname="Behaviours of dolphins")
@@ -436,7 +359,7 @@ numb_stats_w[is.na(numb_stats_w)] <- 0
 numb_stats_w$ID <- as.factor(numb_stats_w$ID)
 numb_stats_w %>%
 ggplot(aes(x=ID, y=mean, group=1)) +
-geom_errorbar(aes(x=ID, ymin=mean-sd, ymax=mean+sd),
+geom_errorbar(aes(x=ID, ymin=mean-sd, ymax=mean+ci),
 color="red", width=.1, show.legend = FALSE)+
 geom_point() + scale_x_discrete(guide = guide_axis(n.dodge = 2))+
 theme_light() + theme(text=element_text(size=12)) +
@@ -448,7 +371,7 @@ numb_stats_b[is.na(numb_stats_b)] <- 0
 numb_stats_b$ID <- as.factor(numb_stats_b$ID)
 numb_stats_b %>%
 ggplot(aes(x=ID, y=mean, group=1)) +
-geom_errorbar(aes(x=ID, ymin=mean-sd, ymax=mean+sd),
+geom_errorbar(aes(x=ID, ymin=mean-sd, ymax=mean+ci),
 color="red", width=.1, show.legend = FALSE)+
 geom_point() + scale_x_discrete(guide = guide_axis(n.dodge = 2))+
 theme_light() + theme(text=element_text(size=12)) +
@@ -460,7 +383,7 @@ numb_stats_c[is.na(numb_stats_c)] <- 0
 numb_stats_c$ID <- as.factor(numb_stats_c$ID)
 numb_stats_c %>%
 ggplot(aes(x=ID, y=mean, group=1)) +
-geom_errorbar(aes(x=ID, ymin=mean-sd, ymax=mean+sd),
+geom_errorbar(aes(x=ID, ymin=mean-sd, ymax=mean+ci),
 color="red", width=.1)+
 geom_point() + scale_x_discrete(guide = guide_axis(n.dodge = 2))+
 theme_light() + theme(text=element_text(size=12)) +
@@ -472,41 +395,118 @@ data_test <- acoustic.dta[acoustic.dta$ID!="2",]
 print( posthocKW(data_test$whistling_time_per_dolphin, data_test$ID))
 print( posthocKW(data_test$BBPs_per_dolphin, data_test$ID))
 print( posthocKW(data_test$clicks_per_dolphin, data_test$ID))
-#### Fishing net  ####
-# whistles
-table <- cld(emmeans(mod.whi, pairwise~fishing_net, adjust="tukey"), Letters = letters)
-myletters_df <- data.frame(fishing_net=table$fishing_net,
-letter = trimws(table$.group))
-barPlot(computeStats(acoustic.dta, fishing_net, whistling_time_per_dolphin/n_bins),
-myletters_df, fishing_net,
-old_names = c("SSF","F"), new_names = c("Absent", "Present"),
-xname="Presence/Asence of fishing net", height=1,
-ytitle="Mean whistling time per dolphin per min (in sec)")
-#### Acoustic plots  ####
-# Whistles
-table <- cld(emmeans(mod.whi, pairwise~acoustic, adjust="tukey"), Letters = letters)
-myletters_df <- data.frame(acoustic=table$acoustic,letter = trimws(table$.group))
-barPlot(computeStats(acoustic.dta, acoustic, whistling_time_per_dolphin/n_bins),
-myletters_df, acoustic, height=1.2, ytitle="Mean whistling time per dolphin per min (in sec)",
-old_names = c("AV","AV+D","D","D+AP","AP"),
-new_names = c("BEF","BEF+DUR","DUR", "DUR+AFT", "AFT"),
-xname="Activation sequence")
-barPlot(computeStats(acoustic.dta, acoustic, whistling_time_per_dolphin/n_bins),
-myletters_df, acoustic, height=1.3, ytitle="Mean whistling time per dolphin per min (in sec)",
-old_names = c("AV","AV+D","D","D+AP","AP"),
-new_names = c("BEF","BEF+DUR","DUR", "DUR+AFT", "AFT"),
-xname="Activation sequence")
-#### Behaviour plots ####
+#### Plots by days ####
+# clicks
+numb_stats_c <- computeStats(acoustic.dta, date, clicks_per_dolphin)
+numb_stats_c %>%
+ggplot(aes(x=date, y=mean, group=1)) +
+geom_errorbar(aes(x=date, ymin=mean-sd, ymax=mean+ci),
+color="black", width=.1)+
+geom_point() + scale_x_discrete(guide = guide_axis(n.dodge=2)) +
+theme_classic() + theme(text=element_text(size=12)) +
+ylab("Mean number of clicks per dolphin per min")+
+xlab("Days of recording")
+# BBPs
+numb_stats_c <- computeStats(acoustic.dta, date, BBPs_per_dolphin)
+numb_stats_c %>%
+ggplot(aes(x=date, y=mean, group=1)) +
+geom_errorbar(aes(x=date, ymin=mean-sd, ymax=mean+ci),
+color="black", width=.1)+
+geom_point() + scale_x_discrete(guide = guide_axis(n.dodge=2)) +
+theme_classic() + theme(text=element_text(size=12)) +
+ylab("Number of BBPs per dolphin per min")+
+xlab("Days of recording")
 # Whistles
-table <- cld(emmeans(mod.whi, pairwise~behavior, adjust="tukey"), Letters = letters)
-myletters_df <- data.frame(behavior=table$behavior,letter = trimws(table$.group))
-barPlot(computeStats(acoustic.dta, behavior, whistling_time_per_dolphin/n_bins),
-myletters_df, behavior, height=1.5, ytitle="Mean whistling time per dolphin per min (in sec)",
-old_names = c("CHAS", "DEPL", "SOCI"),
-new_names = c("Foraging", "Travelling", "Socialising"),
-xname="Behaviours of dolphins")
-25.8/19.1
-5.65/(25.8/19.1)
-25.15/20.9
-5.65/(25.15/20.9)
-5.65/(25.8/23.4)
+numb_stats_c <- computeStats(acoustic.dta, date, whistling_time_per_dolphin/n_bins)
+numb_stats_c %>%
+ggplot(aes(x=date, y=mean, group=1)) +
+geom_errorbar(aes(x=date, ymin=mean-sd, ymax=mean+ci),
+color="black", width=.1)+
+geom_point() + scale_x_discrete(guide = guide_axis(n.dodge=2)) +
+theme_classic() + theme(text=element_text(size=12)) +
+ylab("Mean whistling time per dolphin per min (in sec)")+
+xlab("Days of recording")
+View(acoustic.dta)
+sum(acoustic.dta$date=="09/07/2021")
+56/361
+########################################################################
+#    STATISTICS
+#    Author : Loic LEHNHOFF
+#    Adapted from Yannick OUTREMAN
+#    Agrocampus Ouest - 2020
+#######################################################################
+library(pscl)
+library(MASS)
+library(lmtest)
+library(multcomp)
+library(emmeans)
+library(dplyr)        # "%>%" function
+library(forcats)      # "fct_relevel" function
+library(stringr)      # "gsub" function
+library(rcompanion)   # "fullPTable" function
+library(multcompView) # "multcompLetters" function
+library(ggplot2)
+library(pgirmess)
+library(postHoc)
+#library(tidyquant)    # geom_ma() if rolling average needed
+n_bins = 187.5 # number of bins per sec for spectrograms (whistles)
+################# DATASET IMPORTS #####################################
+folder <- './../'
+whistles.dta <-read.table(file=paste0(folder,
+'Whistles/Evaluation/whistles_durations.csv'),
+sep = ',', header=TRUE)
+whistles.dta <- whistles.dta[order(whistles.dta$audio_names),]
+bbp.dta <-read.table(file=paste0(folder,
+'BBPs/Results/16-06-22_14h00_number_of_BBP.csv'),
+sep = ',', header=TRUE)
+bbp.dta <- bbp.dta[order(bbp.dta$audio_names),]
+clicks.dta <-read.table(file=paste0(folder,
+'Clicks/Results/projection_updated_number_of_clicks_02052022.csv'), #number_of_clicks_02052022.csv
+sep = ',', header=TRUE)
+clicks.dta <- clicks.dta[order(clicks.dta$audio_names),]
+# Merge files into 1 dataset
+acoustic.dta <- clicks.dta
+acoustic.dta$number_of_bbp <- bbp.dta$number_of_BBP
+acoustic.dta$total_whistles_duration <- whistles.dta$total_whistles_duration
+rm(whistles.dta, bbp.dta, clicks.dta)
+# add group IDs
+id2020 <- read.table(file=paste0(folder, 'CSV_data/Audio_Data_2020.csv'),
+sep = ',', header=TRUE)[1:396,]
+id2021 <- read.table(file=paste0(folder, 'CSV_data/Audio_Data_2021.csv'),
+sep = ',', header=TRUE)[1:96,]
+id2021$ID <- id2021$ID+max(id2020$ID)
+id2021$Seq <- id2021$Seq+max(id2020$Seq)
+id.dta <- rbind(id2020, id2021)
+id.dta$Fichier.Audio <-str_sub(id.dta$Fichier.Audio, -27, -5)
+acoustic.dta$ID <- rep(-1, 490)
+for (name in acoustic.dta$audio_names){
+acoustic.dta$ID[match(name, acoustic.dta$audio_names)] <- id.dta$ID[match(name, id.dta$Fichier.Audio)]
+}
+acoustic.dta$ID <- as.factor(acoustic.dta$ID)
+rm(id2020, id2021, id.dta)
+# suppress "T" acoustic data (other groups not tested on our variables)
+acoustic.dta <- acoustic.dta[acoustic.dta$acoustic!="T",]
+# shuffle dataframe
+acoustic.dta <- acoustic.dta[sample(1:nrow(acoustic.dta)), ]
+acoustic.dta$acoustic <- factor(acoustic.dta$acoustic)
+#################### DATA INSPECTION  #################################
+# Data description
+names(acoustic.dta)
+# self explenatory except acoustic : correspond to the activation sequence.
+# Look for obvious correlations
+plot(acoustic.dta) # nothing that we can see
+# Look for zero-inflation
+100*sum(acoustic.dta$number_of_clicks == 0)/nrow(acoustic.dta)
+100*sum(acoustic.dta$number_of_bbp == 0)/nrow(acoustic.dta)
+100*sum(acoustic.dta$total_whistles_duration == 0)/nrow(acoustic.dta)
+# 5.8%, 53.7% & 27.1% of our data are zeros. Will have to be dealt with.
+# QUESTION: This study is aimed at understanding if dolphin's acoustic activity
+# is influenced by their behavior, the emission of a pinger or a fishing net.
+# Dependent variables (Y): number_of_clicks, number_of_bbp, total_whistles_duration.
+# Explanatory variables (X): acoustic, fishing_net, behavior, beacon, net, number.
+# What are the H0/ H1 hypotheses ?
+# H0 : No influence of any of the explanatory variables on a dependant one.
+# H1 : Influence of an explanatory variable on a dependent one.
+View(acoustic.dta)
+sum(acoustic.dta$date=="09/07/2021")
+sum(acoustic.dta$acoustic=="T")
diff --git a/Stats/BBP-click-whistles_3models.R b/Stats/BBP-click-whistles_3models.R
index 17d7dcd..0dcbe2f 100644
--- a/Stats/BBP-click-whistles_3models.R
+++ b/Stats/BBP-click-whistles_3models.R
@@ -544,7 +544,7 @@ numb_stats_w[is.na(numb_stats_w)] <- 0
 numb_stats_w$ID <- as.factor(numb_stats_w$ID)
 numb_stats_w %>%
   ggplot(aes(x=ID, y=mean, group=1)) +
-  geom_errorbar(aes(x=ID, ymin=mean-sd, ymax=mean+sd), 
+  geom_errorbar(aes(x=ID, ymin=mean-sd, ymax=mean+ci), 
                 color="red", width=.1, show.legend = FALSE)+
   geom_point() + scale_x_discrete(guide = guide_axis(n.dodge = 2))+
   theme_light() + theme(text=element_text(size=12)) +
@@ -558,7 +558,7 @@ numb_stats_b$ID <- as.factor(numb_stats_b$ID)
 
 numb_stats_b %>%
   ggplot(aes(x=ID, y=mean, group=1)) +
-  geom_errorbar(aes(x=ID, ymin=mean-sd, ymax=mean+sd), 
+  geom_errorbar(aes(x=ID, ymin=mean-sd, ymax=mean+ci), 
                 color="red", width=.1, show.legend = FALSE)+
   geom_point() + scale_x_discrete(guide = guide_axis(n.dodge = 2))+
   theme_light() + theme(text=element_text(size=12)) +
@@ -572,7 +572,7 @@ numb_stats_c$ID <- as.factor(numb_stats_c$ID)
 
 numb_stats_c %>%
   ggplot(aes(x=ID, y=mean, group=1)) +
-  geom_errorbar(aes(x=ID, ymin=mean-sd, ymax=mean+sd), 
+  geom_errorbar(aes(x=ID, ymin=mean-sd, ymax=mean+ci), 
                 color="red", width=.1)+
   geom_point() + scale_x_discrete(guide = guide_axis(n.dodge = 2))+
   theme_light() + theme(text=element_text(size=12)) +
@@ -584,4 +584,41 @@ numb_stats_c %>%
 data_test <- acoustic.dta[acoustic.dta$ID!="2",]
 print( posthocKW(data_test$whistling_time_per_dolphin, data_test$ID))
 print( posthocKW(data_test$BBPs_per_dolphin, data_test$ID))
-print( posthocKW(data_test$clicks_per_dolphin, data_test$ID))
\ No newline at end of file
+print( posthocKW(data_test$clicks_per_dolphin, data_test$ID))
+
+#### Plots by days #### 
+# clicks
+numb_stats_c <- computeStats(acoustic.dta, date, clicks_per_dolphin)
+
+numb_stats_c %>%
+  ggplot(aes(x=date, y=mean, group=1)) +
+  geom_errorbar(aes(x=date, ymin=mean-sd, ymax=mean+ci), 
+                color="black", width=.1)+
+  geom_point() + scale_x_discrete(guide = guide_axis(n.dodge=2)) +
+  theme_classic() + theme(text=element_text(size=12)) +
+  ylab("Mean number of clicks per dolphin per min")+
+  xlab("Days of recording")
+
+# BBPs
+numb_stats_c <- computeStats(acoustic.dta, date, BBPs_per_dolphin)
+
+numb_stats_c %>%
+  ggplot(aes(x=date, y=mean, group=1)) +
+  geom_errorbar(aes(x=date, ymin=mean-sd, ymax=mean+ci), 
+                color="black", width=.1)+
+  geom_point() + scale_x_discrete(guide = guide_axis(n.dodge=2)) +
+  theme_classic() + theme(text=element_text(size=12)) +
+  ylab("Number of BBPs per dolphin per min")+
+  xlab("Days of recording")
+
+# Whistles
+numb_stats_c <- computeStats(acoustic.dta, date, whistling_time_per_dolphin/n_bins)
+
+numb_stats_c %>%
+  ggplot(aes(x=date, y=mean, group=1)) +
+  geom_errorbar(aes(x=date, ymin=mean-sd, ymax=mean+ci), 
+                color="black", width=.1)+
+  geom_point() + scale_x_discrete(guide = guide_axis(n.dodge=2)) +
+  theme_classic() + theme(text=element_text(size=12)) +
+  ylab("Mean whistling time per dolphin per min (in sec)")+
+  xlab("Days of recording")
-- 
GitLab