From d19a6ecb8ccaa80ddcbfdcf70076e453b243722b Mon Sep 17 00:00:00 2001 From: Loic-Lenof <loic.lenof@gmail.com> Date: Mon, 10 Oct 2022 15:08:00 +0200 Subject: [PATCH] Adding plots + adding plots by date --- Stats/.Rhistory | 240 ++++++++++++++--------------- Stats/BBP-click-whistles_3models.R | 45 +++++- 2 files changed, 161 insertions(+), 124 deletions(-) diff --git a/Stats/.Rhistory b/Stats/.Rhistory index eaeec58..3cef1a1 100644 --- a/Stats/.Rhistory +++ b/Stats/.Rhistory @@ -1,80 +1,3 @@ -sep = ',', header=TRUE)[1:396,] -id2021 <- read.table(file=paste0(folder, 'CSV_data/Audio_Data_2021.csv'), -sep = ',', header=TRUE)[1:96,] -id2021$ID <- id2021$ID+max(id2020$ID) -id2021$Seq <- id2021$Seq+max(id2020$Seq) -id.dta <- rbind(id2020, id2021) -id.dta$Fichier.Audio <-str_sub(id.dta$Fichier.Audio, -27, -5) -acoustic.dta$ID <- rep(-1, 490) -for (name in acoustic.dta$audio_names){ -acoustic.dta$ID[match(name, acoustic.dta$audio_names)] <- id.dta$ID[match(name, id.dta$Fichier.Audio)] -} -acoustic.dta$ID <- as.factor(acoustic.dta$ID) -rm(id2020, id2021, id.dta) -# suppress "T" acoustic data (other groups not tested on our variables) -acoustic.dta <- acoustic.dta[acoustic.dta$acoustic!="T",] -# shuffle dataframe -acoustic.dta <- acoustic.dta[sample(1:nrow(acoustic.dta)), ] -acoustic.dta$acoustic <- factor(acoustic.dta$acoustic) -#################### DATA INSPECTION ################################# -# Data description -names(acoustic.dta) -# self explenatory except acoustic : correspond to the activation sequence. -# Look for obvious correlations -plot(acoustic.dta) # nothing that we can see -# Look for zero-inflation -100*sum(acoustic.dta$number_of_clicks == 0)/nrow(acoustic.dta) -100*sum(acoustic.dta$number_of_bbp == 0)/nrow(acoustic.dta) -100*sum(acoustic.dta$total_whistles_duration == 0)/nrow(acoustic.dta) -# 5.8%, 53.7% & 27.1% of our data are zeros. Will have to be dealt with. -# QUESTION: This study is aimed at understanding if dolphin's acoustic activity -# is influenced by their behavior, the emission of a pinger or a fishing net. -# Dependent variables (Y): number_of_clicks, number_of_bbp, total_whistles_duration. -# Explanatory variables (X): acoustic, fishing_net, behavior, beacon, net, number. -# What are the H0/ H1 hypotheses ? -# H0 : No influence of any of the explanatory variables on a dependant one. -# H1 : Influence of an explanatory variable on a dependent one. -##################### DATA EXPLORATION ################################ -# Y Outlier detection -par(mfrow=c(2,3)) -boxplot(acoustic.dta$total_whistles_duration, col='red', -ylab='total_whistles_duration') -boxplot(acoustic.dta$number_of_bbp, col='red', -ylab='number_of_bbp') -boxplot(acoustic.dta$number_of_clicks, col='red', -ylab='number_of_clicks') -dotchart(acoustic.dta$total_whistles_duration, pch=16, -xlab='total_whistles_duration', col='red') -dotchart(acoustic.dta$number_of_bbp, pch=16, -xlab='number_of_bbp', col='red') -dotchart(acoustic.dta$number_of_clicks, pch=16, -xlab='number_of_clicks', col='red') -# Y distribution -par(mfrow=c(2,3)) -hist(acoustic.dta$total_whistles_duration, col='red', breaks=8, -xlab='total_whistles_duration', ylab='number') -hist(acoustic.dta$number_of_bbp, col='red', breaks=8, -xlab='number_of_bbp', ylab='number') -hist(acoustic.dta$number_of_clicks, col='red', breaks=8, -xlab='number_of_clicks', ylab='number') -qqnorm(acoustic.dta$total_whistles_duration, col='red', pch=16) -qqline(acoustic.dta$total_whistles_duration) -qqnorm(acoustic.dta$number_of_bbp, col='red', pch=16) -qqline(acoustic.dta$number_of_bbp) -qqnorm(acoustic.dta$number_of_clicks, col='red', pch=16) -qqline(acoustic.dta$number_of_clicks) -shapiro.test(acoustic.dta$total_whistles_duration) -shapiro.test(acoustic.dta$number_of_bbp) -shapiro.test(acoustic.dta$number_of_clicks) -# p-values are significant => they do not follow normal distributions -# will need a transformation or the use of a glim model -# X Number of individuals per level -summary(factor(acoustic.dta$acoustic)) -summary(factor(acoustic.dta$fishing_net)) -summary(factor(acoustic.dta$behavior)) -summary(factor(acoustic.dta$beacon)) -summary(factor(acoustic.dta$net)) -table(factor(acoustic.dta$acoustic),factor(acoustic.dta$fishing_net)) table(factor(acoustic.dta$acoustic),factor(acoustic.dta$behavior)) table(factor(acoustic.dta$behavior),factor(acoustic.dta$acoustic)) ftable(factor(acoustic.dta$fishing_net), factor(acoustic.dta$behavior), factor(acoustic.dta$acoustic)) @@ -193,7 +116,7 @@ letter = trimws(table$.group)) barPlot(computeStats(acoustic.dta, fishing_net, whistling_time_per_dolphin/n_bins), myletters_df, fishing_net, old_names = c("SSF","F"), new_names = c("Absent", "Present"), -xname="Presence/Asence of fishing net", height=.5, +xname="Presence/Asence of fishing net", height=1, ytitle="Mean whistling time per dolphin per min (in sec)") # BBP table <- cld(emmeans(mod.bbp, pairwise~fishing_net, adjust="tukey"), Letters = letters) @@ -218,7 +141,7 @@ ytitle="Mean number of clicks per dolphin per min") table <- cld(emmeans(mod.whi, pairwise~acoustic, adjust="tukey"), Letters = letters) myletters_df <- data.frame(acoustic=table$acoustic,letter = trimws(table$.group)) barPlot(computeStats(acoustic.dta, acoustic, whistling_time_per_dolphin/n_bins), -myletters_df, acoustic, height=0.65, ytitle="Mean whistling time per dolphin per min (in sec)", +myletters_df, acoustic, height=1.3, ytitle="Mean whistling time per dolphin per min (in sec)", old_names = c("AV","AV+D","D","D+AP","AP"), new_names = c("BEF","BEF+DUR","DUR", "DUR+AFT", "AFT"), xname="Activation sequence") @@ -286,7 +209,7 @@ legend_title="Fishing net", legend_labs=c("Present", "Absent")) table <- cld(emmeans(mod.whi, pairwise~behavior, adjust="tukey"), Letters = letters) myletters_df <- data.frame(behavior=table$behavior,letter = trimws(table$.group)) barPlot(computeStats(acoustic.dta, behavior, whistling_time_per_dolphin/n_bins), -myletters_df, behavior, height=0.75, ytitle="Mean whistling time per dolphin per min (in sec)", +myletters_df, behavior, height=1.5, ytitle="Mean whistling time per dolphin per min (in sec)", old_names = c("CHAS", "DEPL", "SOCI"), new_names = c("Foraging", "Travelling", "Socialising"), xname="Behaviours of dolphins") @@ -436,7 +359,7 @@ numb_stats_w[is.na(numb_stats_w)] <- 0 numb_stats_w$ID <- as.factor(numb_stats_w$ID) numb_stats_w %>% ggplot(aes(x=ID, y=mean, group=1)) + -geom_errorbar(aes(x=ID, ymin=mean-sd, ymax=mean+sd), +geom_errorbar(aes(x=ID, ymin=mean-sd, ymax=mean+ci), color="red", width=.1, show.legend = FALSE)+ geom_point() + scale_x_discrete(guide = guide_axis(n.dodge = 2))+ theme_light() + theme(text=element_text(size=12)) + @@ -448,7 +371,7 @@ numb_stats_b[is.na(numb_stats_b)] <- 0 numb_stats_b$ID <- as.factor(numb_stats_b$ID) numb_stats_b %>% ggplot(aes(x=ID, y=mean, group=1)) + -geom_errorbar(aes(x=ID, ymin=mean-sd, ymax=mean+sd), +geom_errorbar(aes(x=ID, ymin=mean-sd, ymax=mean+ci), color="red", width=.1, show.legend = FALSE)+ geom_point() + scale_x_discrete(guide = guide_axis(n.dodge = 2))+ theme_light() + theme(text=element_text(size=12)) + @@ -460,7 +383,7 @@ numb_stats_c[is.na(numb_stats_c)] <- 0 numb_stats_c$ID <- as.factor(numb_stats_c$ID) numb_stats_c %>% ggplot(aes(x=ID, y=mean, group=1)) + -geom_errorbar(aes(x=ID, ymin=mean-sd, ymax=mean+sd), +geom_errorbar(aes(x=ID, ymin=mean-sd, ymax=mean+ci), color="red", width=.1)+ geom_point() + scale_x_discrete(guide = guide_axis(n.dodge = 2))+ theme_light() + theme(text=element_text(size=12)) + @@ -472,41 +395,118 @@ data_test <- acoustic.dta[acoustic.dta$ID!="2",] print( posthocKW(data_test$whistling_time_per_dolphin, data_test$ID)) print( posthocKW(data_test$BBPs_per_dolphin, data_test$ID)) print( posthocKW(data_test$clicks_per_dolphin, data_test$ID)) -#### Fishing net #### -# whistles -table <- cld(emmeans(mod.whi, pairwise~fishing_net, adjust="tukey"), Letters = letters) -myletters_df <- data.frame(fishing_net=table$fishing_net, -letter = trimws(table$.group)) -barPlot(computeStats(acoustic.dta, fishing_net, whistling_time_per_dolphin/n_bins), -myletters_df, fishing_net, -old_names = c("SSF","F"), new_names = c("Absent", "Present"), -xname="Presence/Asence of fishing net", height=1, -ytitle="Mean whistling time per dolphin per min (in sec)") -#### Acoustic plots #### -# Whistles -table <- cld(emmeans(mod.whi, pairwise~acoustic, adjust="tukey"), Letters = letters) -myletters_df <- data.frame(acoustic=table$acoustic,letter = trimws(table$.group)) -barPlot(computeStats(acoustic.dta, acoustic, whistling_time_per_dolphin/n_bins), -myletters_df, acoustic, height=1.2, ytitle="Mean whistling time per dolphin per min (in sec)", -old_names = c("AV","AV+D","D","D+AP","AP"), -new_names = c("BEF","BEF+DUR","DUR", "DUR+AFT", "AFT"), -xname="Activation sequence") -barPlot(computeStats(acoustic.dta, acoustic, whistling_time_per_dolphin/n_bins), -myletters_df, acoustic, height=1.3, ytitle="Mean whistling time per dolphin per min (in sec)", -old_names = c("AV","AV+D","D","D+AP","AP"), -new_names = c("BEF","BEF+DUR","DUR", "DUR+AFT", "AFT"), -xname="Activation sequence") -#### Behaviour plots #### +#### Plots by days #### +# clicks +numb_stats_c <- computeStats(acoustic.dta, date, clicks_per_dolphin) +numb_stats_c %>% +ggplot(aes(x=date, y=mean, group=1)) + +geom_errorbar(aes(x=date, ymin=mean-sd, ymax=mean+ci), +color="black", width=.1)+ +geom_point() + scale_x_discrete(guide = guide_axis(n.dodge=2)) + +theme_classic() + theme(text=element_text(size=12)) + +ylab("Mean number of clicks per dolphin per min")+ +xlab("Days of recording") +# BBPs +numb_stats_c <- computeStats(acoustic.dta, date, BBPs_per_dolphin) +numb_stats_c %>% +ggplot(aes(x=date, y=mean, group=1)) + +geom_errorbar(aes(x=date, ymin=mean-sd, ymax=mean+ci), +color="black", width=.1)+ +geom_point() + scale_x_discrete(guide = guide_axis(n.dodge=2)) + +theme_classic() + theme(text=element_text(size=12)) + +ylab("Number of BBPs per dolphin per min")+ +xlab("Days of recording") # Whistles -table <- cld(emmeans(mod.whi, pairwise~behavior, adjust="tukey"), Letters = letters) -myletters_df <- data.frame(behavior=table$behavior,letter = trimws(table$.group)) -barPlot(computeStats(acoustic.dta, behavior, whistling_time_per_dolphin/n_bins), -myletters_df, behavior, height=1.5, ytitle="Mean whistling time per dolphin per min (in sec)", -old_names = c("CHAS", "DEPL", "SOCI"), -new_names = c("Foraging", "Travelling", "Socialising"), -xname="Behaviours of dolphins") -25.8/19.1 -5.65/(25.8/19.1) -25.15/20.9 -5.65/(25.15/20.9) -5.65/(25.8/23.4) +numb_stats_c <- computeStats(acoustic.dta, date, whistling_time_per_dolphin/n_bins) +numb_stats_c %>% +ggplot(aes(x=date, y=mean, group=1)) + +geom_errorbar(aes(x=date, ymin=mean-sd, ymax=mean+ci), +color="black", width=.1)+ +geom_point() + scale_x_discrete(guide = guide_axis(n.dodge=2)) + +theme_classic() + theme(text=element_text(size=12)) + +ylab("Mean whistling time per dolphin per min (in sec)")+ +xlab("Days of recording") +View(acoustic.dta) +sum(acoustic.dta$date=="09/07/2021") +56/361 +######################################################################## +# STATISTICS +# Author : Loic LEHNHOFF +# Adapted from Yannick OUTREMAN +# Agrocampus Ouest - 2020 +####################################################################### +library(pscl) +library(MASS) +library(lmtest) +library(multcomp) +library(emmeans) +library(dplyr) # "%>%" function +library(forcats) # "fct_relevel" function +library(stringr) # "gsub" function +library(rcompanion) # "fullPTable" function +library(multcompView) # "multcompLetters" function +library(ggplot2) +library(pgirmess) +library(postHoc) +#library(tidyquant) # geom_ma() if rolling average needed +n_bins = 187.5 # number of bins per sec for spectrograms (whistles) +################# DATASET IMPORTS ##################################### +folder <- './../' +whistles.dta <-read.table(file=paste0(folder, +'Whistles/Evaluation/whistles_durations.csv'), +sep = ',', header=TRUE) +whistles.dta <- whistles.dta[order(whistles.dta$audio_names),] +bbp.dta <-read.table(file=paste0(folder, +'BBPs/Results/16-06-22_14h00_number_of_BBP.csv'), +sep = ',', header=TRUE) +bbp.dta <- bbp.dta[order(bbp.dta$audio_names),] +clicks.dta <-read.table(file=paste0(folder, +'Clicks/Results/projection_updated_number_of_clicks_02052022.csv'), #number_of_clicks_02052022.csv +sep = ',', header=TRUE) +clicks.dta <- clicks.dta[order(clicks.dta$audio_names),] +# Merge files into 1 dataset +acoustic.dta <- clicks.dta +acoustic.dta$number_of_bbp <- bbp.dta$number_of_BBP +acoustic.dta$total_whistles_duration <- whistles.dta$total_whistles_duration +rm(whistles.dta, bbp.dta, clicks.dta) +# add group IDs +id2020 <- read.table(file=paste0(folder, 'CSV_data/Audio_Data_2020.csv'), +sep = ',', header=TRUE)[1:396,] +id2021 <- read.table(file=paste0(folder, 'CSV_data/Audio_Data_2021.csv'), +sep = ',', header=TRUE)[1:96,] +id2021$ID <- id2021$ID+max(id2020$ID) +id2021$Seq <- id2021$Seq+max(id2020$Seq) +id.dta <- rbind(id2020, id2021) +id.dta$Fichier.Audio <-str_sub(id.dta$Fichier.Audio, -27, -5) +acoustic.dta$ID <- rep(-1, 490) +for (name in acoustic.dta$audio_names){ +acoustic.dta$ID[match(name, acoustic.dta$audio_names)] <- id.dta$ID[match(name, id.dta$Fichier.Audio)] +} +acoustic.dta$ID <- as.factor(acoustic.dta$ID) +rm(id2020, id2021, id.dta) +# suppress "T" acoustic data (other groups not tested on our variables) +acoustic.dta <- acoustic.dta[acoustic.dta$acoustic!="T",] +# shuffle dataframe +acoustic.dta <- acoustic.dta[sample(1:nrow(acoustic.dta)), ] +acoustic.dta$acoustic <- factor(acoustic.dta$acoustic) +#################### DATA INSPECTION ################################# +# Data description +names(acoustic.dta) +# self explenatory except acoustic : correspond to the activation sequence. +# Look for obvious correlations +plot(acoustic.dta) # nothing that we can see +# Look for zero-inflation +100*sum(acoustic.dta$number_of_clicks == 0)/nrow(acoustic.dta) +100*sum(acoustic.dta$number_of_bbp == 0)/nrow(acoustic.dta) +100*sum(acoustic.dta$total_whistles_duration == 0)/nrow(acoustic.dta) +# 5.8%, 53.7% & 27.1% of our data are zeros. Will have to be dealt with. +# QUESTION: This study is aimed at understanding if dolphin's acoustic activity +# is influenced by their behavior, the emission of a pinger or a fishing net. +# Dependent variables (Y): number_of_clicks, number_of_bbp, total_whistles_duration. +# Explanatory variables (X): acoustic, fishing_net, behavior, beacon, net, number. +# What are the H0/ H1 hypotheses ? +# H0 : No influence of any of the explanatory variables on a dependant one. +# H1 : Influence of an explanatory variable on a dependent one. +View(acoustic.dta) +sum(acoustic.dta$date=="09/07/2021") +sum(acoustic.dta$acoustic=="T") diff --git a/Stats/BBP-click-whistles_3models.R b/Stats/BBP-click-whistles_3models.R index 17d7dcd..0dcbe2f 100644 --- a/Stats/BBP-click-whistles_3models.R +++ b/Stats/BBP-click-whistles_3models.R @@ -544,7 +544,7 @@ numb_stats_w[is.na(numb_stats_w)] <- 0 numb_stats_w$ID <- as.factor(numb_stats_w$ID) numb_stats_w %>% ggplot(aes(x=ID, y=mean, group=1)) + - geom_errorbar(aes(x=ID, ymin=mean-sd, ymax=mean+sd), + geom_errorbar(aes(x=ID, ymin=mean-sd, ymax=mean+ci), color="red", width=.1, show.legend = FALSE)+ geom_point() + scale_x_discrete(guide = guide_axis(n.dodge = 2))+ theme_light() + theme(text=element_text(size=12)) + @@ -558,7 +558,7 @@ numb_stats_b$ID <- as.factor(numb_stats_b$ID) numb_stats_b %>% ggplot(aes(x=ID, y=mean, group=1)) + - geom_errorbar(aes(x=ID, ymin=mean-sd, ymax=mean+sd), + geom_errorbar(aes(x=ID, ymin=mean-sd, ymax=mean+ci), color="red", width=.1, show.legend = FALSE)+ geom_point() + scale_x_discrete(guide = guide_axis(n.dodge = 2))+ theme_light() + theme(text=element_text(size=12)) + @@ -572,7 +572,7 @@ numb_stats_c$ID <- as.factor(numb_stats_c$ID) numb_stats_c %>% ggplot(aes(x=ID, y=mean, group=1)) + - geom_errorbar(aes(x=ID, ymin=mean-sd, ymax=mean+sd), + geom_errorbar(aes(x=ID, ymin=mean-sd, ymax=mean+ci), color="red", width=.1)+ geom_point() + scale_x_discrete(guide = guide_axis(n.dodge = 2))+ theme_light() + theme(text=element_text(size=12)) + @@ -584,4 +584,41 @@ numb_stats_c %>% data_test <- acoustic.dta[acoustic.dta$ID!="2",] print( posthocKW(data_test$whistling_time_per_dolphin, data_test$ID)) print( posthocKW(data_test$BBPs_per_dolphin, data_test$ID)) -print( posthocKW(data_test$clicks_per_dolphin, data_test$ID)) \ No newline at end of file +print( posthocKW(data_test$clicks_per_dolphin, data_test$ID)) + +#### Plots by days #### +# clicks +numb_stats_c <- computeStats(acoustic.dta, date, clicks_per_dolphin) + +numb_stats_c %>% + ggplot(aes(x=date, y=mean, group=1)) + + geom_errorbar(aes(x=date, ymin=mean-sd, ymax=mean+ci), + color="black", width=.1)+ + geom_point() + scale_x_discrete(guide = guide_axis(n.dodge=2)) + + theme_classic() + theme(text=element_text(size=12)) + + ylab("Mean number of clicks per dolphin per min")+ + xlab("Days of recording") + +# BBPs +numb_stats_c <- computeStats(acoustic.dta, date, BBPs_per_dolphin) + +numb_stats_c %>% + ggplot(aes(x=date, y=mean, group=1)) + + geom_errorbar(aes(x=date, ymin=mean-sd, ymax=mean+ci), + color="black", width=.1)+ + geom_point() + scale_x_discrete(guide = guide_axis(n.dodge=2)) + + theme_classic() + theme(text=element_text(size=12)) + + ylab("Number of BBPs per dolphin per min")+ + xlab("Days of recording") + +# Whistles +numb_stats_c <- computeStats(acoustic.dta, date, whistling_time_per_dolphin/n_bins) + +numb_stats_c %>% + ggplot(aes(x=date, y=mean, group=1)) + + geom_errorbar(aes(x=date, ymin=mean-sd, ymax=mean+ci), + color="black", width=.1)+ + geom_point() + scale_x_discrete(guide = guide_axis(n.dodge=2)) + + theme_classic() + theme(text=element_text(size=12)) + + ylab("Mean whistling time per dolphin per min (in sec)")+ + xlab("Days of recording") -- GitLab