Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
R
RAVEN2YOLO
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Package registry
Container registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Stephane Chavin
RAVEN2YOLO
Commits
74a8dd8d
Commit
74a8dd8d
authored
5 months ago
by
Stephane Chavin
Browse files
Options
Downloads
Patches
Plain Diff
correct split background
parent
c1ab7237
No related branches found
No related tags found
No related merge requests found
Changes
3
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
get_train_annot.py
+7
-5
7 additions, 5 deletions
get_train_annot.py
get_train_val.py
+25
-14
25 additions, 14 deletions
get_train_val.py
utils.py
+37
-1
37 additions, 1 deletion
utils.py
with
69 additions
and
20 deletions
get_train_annot.py
+
7
−
5
View file @
74a8dd8d
...
...
@@ -14,7 +14,7 @@ from tqdm import tqdm
import
pandas
as
pd
def
process
(
entry
,
arguments
,
species_list
):
def
main
(
entry
,
arguments
,
species_list
):
"""
Precess the annotation to get the .jpg spectrogram and the .txt annotation file
:param x (tuple): Enumerate number, [filename, group] per file
...
...
@@ -213,11 +213,11 @@ if __name__ == '__main__':
if
args
.
cpu
==
1
:
for
i
in
tqdm
(
enumerate
(
df
.
groupby
(
'
Path
'
)),
total
=
len
(
df
.
groupby
(
'
Path
'
)),
desc
=
"
Processing
"
,
ascii
=
'
░▒▓█
'
):
process
(
i
,
args
,
species
)
main
(
i
,
args
,
species
)
else
:
args
=
[
args
for
i
in
range
(
len
(
df
.
groupby
(
'
Path
'
)))]
species
=
[
species
for
i
in
range
(
len
(
df
.
groupby
(
'
Path
'
)))]
p_map
(
process
,
enumerate
(
df
.
groupby
(
'
Path
'
)),
args
,
p_map
(
main
,
enumerate
(
df
.
groupby
(
'
Path
'
)),
args
,
species
,
num_cpus
=
args
[
0
].
cpu
,
total
=
len
(
df
.
groupby
(
'
Path
'
)))
args
=
args
[
0
]
print
(
'
saved to
'
,
args
.
directory
)
...
...
@@ -231,11 +231,13 @@ if __name__ == '__main__':
if
SPLIT
==
'
Y
'
:
print
(
'
The train set will be 70%, val set 15% and test set 15%
'
)
path
=
os
.
getcwd
()
# Get the current path to find the split script
# Get the path of the current script
path
=
os
.
path
.
abspath
(
os
.
path
.
dirname
(
__file__
))
script
=
os
.
path
.
join
(
path
,
'
get_train_val.py
'
)
data_path
=
os
.
path
.
join
(
path
,
args
.
directory
,
'
labels
'
)
directory_path
=
os
.
path
.
join
(
path
,
args
.
directory
)
# Create the directory path if not exists
utils
.
create_directory
(
directory_path
)
try
:
...
...
This diff is collapsed.
Click to expand it.
get_train_val.py
+
25
−
14
View file @
74a8dd8d
...
...
@@ -67,17 +67,22 @@ def prepare_data(arguments):
"""
Prepare the annotation before getting splited
:param args (args): Argument
:return
detec
tion (DataFrame): DataFrame with all the annotation to split
:return
annota
tion
s
(DataFrame): DataFrame with all the annotation to split
"""
detections
=
pd
.
concat
({
f
:
pd
.
read_csv
(
os
.
path
.
join
(
arguments
.
path_to_data
,
f
),
sep
=
'
'
,
names
=
[
'
species
'
,
'
x
'
,
'
y
'
,
'
w
'
,
'
h
'
])
for
f
in
tqdm
(
os
.
listdir
(
arguments
.
path_to_data
),
desc
=
"
Processing
"
,
ascii
=
'
░▒▓█
'
)},
names
=
[
'
file
'
])
annotations
=
[]
background
=
[]
for
f
in
tqdm
(
os
.
path
.
join
(
arguments
.
path_to_data
,
f
),
desc
=
"
Processing
"
,
ascii
=
'
░▒▓█
'
):
file_annotation
=
pd
.
read_csv
(
f
,
sep
=
'
'
,
names
=
[
'
species
'
,
'
x
'
,
'
y
'
,
'
w
'
,
'
h
'
])
if
len
(
file_annotation
)
==
0
:
background
.
append
(
f
)
else
:
file_annotation
[
'
file
'
]
=
f
annotations
.
extend
(
file_annotation
.
to_dict
(
orient
=
'
records
'
))
detec
tions
=
detections
.
reset_index
(
)
detec
tions
.
species
=
detec
tions
.
species
.
astype
(
float
)
return
detections
annota
tions
=
pd
.
DataFrame
(
annotations
)
annota
tions
.
species
=
annota
tions
.
species
.
astype
(
float
)
return
annotations
,
background
if
__name__
==
'
__main__
'
:
...
...
@@ -96,7 +101,7 @@ if __name__ == '__main__':
'
for test and same for validation
'
,
default
=
None
)
args
=
parser
.
parse_args
()
df
=
prepare_data
(
args
)
df
,
background
=
prepare_data
(
args
)
train
,
val
=
utils
.
split
(
df
,
'
train
'
,
args
.
ratio
)
saved_directory
=
os
.
path
.
join
(
args
.
directory
,
'
set
'
)
...
...
@@ -126,7 +131,13 @@ if __name__ == '__main__':
command
=
f
'
python
{
yolo_path
}
--data
{
data_path
}
--imgsz 640 --epochs 100 --weights
{
weights_path
}
--hyp
{
hyp_path
}
--cache
'
print
(
command
,
'
\n
'
)
if
len
(
background
==
0
):
print
(
'
\u26A0\uFE0F
Be aware that it is recommended to have background images that
'
,
'
represents 10% of your dataset.
To do so, please
use the script
"
get_spectrogram.py
"'
,
'
represents 10% of your dataset.
If you do not have background,
use the script
"
get_spectrogram.py
"'
,
'
with --background arguments. Comptue on recordings that contains multiple type of noise...
'
)
else
:
utils
.
split_background
(
background
,
args
)
print
(
f
'
Your dataset contains
{
len
(
background
)
}
images in background. It represents
'
,
f
'
{
(
len
(
background
)
/
len
(
df
))
*
100
}
% of your dataset set. It is recommended to reach around
'
,
'
10% for a good model.
'
)
\ No newline at end of file
This diff is collapsed.
Click to expand it.
utils.py
+
37
−
1
View file @
74a8dd8d
...
...
@@ -9,6 +9,7 @@ import json
from
datetime
import
date
from
pathlib
import
Path
import
librosa
import
random
import
pandas
as
pd
import
numpy
as
np
...
...
@@ -209,6 +210,41 @@ def split(df, method, ratio=0.7):
return
major_df
,
minor_df
def
split_background
(
file_list
,
arguments
):
"""
Randomly split the background images and save them into the differents sets.
:param file_list (list): List with all the filename of the background.
:param argument (args): Arguments.
"""
file_list
=
[
'
.
'
.
join
(
x
.
split
(
'
.
'
)[:
-
1
])
for
num
,
x
in
enumerate
(
file_list
)]
random
.
shuffle
(
file_list
)
total
=
len
(
file_list
)
if
arguments
.
test
:
r
=
0
t
=
total
/
3
for
s
in
[
'
train
'
,
'
test
'
,
'
val
'
]:
source_txt
=
arguments
.
path_to_data
source_img
=
os
.
path
.
join
(
arguments
.
path_to_data
,
'
../images/
'
)
directory_txt
=
os
.
path
.
join
(
arguments
.
directory
,
f
'
labels/
{
s
}
'
)
directory_img
=
os
.
path
.
join
(
arguments
.
directory
,
f
'
images/
{
s
}
'
)
copy_files_to_directory
(
file_list
[
r
:
t
],
source_txt
,
directory_txt
,
'
txt
'
)
copy_files_to_directory
(
file_list
[
r
:
t
],
source_img
,
directory_img
,
'
jpg
'
)
r
=
t
t
+=
t
else
:
r
=
0
t
=
total
/
2
for
s
in
[
'
train
'
,
'
val
'
]:
source_txt
=
arguments
.
path_to_data
source_img
=
os
.
path
.
join
(
arguments
.
path_to_data
,
'
../images/
'
)
directory_txt
=
os
.
path
.
join
(
arguments
.
directory
,
f
'
labels/
{
s
}
'
)
directory_img
=
os
.
path
.
join
(
arguments
.
directory
,
f
'
images/
{
s
}
'
)
copy_files_to_directory
(
file_list
[
r
:
t
],
source_txt
,
directory_txt
,
'
txt
'
)
copy_files_to_directory
(
file_list
[
r
:
t
],
source_img
,
directory_img
,
'
jpg
'
)
r
=
t
t
+=
t
def
open_file
(
path
):
"""
Open a file with a path without knowing if suffix is .pkl or .csv
...
...
@@ -240,7 +276,7 @@ def open_file(path):
print
(
"
Wav files can
'
t be load...
"
)
return
pd
.
DataFrame
()
else
:
print
(
'
Collect all files
o
n
a
folder...
'
)
print
(
'
Collect all files
i
n
the
folder...
'
)
df
=
pd
.
DataFrame
(
glob
.
glob
(
os
.
path
.
join
(
path
,
'
*
'
),
recursive
=
True
),
columns
=
[
'
Path
'
])
return
df
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment