-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy path2. Post-processing.py
131 lines (97 loc) · 4.18 KB
/
2. Post-processing.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
"""
Goat vocalizations
Università degli studi di Milano
@author: Giulia Cuttone
"""
import pandas as pd
# Z-score normalization (Standardization)
from sklearn.preprocessing import StandardScaler
'---------------------------------------------------------------------------------------------------------------'
def map_to_main_class(value, mapping):
"""
Takes a subclass value and a mapping dictionary,
and returns the corresponding main class if it exists in the mapping.
If the subclass is not found, it returns the original value.
---------------------------------------------------------------------
Args:
value (str): The subclass to be mapped.
mapping (dict): A dictionary mapping main classes to their subclasses.
----------------------------------------------------------------------------
Returns:
value (str): The main class corresponding to the subclass or the original value.
"""
for main_class, subclasses in mapping.items():
if value in subclasses:
return main_class
return value
def safe_assign_label(label):
"""
Error handling
--------------
Paramenters:
label (string): Audio file label.
-------------------------------------
Return:
(string)
'Unknown' if the label is not valid.
"""
try:
return assign_label(label)
except ValueError:
invalid_labels.append(label)
return 'Unknown'
def assign_label(label):
"""
Check the label and raise an exception if it's neither positive nor negative.
-----------------------------------------------------------------------------
Paramenters:
label (string): Audio file label.
-------------------------------------
Returns:
(string)
'positive' if the label is in the positive labels list,
'negative' if in the negative labels list.
-----------------------------------------------------------
Raises:
ValueError: If the label is not found in either the positive or negative labels list.
"""
if label in Positive:
return 'Positive'
elif label in Negative:
return 'Negative'
else:
raise ValueError(f"Label '{label}' is invalid: it must be in positive_labels or negative_labels")
'---------------------------------------------------------------------------------------------------------------'
"Main code:"
# Load dataset
file_path = './Vocapra_dataset.csv'
df = pd.read_csv(file_path)
# Exclude 'Class' column from normalization
features = df.drop(columns='Class')
scaler = StandardScaler()
# Apply Z-score normalization
normalized_features = scaler.fit_transform(features)
# Define the mapping of subclasses to main class
class_mapping = {
'Calori': ['calori artificiali', 'calori naturali'],
'Distribuzione Cibo': ['Distribuzione fieno', 'distribuzione concentrato', 'distribuzione unifeed'],
'Fenomeni legati al parto': ['doglie del parto', 'fase espulsiva', 'parto difficile', 'aborto'],
'Ferita-Morte': ['Ferita', 'Morte capra']
}
Positive = ['Calori', 'Distribuzione Cibo', 'Presenza contemporanea di madri e capretti', 'Visita di estranei']
Negative = ['Fenomeni legati al parto', 'Ferita-Morte', 'Isolamento sociale', 'Separazione madre capretto']
# Initialize a list to keep track of invalid labels
invalid_labels = []
# Create a new DataFrame for the normalized data, preserving 'Class'
processed_data = pd.DataFrame(normalized_features, columns=features.columns)
# Replace subclasses with the main class
processed_data['Class'] = df['Class'].apply(lambda x: map_to_main_class(x, class_mapping))
# Add 'Emotional_state' column
processed_data['Emotional_state'] = processed_data['Class'].apply(safe_assign_label)
# Report invalid labels if there are any
if invalid_labels:
print(f"\nInvalid labels encountered: {set(invalid_labels)}\n")
# Show the first few rows of the post-processed dataset
print(processed_data.head())
# Save the DataFrame to a CSV file
processed_data.to_csv('Vocapra_postprocessing.csv', index=False)