-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathee5132_part2(b)_with_experimental_data (1).py
115 lines (78 loc) · 3.98 KB
/
ee5132_part2(b)_with_experimental_data (1).py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
# -*- coding: utf-8 -*-
"""EE5132 Part2(b) with Experimental data.ipynb
Automatically generated by Colaboratory.
Original file is located at
https://colab.research.google.com/drive/1dewvvQXslNlPj0i5P476wtQL4vn-CgxX
**Import libraries**
"""
#Data procsessing and model training
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold
#SVM
from sklearn.svm import SVC
#Decision tree
from sklearn.tree import DecisionTreeClassifier
from sklearn import tree
"""**Mount Google Drive to access and import CSV file**"""
from google.colab import drive
drive.mount('/content/drive')
df = pd.read_csv("/content/preprocessed_dataset.csv")
"""Let's check what this dataset contains!"""
df.head(10) #Show top 10 values from the top
df.isnull().any() #Check for any null values in dataset
df.dtypes #Check the variable types for our variables
df.describe()
len(df["label"]) #count number of rows in this dataset
X = df.iloc[:, 0:7] #Features to be used are only temperature, light intensity and humidity
X
Y = df.iloc[:, 7:] #Actual labels
Y
CV = KFold(n_splits = 3, shuffle = True , random_state = 42) ##splits into 3 folds for cross validation later
"""Separated the features and labels from the dataframe. Now we can split the training and test sets."""
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, train_size = 0.7) #splits the dataset into 70% for training, 30% for testing
clf = SVC(C=1e4, kernel='rbf', random_state=100, gamma='scale', probability=True) #some parameters here were varied to give the best performing score below of 0.9807
clf.fit(X_train, Y_train)
clf.score(X_test, Y_test)
"""The SVC parameters were varied, with the parameters giving the best score eventually being used. The list below shows some other variants tried and the score achieved for the support vector classifier.
1. clf = SVC(C=1e7, kernel='poly', random_state=100, gamma='scale', probability=True) -> Score = 0.9807
2. clf = SVC(C=1e4, kernel='rbf', random_state=100, gamma='scale', probability=True) -> Score = 0.9807
3. clf = SVC(C=1e4, kernel='sigmoid', random_state=100, gamma='scale', probability=True) -> Score = 0.4615
Other than changing the kernel and the regularization parameter (C), the gamma was alternated between 'scale' and 'auto'. However, an 'auto' kernel coefficient gave a poorer score compared to 'scale' being used.
"""
clf2 = DecisionTreeClassifier()
clf2.fit(X_train, Y_train)
clf2.score(X_test, Y_test)
tree.plot_tree(clf2)
Example = [[38, 38, 37, 37, 37, 63, 45.2]] #Normal but server drawer not pulled out
Example1 = [[32, 32, 32, 32, 31, 608, 57.0]] #Normal but drawer pulled out
Example2 = [[50, 50, 50, 60, 40, 30, 45.0]] #Overheat since higher temps than example 1 but light remains the same
Example3 = [[90, 95, 105, 110, 120, 600, 20.0]] #Fire since temps and light are high, humidity drops due to dryer air
def Classifier_DecisionTree (Data):
Label = clf2.predict(Data)
Pred_Label = Label[0] #Used this to get the int value from the 1D list
Dict = {0: 'Normal', 1: 'Overheat', 2: 'Fire'}
Class = Dict.get(Label)
print(Pred_Label, Class)
Classifier_DecisionTree(Example)
Classifier_DecisionTree(Example1)
Classifier_DecisionTree(Example2)
Classifier_DecisionTree(Example3)
def Classifier_SVM (Data):
Label = clf.predict(Data)
Pred_Label = Label[0] #Used this to get the int value from the 1D list
Dict = {0: 'Normal', 1: 'Overheat', 2: 'Fire'}
Class = Dict.get(Label)
print(Pred_Label, Class)
Classifier_SVM(Example)
Classifier_SVM(Example1)
Classifier_SVM(Example2)
Classifier_SVM(Example3)
from sklearn.model_selection import cross_val_score
from sklearn.metrics import mean_squared_error
CV_score_DecisionTree = cross_val_score(clf2, X, Y, cv = 5, scoring='neg_mean_squared_error') #cross validation repeated 5 times
CV_score_DecisionTree
CV_score_SVC = cross_val_score(clf, X, Y, cv = None, scoring='neg_mean_squared_error') #cross validation
CV_score_SVC