ee5132_part2(b)_with_experimental_data (1).py

# -*- coding: utf-8 -*-
"""EE5132 Part2(b) with Experimental data.ipynb

Automatically generated by Colaboratory.

Original file is located at
    https://colab.research.google.com/drive/1dewvvQXslNlPj0i5P476wtQL4vn-CgxX

**Import libraries**
"""

#Data procsessing and model training
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold

#SVM
from sklearn.svm import SVC

#Decision tree
from sklearn.tree import DecisionTreeClassifier
from sklearn import tree

"""**Mount Google Drive to access and import CSV file**"""

from google.colab import drive
drive.mount('/content/drive')

df = pd.read_csv("/content/preprocessed_dataset.csv")

"""Let's check what this dataset contains!"""

df.head(10) #Show top 10 values from the top

df.isnull().any() #Check for any null values in dataset

df.dtypes #Check the variable types for our variables

df.describe()

len(df["label"]) #count number of rows in this dataset

X = df.iloc[:, 0:7] #Features to be used are only temperature, light intensity and humidity
X

Y = df.iloc[:, 7:] #Actual labels
Y

CV = KFold(n_splits = 3, shuffle = True , random_state = 42) ##splits into 3 folds for cross validation later

"""Separated the features and labels from the dataframe. Now we can split the training and test sets."""

X_train, X_test, Y_train, Y_test = train_test_split(X, Y, train_size = 0.7) #splits the dataset into 70% for training, 30% for testing

clf = SVC(C=1e4, kernel='rbf', random_state=100, gamma='scale', probability=True) #some parameters here were varied to give the best performing score below of 0.9807
clf.fit(X_train, Y_train)
clf.score(X_test, Y_test)

"""The SVC parameters were varied, with the parameters giving the best score eventually being used. The list below shows some other variants tried and the score achieved for the support vector classifier.

1.   clf = SVC(C=1e7, kernel='poly', random_state=100, gamma='scale', probability=True) -> Score = 0.9807

2.   clf = SVC(C=1e4, kernel='rbf', random_state=100, gamma='scale', probability=True) -> Score = 0.9807

3. clf = SVC(C=1e4, kernel='sigmoid', random_state=100, gamma='scale', probability=True) -> Score = 0.4615

Other than changing the kernel and the regularization parameter (C), the gamma was alternated between 'scale' and 'auto'. However, an 'auto' kernel coefficient gave a poorer score compared to 'scale' being used.


"""

clf2 = DecisionTreeClassifier()
clf2.fit(X_train, Y_train)
clf2.score(X_test, Y_test)

tree.plot_tree(clf2)

Example = [[38, 38, 37, 37, 37, 63, 45.2]] #Normal but server drawer not pulled out
Example1 = [[32, 32, 32, 32, 31, 608, 57.0]] #Normal but drawer pulled out
Example2 = [[50, 50, 50, 60, 40, 30, 45.0]] #Overheat since higher temps than example 1 but light remains the same
Example3 = [[90, 95, 105, 110, 120, 600, 20.0]] #Fire since temps and light are high, humidity drops due to dryer air

def Classifier_DecisionTree (Data):
    Label = clf2.predict(Data)
    Pred_Label = Label[0] #Used this to get the int value from the 1D list
    Dict = {0: 'Normal', 1: 'Overheat', 2: 'Fire'}
    Class = Dict.get(Label)
    print(Pred_Label, Class)

Classifier_DecisionTree(Example)
Classifier_DecisionTree(Example1)
Classifier_DecisionTree(Example2)
Classifier_DecisionTree(Example3)

def Classifier_SVM (Data):
    Label = clf.predict(Data)
    Pred_Label = Label[0] #Used this to get the int value from the 1D list
    Dict = {0: 'Normal', 1: 'Overheat', 2: 'Fire'}
    Class = Dict.get(Label)
    print(Pred_Label, Class)

Classifier_SVM(Example)
Classifier_SVM(Example1)
Classifier_SVM(Example2)
Classifier_SVM(Example3)

from sklearn.model_selection import cross_val_score
from sklearn.metrics import mean_squared_error
CV_score_DecisionTree = cross_val_score(clf2, X, Y, cv = 5, scoring='neg_mean_squared_error') #cross validation repeated 5 times
CV_score_DecisionTree

CV_score_SVC = cross_val_score(clf, X, Y, cv = None, scoring='neg_mean_squared_error') #cross validation
CV_score_SVC