-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathDataCleaning.py
91 lines (74 loc) · 3.62 KB
/
DataCleaning.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
# -*- coding: utf-8 -*-
"""
Created on Sat Jan 8 11:19:45 2022
@author: eslam
"""
import netaddr
import csv
import numpy as np
import itertools
from KiplingTrafficFlow import KiplingTrafficFlow
class DataCleaning:
"""
Ignore: UserID=eslam,destinationID="Server",AppID="SSH",ContentID="Content",When="Noon",Where="Cairo",Action="allow",Action="deny"
To enable more generic polices and defination for Zero Trust access rules.
We will provide a basic regix * to be replaced with all available options.
Initial policies file name will be provided then the * will be repolaced with all other values available in the same column
"""
def __init__(self, rawFileName=None, policiesFileName=None):
self.rawFileName = rawFileName
self.policiesFileName = policiesFileName
# self.generatedPolicies = []
# policy = StaticPolicyAgent()
generatedPolicies = self.expandPolicies()
generatedPolicies = self.removeDuplicateRows(generatedPolicies)
raw_policies = list(csv.reader(open(self.rawFileName)))
generatedPolicies.insert(0,raw_policies[0].copy()) #Add the columns headers to the file
self.save_csv(generatedPolicies, policiesFileName)
# for x in generatedPolicies:
# print(x)
def getUniqueValuesColumn(self,twodList,columnId,removedValues):
buff = list(set([ x[columnId] for x in twodList])) #this will ensure only 1 existance of each value
for value in removedValues:
buff.remove(value)
return buff
def removeDuplicateRows(self, x):
x.sort()
x = list(x for x,_ in itertools.groupby(x))
return x
def expandPolicies(self):
generatedPolicies=[]
raw_policies = list(csv.reader(open(self.rawFileName)))
raw_policies = raw_policies[1:] #remove first row which contains cloumn names
# print(self.getUniqueValuesColumn(raw_policies, 0))
while True:
for x in range(len(raw_policies)):
if('*' in raw_policies[x]):
for y in range(len(raw_policies[x])):
if (raw_policies[x][y] == '*'):
valuesColumn = self.getUniqueValuesColumn(raw_policies, y, ['*'])
for value in valuesColumn:
buff = raw_policies[x].copy()
buff[y] = value
generatedPolicies.append(buff)
else:
generatedPolicies.append(raw_policies[x])
if (any("*" in sublist for sublist in generatedPolicies)): #check if the result still contains * to be removed in case of 1 line 2 *
raw_policies = generatedPolicies.copy()
generatedPolicies=[]
else:
break
# raw_policies = list(csv.reader(open(self.rawFileName)))
# generatedPolicies.insert(0,raw_policies[0].copy())
return generatedPolicies
def save_csv(self, twodList, fileName):
f = open(fileName, 'w')
for item in twodList:
f.write(','.join([str(x) for x in item]) + '\n')
f.close()
# if __name__ == "__main__":
# # fileName = "RawStaticPolicyAgentPolicies.csv"
# rawFileName = "RawStaticPolicyAgentPolicies.csv"
# policiesFileName = "StaticPolicyAgentPolicies.csv"
# data = DataCleaning(rawFileName, policiesFileName)
# # print (policy.validateFlow(flow))