Skip to content

Commit

Permalink
solving a bug with the genetic generator to handle the categorical fe…
Browse files Browse the repository at this point in the history
…atures in a tabular setting
  • Loading branch information
rinziv committed Dec 3, 2024
1 parent 6f3a5d2 commit 742bc54
Show file tree
Hide file tree
Showing 3 changed files with 63 additions and 42 deletions.
51 changes: 34 additions & 17 deletions lore_sa/neighgen/genetic.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,7 @@ def setup_toolbox(self, x, evaluate, population_size):
toolbox.register("population", tools.initRepeat, list, toolbox.individual, n=population_size)

toolbox.register("clone", self.clone)
toolbox.register("evaluate", evaluate, x)
toolbox.register("evaluate", self.constraint_decorator(evaluate, x))
toolbox.register("mate", tools.cxTwoPoint)
toolbox.register("mutate", self.mutate, toolbox)
toolbox.register("select", tools.selTournament, tournsize=self.tournsize)
Expand All @@ -143,13 +143,38 @@ def setup_toolbox_noteq(self, x, x1, evaluate, population_size):
toolbox.register("population", tools.initRepeat, list, toolbox.individual, n=population_size)

toolbox.register("clone", self.clone)
toolbox.register("evaluate", evaluate, x)
toolbox.register("evaluate", self.constraint_decorator(evaluate, x))
toolbox.register("mate", tools.cxTwoPoint)
toolbox.register("mutate", self.mutate, toolbox)
toolbox.register("select", tools.selTournament, tournsize=self.tournsize)

return toolbox


def check_constraints(self, z):
'''
Check if the generated instance meets the constraints
:param z: the generated instance to be checked
:return: True if the instance meets the constraints, False otherwise
'''
x = self.encoder.decode(z.reshape(1, -1))[0]
if None in x:
return False
return True

# Create a decorator for the evaluation function
def constraint_decorator(self, evaluate, z_ref):
def wrapper(individual):
if self.check_constraints(individual):
return evaluate(individual, z_ref)
else:
# Penalize individuals that do not meet the constraints
return -np.inf,

return wrapper



def fit(self, toolbox, population_size):

halloffame_size = int(np.round(population_size * self.halloffame_ratio))
Expand All @@ -169,15 +194,16 @@ def fit(self, toolbox, population_size):
return population, halloffame, logbook

def record_init(self, x):
'''
This function is used to generate a random instance to start the evolutionary algorithm. In this case
we repeat the input instance x for all the initial population
:return: a (not so) random instance
'''
return x

def random_init(self):
z = self.generate_synthetic_instance()
x = self.encoder.decode(z.reshape(1, -1))
if None in x :
print('None in generated z')
print('z', z)
print('x', x)

return z

Expand All @@ -190,11 +216,7 @@ def mutate(self, toolbox, x):
# # if np.random.random() <= self.mutpb:
# # z[i] = np.random.choice(self.feature_values[i], size=1, replace=True)
z = self.generate_synthetic_instance(from_z=z, mutpb=self.mutpb)
x = self.encoder.decode(z.reshape(1, -1))
if None in x :
print('None in mutated z')
print('z', z)
print('x', x)

return z,

def fitness_equal(self, z, z1):
Expand All @@ -209,9 +231,6 @@ def fitness_equal(self, z, z1):
# y1 = self.bb_predict(x1.reshape(1, -1))[0]
x = self.encoder.decode(z.reshape(1, -1))
x1 = self.encoder.decode(z1.reshape(1, -1))
if None in x or None in x1:
return 0.0, # TODO: check if this is the correct way to return a tuple

y = self.bbox.predict(x)
y1 = self.bbox.predict(x1)

Expand All @@ -231,8 +250,6 @@ def fitness_notequal(self, z, z1):
# y1 = self.bb_predict(x1.reshape(1, -1))[0]
x = self.encoder.decode(z.reshape(1, -1))
x1 = self.encoder.decode(z1.reshape(1, -1))
if None in x or None in x1:
return 0.0, #TODO: check why we get here in the code
y = self.bbox.predict(x)
y1 = self.bbox.predict(x1)

Expand Down
52 changes: 28 additions & 24 deletions lore_sa/neighgen/neighborhood_generator.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import random
from abc import abstractmethod
import warnings
import numpy as np
Expand Down Expand Up @@ -38,38 +39,41 @@ def generate_synthetic_instance(self, from_z=None, mutpb=1.0):
if from_z is not None:
instance = from_z # -1 because the target class is not generated

for name, feature in self.dataset.descriptor['categorical'].items():
if self.encoder is not None:
# feature is encoded, so i need to random generate chunks of one-hot-encoded values
if random.random() < mutpb:
for name, feature in self.dataset.descriptor['categorical'].items():
if self.encoder is not None:
# feature is encoded, so i need to random generate chunks of one-hot-encoded values

# finding the vector index of the feature
indices = [k for k, v in self.encoder.get_encoded_features().items() if v.split("=")[0] == name]
index_choice = np.random.choice(list(range(len(indices))))
# finding the vector index of the feature
indices = [k for k, v in self.encoder.get_encoded_features().items() if v.split("=")[0] == name]
index_choice = np.random.choice(list(range(len(indices))))

for i, idx in enumerate(indices):
if i == index_choice:
instance[idx] = 1
else:
instance[idx] = 0
columns[idx] = self.encoder.get_encoded_features()[idx]
for i, idx in enumerate(indices):
if i == index_choice:
instance[idx] = 1
else:
instance[idx] = 0
columns[idx] = self.encoder.get_encoded_features()[idx]


else:
# feature is not encoded: random choice among the distinct values of the feature
else:
# feature is not encoded: random choice among the distinct values of the feature

instance[feature['index']] = np.random.choice(feature['distinct_values'])
columns[feature['index']] = name
instance[feature['index']] = np.random.choice(feature['distinct_values'])
columns[feature['index']] = name

for name, feature in self.dataset.descriptor['numeric'].items():
idx = None
if self.encoder is not None:
idx = [k for k, v in self.encoder.get_encoded_features().items() if v == name][0]
else:
idx = feature['index']
columns[idx] = name
for name, feature in self.dataset.descriptor['numeric'].items():
idx = None
if self.encoder is not None:
idx = [k for k, v in self.encoder.get_encoded_features().items() if v == name][0]
else:
idx = feature['index']
columns[idx] = name

instance[idx] = np.random.uniform(low=feature['min'], high=feature['max'])
instance[idx] = np.random.uniform(low=feature['min'], high=feature['max'])
self.columns = columns


return instance

def balance_neigh(self, z, Z, num_samples):
Expand Down
2 changes: 1 addition & 1 deletion test/test_neighgen.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ def test_genetic_generator_generate_balanced(self):
z = self.enc.encode([x.values])[0] # remove the class feature from the input instance

gen = GeneticGenerator(bbox=self.bbox, dataset=self.dataset, encoder=self.enc, ocr=0.1, ngen=20)
neighbour = gen.generate(z, 100, self.dataset.descriptor, self.enc)
neighbour = gen.generate(z, 1000, self.dataset.descriptor, self.enc)
# Assert the lenght of the generated dataset is at least 1000
self.assertGreaterEqual(neighbour.shape[0], 100)

Expand Down

0 comments on commit 742bc54

Please sign in to comment.