Skip to content
Snippets Groups Projects
Commit e049784a authored by Zizhe Wang's avatar Zizhe Wang
Browse files

fix adaptive instance selection

parent 8cd1e7a5
Branches
No related tags found
No related merge requests found
......@@ -7,9 +7,9 @@
# #
####################################
from sklearn.mixture import GaussianMixture
from scipy.stats.qmc import LatinHypercube as lhs
import numpy as np
from sklearn.cluster import KMeans
from scipy.stats.qmc import LatinHypercube as lhs
from config import PARAM_TYPES, PARAM_BOUNDS
# Initial Sampling
......@@ -41,38 +41,60 @@ def evaluate_samples(samples, objective_function):
def advanced_clustering_samples(samples, n_clusters):
if len(samples) == 0:
raise ValueError("Cannot cluster an empty set of samples.")
gmm = GaussianMixture(n_components=n_clusters, covariance_type='full')
gmm.fit(samples)
labels = gmm.predict(samples)
centers = gmm.means_
kmeans = KMeans(n_clusters=n_clusters, random_state=0, n_init=10)
kmeans.fit(samples)
labels = kmeans.predict(samples)
centers = kmeans.cluster_centers_
return labels, centers
# Adaptive Selection with Adaptive Threshold
def adaptive_select_informative_instances(samples, results, initial_threshold=0.15, adapt_rate=0.05):
def adaptive_select_informative_instances(samples, results, initial_threshold=0.05, adapt_rate=0.01, desired_samples=None, max_iterations=100):
if len(samples) == 0 or len(results) == 0:
raise ValueError("Received empty samples or results for selection.")
performance = np.mean(results, axis=1)
performance = np.nanmean(results, axis=1) # Use np.nanmean to ignore nan values
threshold = initial_threshold
while True:
# Ensure the threshold does not exceed 1
iteration = 0
while iteration < max_iterations:
iteration += 1
print(f"Iteration {iteration}: Current threshold: {threshold}")
# Cap the threshold at 1.0
effective_threshold = min(threshold, 1.0)
cutoff = np.percentile(performance, effective_threshold * 100)
cutoff = np.nanpercentile(performance, effective_threshold * 100) # Use np.nanpercentile to ignore nan values
selected_samples = samples[performance <= cutoff]
if len(selected_samples) >= 3:
print(f"Iteration {iteration}: Number of selected samples: {len(selected_samples)}")
if desired_samples is not None and len(selected_samples) >= desired_samples:
print(f"Iteration {iteration}: Desired number of samples reached.")
break
if len(selected_samples) == len(samples):
print(f"Iteration {iteration}: All samples selected.")
break
threshold += adapt_rate
if iteration == max_iterations:
print(f"Final threshold after max iterations: {threshold}")
print(f"Performance values: {performance}")
print(f"Number of selected samples: {len(selected_samples)}")
if desired_samples is not None and len(selected_samples) < desired_samples:
print("Falling back to the best available samples.")
# Select the top desired_samples samples based on performance
best_indices = np.argsort(performance)[:desired_samples]
selected_samples = samples[best_indices]
if selected_samples.size == 0:
raise ValueError("Selection of informative instances resulted in an empty set.")
print(f"Selected samples shape: {selected_samples.shape}")
return selected_samples
print(f"Final selected samples shape: {selected_samples.shape}")
return selected_samples[:desired_samples] # Ensure the number of selected samples matches the desired number
# Iterative Refinement
def iterative_refinement(samples, results, objective_function, maximize_indices, n_iterations=5, initial_threshold=0.15, adapt_rate=0.05):
# Iterative Refinement
def iterative_refinement(samples, results, objective_function, maximize_indices, n_iterations=2, initial_threshold=0.10, adapt_rate=0.03):
for iteration in range(n_iterations):
print(f"Iteration {iteration}: Starting with samples shape: {samples.shape}")
......@@ -88,29 +110,35 @@ def iterative_refinement(samples, results, objective_function, maximize_indices,
# Ensure objective negation is correctly handled
for i in range(len(current_results)):
for idx in maximize_indices:
current_results[i][idx] = -current_results[i][idx]
if not np.isnan(current_results[i][idx]):
current_results[i][idx] = -current_results[i][idx]
# Ensure at least a minimum number of samples are selected to maintain diversity
if len(selected_samples) < 3:
selected_samples = samples[np.argsort(np.mean(current_results, axis=1))[:3]]
selected_samples = samples[np.argsort(np.nanmean(current_results, axis=1))[:3]]
# Re-cluster the selected samples
n_clusters = max(1, min(3, int(len(selected_samples) * 0.3))) # Ensure at least 1 cluster, maximum 3 clusters
n_clusters = max(1, min(2, int(len(selected_samples) * 0.2))) # Ensure at least 1 cluster, maximum 2 clusters
labels, centers = advanced_clustering_samples(selected_samples, n_clusters)
# Generate new samples around cluster centers
new_samples = []
for center in centers:
for _ in range(1): # Generate 1 new sample per center to control the growth of sample size
perturbations = np.random.uniform(-0.05, 0.05, center.shape) # Use smaller perturbations for finer adjustments
for _ in range(max(1, (len(samples) - len(selected_samples)))): # Control the number of new samples
perturbations = np.random.uniform(-0.03, 0.03, center.shape) # Use smaller perturbations for finer adjustments
new_samples.append(center + perturbations)
# Combine selected samples with new samples, ensuring we don't grow the sample size too much
samples = np.vstack((selected_samples, new_samples))
if len(samples) > len(selected_samples) + 2: # Limit the growth of samples
samples = samples[:len(selected_samples) + 2]
combined_samples = np.vstack((selected_samples, new_samples))
samples = combined_samples[:len(selected_samples) + (len(samples) - len(selected_samples))] # Ensure the sample size matches the original size
# Debugging output
print(f"Iteration {iteration}: Samples shape after selection and new sample generation: {samples.shape}")
return samples
\ No newline at end of file
return samples
def generate_new_samples(existing_samples, pop_size, n_adaptive_samples):
n_new_samples = pop_size - n_adaptive_samples
new_samples = initial_sampling(PARAM_BOUNDS, n_new_samples)
combined_samples = np.vstack((existing_samples, new_samples))
return combined_samples
\ No newline at end of file
......@@ -17,14 +17,15 @@ from pymoo.optimize import minimize
from scipy.stats import ttest_ind
from optimization_libraries import initialize_algorithm
from parallel_computing import execute_parallel_tasks, cleanup_temp_dirs
from config import PARAMETERS, OBJECTIVES, MAXIMIZE, PARAM_BOUND_VALUES, PARAM_TYPES, PRECISION, PLOT_CONFIG, OPTIMIZATION_CONFIG, N_JOBS
from config import PARAMETERS, OBJECTIVE_NAMES, MAXIMIZE, PARAM_BOUND_VALUES, PARAM_TYPES, PRECISION, PLOT_CONFIG, OPTIMIZATION_CONFIG, N_JOBS
from adaptive_instance_selection import initial_sampling, evaluate_samples, advanced_clustering_samples, adaptive_select_informative_instances, iterative_refinement, generate_new_samples
class OptimizationProblem(Problem):
def __init__(self):
self.param_names = list(PARAM_BOUND_VALUES.keys())
self.param_types = [PARAM_TYPES[param] for param in self.param_names]
self.objective_names = OBJECTIVES
self.maximize_indices = [self.objective_names.index(res) for res in MAXIMIZE]
self.objective_names = OBJECTIVE_NAMES
self.maximize_indices = [i for i, maximize in enumerate(MAXIMIZE) if maximize]
n_var = len(self.param_names)
n_obj = len(self.objective_names)
xl = np.array([PARAM_BOUND_VALUES[param][0] for param in self.param_names])
......@@ -78,21 +79,75 @@ def run_optimization(use_adaptive_instance_selection):
results_folder = create_results_folder()
# Set the adaptive instance selection flag
OPTIMIZATION_CONFIG["USE_ADAPTIVE_INSTANCE_SELECTION"] = use_adaptive_instance_selection
OPTIMIZATION_CONFIG['USE_ADAPTIVE_INSTANCE_SELECTION'] = use_adaptive_instance_selection
adaptive_frequency = OPTIMIZATION_CONFIG['ADAPTIVE_INSTANCE_SELECTION_FREQUENCY']
# Initialize the population size
pop_size = OPTIMIZATION_CONFIG['POP_SIZE']
# Initialize the optimization algorithm
algorithm = initialize_algorithm(
OPTIMIZATION_CONFIG['ALGORITHM_NAME'],
OPTIMIZATION_CONFIG.get('POP_SIZE')
pop_size
)
# Define the optimization problem
problem = OptimizationProblem()
start_time = time.time()
res = None # Initialize res to handle early termination case
try:
# Run the optimization
res = minimize(problem, algorithm, ("n_gen", OPTIMIZATION_CONFIG['N_GEN']), verbose=True)
for gen in range(OPTIMIZATION_CONFIG['N_GEN']):
if pop_size <= OPTIMIZATION_CONFIG['MIN_POP_SIZE']:
print("Stopping optimization as population size has reached the minimum threshold.")
break
res = minimize(problem, algorithm, ("n_gen", 1), verbose=True)
if use_adaptive_instance_selection and gen > 0 and (gen + 1) % adaptive_frequency == 0:
current_samples = res.pop.get("X")
current_results = res.pop.get("F")
print(f"Generation {gen + 1}: Applying adaptive instance selection")
print(f"Current samples: {current_samples.shape}")
print(f"Current results: {current_results.shape}")
try:
# Apply adaptive instance selection
adaptive_samples = adaptive_select_informative_instances(
current_samples, current_results,
initial_threshold=0.05, adapt_rate=0.01,
desired_samples=pop_size // 2
)
# Select half from algorithm population and half from adaptive instance selection
num_algorithm_samples = pop_size // 2
algorithm_samples_indices = np.random.choice(len(current_samples), num_algorithm_samples, replace=False)
algorithm_samples = current_samples[algorithm_samples_indices]
combined_samples = np.vstack((algorithm_samples, adaptive_samples))
# Evaluate all combined samples
out = {"F": np.zeros((len(combined_samples), len(problem.objective_names)))} # Initialize output
problem._evaluate(combined_samples, out=out) # Evaluate combined samples
res.pop.set("X", combined_samples[:pop_size]) # Set only the first pop_size samples
res.pop.set("F", np.array(out["F"])[:pop_size]) # Set only the first pop_size results
except RuntimeError as e:
print(f"Adaptive instance selection failed: {e}")
# If adaptive instance selection fails, fall back to using the current population
res.pop.set("X", current_samples)
res.pop.set("F", current_results)
# Reduce population size dynamically
pop_size = max(OPTIMIZATION_CONFIG['MIN_POP_SIZE'], int(pop_size * 0.9)) # Reduce by 10% each iteration, minimum threshold
algorithm = initialize_algorithm(
OPTIMIZATION_CONFIG['ALGORITHM_NAME'],
pop_size
)
finally:
# Cleanup temporary directories
cleanup_temp_dirs()
......@@ -100,26 +155,28 @@ def run_optimization(use_adaptive_instance_selection):
elapsed_time = end_time - start_time
print(f"Time with{'out' if not use_adaptive_instance_selection else ''} adaptive instance selection: {elapsed_time:.2f} seconds")
print_and_plot_results(res, problem)
# Save results to a file
# Negate back the maximized objectives before saving
results_to_save = res.F.copy()
for i in range(len(results_to_save)):
for idx in problem.maximize_indices:
results_to_save[i][idx] = -results_to_save[i][idx]
results_data = {
"results": res.F.tolist(),
"elapsed_time": elapsed_time,
"use_adaptive_instance_selection": use_adaptive_instance_selection
}
filename = os.path.join(results_folder, f'optimization_results_{"with" if use_adaptive_instance_selection else "without"}_adaptive.json')
with open(filename, 'w') as f:
json.dump(results_data, f)
print(f"Results have been stored in: {filename}")
return res.F, elapsed_time
if res is not None:
print_and_plot_results(res, problem)
# Save results to a file
# Negate back the maximized objectives before saving
results_to_save = res.F.copy()
for i in range(len(results_to_save)):
for idx in problem.maximize_indices:
results_to_save[i][idx] = -results_to_save[i][idx]
results_data = {
"results": results_to_save.tolist(),
"elapsed_time": elapsed_time,
"use_adaptive_instance_selection": use_adaptive_instance_selection
}
filename = os.path.join(results_folder, f'optimization_results_{"with" if use_adaptive_instance_selection else "without"}_adaptive.json')
with open(filename, 'w') as f:
json.dump(results_data, f)
print(f"Results have been stored in: {filename}")
return res.F if res is not None else None, elapsed_time
def print_and_plot_results(res, problem):
print("Optimization Results:")
......@@ -136,9 +193,10 @@ def print_and_plot_results(res, problem):
print()
try:
plt.figure(figsize=(8, 6))
for idx in problem.maximize_indices:
res.F[:, idx] = -res.F[:, idx]
plt.figure(figsize=(8, 6))
plt.scatter(res.F[:, 0], res.F[:, 1])
plt.xlabel(PLOT_CONFIG["PLOT_X"], fontsize=14)
plt.ylabel(PLOT_CONFIG["PLOT_Y"], fontsize=14)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment