From 2e0c3302741088a975dc2f39532e3c7a0d21379c Mon Sep 17 00:00:00 2001 From: Zizhe Wang <zizhe.wang@tu-dresden.de> Date: Mon, 10 Jun 2024 15:20:37 +0200 Subject: [PATCH] fix adaptive instance selection --- src/adaptive_instance_selection.py | 102 ++++++++++++++++++++--------- src/optimize_main.py | 20 ++++-- src/parallel_computing.py | 10 +-- 3 files changed, 91 insertions(+), 41 deletions(-) diff --git a/src/adaptive_instance_selection.py b/src/adaptive_instance_selection.py index ec2fdbc..ebbe4f5 100644 --- a/src/adaptive_instance_selection.py +++ b/src/adaptive_instance_selection.py @@ -7,19 +7,26 @@ # # #################################### +from sklearn.mixture import GaussianMixture +from scipy.stats.qmc import LatinHypercube as lhs import numpy as np -from pyDOE import lhs -from sklearn.cluster import KMeans +from config import PARAM_TYPES, PARAM_BOUNDS # Initial Sampling def initial_sampling(param_bounds, n_samples): dimensions = len(param_bounds) - samples = lhs(dimensions, samples=n_samples) # Latin hypercube sampling (LHS) - - # Scale samples to parameter bounds - for i, (param, bounds) in enumerate(param_bounds.items()): + samples = lhs(d=dimensions).random(n=n_samples) # Latin hypercube sampling (LHS) + if samples.size == 0: + raise ValueError("Initial sampling produced an empty set of samples.") + + # Scale samples to parameter bounds and respect parameter types + for i, (param, bounds_info) in enumerate(param_bounds.items()): + bounds = bounds_info["bounds"] samples[:, i] = bounds[0] + samples[:, i] * (bounds[1] - bounds[0]) + if bounds_info["type"] == 'int': + samples[:, i] = np.round(samples[:, i]).astype(int) + print(f"Initial samples shape: {samples.shape}") return samples # Evaluate Samples @@ -28,49 +35,82 @@ def evaluate_samples(samples, objective_function): for sample in samples: result = objective_function(sample) results.append(result) - return np.array(results) -# Clustering Samples -def cluster_samples(samples, n_clusters): - kmeans = KMeans(n_clusters=n_clusters) - kmeans.fit(samples) - labels = kmeans.labels_ - centers = kmeans.cluster_centers_ +# Advanced Clustering +def advanced_clustering_samples(samples, n_clusters): + if len(samples) == 0: + raise ValueError("Cannot cluster an empty set of samples.") + gmm = GaussianMixture(n_components=n_clusters, covariance_type='full') + gmm.fit(samples) + labels = gmm.predict(samples) + centers = gmm.means_ return labels, centers -# Adaptive Selection -def select_informative_instances(samples, results, threshold=0.1): +# Adaptive Selection with Adaptive Threshold +def adaptive_select_informative_instances(samples, results, initial_threshold=0.15, adapt_rate=0.05): + if len(samples) == 0 or len(results) == 0: + raise ValueError("Received empty samples or results for selection.") + performance = np.mean(results, axis=1) - cutoff = np.percentile(performance, threshold * 100) - selected_samples = samples[performance <= cutoff] + threshold = initial_threshold + while True: + # Ensure the threshold does not exceed 1 + effective_threshold = min(threshold, 1.0) + cutoff = np.percentile(performance, effective_threshold * 100) + selected_samples = samples[performance <= cutoff] + + if len(selected_samples) >= 3: + break + threshold += adapt_rate + + if selected_samples.size == 0: + raise ValueError("Selection of informative instances resulted in an empty set.") + print(f"Selected samples shape: {selected_samples.shape}") return selected_samples # Iterative Refinement -def iterative_refinement(samples, results, objective_function, n_iterations=5, threshold=0.1): - for _ in range(n_iterations): +def iterative_refinement(samples, results, objective_function, maximize_indices, n_iterations=5, initial_threshold=0.15, adapt_rate=0.05): + for iteration in range(n_iterations): + print(f"Iteration {iteration}: Starting with samples shape: {samples.shape}") + # Evaluate current samples current_results = evaluate_samples(samples, objective_function) - # Select informative instances - samples = select_informative_instances(samples, current_results, threshold) + # Print performance metrics for current samples + print(f"Iteration {iteration}: Current results: {current_results}") + + # Select informative instances with adaptive threshold + selected_samples = adaptive_select_informative_instances(samples, current_results, initial_threshold, adapt_rate) + + # Ensure objective negation is correctly handled + for i in range(len(current_results)): + for idx in maximize_indices: + current_results[i][idx] = -current_results[i][idx] + + # Ensure at least a minimum number of samples are selected to maintain diversity + if len(selected_samples) < 3: + selected_samples = samples[np.argsort(np.mean(current_results, axis=1))[:3]] # Re-cluster the selected samples - n_clusters = max(1, int(len(samples) * 0.1)) # Ensure at least 1 cluster - labels, centers = cluster_samples(samples, n_clusters) + n_clusters = max(1, min(3, int(len(selected_samples) * 0.3))) # Ensure at least 1 cluster, maximum 3 clusters + labels, centers = advanced_clustering_samples(selected_samples, n_clusters) # Generate new samples around cluster centers new_samples = [] for center in centers: - perturbations = np.random.uniform(-0.05, 0.05, center.shape) - new_samples.append(center + perturbations) + for _ in range(1): # Generate 1 new sample per center to control the growth of sample size + perturbations = np.random.uniform(-0.05, 0.05, center.shape) # Use smaller perturbations for finer adjustments + new_samples.append(center + perturbations) - samples = np.vstack((samples, new_samples)) + # Combine selected samples with new samples, ensuring we don't grow the sample size too much + samples = np.vstack((selected_samples, new_samples)) + if len(samples) > len(selected_samples) + 2: # Limit the growth of samples + samples = samples[:len(selected_samples) + 2] + + # Debugging output + print(f"Iteration {iteration}: Samples shape after selection and new sample generation: {samples.shape}") - return samples - -# Define the objective function wrapper -def objective_function(param_values): - return optimization_function(param_values) \ No newline at end of file + return samples \ No newline at end of file diff --git a/src/optimize_main.py b/src/optimize_main.py index c69dd35..09337d3 100644 --- a/src/optimize_main.py +++ b/src/optimize_main.py @@ -17,25 +17,29 @@ from pymoo.optimize import minimize from scipy.stats import ttest_ind from optimization_libraries import initialize_algorithm from parallel_computing import execute_parallel_tasks, cleanup_temp_dirs -from config import PARAMETERS, OBJECTIVES, MAXIMIZE, PARAM_BOUNDS, PRECISION, PLOT_CONFIG, OPTIMIZATION_CONFIG, N_JOBS +from config import PARAMETERS, OBJECTIVES, MAXIMIZE, PARAM_BOUND_VALUES, PARAM_TYPES, PRECISION, PLOT_CONFIG, OPTIMIZATION_CONFIG, N_JOBS class OptimizationProblem(Problem): def __init__(self): - self.param_names = list(PARAM_BOUNDS.keys()) + self.param_names = list(PARAM_BOUND_VALUES.keys()) + self.param_types = [PARAM_TYPES[param] for param in self.param_names] self.objective_names = OBJECTIVES self.maximize_indices = [self.objective_names.index(res) for res in MAXIMIZE] n_var = len(self.param_names) n_obj = len(self.objective_names) - xl = np.array([PARAM_BOUNDS[param][0] for param in self.param_names]) - xu = np.array([PARAM_BOUNDS[param][1] for param in self.param_names]) + xl = np.array([PARAM_BOUND_VALUES[param][0] for param in self.param_names]) + xu = np.array([PARAM_BOUND_VALUES[param][1] for param in self.param_names]) print(f"Number of variables: {n_var}") print(f"Lower bounds: {xl}") print(f"Upper bounds: {xu}") super().__init__(n_var=n_var, n_obj=n_obj, n_constr=0, xl=xl, xu=xu) def _evaluate(self, X, out, *args, **kwargs): + for i, param_type in enumerate(self.param_types): + if param_type == "int": + X[:, i] = np.round(X[:, i]).astype(int) param_values_list = [dict(zip(self.param_names, x)) for x in X] - results = execute_parallel_tasks(param_values_list, OPTIMIZATION_CONFIG["USE_ADAPTIVE_INSTANCE_SELECTION"]) + results = execute_parallel_tasks(param_values_list, OPTIMIZATION_CONFIG["USE_ADAPTIVE_INSTANCE_SELECTION"], self.maximize_indices) # Debugging output before any processing print(f"Initial results: {results}") @@ -99,6 +103,12 @@ def run_optimization(use_adaptive_instance_selection): print_and_plot_results(res, problem) # Save results to a file + # Negate back the maximized objectives before saving + results_to_save = res.F.copy() + for i in range(len(results_to_save)): + for idx in problem.maximize_indices: + results_to_save[i][idx] = -results_to_save[i][idx] + results_data = { "results": res.F.tolist(), "elapsed_time": elapsed_time, diff --git a/src/parallel_computing.py b/src/parallel_computing.py index fc1add4..f6f38e0 100644 --- a/src/parallel_computing.py +++ b/src/parallel_computing.py @@ -14,8 +14,8 @@ import numpy as np from time import sleep from joblib import Parallel, delayed from OMPython import OMCSessionZMQ -from config import MODEL_FILE, MODEL_NAME, SIMULATION_STOP_TIME, PARAMETERS, OBJECTIVES, PARAM_BOUNDS, MODEL_PATH, PRECISION, OPTIMIZATION_CONFIG, N_JOBS -from adaptive_instance_selection import initial_sampling, evaluate_samples, cluster_samples, select_informative_instances, iterative_refinement +from config import MODEL_FILE, MODEL_NAME, SIMULATION_STOP_TIME, PARAMETERS, OBJECTIVES, PARAM_BOUNDS, PARAM_TYPES, MODEL_PATH, PRECISION, OPTIMIZATION_CONFIG, N_JOBS +from adaptive_instance_selection import initial_sampling, evaluate_samples, advanced_clustering_samples, adaptive_select_informative_instances, iterative_refinement temp_dirs = [] # List to store paths of temporary directories @@ -66,7 +66,7 @@ def optimization_function(param_values, retries=3, delay=2): param_values = {param: value for param, value in zip(PARAMETERS, param_values)} # Set model parameters - rounded_param_values = {param: round(value, PRECISION) for param, value in param_values.items()} + rounded_param_values = {param: round(value, PRECISION) if PARAM_TYPES[param] == 'float' else int(value) for param, value in param_values.items()} for param, value in rounded_param_values.items(): set_param_result = omc.sendExpression(f"setParameterValue({MODEL_NAME}, {param}, {value})") if not set_param_result: @@ -125,7 +125,7 @@ def cleanup_temp_dirs(): print(f"Error: {e}") break # Exit the loop for non-permission errors -def execute_parallel_tasks(tasks, use_adaptive_instance_selection): +def execute_parallel_tasks(tasks, use_adaptive_instance_selection, maximize_indices): results = [] if use_adaptive_instance_selection: @@ -136,7 +136,7 @@ def execute_parallel_tasks(tasks, use_adaptive_instance_selection): initial_results = Parallel(n_jobs=N_JOBS)(delayed(optimization_function)(sample) for sample in initial_samples) # Iterative refinement - refined_samples = iterative_refinement(initial_samples, initial_results, optimization_function) + refined_samples = iterative_refinement(initial_samples, initial_results, optimization_function, maximize_indices) # Parallel evaluation of refined samples refined_results = Parallel(n_jobs=N_JOBS)(delayed(optimization_function)(task) for task in refined_samples) -- GitLab