diff --git a/src/adaptive_instance_selection.py b/src/adaptive_instance_selection.py new file mode 100644 index 0000000000000000000000000000000000000000..ec2fdbc06b3cb6383f0c6f26b322b0c407434cd7 --- /dev/null +++ b/src/adaptive_instance_selection.py @@ -0,0 +1,76 @@ +# Copyright (c) 2024 - Zizhe Wang +# https://zizhe.wang + +#################################### +# # +# AUTOMATIC SEARCH SPACE REDUCTION # +# # +#################################### + +import numpy as np +from pyDOE import lhs +from sklearn.cluster import KMeans + +# Initial Sampling +def initial_sampling(param_bounds, n_samples): + dimensions = len(param_bounds) + samples = lhs(dimensions, samples=n_samples) # Latin hypercube sampling (LHS) + + # Scale samples to parameter bounds + for i, (param, bounds) in enumerate(param_bounds.items()): + samples[:, i] = bounds[0] + samples[:, i] * (bounds[1] - bounds[0]) + + return samples + +# Evaluate Samples +def evaluate_samples(samples, objective_function): + results = [] + for sample in samples: + result = objective_function(sample) + results.append(result) + + return np.array(results) + +# Clustering Samples +def cluster_samples(samples, n_clusters): + kmeans = KMeans(n_clusters=n_clusters) + kmeans.fit(samples) + labels = kmeans.labels_ + centers = kmeans.cluster_centers_ + + return labels, centers + +# Adaptive Selection +def select_informative_instances(samples, results, threshold=0.1): + performance = np.mean(results, axis=1) + cutoff = np.percentile(performance, threshold * 100) + selected_samples = samples[performance <= cutoff] + + return selected_samples + +# Iterative Refinement +def iterative_refinement(samples, results, objective_function, n_iterations=5, threshold=0.1): + for _ in range(n_iterations): + # Evaluate current samples + current_results = evaluate_samples(samples, objective_function) + + # Select informative instances + samples = select_informative_instances(samples, current_results, threshold) + + # Re-cluster the selected samples + n_clusters = max(1, int(len(samples) * 0.1)) # Ensure at least 1 cluster + labels, centers = cluster_samples(samples, n_clusters) + + # Generate new samples around cluster centers + new_samples = [] + for center in centers: + perturbations = np.random.uniform(-0.05, 0.05, center.shape) + new_samples.append(center + perturbations) + + samples = np.vstack((samples, new_samples)) + + return samples + +# Define the objective function wrapper +def objective_function(param_values): + return optimization_function(param_values) \ No newline at end of file diff --git a/src/optimize_main.py b/src/optimize_main.py index 0e77a00f730661748049745e63b4a80443c416dd..c69dd353a1af0dceb85408a841bc4a9a647f2991 100644 --- a/src/optimize_main.py +++ b/src/optimize_main.py @@ -1,4 +1,4 @@ -# (c) Zizhe Wang +# Copyright (c) 2024 - Zizhe Wang # https://zizhe.wang ############################ @@ -7,15 +7,17 @@ # # ############################ +import os +import time +import json import numpy as np -from joblib import Parallel, delayed import matplotlib.pyplot as plt from pymoo.core.problem import Problem from pymoo.optimize import minimize +from scipy.stats import ttest_ind from optimization_libraries import initialize_algorithm -from parallel_computing import optimization_function, cleanup_temp_dirs -from config import (PARAMETERS, OBJECTIVES, MAXIMIZE, PARAM_BOUNDS, PRECISION, PLOT_CONFIG, - OPTIMIZATION_CONFIG, N_JOBS) # Import all configuration variables +from parallel_computing import execute_parallel_tasks, cleanup_temp_dirs +from config import PARAMETERS, OBJECTIVES, MAXIMIZE, PARAM_BOUNDS, PRECISION, PLOT_CONFIG, OPTIMIZATION_CONFIG, N_JOBS class OptimizationProblem(Problem): def __init__(self): @@ -23,63 +25,126 @@ class OptimizationProblem(Problem): self.objective_names = OBJECTIVES self.maximize_indices = [self.objective_names.index(res) for res in MAXIMIZE] n_var = len(self.param_names) + n_obj = len(self.objective_names) xl = np.array([PARAM_BOUNDS[param][0] for param in self.param_names]) xu = np.array([PARAM_BOUNDS[param][1] for param in self.param_names]) print(f"Number of variables: {n_var}") print(f"Lower bounds: {xl}") print(f"Upper bounds: {xu}") - super().__init__(n_var=n_var, n_obj=len(OBJECTIVES), n_constr=0, xl=xl, xu=xu) + super().__init__(n_var=n_var, n_obj=n_obj, n_constr=0, xl=xl, xu=xu) def _evaluate(self, X, out, *args, **kwargs): param_values_list = [dict(zip(self.param_names, x)) for x in X] - results = Parallel(n_jobs=N_JOBS)(delayed(optimization_function)(param_values) for param_values in param_values_list) - + results = execute_parallel_tasks(param_values_list, OPTIMIZATION_CONFIG["USE_ADAPTIVE_INSTANCE_SELECTION"]) + + # Debugging output before any processing + print(f"Initial results: {results}") + print(f"Number of parameter sets evaluated: {len(param_values_list)}") + print(f"Expected shape of results: ({len(param_values_list)}, {len(self.objective_names)})") + + # Handle cases where not all results are returned + if len(results) != len(param_values_list): + missing_count = len(param_values_list) - len(results) + results.extend([[np.nan] * len(self.objective_names)] * missing_count) + # Apply negation to objectives that need to be maximized for i in range(len(results)): for idx in self.maximize_indices: results[i] = list(results[i]) results[i][idx] = -results[i][idx] - out["F"] = np.array(results) # Ensure results are a 2D array - -# Initialize the optimization algorithm -algorithm = initialize_algorithm( - OPTIMIZATION_CONFIG['ALGORITHM_NAME'], - OPTIMIZATION_CONFIG.get('POP_SIZE') - ) - -# Define the optimization problem -problem = OptimizationProblem() - -try: - # Run the optimization - res = minimize(problem, algorithm, ("n_gen", OPTIMIZATION_CONFIG['N_GEN']), verbose=True) -finally: - # Cleanup temporary directories - cleanup_temp_dirs() - -# Print the results -print("Optimization Results:") -for i, result in enumerate(res.F): - # Negate back the maximized objectives for display - result = list(result) - for idx in problem.maximize_indices: - result[idx] = -result[idx] - result = tuple(result) - - print(f"Solution {i}: ", end="") - for name, value in zip(OBJECTIVES, result): - print(f"{name.capitalize()} = {value:.{PRECISION}f}", end=", ") - print() - -# Plot the results -plt.figure(figsize=(8, 6)) -for idx in problem.maximize_indices: - res.F[:, idx] = -res.F[:, idx] -plt.scatter(res.F[:, 0], res.F[:, 1]) -plt.xlabel(PLOT_CONFIG["PLOT_X"], fontsize=14) -plt.ylabel(PLOT_CONFIG["PLOT_Y"], fontsize=14) -plt.title(PLOT_CONFIG["PLOT_TITLE"], fontsize=16) -plt.grid(True) -plt.tight_layout() -plt.show() \ No newline at end of file + # Debugging output after processing + print(f"Processed results: {results}") + + # Ensure results are a 2D array of shape (len(X), len(self.objective_names)) + results_array = np.array(results) + print(f"Shape of results array: {results_array.shape}") + + out["F"] = results_array.reshape(len(X), len(self.objective_names)) # Ensure results are a 2D array + +def create_results_folder(): + results_folder = 'results' + if not os.path.exists(results_folder): + os.makedirs(results_folder) + return results_folder + +def run_optimization(use_adaptive_instance_selection): + + # Ensure the results folder exists + results_folder = create_results_folder() + + # Set the adaptive instance selection flag + OPTIMIZATION_CONFIG["USE_ADAPTIVE_INSTANCE_SELECTION"] = use_adaptive_instance_selection + + # Initialize the optimization algorithm + algorithm = initialize_algorithm( + OPTIMIZATION_CONFIG['ALGORITHM_NAME'], + OPTIMIZATION_CONFIG.get('POP_SIZE') + ) + + # Define the optimization problem + problem = OptimizationProblem() + + start_time = time.time() + try: + # Run the optimization + res = minimize(problem, algorithm, ("n_gen", OPTIMIZATION_CONFIG['N_GEN']), verbose=True) + finally: + # Cleanup temporary directories + cleanup_temp_dirs() + end_time = time.time() + + elapsed_time = end_time - start_time + print(f"Time with{'out' if not use_adaptive_instance_selection else ''} adaptive instance selection: {elapsed_time:.2f} seconds") + print_and_plot_results(res, problem) + + # Save results to a file + results_data = { + "results": res.F.tolist(), + "elapsed_time": elapsed_time, + "use_adaptive_instance_selection": use_adaptive_instance_selection + } + filename = os.path.join(results_folder, f'optimization_results_{"with" if use_adaptive_instance_selection else "without"}_adaptive.json') + with open(filename, 'w') as f: + json.dump(results_data, f) + print(f"Results have been stored in: {filename}") + + return res.F, elapsed_time + +def print_and_plot_results(res, problem): + print("Optimization Results:") + for i, result in enumerate(res.F): + # Negate back the maximized objectives for display + result = list(result) + for idx in problem.maximize_indices: + result[idx] = -result[idx] + result = tuple(result) + + print(f"Solution {i}: ", end="") + for name, value in zip(OBJECTIVES, result): + print(f"{name.capitalize()} = {value:.{PRECISION}f}", end=", ") + print() + + try: + plt.figure(figsize=(8, 6)) + for idx in problem.maximize_indices: + res.F[:, idx] = -res.F[:, idx] + plt.scatter(res.F[:, 0], res.F[:, 1]) + plt.xlabel(PLOT_CONFIG["PLOT_X"], fontsize=14) + plt.ylabel(PLOT_CONFIG["PLOT_Y"], fontsize=14) + plt.title(PLOT_CONFIG["PLOT_TITLE"], fontsize=16) + plt.grid(True) + plt.tight_layout() + plt.show() + except Exception as e: + print(f"Error during plotting: {e}") + +def main(): + use_adaptive_instance_selection = OPTIMIZATION_CONFIG["USE_ADAPTIVE_INSTANCE_SELECTION"] + + print(f"Running optimization with{'out' if not use_adaptive_instance_selection else ''} adaptive instance selection...") + results, elapsed_time = run_optimization(use_adaptive_instance_selection=use_adaptive_instance_selection) + print(f"Time with{'out' if not use_adaptive_instance_selection else ''} adaptive instance selection: {elapsed_time:.2f} seconds") + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/src/parallel_computing.py b/src/parallel_computing.py index 003a8cc53d09ef8f3021fed3cfb19dd98779ab98..fc1add481858ab66a3b65dedb97ecc4f2afa9b9c 100644 --- a/src/parallel_computing.py +++ b/src/parallel_computing.py @@ -1,4 +1,4 @@ -# (c) Zizhe Wang +# Copyright (c) 2024 - Zizhe Wang # https://zizhe.wang ###################### @@ -8,12 +8,14 @@ ###################### import os -import tempfile import shutil +import tempfile +import numpy as np from time import sleep +from joblib import Parallel, delayed from OMPython import OMCSessionZMQ -import numpy as np -from config import MODEL_FILE, MODEL_NAME, SIMULATION_STOP_TIME, OBJECTIVES, MODEL_PATH, PRECISION +from config import MODEL_FILE, MODEL_NAME, SIMULATION_STOP_TIME, PARAMETERS, OBJECTIVES, PARAM_BOUNDS, MODEL_PATH, PRECISION, OPTIMIZATION_CONFIG, N_JOBS +from adaptive_instance_selection import initial_sampling, evaluate_samples, cluster_samples, select_informative_instances, iterative_refinement temp_dirs = [] # List to store paths of temporary directories @@ -24,11 +26,11 @@ def optimization_function(param_values, retries=3, delay=2): """ temp_dir = tempfile.mkdtemp() # Create a unique temporary directory for each worker temp_dirs.append(temp_dir) # Store the path for later cleanup + temp_dir = temp_dir.replace('\\', '/') for attempt in range(retries): try: omc = OMCSessionZMQ() # Create a new OpenModelica session - temp_dir = temp_dir.replace('\\', '/') omc.sendExpression(f'cd("{temp_dir}")') # Copy model file to temporary directory @@ -59,6 +61,10 @@ def optimization_function(param_values, retries=3, delay=2): if not os.path.isfile(file): raise RuntimeError(f"Expected file not found: {file}") + # Ensure param_values is a dictionary + if isinstance(param_values, np.ndarray): + param_values = {param: value for param, value in zip(PARAMETERS, param_values)} + # Set model parameters rounded_param_values = {param: round(value, PRECISION) for param, value in param_values.items()} for param, value in rounded_param_values.items(): @@ -117,4 +123,38 @@ def cleanup_temp_dirs(): sleep(2 ** attempt) # Incremental backoff: 2, 4, 8, 16, ... seconds except Exception as e: print(f"Error: {e}") - break # Exit the loop for non-permission errors \ No newline at end of file + break # Exit the loop for non-permission errors + +def execute_parallel_tasks(tasks, use_adaptive_instance_selection): + results = [] + + if use_adaptive_instance_selection: + # Initial sampling + initial_samples = initial_sampling(PARAM_BOUNDS, OPTIMIZATION_CONFIG['POP_SIZE']) + + # Parallel evaluation of initial samples + initial_results = Parallel(n_jobs=N_JOBS)(delayed(optimization_function)(sample) for sample in initial_samples) + + # Iterative refinement + refined_samples = iterative_refinement(initial_samples, initial_results, optimization_function) + + # Parallel evaluation of refined samples + refined_results = Parallel(n_jobs=N_JOBS)(delayed(optimization_function)(task) for task in refined_samples) + + # Combine initial and refined results, ensuring the number matches the initial parameter sets + results = initial_results + refined_results + + # Ensure only the first `len(tasks)` results are considered + results = results[:len(tasks)] + else: + results = Parallel(n_jobs=N_JOBS)(delayed(optimization_function)(task) for task in tasks) + + # Ensure results length matches tasks length by handling exceptions + completed_results = [result for result in results if result is not None] + + # Debugging output + print(f"Initial tasks: {len(tasks)}") + print(f"Results: {len(completed_results)}") + print(f"Results content: {completed_results}") + + return completed_results \ No newline at end of file