feat adaptive instance selection

d0ba7175 · Zizhe Wang · b69bf548 · d0ba7175 · d0ba7175 · d0ba7175
Commit d0ba7175 authored 1 year ago by Zizhe Wang
--- a/src/adaptive_instance_selection.py
+++ b/src/adaptive_instance_selection.py
+# Copyright (c) 2024 - Zizhe Wang
+# https://zizhe.wang
+
+####################################
+#                                  #
+# AUTOMATIC SEARCH SPACE REDUCTION #
+#                                  #
+####################################
+
+import numpy as np
+from pyDOE import lhs
+from sklearn.cluster import KMeans
+
+# Initial Sampling
+def initial_sampling(param_bounds, n_samples):
+    dimensions = len(param_bounds)
+    samples = lhs(dimensions, samples=n_samples)  # Latin hypercube sampling (LHS)
+    
+    # Scale samples to parameter bounds
+    for i, (param, bounds) in enumerate(param_bounds.items()):
+        samples[:, i] = bounds[0] + samples[:, i] * (bounds[1] - bounds[0])
+    
+    return samples
+
+# Evaluate Samples
+def evaluate_samples(samples, objective_function):
+    results = []
+    for sample in samples:
+        result = objective_function(sample)
+        results.append(result)
+    
+    return np.array(results)
+
+# Clustering Samples
+def cluster_samples(samples, n_clusters):
+    kmeans = KMeans(n_clusters=n_clusters)
+    kmeans.fit(samples)
+    labels = kmeans.labels_
+    centers = kmeans.cluster_centers_
+    
+    return labels, centers
+
+# Adaptive Selection
+def select_informative_instances(samples, results, threshold=0.1):
+    performance = np.mean(results, axis=1)
+    cutoff = np.percentile(performance, threshold * 100)
+    selected_samples = samples[performance <= cutoff]
+    
+    return selected_samples
+
+# Iterative Refinement
+def iterative_refinement(samples, results, objective_function, n_iterations=5, threshold=0.1):
+    for _ in range(n_iterations):
+        # Evaluate current samples
+        current_results = evaluate_samples(samples, objective_function)
+        
+        # Select informative instances
+        samples = select_informative_instances(samples, current_results, threshold)
+        
+        # Re-cluster the selected samples
+        n_clusters = max(1, int(len(samples) * 0.1))  # Ensure at least 1 cluster
+        labels, centers = cluster_samples(samples, n_clusters)
+        
+        # Generate new samples around cluster centers
+        new_samples = []
+        for center in centers:
+            perturbations = np.random.uniform(-0.05, 0.05, center.shape)
+            new_samples.append(center + perturbations)
+        
+        samples = np.vstack((samples, new_samples))
+    
+    return samples
+
+# Define the objective function wrapper
+def objective_function(param_values):
+    return optimization_function(param_values)
\ No newline at end of file
--- a/src/optimize_main.py
+++ b/src/optimize_main.py
-# (c) Zizhe Wang
+# Copyright (c) 2024 - Zizhe Wang
 # https://zizhe.wang

 ############################
@@ -7,15 +7,17 @@
 #                          #
 ############################

+import os
+import time
+import json
 import numpy as np
-from joblib import Parallel, delayed
 import matplotlib.pyplot as plt
 from pymoo.core.problem import Problem
 from pymoo.optimize import minimize
+from scipy.stats import ttest_ind
 from optimization_libraries import initialize_algorithm
-from parallel_computing import optimization_function, cleanup_temp_dirs
-from config import (PARAMETERS, OBJECTIVES, MAXIMIZE, PARAM_BOUNDS, PRECISION, PLOT_CONFIG, 
-            OPTIMIZATION_CONFIG, N_JOBS)  # Import all configuration variables
+from parallel_computing import execute_parallel_tasks, cleanup_temp_dirs
+from config import PARAMETERS, OBJECTIVES, MAXIMIZE, PARAM_BOUNDS, PRECISION, PLOT_CONFIG, OPTIMIZATION_CONFIG, N_JOBS

 class OptimizationProblem(Problem):
    def __init__(self):
@@ -23,16 +25,27 @@ class OptimizationProblem(Problem):
        self.objective_names = OBJECTIVES
        self.maximize_indices = [self.objective_names.index(res) for res in MAXIMIZE]
        n_var = len(self.param_names)
+        n_obj = len(self.objective_names)
        xl = np.array([PARAM_BOUNDS[param][0] for param in self.param_names])
        xu = np.array([PARAM_BOUNDS[param][1] for param in self.param_names])
        print(f"Number of variables: {n_var}")
        print(f"Lower bounds: {xl}")
        print(f"Upper bounds: {xu}")
-        super().__init__(n_var=n_var, n_obj=len(OBJECTIVES), n_constr=0, xl=xl, xu=xu)
+        super().__init__(n_var=n_var, n_obj=n_obj, n_constr=0, xl=xl, xu=xu)

    def _evaluate(self, X, out, *args, **kwargs):
        param_values_list = [dict(zip(self.param_names, x)) for x in X]
-        results = Parallel(n_jobs=N_JOBS)(delayed(optimization_function)(param_values) for param_values in param_values_list)
+        results = execute_parallel_tasks(param_values_list, OPTIMIZATION_CONFIG["USE_ADAPTIVE_INSTANCE_SELECTION"])
+
+        # Debugging output before any processing
+        print(f"Initial results: {results}")
+        print(f"Number of parameter sets evaluated: {len(param_values_list)}")
+        print(f"Expected shape of results: ({len(param_values_list)}, {len(self.objective_names)})")
+
+        # Handle cases where not all results are returned
+        if len(results) != len(param_values_list):
+            missing_count = len(param_values_list) - len(results)
+            results.extend([[np.nan] * len(self.objective_names)] * missing_count)

        # Apply negation to objectives that need to be maximized
        for i in range(len(results)):
@@ -40,7 +53,28 @@ class OptimizationProblem(Problem):
                results[i] = list(results[i])
                results[i][idx] = -results[i][idx]
        
-        out["F"] = np.array(results)  # Ensure results are a 2D array
+        # Debugging output after processing
+        print(f"Processed results: {results}")
+
+        # Ensure results are a 2D array of shape (len(X), len(self.objective_names))
+        results_array = np.array(results)
+        print(f"Shape of results array: {results_array.shape}")
+
+        out["F"] = results_array.reshape(len(X), len(self.objective_names))  # Ensure results are a 2D array
+
+def create_results_folder():
+    results_folder = 'results'
+    if not os.path.exists(results_folder):
+        os.makedirs(results_folder)
+    return results_folder
+
+def run_optimization(use_adaptive_instance_selection):
+
+    # Ensure the results folder exists
+    results_folder = create_results_folder()
+
+    # Set the adaptive instance selection flag
+    OPTIMIZATION_CONFIG["USE_ADAPTIVE_INSTANCE_SELECTION"] = use_adaptive_instance_selection

    # Initialize the optimization algorithm
    algorithm = initialize_algorithm(
@@ -51,14 +85,33 @@ algorithm = initialize_algorithm(
    # Define the optimization problem
    problem = OptimizationProblem()

+    start_time = time.time()
    try:
        # Run the optimization
        res = minimize(problem, algorithm, ("n_gen", OPTIMIZATION_CONFIG['N_GEN']), verbose=True)
    finally:
        # Cleanup temporary directories
        cleanup_temp_dirs()
+    end_time = time.time()

-# Print the results
+    elapsed_time = end_time - start_time
+    print(f"Time with{'out' if not use_adaptive_instance_selection else ''} adaptive instance selection: {elapsed_time:.2f} seconds")
+    print_and_plot_results(res, problem)
+
+    # Save results to a file
+    results_data = {
+        "results": res.F.tolist(),
+        "elapsed_time": elapsed_time,
+        "use_adaptive_instance_selection": use_adaptive_instance_selection
+    }
+    filename = os.path.join(results_folder, f'optimization_results_{"with" if use_adaptive_instance_selection else "without"}_adaptive.json')
+    with open(filename, 'w') as f:
+        json.dump(results_data, f)
+    print(f"Results have been stored in: {filename}")
+
+    return res.F, elapsed_time
+
+def print_and_plot_results(res, problem):
    print("Optimization Results:")
    for i, result in enumerate(res.F):
        # Negate back the maximized objectives for display
@@ -72,7 +125,7 @@ for i, result in enumerate(res.F):
            print(f"{name.capitalize()} = {value:.{PRECISION}f}", end=", ")
        print()

-# Plot the results
+    try: 
        plt.figure(figsize=(8, 6))
        for idx in problem.maximize_indices:
            res.F[:, idx] = -res.F[:, idx]
@@ -83,3 +136,15 @@ plt.title(PLOT_CONFIG["PLOT_TITLE"], fontsize=16)
        plt.grid(True)
        plt.tight_layout()
        plt.show()
+    except Exception as e:
+        print(f"Error during plotting: {e}")
+
+def main():
+    use_adaptive_instance_selection = OPTIMIZATION_CONFIG["USE_ADAPTIVE_INSTANCE_SELECTION"]
+
+    print(f"Running optimization with{'out' if not use_adaptive_instance_selection else ''} adaptive instance selection...")
+    results, elapsed_time = run_optimization(use_adaptive_instance_selection=use_adaptive_instance_selection)
+    print(f"Time with{'out' if not use_adaptive_instance_selection else ''} adaptive instance selection: {elapsed_time:.2f} seconds")
+
+if __name__ == "__main__":
+    main()
\ No newline at end of file
--- a/src/parallel_computing.py
+++ b/src/parallel_computing.py
-# (c) Zizhe Wang
+# Copyright (c) 2024 - Zizhe Wang
 # https://zizhe.wang

 ######################
@@ -8,12 +8,14 @@
 ######################

 import os
-import tempfile
 import shutil
+import tempfile
+import numpy as np
 from time import sleep
+from joblib import Parallel, delayed
 from OMPython import OMCSessionZMQ
-import numpy as np
-from config import MODEL_FILE, MODEL_NAME, SIMULATION_STOP_TIME, OBJECTIVES, MODEL_PATH, PRECISION
+from config import MODEL_FILE, MODEL_NAME, SIMULATION_STOP_TIME, PARAMETERS, OBJECTIVES, PARAM_BOUNDS, MODEL_PATH, PRECISION, OPTIMIZATION_CONFIG, N_JOBS
+from adaptive_instance_selection import initial_sampling, evaluate_samples, cluster_samples, select_informative_instances, iterative_refinement

 temp_dirs = []  # List to store paths of temporary directories

@@ -24,11 +26,11 @@ def optimization_function(param_values, retries=3, delay=2):
    """
    temp_dir = tempfile.mkdtemp()  # Create a unique temporary directory for each worker
    temp_dirs.append(temp_dir)  # Store the path for later cleanup
+    temp_dir = temp_dir.replace('\\', '/')

    for attempt in range(retries):
        try:
            omc = OMCSessionZMQ()  # Create a new OpenModelica session
-            temp_dir = temp_dir.replace('\\', '/')
            omc.sendExpression(f'cd("{temp_dir}")')

            # Copy model file to temporary directory
@@ -59,6 +61,10 @@ def optimization_function(param_values, retries=3, delay=2):
                if not os.path.isfile(file):
                    raise RuntimeError(f"Expected file not found: {file}")
            
+            # Ensure param_values is a dictionary
+            if isinstance(param_values, np.ndarray):
+                param_values = {param: value for param, value in zip(PARAMETERS, param_values)}
+
            # Set model parameters
            rounded_param_values = {param: round(value, PRECISION) for param, value in param_values.items()}
            for param, value in rounded_param_values.items():
@@ -118,3 +124,37 @@ def cleanup_temp_dirs():
            except Exception as e:
                print(f"Error: {e}")
                break  # Exit the loop for non-permission errors
+
+def execute_parallel_tasks(tasks, use_adaptive_instance_selection):
+    results = []
+
+    if use_adaptive_instance_selection:
+        # Initial sampling
+        initial_samples = initial_sampling(PARAM_BOUNDS, OPTIMIZATION_CONFIG['POP_SIZE'])
+
+        # Parallel evaluation of initial samples
+        initial_results = Parallel(n_jobs=N_JOBS)(delayed(optimization_function)(sample) for sample in initial_samples)
+        
+        # Iterative refinement
+        refined_samples = iterative_refinement(initial_samples, initial_results, optimization_function)
+        
+        # Parallel evaluation of refined samples
+        refined_results = Parallel(n_jobs=N_JOBS)(delayed(optimization_function)(task) for task in refined_samples)
+
+        # Combine initial and refined results, ensuring the number matches the initial parameter sets
+        results = initial_results + refined_results
+
+        # Ensure only the first `len(tasks)` results are considered
+        results = results[:len(tasks)]
+    else:
+        results = Parallel(n_jobs=N_JOBS)(delayed(optimization_function)(task) for task in tasks)
+
+    # Ensure results length matches tasks length by handling exceptions
+    completed_results = [result for result in results if result is not None]
+
+    # Debugging output
+    print(f"Initial tasks: {len(tasks)}")
+    print(f"Results: {len(completed_results)}")
+    print(f"Results content: {completed_results}")
+
+    return completed_results
\ No newline at end of file