From 2e0c3302741088a975dc2f39532e3c7a0d21379c Mon Sep 17 00:00:00 2001
From: Zizhe Wang <zizhe.wang@tu-dresden.de>
Date: Mon, 10 Jun 2024 15:20:37 +0200
Subject: [PATCH] fix adaptive instance selection

---
 src/adaptive_instance_selection.py | 102 ++++++++++++++++++++---------
 src/optimize_main.py               |  20 ++++--
 src/parallel_computing.py          |  10 +--
 3 files changed, 91 insertions(+), 41 deletions(-)

diff --git a/src/adaptive_instance_selection.py b/src/adaptive_instance_selection.py
index ec2fdbc..ebbe4f5 100644
--- a/src/adaptive_instance_selection.py
+++ b/src/adaptive_instance_selection.py
@@ -7,19 +7,26 @@
 #                                  #
 ####################################
 
+from sklearn.mixture import GaussianMixture
+from scipy.stats.qmc import LatinHypercube as lhs
 import numpy as np
-from pyDOE import lhs
-from sklearn.cluster import KMeans
+from config import PARAM_TYPES, PARAM_BOUNDS
 
 # Initial Sampling
 def initial_sampling(param_bounds, n_samples):
     dimensions = len(param_bounds)
-    samples = lhs(dimensions, samples=n_samples)  # Latin hypercube sampling (LHS)
-    
-    # Scale samples to parameter bounds
-    for i, (param, bounds) in enumerate(param_bounds.items()):
+    samples = lhs(d=dimensions).random(n=n_samples)  # Latin hypercube sampling (LHS)
+    if samples.size == 0:
+        raise ValueError("Initial sampling produced an empty set of samples.")
+
+    # Scale samples to parameter bounds and respect parameter types
+    for i, (param, bounds_info) in enumerate(param_bounds.items()):
+        bounds = bounds_info["bounds"]
         samples[:, i] = bounds[0] + samples[:, i] * (bounds[1] - bounds[0])
+        if bounds_info["type"] == 'int':
+            samples[:, i] = np.round(samples[:, i]).astype(int)
     
+    print(f"Initial samples shape: {samples.shape}")
     return samples
 
 # Evaluate Samples
@@ -28,49 +35,82 @@ def evaluate_samples(samples, objective_function):
     for sample in samples:
         result = objective_function(sample)
         results.append(result)
-    
     return np.array(results)
 
-# Clustering Samples
-def cluster_samples(samples, n_clusters):
-    kmeans = KMeans(n_clusters=n_clusters)
-    kmeans.fit(samples)
-    labels = kmeans.labels_
-    centers = kmeans.cluster_centers_
+# Advanced Clustering
+def advanced_clustering_samples(samples, n_clusters):
+    if len(samples) == 0:
+        raise ValueError("Cannot cluster an empty set of samples.")
+    gmm = GaussianMixture(n_components=n_clusters, covariance_type='full')
+    gmm.fit(samples)
+    labels = gmm.predict(samples)
+    centers = gmm.means_
     
     return labels, centers
 
-# Adaptive Selection
-def select_informative_instances(samples, results, threshold=0.1):
+# Adaptive Selection with Adaptive Threshold
+def adaptive_select_informative_instances(samples, results, initial_threshold=0.15, adapt_rate=0.05):
+    if len(samples) == 0 or len(results) == 0:
+        raise ValueError("Received empty samples or results for selection.")
+    
     performance = np.mean(results, axis=1)
-    cutoff = np.percentile(performance, threshold * 100)
-    selected_samples = samples[performance <= cutoff]
+    threshold = initial_threshold
+    while True:
+        # Ensure the threshold does not exceed 1
+        effective_threshold = min(threshold, 1.0)
+        cutoff = np.percentile(performance, effective_threshold * 100)
+        selected_samples = samples[performance <= cutoff]
+        
+        if len(selected_samples) >= 3:
+            break
+        threshold += adapt_rate
+
+    if selected_samples.size == 0:
+        raise ValueError("Selection of informative instances resulted in an empty set.")
     
+    print(f"Selected samples shape: {selected_samples.shape}")
     return selected_samples
 
 # Iterative Refinement
-def iterative_refinement(samples, results, objective_function, n_iterations=5, threshold=0.1):
-    for _ in range(n_iterations):
+def iterative_refinement(samples, results, objective_function, maximize_indices, n_iterations=5, initial_threshold=0.15, adapt_rate=0.05):
+    for iteration in range(n_iterations):
+        print(f"Iteration {iteration}: Starting with samples shape: {samples.shape}")
+
         # Evaluate current samples
         current_results = evaluate_samples(samples, objective_function)
         
-        # Select informative instances
-        samples = select_informative_instances(samples, current_results, threshold)
+        # Print performance metrics for current samples
+        print(f"Iteration {iteration}: Current results: {current_results}")
+        
+        # Select informative instances with adaptive threshold
+        selected_samples = adaptive_select_informative_instances(samples, current_results, initial_threshold, adapt_rate)
+        
+        # Ensure objective negation is correctly handled
+        for i in range(len(current_results)):
+            for idx in maximize_indices:
+                current_results[i][idx] = -current_results[i][idx]
+
+        # Ensure at least a minimum number of samples are selected to maintain diversity
+        if len(selected_samples) < 3:
+            selected_samples = samples[np.argsort(np.mean(current_results, axis=1))[:3]]
         
         # Re-cluster the selected samples
-        n_clusters = max(1, int(len(samples) * 0.1))  # Ensure at least 1 cluster
-        labels, centers = cluster_samples(samples, n_clusters)
+        n_clusters = max(1, min(3, int(len(selected_samples) * 0.3)))  # Ensure at least 1 cluster, maximum 3 clusters
+        labels, centers = advanced_clustering_samples(selected_samples, n_clusters)
         
         # Generate new samples around cluster centers
         new_samples = []
         for center in centers:
-            perturbations = np.random.uniform(-0.05, 0.05, center.shape)
-            new_samples.append(center + perturbations)
+            for _ in range(1):  # Generate 1 new sample per center to control the growth of sample size
+                perturbations = np.random.uniform(-0.05, 0.05, center.shape)  # Use smaller perturbations for finer adjustments
+                new_samples.append(center + perturbations)
         
-        samples = np.vstack((samples, new_samples))
+        # Combine selected samples with new samples, ensuring we don't grow the sample size too much
+        samples = np.vstack((selected_samples, new_samples))
+        if len(samples) > len(selected_samples) + 2:  # Limit the growth of samples
+            samples = samples[:len(selected_samples) + 2]
+        
+        # Debugging output
+        print(f"Iteration {iteration}: Samples shape after selection and new sample generation: {samples.shape}")
     
-    return samples
-
-# Define the objective function wrapper
-def objective_function(param_values):
-    return optimization_function(param_values)
\ No newline at end of file
+    return samples
\ No newline at end of file
diff --git a/src/optimize_main.py b/src/optimize_main.py
index c69dd35..09337d3 100644
--- a/src/optimize_main.py
+++ b/src/optimize_main.py
@@ -17,25 +17,29 @@ from pymoo.optimize import minimize
 from scipy.stats import ttest_ind
 from optimization_libraries import initialize_algorithm
 from parallel_computing import execute_parallel_tasks, cleanup_temp_dirs
-from config import PARAMETERS, OBJECTIVES, MAXIMIZE, PARAM_BOUNDS, PRECISION, PLOT_CONFIG, OPTIMIZATION_CONFIG, N_JOBS
+from config import PARAMETERS, OBJECTIVES, MAXIMIZE, PARAM_BOUND_VALUES, PARAM_TYPES, PRECISION, PLOT_CONFIG, OPTIMIZATION_CONFIG, N_JOBS
 
 class OptimizationProblem(Problem):
     def __init__(self):
-        self.param_names = list(PARAM_BOUNDS.keys())
+        self.param_names = list(PARAM_BOUND_VALUES.keys())
+        self.param_types = [PARAM_TYPES[param] for param in self.param_names]
         self.objective_names = OBJECTIVES
         self.maximize_indices = [self.objective_names.index(res) for res in MAXIMIZE]
         n_var = len(self.param_names)
         n_obj = len(self.objective_names)
-        xl = np.array([PARAM_BOUNDS[param][0] for param in self.param_names])
-        xu = np.array([PARAM_BOUNDS[param][1] for param in self.param_names])
+        xl = np.array([PARAM_BOUND_VALUES[param][0] for param in self.param_names])
+        xu = np.array([PARAM_BOUND_VALUES[param][1] for param in self.param_names])
         print(f"Number of variables: {n_var}")
         print(f"Lower bounds: {xl}")
         print(f"Upper bounds: {xu}")
         super().__init__(n_var=n_var, n_obj=n_obj, n_constr=0, xl=xl, xu=xu)
 
     def _evaluate(self, X, out, *args, **kwargs):
+        for i, param_type in enumerate(self.param_types):
+            if param_type == "int":
+                X[:, i] = np.round(X[:, i]).astype(int)
         param_values_list = [dict(zip(self.param_names, x)) for x in X]
-        results = execute_parallel_tasks(param_values_list, OPTIMIZATION_CONFIG["USE_ADAPTIVE_INSTANCE_SELECTION"])
+        results = execute_parallel_tasks(param_values_list, OPTIMIZATION_CONFIG["USE_ADAPTIVE_INSTANCE_SELECTION"], self.maximize_indices)
 
         # Debugging output before any processing
         print(f"Initial results: {results}")
@@ -99,6 +103,12 @@ def run_optimization(use_adaptive_instance_selection):
     print_and_plot_results(res, problem)
 
     # Save results to a file
+    # Negate back the maximized objectives before saving
+    results_to_save = res.F.copy()
+    for i in range(len(results_to_save)):
+        for idx in problem.maximize_indices:
+            results_to_save[i][idx] = -results_to_save[i][idx]
+
     results_data = {
         "results": res.F.tolist(),
         "elapsed_time": elapsed_time,
diff --git a/src/parallel_computing.py b/src/parallel_computing.py
index fc1add4..f6f38e0 100644
--- a/src/parallel_computing.py
+++ b/src/parallel_computing.py
@@ -14,8 +14,8 @@ import numpy as np
 from time import sleep
 from joblib import Parallel, delayed
 from OMPython import OMCSessionZMQ
-from config import MODEL_FILE, MODEL_NAME, SIMULATION_STOP_TIME, PARAMETERS, OBJECTIVES, PARAM_BOUNDS, MODEL_PATH, PRECISION, OPTIMIZATION_CONFIG, N_JOBS
-from adaptive_instance_selection import initial_sampling, evaluate_samples, cluster_samples, select_informative_instances, iterative_refinement
+from config import MODEL_FILE, MODEL_NAME, SIMULATION_STOP_TIME, PARAMETERS, OBJECTIVES, PARAM_BOUNDS, PARAM_TYPES, MODEL_PATH, PRECISION, OPTIMIZATION_CONFIG, N_JOBS
+from adaptive_instance_selection import initial_sampling, evaluate_samples, advanced_clustering_samples, adaptive_select_informative_instances, iterative_refinement
 
 temp_dirs = []  # List to store paths of temporary directories
 
@@ -66,7 +66,7 @@ def optimization_function(param_values, retries=3, delay=2):
                 param_values = {param: value for param, value in zip(PARAMETERS, param_values)}
 
             # Set model parameters
-            rounded_param_values = {param: round(value, PRECISION) for param, value in param_values.items()}
+            rounded_param_values = {param: round(value, PRECISION) if PARAM_TYPES[param] == 'float' else int(value) for param, value in param_values.items()}
             for param, value in rounded_param_values.items():
                 set_param_result = omc.sendExpression(f"setParameterValue({MODEL_NAME}, {param}, {value})")
                 if not set_param_result:
@@ -125,7 +125,7 @@ def cleanup_temp_dirs():
                 print(f"Error: {e}")
                 break  # Exit the loop for non-permission errors
 
-def execute_parallel_tasks(tasks, use_adaptive_instance_selection):
+def execute_parallel_tasks(tasks, use_adaptive_instance_selection, maximize_indices):
     results = []
 
     if use_adaptive_instance_selection:
@@ -136,7 +136,7 @@ def execute_parallel_tasks(tasks, use_adaptive_instance_selection):
         initial_results = Parallel(n_jobs=N_JOBS)(delayed(optimization_function)(sample) for sample in initial_samples)
         
         # Iterative refinement
-        refined_samples = iterative_refinement(initial_samples, initial_results, optimization_function)
+        refined_samples = iterative_refinement(initial_samples, initial_results, optimization_function, maximize_indices)
         
         # Parallel evaluation of refined samples
         refined_results = Parallel(n_jobs=N_JOBS)(delayed(optimization_function)(task) for task in refined_samples)
-- 
GitLab