Skip to content
GitLab
Explore
Sign in
Register
Primary navigation
Search or go to…
Project
O
OptiOrch
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package registry
Container registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Zizhe Wang
OptiOrch
Commits
e049784a
Commit
e049784a
authored
11 months ago
by
Zizhe Wang
Browse files
Options
Downloads
Patches
Plain Diff
fix adaptive instance selection
parent
8cd1e7a5
Branches
Branches containing commit
No related tags found
No related merge requests found
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
src/adaptive_instance_selection.py
+53
-25
53 additions, 25 deletions
src/adaptive_instance_selection.py
src/optimize_main.py
+85
-27
85 additions, 27 deletions
src/optimize_main.py
with
138 additions
and
52 deletions
src/adaptive_instance_selection.py
+
53
−
25
View file @
e049784a
...
...
@@ -7,9 +7,9 @@
# #
####################################
from
sklearn.mixture
import
GaussianMixture
from
scipy.stats.qmc
import
LatinHypercube
as
lhs
import
numpy
as
np
from
sklearn.cluster
import
KMeans
from
scipy.stats.qmc
import
LatinHypercube
as
lhs
from
config
import
PARAM_TYPES
,
PARAM_BOUNDS
# Initial Sampling
...
...
@@ -41,38 +41,60 @@ def evaluate_samples(samples, objective_function):
def
advanced_clustering_samples
(
samples
,
n_clusters
):
if
len
(
samples
)
==
0
:
raise
ValueError
(
"
Cannot cluster an empty set of samples.
"
)
gmm
=
GaussianMixture
(
n_components
=
n_clusters
,
covariance_type
=
'
full
'
)
gmm
.
fit
(
samples
)
labels
=
gmm
.
predict
(
samples
)
centers
=
gmm
.
mean
s_
kmeans
=
KMeans
(
n_clusters
=
n_clusters
,
random_state
=
0
,
n_init
=
10
)
kmeans
.
fit
(
samples
)
labels
=
kmeans
.
predict
(
samples
)
centers
=
kmeans
.
cluster_center
s_
return
labels
,
centers
# Adaptive Selection with Adaptive Threshold
def
adaptive_select_informative_instances
(
samples
,
results
,
initial_threshold
=
0.
1
5
,
adapt_rate
=
0.0
5
):
def
adaptive_select_informative_instances
(
samples
,
results
,
initial_threshold
=
0.
0
5
,
adapt_rate
=
0.0
1
,
desired_samples
=
None
,
max_iterations
=
100
):
if
len
(
samples
)
==
0
or
len
(
results
)
==
0
:
raise
ValueError
(
"
Received empty samples or results for selection.
"
)
performance
=
np
.
mean
(
results
,
axis
=
1
)
performance
=
np
.
nan
mean
(
results
,
axis
=
1
)
# Use np.nanmean to ignore nan values
threshold
=
initial_threshold
while
True
:
# Ensure the threshold does not exceed 1
iteration
=
0
while
iteration
<
max_iterations
:
iteration
+=
1
print
(
f
"
Iteration
{
iteration
}
: Current threshold:
{
threshold
}
"
)
# Cap the threshold at 1.0
effective_threshold
=
min
(
threshold
,
1.0
)
cutoff
=
np
.
percentile
(
performance
,
effective_threshold
*
100
)
cutoff
=
np
.
nan
percentile
(
performance
,
effective_threshold
*
100
)
# Use np.nanpercentile to ignore nan values
selected_samples
=
samples
[
performance
<=
cutoff
]
if
len
(
selected_samples
)
>=
3
:
print
(
f
"
Iteration
{
iteration
}
: Number of selected samples:
{
len
(
selected_samples
)
}
"
)
if
desired_samples
is
not
None
and
len
(
selected_samples
)
>=
desired_samples
:
print
(
f
"
Iteration
{
iteration
}
: Desired number of samples reached.
"
)
break
if
len
(
selected_samples
)
==
len
(
samples
):
print
(
f
"
Iteration
{
iteration
}
: All samples selected.
"
)
break
threshold
+=
adapt_rate
if
iteration
==
max_iterations
:
print
(
f
"
Final threshold after max iterations:
{
threshold
}
"
)
print
(
f
"
Performance values:
{
performance
}
"
)
print
(
f
"
Number of selected samples:
{
len
(
selected_samples
)
}
"
)
if
desired_samples
is
not
None
and
len
(
selected_samples
)
<
desired_samples
:
print
(
"
Falling back to the best available samples.
"
)
# Select the top desired_samples samples based on performance
best_indices
=
np
.
argsort
(
performance
)[:
desired_samples
]
selected_samples
=
samples
[
best_indices
]
if
selected_samples
.
size
==
0
:
raise
ValueError
(
"
Selection of informative instances resulted in an empty set.
"
)
print
(
f
"
S
elected samples shape:
{
selected_samples
.
shape
}
"
)
return
selected_samples
print
(
f
"
Final s
elected samples shape:
{
selected_samples
.
shape
}
"
)
return
selected_samples
[:
desired_samples
]
# Ensure the number of selected samples matches the desired number
# Iterative Refinement
def
iterative_refinement
(
samples
,
results
,
objective_function
,
maximize_indices
,
n_iterations
=
5
,
initial_threshold
=
0.15
,
adapt_rate
=
0.05
):
# Iterative Refinement
def
iterative_refinement
(
samples
,
results
,
objective_function
,
maximize_indices
,
n_iterations
=
2
,
initial_threshold
=
0.10
,
adapt_rate
=
0.03
):
for
iteration
in
range
(
n_iterations
):
print
(
f
"
Iteration
{
iteration
}
: Starting with samples shape:
{
samples
.
shape
}
"
)
...
...
@@ -88,29 +110,35 @@ def iterative_refinement(samples, results, objective_function, maximize_indices,
# Ensure objective negation is correctly handled
for
i
in
range
(
len
(
current_results
)):
for
idx
in
maximize_indices
:
current_results
[
i
][
idx
]
=
-
current_results
[
i
][
idx
]
if
not
np
.
isnan
(
current_results
[
i
][
idx
]):
current_results
[
i
][
idx
]
=
-
current_results
[
i
][
idx
]
# Ensure at least a minimum number of samples are selected to maintain diversity
if
len
(
selected_samples
)
<
3
:
selected_samples
=
samples
[
np
.
argsort
(
np
.
mean
(
current_results
,
axis
=
1
))[:
3
]]
selected_samples
=
samples
[
np
.
argsort
(
np
.
nan
mean
(
current_results
,
axis
=
1
))[:
3
]]
# Re-cluster the selected samples
n_clusters
=
max
(
1
,
min
(
3
,
int
(
len
(
selected_samples
)
*
0.
3
)))
# Ensure at least 1 cluster, maximum
3
clusters
n_clusters
=
max
(
1
,
min
(
2
,
int
(
len
(
selected_samples
)
*
0.
2
)))
# Ensure at least 1 cluster, maximum
2
clusters
labels
,
centers
=
advanced_clustering_samples
(
selected_samples
,
n_clusters
)
# Generate new samples around cluster centers
new_samples
=
[]
for
center
in
centers
:
for
_
in
range
(
1
):
# Generate 1 new sample per center to control the growth of
sample
size
perturbations
=
np
.
random
.
uniform
(
-
0.0
5
,
0.0
5
,
center
.
shape
)
# Use smaller perturbations for finer adjustments
for
_
in
range
(
max
(
1
,
(
len
(
samples
)
-
len
(
selected_samples
)))):
# Control the number of new
sample
s
perturbations
=
np
.
random
.
uniform
(
-
0.0
3
,
0.0
3
,
center
.
shape
)
# Use smaller perturbations for finer adjustments
new_samples
.
append
(
center
+
perturbations
)
# Combine selected samples with new samples, ensuring we don't grow the sample size too much
samples
=
np
.
vstack
((
selected_samples
,
new_samples
))
if
len
(
samples
)
>
len
(
selected_samples
)
+
2
:
# Limit the growth of samples
samples
=
samples
[:
len
(
selected_samples
)
+
2
]
combined_samples
=
np
.
vstack
((
selected_samples
,
new_samples
))
samples
=
combined_samples
[:
len
(
selected_samples
)
+
(
len
(
samples
)
-
len
(
selected_samples
))]
# Ensure the sample size matches the original size
# Debugging output
print
(
f
"
Iteration
{
iteration
}
: Samples shape after selection and new sample generation:
{
samples
.
shape
}
"
)
return
samples
\ No newline at end of file
return
samples
def
generate_new_samples
(
existing_samples
,
pop_size
,
n_adaptive_samples
):
n_new_samples
=
pop_size
-
n_adaptive_samples
new_samples
=
initial_sampling
(
PARAM_BOUNDS
,
n_new_samples
)
combined_samples
=
np
.
vstack
((
existing_samples
,
new_samples
))
return
combined_samples
\ No newline at end of file
This diff is collapsed.
Click to expand it.
src/optimize_main.py
+
85
−
27
View file @
e049784a
...
...
@@ -17,14 +17,15 @@ from pymoo.optimize import minimize
from
scipy.stats
import
ttest_ind
from
optimization_libraries
import
initialize_algorithm
from
parallel_computing
import
execute_parallel_tasks
,
cleanup_temp_dirs
from
config
import
PARAMETERS
,
OBJECTIVES
,
MAXIMIZE
,
PARAM_BOUND_VALUES
,
PARAM_TYPES
,
PRECISION
,
PLOT_CONFIG
,
OPTIMIZATION_CONFIG
,
N_JOBS
from
config
import
PARAMETERS
,
OBJECTIVE_NAMES
,
MAXIMIZE
,
PARAM_BOUND_VALUES
,
PARAM_TYPES
,
PRECISION
,
PLOT_CONFIG
,
OPTIMIZATION_CONFIG
,
N_JOBS
from
adaptive_instance_selection
import
initial_sampling
,
evaluate_samples
,
advanced_clustering_samples
,
adaptive_select_informative_instances
,
iterative_refinement
,
generate_new_samples
class
OptimizationProblem
(
Problem
):
def
__init__
(
self
):
self
.
param_names
=
list
(
PARAM_BOUND_VALUES
.
keys
())
self
.
param_types
=
[
PARAM_TYPES
[
param
]
for
param
in
self
.
param_names
]
self
.
objective_names
=
OBJECTIVES
self
.
maximize_indices
=
[
self
.
objective_names
.
index
(
res
)
for
res
in
MAXIMIZE
]
self
.
objective_names
=
OBJECTIVE
_NAME
S
self
.
maximize_indices
=
[
i
for
i
,
maximize
in
enumerate
(
MAXIMIZE
)
if
maximize
]
n_var
=
len
(
self
.
param_names
)
n_obj
=
len
(
self
.
objective_names
)
xl
=
np
.
array
([
PARAM_BOUND_VALUES
[
param
][
0
]
for
param
in
self
.
param_names
])
...
...
@@ -78,21 +79,75 @@ def run_optimization(use_adaptive_instance_selection):
results_folder
=
create_results_folder
()
# Set the adaptive instance selection flag
OPTIMIZATION_CONFIG
[
"
USE_ADAPTIVE_INSTANCE_SELECTION
"
]
=
use_adaptive_instance_selection
OPTIMIZATION_CONFIG
[
'
USE_ADAPTIVE_INSTANCE_SELECTION
'
]
=
use_adaptive_instance_selection
adaptive_frequency
=
OPTIMIZATION_CONFIG
[
'
ADAPTIVE_INSTANCE_SELECTION_FREQUENCY
'
]
# Initialize the population size
pop_size
=
OPTIMIZATION_CONFIG
[
'
POP_SIZE
'
]
# Initialize the optimization algorithm
algorithm
=
initialize_algorithm
(
OPTIMIZATION_CONFIG
[
'
ALGORITHM_NAME
'
],
OPTIMIZATION_CONFIG
.
get
(
'
POP_SIZE
'
)
pop_size
)
# Define the optimization problem
problem
=
OptimizationProblem
()
start_time
=
time
.
time
()
res
=
None
# Initialize res to handle early termination case
try
:
# Run the optimization
res
=
minimize
(
problem
,
algorithm
,
(
"
n_gen
"
,
OPTIMIZATION_CONFIG
[
'
N_GEN
'
]),
verbose
=
True
)
for
gen
in
range
(
OPTIMIZATION_CONFIG
[
'
N_GEN
'
]):
if
pop_size
<=
OPTIMIZATION_CONFIG
[
'
MIN_POP_SIZE
'
]:
print
(
"
Stopping optimization as population size has reached the minimum threshold.
"
)
break
res
=
minimize
(
problem
,
algorithm
,
(
"
n_gen
"
,
1
),
verbose
=
True
)
if
use_adaptive_instance_selection
and
gen
>
0
and
(
gen
+
1
)
%
adaptive_frequency
==
0
:
current_samples
=
res
.
pop
.
get
(
"
X
"
)
current_results
=
res
.
pop
.
get
(
"
F
"
)
print
(
f
"
Generation
{
gen
+
1
}
: Applying adaptive instance selection
"
)
print
(
f
"
Current samples:
{
current_samples
.
shape
}
"
)
print
(
f
"
Current results:
{
current_results
.
shape
}
"
)
try
:
# Apply adaptive instance selection
adaptive_samples
=
adaptive_select_informative_instances
(
current_samples
,
current_results
,
initial_threshold
=
0.05
,
adapt_rate
=
0.01
,
desired_samples
=
pop_size
//
2
)
# Select half from algorithm population and half from adaptive instance selection
num_algorithm_samples
=
pop_size
//
2
algorithm_samples_indices
=
np
.
random
.
choice
(
len
(
current_samples
),
num_algorithm_samples
,
replace
=
False
)
algorithm_samples
=
current_samples
[
algorithm_samples_indices
]
combined_samples
=
np
.
vstack
((
algorithm_samples
,
adaptive_samples
))
# Evaluate all combined samples
out
=
{
"
F
"
:
np
.
zeros
((
len
(
combined_samples
),
len
(
problem
.
objective_names
)))}
# Initialize output
problem
.
_evaluate
(
combined_samples
,
out
=
out
)
# Evaluate combined samples
res
.
pop
.
set
(
"
X
"
,
combined_samples
[:
pop_size
])
# Set only the first pop_size samples
res
.
pop
.
set
(
"
F
"
,
np
.
array
(
out
[
"
F
"
])[:
pop_size
])
# Set only the first pop_size results
except
RuntimeError
as
e
:
print
(
f
"
Adaptive instance selection failed:
{
e
}
"
)
# If adaptive instance selection fails, fall back to using the current population
res
.
pop
.
set
(
"
X
"
,
current_samples
)
res
.
pop
.
set
(
"
F
"
,
current_results
)
# Reduce population size dynamically
pop_size
=
max
(
OPTIMIZATION_CONFIG
[
'
MIN_POP_SIZE
'
],
int
(
pop_size
*
0.9
))
# Reduce by 10% each iteration, minimum threshold
algorithm
=
initialize_algorithm
(
OPTIMIZATION_CONFIG
[
'
ALGORITHM_NAME
'
],
pop_size
)
finally
:
# Cleanup temporary directories
cleanup_temp_dirs
()
...
...
@@ -100,26 +155,28 @@ def run_optimization(use_adaptive_instance_selection):
elapsed_time
=
end_time
-
start_time
print
(
f
"
Time with
{
'
out
'
if
not
use_adaptive_instance_selection
else
''
}
adaptive instance selection:
{
elapsed_time
:
.
2
f
}
seconds
"
)
print_and_plot_results
(
res
,
problem
)
# Save results to a file
# Negate back the maximized objectives before saving
results_to_save
=
res
.
F
.
copy
()
for
i
in
range
(
len
(
results_to_save
)):
for
idx
in
problem
.
maximize_indices
:
results_to_save
[
i
][
idx
]
=
-
results_to_save
[
i
][
idx
]
results_data
=
{
"
results
"
:
res
.
F
.
tolist
(),
"
elapsed_time
"
:
elapsed_time
,
"
use_adaptive_instance_selection
"
:
use_adaptive_instance_selection
}
filename
=
os
.
path
.
join
(
results_folder
,
f
'
optimization_results_
{
"
with
"
if
use_adaptive_instance_selection
else
"
without
"
}
_adaptive.json
'
)
with
open
(
filename
,
'
w
'
)
as
f
:
json
.
dump
(
results_data
,
f
)
print
(
f
"
Results have been stored in:
{
filename
}
"
)
return
res
.
F
,
elapsed_time
if
res
is
not
None
:
print_and_plot_results
(
res
,
problem
)
# Save results to a file
# Negate back the maximized objectives before saving
results_to_save
=
res
.
F
.
copy
()
for
i
in
range
(
len
(
results_to_save
)):
for
idx
in
problem
.
maximize_indices
:
results_to_save
[
i
][
idx
]
=
-
results_to_save
[
i
][
idx
]
results_data
=
{
"
results
"
:
results_to_save
.
tolist
(),
"
elapsed_time
"
:
elapsed_time
,
"
use_adaptive_instance_selection
"
:
use_adaptive_instance_selection
}
filename
=
os
.
path
.
join
(
results_folder
,
f
'
optimization_results_
{
"
with
"
if
use_adaptive_instance_selection
else
"
without
"
}
_adaptive.json
'
)
with
open
(
filename
,
'
w
'
)
as
f
:
json
.
dump
(
results_data
,
f
)
print
(
f
"
Results have been stored in:
{
filename
}
"
)
return
res
.
F
if
res
is
not
None
else
None
,
elapsed_time
def
print_and_plot_results
(
res
,
problem
):
print
(
"
Optimization Results:
"
)
...
...
@@ -136,9 +193,10 @@ def print_and_plot_results(res, problem):
print
()
try
:
plt
.
figure
(
figsize
=
(
8
,
6
))
for
idx
in
problem
.
maximize_indices
:
res
.
F
[:,
idx
]
=
-
res
.
F
[:,
idx
]
plt
.
figure
(
figsize
=
(
8
,
6
))
plt
.
scatter
(
res
.
F
[:,
0
],
res
.
F
[:,
1
])
plt
.
xlabel
(
PLOT_CONFIG
[
"
PLOT_X
"
],
fontsize
=
14
)
plt
.
ylabel
(
PLOT_CONFIG
[
"
PLOT_Y
"
],
fontsize
=
14
)
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment