# hub.solver.openevolve.evaluator_builder
Domain specification
# build_evaluator
build_evaluator(
domain_factories: list[typing.Callable[[], skdecide.domains.Domain]],
max_steps: typing.Union[int, typing.Callable[[skdecide.domains.Domain], int]] = 100,
num_episodes: = 3,
normalize: = True,
timeout: = 60,
enforce_using_public_api: = False
) ->
Generate code for the evaluate function used by openevolve.
- The
evaluatefunction must take onlyprogram_pathas argument, - It must be written in a separate file, so this wrapper make necessary argument importable by storing it in globals of this module.
# Parameters
- domain_factories: domain factories on which rolling out
- max_steps: maximum number of steps per episode. Either an integer or a callable mapping a domain to an integer.
- num_episodes: number of episodes to do per domain
- normalize: whether to normalize cost by max steps (potentially related to domain size)
- timeout: seconds before timeout
- enforce_using_public_api: if True, wraps the domains so that only their public API can be seen by the program
# Returns
evaluation code to be used by openevolve
# run_with_timeout
run_with_timeout(
func: typing.Callable[..., typing.Any],
*args,
timeout_seconds: = 30,
**kwargs
) -> typing.Any
Run a function with a timeout.
# evaluate_solver_on_single_domain
evaluate_solver_on_single_domain(
program_path: ,
domain_factory: typing.Callable[[], skdecide.domains.Domain],
max_steps: typing.Union[int, typing.Callable[[skdecide.domains.Domain], int]] = 100,
num_episodes: = 3,
normalize: = True,
render: = False,
enforce_using_public_api: = False
) -> tuple[float, ...]
Evaluate an evolved solver on a domain instance.
We evaluate by performing a rollout on the domain and taking the total cost, potentially normalized by max_steps.
# Parameters
- program_path: path to the program to evaluate
- domain_factory: domain factory to generate a domain for the planner and for the rollout
- max_steps: maximum number of steps per episode. Either an integer or a callable mapping a domain to an integer.
- num_episodes: number of episodes to do
- normalize: whether to normalize cost by max steps (potentially related to domain size)
- render: render the domain during rollout
- enforce_using_public_api: if True, wraps the domain so that only its public API can be seen by the program
# Returns
total cost of the rollout, potentially normalized by max_steps
# evaluate_solver_on_several_domains
evaluate_solver_on_several_domains(
program_path: ,
domain_factories: list[typing.Callable[[], skdecide.domains.Domain]],
max_steps: typing.Union[int, typing.Callable[[skdecide.domains.Domain], int]] = 100,
num_episodes: = 3,
normalize: = True,
render: = False,
enforce_using_public_api: = False
) -> tuple[float, ...]
Evaluate an evolved solver on several domain instances.
We evaluate by performing a rollout on domains and taking the total cost, potentially normalized by max_steps.
# Parameters
- program_path: path to the program to evaluate on the given domain
- domain_factories: domain factories on which rolling out
- max_steps: maximum number of steps per episode. Either an integer or a callable mapping a domain to an integer.
- num_episodes: number of episodes to do per domain
- normalize: whether to normalize cost by max steps (potentially related to domain size)
- render: render the domain during rollout
- enforce_using_public_api: if True, wraps the domains so that only their public API can be seen by the program
# Returns
- total cost of the rollout, potentially normalized by max_steps
- total number of reached goals (when domain inherits
Goals)
# evaluate_solver_on_several_domains_with_timeout
evaluate_solver_on_several_domains_with_timeout(
program_path: ,
domain_factories: list[typing.Callable[[], skdecide.domains.Domain]],
max_steps: typing.Union[int, typing.Callable[[skdecide.domains.Domain], int]] = 100,
num_episodes: = 3,
normalize: = True,
render: = False,
timeout: = 60,
enforce_using_public_api: = False
) -> tuple[float, ...]
Evaluate an evolved solver on several domain instances, with timeout.
We evaluate by performing a rollout on domains and taking the total cost, potentially normalized by max_steps.
# Parameters
- program_path: path to the program to evaluate on the given domain
- domain_factories: domain factories on which rolling out
- max_steps: maximum number of steps per episode. Either an integer or a callable mapping a domain to an integer.
- num_episodes: number of episodes to do per domain
- normalize: whether to normalize cost by max steps (potentially related to domain size)
- render: render the domain during rollout
- timeout: seconds before timeout
- enforce_using_public_api: if True, wraps the domains so that only their public API can be seen by the program
# Returns
total cost of the rollout, potentially normalized by max_steps
# EvolvedPolicies
Policies wrapper around evolved program.
# Constructor EvolvedPolicies
EvolvedPolicies(
domain: ,
program_path: ,
enforce_using_public_api: = False
)
Initialize the wrapper
# Parameters
- program_path: path to the program to evaluate
- domain: domain to pass to the wrapped planner
- enforce_using_public_api: if True, wraps the domain so that only its public API can be seen by the program
# is_policy_defined_for Policies
is_policy_defined_for(
self,
observation: StrDict[D.T_observation]
) -> bool
Check whether the solver's current policy is defined for the given observation.
# Parameters
- observation: The observation to consider.
# Returns
True if the policy is defined for the given observation memory (False otherwise).
# sample_action Policies
sample_action(
self,
observation: StrDict[D.T_observation],
domain: Optional[Domain] = None
) -> StrDict[list[D.T_event]]
Sample an action for the given observation (from the solver's current policy).
# Parameters
- observation: The observation for which an action must be sampled.
- domain: the domain source of the observation. Typically used to get current applicable actions or action mask.
# Returns
The sampled action.
# _is_policy_defined_for Policies
_is_policy_defined_for(
self,
observation: skdecide.core.StrDict[skdecide.domains.T_observation]
) ->
Check whether the solver's current policy is defined for the given observation.
# Parameters
- observation: The observation to consider.
# Returns
True if the policy is defined for the given observation memory (False otherwise).
# _sample_action Policies
_sample_action(
self,
observation: skdecide.core.StrDict[skdecide.domains.T_observation],
domain: typing.Optional[skdecide.domains.Domain] = None
) -> skdecide.core.StrDict[list[skdecide.domains.T_event]]
Sample an action for the given observation (from the solver's current policy).
# Parameters
- observation: The observation for which an action must be sampled.
- domain: the domain source of the observation. Typically used to get current applicable actions or action mask. NB: Be careful that the domain has not been autocast, so may not respect the T_domain specs.
# Returns
The sampled action.
# evaluate
evaluate(
program_path: ,
domain_factories: list[typing.Callable[[], skdecide.domains.Domain]],
max_steps: typing.Union[int, typing.Callable[[skdecide.domains.Domain], int]] = 100,
num_episodes: = 3,
normalize: = True,
render: = False,
timeout: = 60,
enforce_using_public_api: = False
) ->
Evaluate function to use in openevolve evolution.
Calls evaluate_solver_on_several_domains_with_timeout and wraps result in the proper
openevolve object.
# Parameters
- program_path: path to the program to evaluate on the given domain
- domain_factories: domain factories on which rolling out
- max_steps: maximum number of steps per episode. Either an integer or a callable mapping a domain to an integer.
- num_episodes: number of episodes to do per domain
- normalize: whether to normalize cost by max steps (potentially related to domain size)
- render: render the domain during rollout
- timeout: seconds before timeout
# Returns
combined_score: -inf for incorrect programs, else -total_cost