# hub.solver.openevolve.evaluator_builder

Domain specification

Domain

# build_evaluator

build_evaluator(
  domain_factories: list[typing.Callable[[], skdecide.domains.Domain]],
max_steps: typing.Union[int, typing.Callable[[skdecide.domains.Domain], int]] = 100,
num_episodes: = 3,
normalize: = True,
timeout: = 60,
enforce_using_public_api: = False
) ->

Generate code for the evaluate function used by openevolve.

  • The evaluate function must take only program_path as argument,
  • It must be written in a separate file, so this wrapper make necessary argument importable by storing it in globals of this module.

# Parameters

  • domain_factories: domain factories on which rolling out
  • max_steps: maximum number of steps per episode. Either an integer or a callable mapping a domain to an integer.
  • num_episodes: number of episodes to do per domain
  • normalize: whether to normalize cost by max steps (potentially related to domain size)
  • timeout: seconds before timeout
  • enforce_using_public_api: if True, wraps the domains so that only their public API can be seen by the program

# Returns

evaluation code to be used by openevolve

# run_with_timeout

run_with_timeout(
  func: typing.Callable[..., typing.Any],
*args,
timeout_seconds: = 30,
**kwargs
) -> typing.Any

Run a function with a timeout.

# evaluate_solver_on_single_domain

evaluate_solver_on_single_domain(
  program_path: ,
domain_factory: typing.Callable[[], skdecide.domains.Domain],
max_steps: typing.Union[int, typing.Callable[[skdecide.domains.Domain], int]] = 100,
num_episodes: = 3,
normalize: = True,
render: = False,
enforce_using_public_api: = False
) -> tuple[float, ...]

Evaluate an evolved solver on a domain instance.

We evaluate by performing a rollout on the domain and taking the total cost, potentially normalized by max_steps.

# Parameters

  • program_path: path to the program to evaluate
  • domain_factory: domain factory to generate a domain for the planner and for the rollout
  • max_steps: maximum number of steps per episode. Either an integer or a callable mapping a domain to an integer.
  • num_episodes: number of episodes to do
  • normalize: whether to normalize cost by max steps (potentially related to domain size)
  • render: render the domain during rollout
  • enforce_using_public_api: if True, wraps the domain so that only its public API can be seen by the program

# Returns

total cost of the rollout, potentially normalized by max_steps

# evaluate_solver_on_several_domains

evaluate_solver_on_several_domains(
  program_path: ,
domain_factories: list[typing.Callable[[], skdecide.domains.Domain]],
max_steps: typing.Union[int, typing.Callable[[skdecide.domains.Domain], int]] = 100,
num_episodes: = 3,
normalize: = True,
render: = False,
enforce_using_public_api: = False
) -> tuple[float, ...]

Evaluate an evolved solver on several domain instances.

We evaluate by performing a rollout on domains and taking the total cost, potentially normalized by max_steps.

# Parameters

  • program_path: path to the program to evaluate on the given domain
  • domain_factories: domain factories on which rolling out
  • max_steps: maximum number of steps per episode. Either an integer or a callable mapping a domain to an integer.
  • num_episodes: number of episodes to do per domain
  • normalize: whether to normalize cost by max steps (potentially related to domain size)
  • render: render the domain during rollout
  • enforce_using_public_api: if True, wraps the domains so that only their public API can be seen by the program

# Returns

  • total cost of the rollout, potentially normalized by max_steps
  • total number of reached goals (when domain inherits Goals)

# evaluate_solver_on_several_domains_with_timeout

evaluate_solver_on_several_domains_with_timeout(
  program_path: ,
domain_factories: list[typing.Callable[[], skdecide.domains.Domain]],
max_steps: typing.Union[int, typing.Callable[[skdecide.domains.Domain], int]] = 100,
num_episodes: = 3,
normalize: = True,
render: = False,
timeout: = 60,
enforce_using_public_api: = False
) -> tuple[float, ...]

Evaluate an evolved solver on several domain instances, with timeout.

We evaluate by performing a rollout on domains and taking the total cost, potentially normalized by max_steps.

# Parameters

  • program_path: path to the program to evaluate on the given domain
  • domain_factories: domain factories on which rolling out
  • max_steps: maximum number of steps per episode. Either an integer or a callable mapping a domain to an integer.
  • num_episodes: number of episodes to do per domain
  • normalize: whether to normalize cost by max steps (potentially related to domain size)
  • render: render the domain during rollout
  • timeout: seconds before timeout
  • enforce_using_public_api: if True, wraps the domains so that only their public API can be seen by the program

# Returns

total cost of the rollout, potentially normalized by max_steps

# EvolvedPolicies

Policies wrapper around evolved program.

# Constructor EvolvedPolicies

EvolvedPolicies(
  domain: ,
program_path: ,
enforce_using_public_api: = False
)

Initialize the wrapper

# Parameters

  • program_path: path to the program to evaluate
  • domain: domain to pass to the wrapped planner
  • enforce_using_public_api: if True, wraps the domain so that only its public API can be seen by the program

# is_policy_defined_for Policies

is_policy_defined_for(
  self,
observation: StrDict[D.T_observation]
) -> bool

Check whether the solver's current policy is defined for the given observation.

# Parameters

  • observation: The observation to consider.

# Returns

True if the policy is defined for the given observation memory (False otherwise).

# sample_action Policies

sample_action(
  self,
observation: StrDict[D.T_observation],
domain: Optional[Domain] = None
) -> StrDict[list[D.T_event]]

Sample an action for the given observation (from the solver's current policy).

# Parameters

  • observation: The observation for which an action must be sampled.
  • domain: the domain source of the observation. Typically used to get current applicable actions or action mask.

# Returns

The sampled action.

# _is_policy_defined_for Policies

_is_policy_defined_for(
  self,
observation: skdecide.core.StrDict[skdecide.domains.T_observation]
) ->

Check whether the solver's current policy is defined for the given observation.

# Parameters

  • observation: The observation to consider.

# Returns

True if the policy is defined for the given observation memory (False otherwise).

# _sample_action Policies

_sample_action(
  self,
observation: skdecide.core.StrDict[skdecide.domains.T_observation],
domain: typing.Optional[skdecide.domains.Domain] = None
) -> skdecide.core.StrDict[list[skdecide.domains.T_event]]

Sample an action for the given observation (from the solver's current policy).

# Parameters

  • observation: The observation for which an action must be sampled.
  • domain: the domain source of the observation. Typically used to get current applicable actions or action mask. NB: Be careful that the domain has not been autocast, so may not respect the T_domain specs.

# Returns

The sampled action.

# evaluate

evaluate(
  program_path: ,
domain_factories: list[typing.Callable[[], skdecide.domains.Domain]],
max_steps: typing.Union[int, typing.Callable[[skdecide.domains.Domain], int]] = 100,
num_episodes: = 3,
normalize: = True,
render: = False,
timeout: = 60,
enforce_using_public_api: = False
) ->

Evaluate function to use in openevolve evolution.

Calls evaluate_solver_on_several_domains_with_timeout and wraps result in the proper openevolve object.

# Parameters

  • program_path: path to the program to evaluate on the given domain
  • domain_factories: domain factories on which rolling out
  • max_steps: maximum number of steps per episode. Either an integer or a callable mapping a domain to an integer.
  • num_episodes: number of episodes to do per domain
  • normalize: whether to normalize cost by max steps (potentially related to domain size)
  • render: render the domain during rollout
  • timeout: seconds before timeout

# Returns

combined_score: -inf for incorrect programs, else -total_cost