Source code for discrete_optimization.ovensched.parser

#  Copyright (c) 2026 AIRBUS and its affiliates.
#  This source code is licensed under the MIT license found in the
#  LICENSE file in the root directory of this source tree.
import os
import re
from typing import Optional

from discrete_optimization.datasets import ERROR_MSG_MISSING_DATASETS, get_data_home
from discrete_optimization.ovensched.problem import (
    MachineData,
    OvenSchedulingProblem,
    TaskData,
)


[docs] def get_data_available( data_folder: Optional[str] = None, data_home: Optional[str] = None ) -> list[str]: """Get datasets available for oven scheduling problem. Params: data_folder: folder where datasets for ovensched should be found. If None, we look in "ovensched" subdirectory of `data_home`. data_home: root directory for all datasets. If None, set by default to "~/discrete_optimization_data" """ if data_folder is None: data_home = get_data_home(data_home=data_home) data_folder = f"{data_home}/ovensched" try: # Walk through all subdirectories to find .dat files files = [] for root, dirs, filenames in os.walk(data_folder): for filename in filenames: if filename.endswith(".dat"): files.append(os.path.abspath(os.path.join(root, filename))) except FileNotFoundError as e: raise FileNotFoundError(str(e) + ERROR_MSG_MISSING_DATASETS) return files
[docs] def parse_dat_file(file_path: str) -> OvenSchedulingProblem: """ Parses a .dat file and returns an OvenSchedulingProblem instance. This version includes robust error handling. """ with open(file_path, "r") as f: content = f.read() def _robust_search_and_eval(name: str, is_int: bool = False): """Generic helper to find a value with a regex and raise a clear error on failure.""" # The pattern uses \s* for optional whitespace and a non-greedy capture (.+?) # to correctly handle nested lists and multiline values. full_pattern = rf"{name}\s*=\s*(.+?);" match = re.search(full_pattern, content, re.DOTALL) if match is None: raise ValueError( f"Parser error: Could not find field '{name}' in '{file_path}'." ) # The captured string is evaluated as Python code. value = eval(match.group(1)) return int(value) if is_int else value try: n_jobs = _robust_search_and_eval("nJobs", is_int=True) n_machines = _robust_search_and_eval("nMachines", is_int=True) n_attributes = _robust_search_and_eval("nAttributes", is_int=True) upper_bound_integer_objective = _robust_search_and_eval( "upper_bound_integer_objective", is_int=True ) mult_factor_total_runtime = _robust_search_and_eval( "mult_factor_total_runtime", is_int=True ) mult_factor_finished_toolate = _robust_search_and_eval( "mult_factor_finished_toolate", is_int=True ) mult_factor_total_setuptimes = _robust_search_and_eval( "mult_factor_total_setuptimes", is_int=True ) mult_factor_total_setupcosts = _robust_search_and_eval( "mult_factor_total_setupcosts", is_int=True ) running_time_bound = _robust_search_and_eval( "upper_bound_integer_objective", is_int=True ) setup_costs_raw = _robust_search_and_eval("SetupCosts") setup_times_raw = _robust_search_and_eval("SetupTimes") setup_costs = setup_costs_raw[1:] setup_times = setup_times_raw[1:] shift_starts = _robust_search_and_eval("ShiftStartTimes") shift_ends = _robust_search_and_eval("ShiftEndTimes") job_sizes = _robust_search_and_eval("JobSize") job_attributes = [x - 1 for x in _robust_search_and_eval("Attribute")] min_times = _robust_search_and_eval("MinTime") max_times = _robust_search_and_eval("MaxTime") earliest_starts = _robust_search_and_eval("EarliestStart") latest_ends = _robust_search_and_eval("LatestEnd") eligible_machines_raw = _robust_search_and_eval("EligibleMachines") machine_capacities = _robust_search_and_eval("MaxCap") initial_states = [x - 1 for x in _robust_search_and_eval("initState")] # Build tasks_data tasks_data = [] for j in range(n_jobs): tasks_data.append( TaskData( attribute=job_attributes[j], min_duration=min_times[j], max_duration=max_times[j], earliest_start=earliest_starts[j], latest_end=latest_ends[j], eligible_machines=set([x - 1 for x in eligible_machines_raw[j]]), size=job_sizes[j], ) ) # Build machines_data machines_data = [] for m in range(n_machines): avail_intervals = [ (s, e) for s, e in zip(shift_starts[m], shift_ends[m]) if e > s ] machines_data.append( MachineData( capacity=machine_capacities[m], initial_attribute=initial_states[m], availability=avail_intervals, ) ) problem = OvenSchedulingProblem( n_jobs=n_jobs, n_machines=n_machines, tasks_data=tasks_data, machines_data=machines_data, setup_costs=setup_costs, setup_times=setup_times, ) additional_data = { "ub": upper_bound_integer_objective, "weight_tardiness": mult_factor_finished_toolate, "weight_processing": mult_factor_total_runtime, "weight_setup_cost": mult_factor_total_setupcosts, "running_time_ub": running_time_bound, } problem.additional_data = additional_data return problem except (ValueError, SyntaxError, NameError) as e: print(f"--- PARSER FAILED ---") print( f"An error occurred while parsing '{file_path}'. Please check the file format." ) print(f"Details: {e}") raise