Source code for discrete_optimization.ovensched.parser

#  Copyright (c) 2026 AIRBUS and its affiliates.
#  This source code is licensed under the MIT license found in the
#  LICENSE file in the root directory of this source tree.
import os
import re
from typing import Optional

from discrete_optimization.datasets import ERROR_MSG_MISSING_DATASETS, get_data_home
from discrete_optimization.ovensched.problem import (
    MachineData,
    OvenSchedulingProblem,
    TaskData,
)



[docs]
def get_data_available(
    data_folder: Optional[str] = None, data_home: Optional[str] = None
) -> list[str]:
    """Get datasets available for oven scheduling problem.

    Params:
        data_folder: folder where datasets for ovensched should be found.
            If None, we look in "ovensched" subdirectory of `data_home`.
        data_home: root directory for all datasets. If None, set by
            default to "~/discrete_optimization_data"

    """
    if data_folder is None:
        data_home = get_data_home(data_home=data_home)
        data_folder = f"{data_home}/ovensched"

    try:
        # Walk through all subdirectories to find .dat files
        files = []
        for root, dirs, filenames in os.walk(data_folder):
            for filename in filenames:
                if filename.endswith(".dat"):
                    files.append(os.path.abspath(os.path.join(root, filename)))
    except FileNotFoundError as e:
        raise FileNotFoundError(str(e) + ERROR_MSG_MISSING_DATASETS)
    return files




[docs]
def parse_dat_file(file_path: str) -> OvenSchedulingProblem:
    """
    Parses a .dat file and returns an OvenSchedulingProblem instance.
    This version includes robust error handling.
    """
    with open(file_path, "r") as f:
        content = f.read()

    def _robust_search_and_eval(name: str, is_int: bool = False):
        """Generic helper to find a value with a regex and raise a clear error on failure."""
        # The pattern uses \s* for optional whitespace and a non-greedy capture (.+?)
        # to correctly handle nested lists and multiline values.
        full_pattern = rf"{name}\s*=\s*(.+?);"
        match = re.search(full_pattern, content, re.DOTALL)

        if match is None:
            raise ValueError(
                f"Parser error: Could not find field '{name}' in '{file_path}'."
            )

        # The captured string is evaluated as Python code.
        value = eval(match.group(1))
        return int(value) if is_int else value

    try:
        n_jobs = _robust_search_and_eval("nJobs", is_int=True)
        n_machines = _robust_search_and_eval("nMachines", is_int=True)
        n_attributes = _robust_search_and_eval("nAttributes", is_int=True)

        upper_bound_integer_objective = _robust_search_and_eval(
            "upper_bound_integer_objective", is_int=True
        )
        mult_factor_total_runtime = _robust_search_and_eval(
            "mult_factor_total_runtime", is_int=True
        )
        mult_factor_finished_toolate = _robust_search_and_eval(
            "mult_factor_finished_toolate", is_int=True
        )
        mult_factor_total_setuptimes = _robust_search_and_eval(
            "mult_factor_total_setuptimes", is_int=True
        )
        mult_factor_total_setupcosts = _robust_search_and_eval(
            "mult_factor_total_setupcosts", is_int=True
        )
        running_time_bound = _robust_search_and_eval(
            "upper_bound_integer_objective", is_int=True
        )
        setup_costs_raw = _robust_search_and_eval("SetupCosts")
        setup_times_raw = _robust_search_and_eval("SetupTimes")
        setup_costs = setup_costs_raw[1:]
        setup_times = setup_times_raw[1:]
        shift_starts = _robust_search_and_eval("ShiftStartTimes")
        shift_ends = _robust_search_and_eval("ShiftEndTimes")
        job_sizes = _robust_search_and_eval("JobSize")
        job_attributes = [x - 1 for x in _robust_search_and_eval("Attribute")]
        min_times = _robust_search_and_eval("MinTime")
        max_times = _robust_search_and_eval("MaxTime")
        earliest_starts = _robust_search_and_eval("EarliestStart")
        latest_ends = _robust_search_and_eval("LatestEnd")
        eligible_machines_raw = _robust_search_and_eval("EligibleMachines")
        machine_capacities = _robust_search_and_eval("MaxCap")
        initial_states = [x - 1 for x in _robust_search_and_eval("initState")]

        # Build tasks_data
        tasks_data = []
        for j in range(n_jobs):
            tasks_data.append(
                TaskData(
                    attribute=job_attributes[j],
                    min_duration=min_times[j],
                    max_duration=max_times[j],
                    earliest_start=earliest_starts[j],
                    latest_end=latest_ends[j],
                    eligible_machines=set([x - 1 for x in eligible_machines_raw[j]]),
                    size=job_sizes[j],
                )
            )

        # Build machines_data
        machines_data = []
        for m in range(n_machines):
            avail_intervals = [
                (s, e) for s, e in zip(shift_starts[m], shift_ends[m]) if e > s
            ]
            machines_data.append(
                MachineData(
                    capacity=machine_capacities[m],
                    initial_attribute=initial_states[m],
                    availability=avail_intervals,
                )
            )

        problem = OvenSchedulingProblem(
            n_jobs=n_jobs,
            n_machines=n_machines,
            tasks_data=tasks_data,
            machines_data=machines_data,
            setup_costs=setup_costs,
            setup_times=setup_times,
        )

        additional_data = {
            "ub": upper_bound_integer_objective,
            "weight_tardiness": mult_factor_finished_toolate,
            "weight_processing": mult_factor_total_runtime,
            "weight_setup_cost": mult_factor_total_setupcosts,
            "running_time_ub": running_time_bound,
        }
        problem.additional_data = additional_data
        return problem

    except (ValueError, SyntaxError, NameError) as e:
        print(f"--- PARSER FAILED ---")
        print(
            f"An error occurred while parsing '{file_path}'. Please check the file format."
        )
        print(f"Details: {e}")
        raise