Skip to content

ConfigLoader

ConfigLoader

Facade to configure the entire benchmark execution environment.

Parameters:

Name Type Description Default
filepath_ str

path to configuration file.

required
factory TranscriberFactoryABC

factory to set up Transcribers.

DefaultTranscriberFactory()
observer Observer

observer to show execution status.

ConsoleObserver()
Source code in asrbench\config_loader.py
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
class ConfigLoader:
    """Facade to configure the entire benchmark execution environment.

    Arguments:
        filepath_: path to configuration file.
        factory: factory to set up Transcribers.
        observer: observer to show execution status.
    """

    def __init__(
            self,
            filepath_: str,
            factory: TranscriberFactoryABC = DefaultTranscriberFactory(),
            observer: Observer = ConsoleObserver()
    ) -> None:
        utils.check_path(filepath_)

        self.__path: str = filepath_
        self._observer: Observer = observer
        self.__data: Dict[str, Dict[str, Any]] = self.read_data()
        self.__factory: TranscriberFactoryABC = factory
        self.__output_cfg: Dict[str, str] = self.data.get("output", {})
        self.check_external_transcribers()

    @property
    def data(self) -> Dict[str, Dict[str, Any]]:
        return self.__data

    def read_data(self) -> Dict[str, Any]:
        """Read config data."""
        self._observer.notify("Reading configfile.")

        with open(self.__path, "r") as file:
            config: Dict[str, Any] = yaml.safe_load(file)

        return config

    def check_external_transcribers(self) -> None:
        if "transcriber_dir" in self.data:
            external_path: Path = Path(
                self.get_config_section("transcriber_dir")
            )
            load_registers(external_path)

    def set_up_benchmark(self) -> BenchmarkABC:
        self._observer.notify("Mounting Benchmark.")
        benchmark = DefaultBenchmark(
            datasets=self.get_datasets(),
            transcribers=self.get_transcribers(),
            output=self.get_output(),
            observer=self._observer,
            jiwer_=JiwerManager(language=self.get_language())
        )
        return benchmark

    def get_language(self) -> str:
        language: str = self.data.get("language", "en")

        if not is_language_supported(language):
            raise ValueError(f"Language {language} is not supported.")

        return language

    def get_datasets(self) -> List[Dataset]:
        """Get datasets from the configuration file"""
        if not self.has_dataset():
            raise ValueError("Configfile dont have datasets configuration.")

        return [
            Dataset.from_config(name, config)
            for name, config in self.data.get("datasets").items()
        ]

    def has_dataset(self) -> bool:
        return "datasets" in self.data

    def get_transcribers(self) -> Dict[str, Transcriber]:
        """Get transcribers from the configuration file"""
        return self.__factory.from_config(
            self.get_config_section("transcribers"),
        )

    def get_output(self) -> OutputContextABC:
        """Get output from the configuration file."""
        type_: str = self.get_output_type()

        match type_:
            case "csv":
                return CsvOutputContext(self.get_output_filepath())
            case "json":
                return JsonOutputContext(self.get_output_filepath())
            case _:
                raise ValueError(f"Output type {type_} not supported.")

    def get_output_type(self) -> str:
        return self.__output_cfg.get("type", "csv")

    def set_up_output_filename(self) -> str:
        timestamp = datetime.now(UTC).strftime("%Y%m%dT%H%M%S")
        return f"{self.get_output_filename()}_{timestamp}"

    def get_output_filepath(self) -> Path:
        """Set up output filepath from the configuration file."""
        return Path(
            self.get_output_dir()
        ).joinpath(
            self.set_up_output_filename()
        )

    def get_output_dir(self) -> str:
        """Get output dir from the configuration file"""
        return self.__output_cfg.get("dir", Path.cwd())

    def get_output_filename(self) -> str:
        """Get output filename from the configuration file."""
        return self.__output_cfg.get("filename", "asrbench")

    def get_config_section(self, section: str) -> Any:
        """Get the section of the configfile by the name provided."""
        if section not in self.data:
            raise KeyError(f"Configfile dont have {section} section.")
        return self.data[section]

    @staticmethod
    def get_section_value(section: Dict[str, Any], key: str) -> Any:
        """Get the value from the section and key provided."""
        if key not in section or section[key] is None:
            raise KeyError(f"Configfile {section} section missing {key}.")

        return section[key]

get_config_section(section)

Get the section of the configfile by the name provided.

Source code in asrbench\config_loader.py
135
136
137
138
139
def get_config_section(self, section: str) -> Any:
    """Get the section of the configfile by the name provided."""
    if section not in self.data:
        raise KeyError(f"Configfile dont have {section} section.")
    return self.data[section]

get_datasets()

Get datasets from the configuration file

Source code in asrbench\config_loader.py
81
82
83
84
85
86
87
88
89
def get_datasets(self) -> List[Dataset]:
    """Get datasets from the configuration file"""
    if not self.has_dataset():
        raise ValueError("Configfile dont have datasets configuration.")

    return [
        Dataset.from_config(name, config)
        for name, config in self.data.get("datasets").items()
    ]

get_output()

Get output from the configuration file.

Source code in asrbench\config_loader.py
100
101
102
103
104
105
106
107
108
109
110
def get_output(self) -> OutputContextABC:
    """Get output from the configuration file."""
    type_: str = self.get_output_type()

    match type_:
        case "csv":
            return CsvOutputContext(self.get_output_filepath())
        case "json":
            return JsonOutputContext(self.get_output_filepath())
        case _:
            raise ValueError(f"Output type {type_} not supported.")

get_output_dir()

Get output dir from the configuration file

Source code in asrbench\config_loader.py
127
128
129
def get_output_dir(self) -> str:
    """Get output dir from the configuration file"""
    return self.__output_cfg.get("dir", Path.cwd())

get_output_filename()

Get output filename from the configuration file.

Source code in asrbench\config_loader.py
131
132
133
def get_output_filename(self) -> str:
    """Get output filename from the configuration file."""
    return self.__output_cfg.get("filename", "asrbench")

get_output_filepath()

Set up output filepath from the configuration file.

Source code in asrbench\config_loader.py
119
120
121
122
123
124
125
def get_output_filepath(self) -> Path:
    """Set up output filepath from the configuration file."""
    return Path(
        self.get_output_dir()
    ).joinpath(
        self.set_up_output_filename()
    )

get_section_value(section, key) staticmethod

Get the value from the section and key provided.

Source code in asrbench\config_loader.py
141
142
143
144
145
146
147
@staticmethod
def get_section_value(section: Dict[str, Any], key: str) -> Any:
    """Get the value from the section and key provided."""
    if key not in section or section[key] is None:
        raise KeyError(f"Configfile {section} section missing {key}.")

    return section[key]

get_transcribers()

Get transcribers from the configuration file

Source code in asrbench\config_loader.py
94
95
96
97
98
def get_transcribers(self) -> Dict[str, Transcriber]:
    """Get transcribers from the configuration file"""
    return self.__factory.from_config(
        self.get_config_section("transcribers"),
    )

read_data()

Read config data.

Source code in asrbench\config_loader.py
46
47
48
49
50
51
52
53
def read_data(self) -> Dict[str, Any]:
    """Read config data."""
    self._observer.notify("Reading configfile.")

    with open(self.__path, "r") as file:
        config: Dict[str, Any] = yaml.safe_load(file)

    return config