Skip to content

Jiwer

JiwerManager

Source code in asrbench\jiwer_.py
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
class JiwerManager:
    def __init__(self, language: str) -> None:
        self.__lang: str = language

    def normalize_txt(self, txt: str) -> str:
        """Return the post-processed text from jiwer transform"""
        compose: jiwer.Compose = self._create_normalize_transform()
        processed_txt: str = compose(txt)
        return "".join(processed_txt)

    def get_measures(self, reference: str, hypothesis: str) -> Measures:
        """Returns all measures provided by jiwer (wer, cer, mer, wil, wip)"""
        return Measures(
            wer=self.get_wer(reference, hypothesis),
            cer=self.get_cer(reference, hypothesis),
            mer=self.get_mer(reference, hypothesis),
            wil=self.get_wil(reference, hypothesis),
            wip=self.get_wip(reference, hypothesis)
        )

    def get_wer(self, reference: str, hypothesis: str) -> float:
        """Measure Word Error Rate [WER]

        Parameters:
             reference: verified transcript.
             hypothesis: generated transcript.
        """
        return round(
            jiwer.wer(
                reference=reference,
                reference_transform=self._create_default_transform(),
                hypothesis=hypothesis,
                hypothesis_transform=self._create_default_transform()
            ),
            2
        )

    def get_cer(self, reference: str, hypothesis: str) -> float:
        """Measure Character Error Rate [CER].

        Parameters:
            reference: verified transcript.
            hypothesis: generated transcript.
        """
        return round(
            jiwer.cer(
                reference=reference,
                reference_transform=self._create_char_transform(),
                hypothesis=hypothesis,
                hypothesis_transform=self._create_char_transform()
            ),
            2
        )

    def get_mer(self, reference: str, hypothesis: str) -> float:
        """Measure Match Error Rate [MER].

        Parameters:
            reference: verified transcript.
            hypothesis: generated transcript.
        """
        return round(
            jiwer.mer(
                reference=reference,
                reference_transform=self._create_default_transform(),
                hypothesis=hypothesis,
                hypothesis_transform=self._create_default_transform()
            ),
            2
        )

    def get_wil(self, reference: str, hypothesis: str) -> float:
        """Measure Word Information Lost [WIL].

        Parameters:
            reference: verified transcript.
            hypothesis: generated transcript.
        """
        return round(
            jiwer.wil(
                reference=reference,
                reference_transform=self._create_default_transform(),
                hypothesis=hypothesis,
                hypothesis_transform=self._create_default_transform()
            ),
            2
        )

    def get_wip(self, reference: str, hypothesis: str) -> float:
        """Measure Word Information Preserved [WIP].

        Parameters:
            reference: verified transcript.
            hypothesis: generated transcript.
        """
        return round(
            jiwer.wip(
                reference=reference,
                reference_transform=self._create_default_transform(),
                hypothesis=hypothesis,
                hypothesis_transform=self._create_default_transform()
            ),
            2
        )

    def _create_default_transform(self) -> jiwer.Compose:
        return jiwer.Compose(
            [
                jiwer.RemoveEmptyStrings(),
                jiwer.ToLowerCase(),
                jiwer.RemoveMultipleSpaces(),
                jiwer.Strip(),
                jiwer.RemovePunctuation(),
                lambda texts: _normalize_number2word(texts, self.__lang),
                _remove_accents,
                jiwer.ReduceToListOfListOfWords()
            ]
        )

    def _create_char_transform(self) -> jiwer.Compose:
        return jiwer.Compose(
            [
                jiwer.RemoveEmptyStrings(),
                jiwer.ToLowerCase(),
                jiwer.RemoveMultipleSpaces(),
                jiwer.Strip(),
                jiwer.RemovePunctuation(),
                lambda texts: _normalize_number2word(texts, self.__lang),
                _remove_accents,
                jiwer.ReduceToListOfListOfChars()
            ]
        )

    def _create_normalize_transform(self) -> jiwer.Compose:
        return jiwer.Compose(
            [
                jiwer.RemoveEmptyStrings(),
                jiwer.ToLowerCase(),
                jiwer.RemoveMultipleSpaces(),
                jiwer.Strip(),
                jiwer.RemovePunctuation(),
                _remove_accents,
                lambda texts: _normalize_number2word(texts, self.__lang),
            ]
        )

get_cer(reference, hypothesis)

Measure Character Error Rate [CER].

Parameters:

Name Type Description Default
reference str

verified transcript.

required
hypothesis str

generated transcript.

required
Source code in asrbench\jiwer_.py
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
def get_cer(self, reference: str, hypothesis: str) -> float:
    """Measure Character Error Rate [CER].

    Parameters:
        reference: verified transcript.
        hypothesis: generated transcript.
    """
    return round(
        jiwer.cer(
            reference=reference,
            reference_transform=self._create_char_transform(),
            hypothesis=hypothesis,
            hypothesis_transform=self._create_char_transform()
        ),
        2
    )

get_measures(reference, hypothesis)

Returns all measures provided by jiwer (wer, cer, mer, wil, wip)

Source code in asrbench\jiwer_.py
19
20
21
22
23
24
25
26
27
def get_measures(self, reference: str, hypothesis: str) -> Measures:
    """Returns all measures provided by jiwer (wer, cer, mer, wil, wip)"""
    return Measures(
        wer=self.get_wer(reference, hypothesis),
        cer=self.get_cer(reference, hypothesis),
        mer=self.get_mer(reference, hypothesis),
        wil=self.get_wil(reference, hypothesis),
        wip=self.get_wip(reference, hypothesis)
    )

get_mer(reference, hypothesis)

Measure Match Error Rate [MER].

Parameters:

Name Type Description Default
reference str

verified transcript.

required
hypothesis str

generated transcript.

required
Source code in asrbench\jiwer_.py
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
def get_mer(self, reference: str, hypothesis: str) -> float:
    """Measure Match Error Rate [MER].

    Parameters:
        reference: verified transcript.
        hypothesis: generated transcript.
    """
    return round(
        jiwer.mer(
            reference=reference,
            reference_transform=self._create_default_transform(),
            hypothesis=hypothesis,
            hypothesis_transform=self._create_default_transform()
        ),
        2
    )

get_wer(reference, hypothesis)

Measure Word Error Rate [WER]

Parameters:

Name Type Description Default
reference str

verified transcript.

required
hypothesis str

generated transcript.

required
Source code in asrbench\jiwer_.py
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
def get_wer(self, reference: str, hypothesis: str) -> float:
    """Measure Word Error Rate [WER]

    Parameters:
         reference: verified transcript.
         hypothesis: generated transcript.
    """
    return round(
        jiwer.wer(
            reference=reference,
            reference_transform=self._create_default_transform(),
            hypothesis=hypothesis,
            hypothesis_transform=self._create_default_transform()
        ),
        2
    )

get_wil(reference, hypothesis)

Measure Word Information Lost [WIL].

Parameters:

Name Type Description Default
reference str

verified transcript.

required
hypothesis str

generated transcript.

required
Source code in asrbench\jiwer_.py
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
def get_wil(self, reference: str, hypothesis: str) -> float:
    """Measure Word Information Lost [WIL].

    Parameters:
        reference: verified transcript.
        hypothesis: generated transcript.
    """
    return round(
        jiwer.wil(
            reference=reference,
            reference_transform=self._create_default_transform(),
            hypothesis=hypothesis,
            hypothesis_transform=self._create_default_transform()
        ),
        2
    )

get_wip(reference, hypothesis)

Measure Word Information Preserved [WIP].

Parameters:

Name Type Description Default
reference str

verified transcript.

required
hypothesis str

generated transcript.

required
Source code in asrbench\jiwer_.py
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
def get_wip(self, reference: str, hypothesis: str) -> float:
    """Measure Word Information Preserved [WIP].

    Parameters:
        reference: verified transcript.
        hypothesis: generated transcript.
    """
    return round(
        jiwer.wip(
            reference=reference,
            reference_transform=self._create_default_transform(),
            hypothesis=hypothesis,
            hypothesis_transform=self._create_default_transform()
        ),
        2
    )

normalize_txt(txt)

Return the post-processed text from jiwer transform

Source code in asrbench\jiwer_.py
13
14
15
16
17
def normalize_txt(self, txt: str) -> str:
    """Return the post-processed text from jiwer transform"""
    compose: jiwer.Compose = self._create_normalize_transform()
    processed_txt: str = compose(txt)
    return "".join(processed_txt)