Source code for swtloc.core

# Author : Achintya Gupta
# Purpose : Houses Core Algorithms for Stroke Width Transforms

from typing import List
import numpy as np
import numba as nb


[docs]def swt_strokes(edged_image,
                hstep_mat,
                vstep_mat,
                dstep_mat,
                max_stroke_width,
                min_stroke_width,
                image_height,
                image_width,
                check_angle_deviation,
                image_gradient_theta,
                max_angle_deviation,
                include_edges_in_swt):
    """
    Core Logic for Stroke Width Transform.
    Implementing the work of [Boris Epshtein, Eyal Ofek & Yonatan Wexler](https://www.microsoft.com/en-us/research/publication/detecting-text-in-natural-scenes-with-stroke-width-transform/)

    Objective of this function is to, given an edged input image, find the stroke widths conforming
    to the following rules :
        - Each Stroke Width has be in the range of : min_stroke_width<= stroke_widths<=max_stroke_width
        - A ray emanating from each edge point, traveling in its gradients direction, when met with another
        edge point will terminate its journey only when the difference between their gradient directional angles
        is np.pi - max_angle_deviation <= theta_diff <= np.pi + max_angle_deviation

    Args:
        edged_image (np.ndarray) : Edges of the Original Input Image. Same size as the original image

        hstep_mat (np.ndarray) : For each pixel, cos(gradient_theta), where gradient_theta is the gradient
         angle for that pixel, representing length of horizontal movement for every unit movement in gradients direction.
         Same size as the original image

        vstep_mat (np.ndarray) : For each pixel, sin(gradient_theta), where gradient_theta is the gradient
         angle for that pixel, representing length of vertical movement for every unit movement in gradients direction.
         Same size as the original image

        dstep_mat (np.ndarray) : np.sqrt(hstep_mat**2+vstep_mat**2)

        max_stroke_width (int) : Maximum Stroke Width which would be permissible

        min_stroke_width (int) : Minimum Stroke Width which would be required

        image_height (int) : Height of the image

        image_width (int) : Width of the image

        check_angle_deviation (bool) : Whether to check the angle deviation to terminate the ray

        image_gradient_theta (np.ndarray) : Gradient array of the input image

        max_angle_deviation (float) : Maximum Angle Deviation which would be permissible

        include_edges_in_swt (bool) : Whether to include edges in the final SWT result

    Returns:
        (np.ndarray) : Stroke Width Transformed Image, each stroke filled with stroke length.
    """
    # Initialisations
    edge_y, edge_x = edged_image.nonzero()
    edge_indices_set = set(zip(edge_y, edge_x))
    angle_dev_ll = np.pi - max_angle_deviation
    angle_dev_ul = np.pi + max_angle_deviation
    if include_edges_in_swt:
        swt_matrix = edged_image.copy()
    else:
        swt_matrix = np.zeros(shape=edged_image.shape, dtype=np.int32)
    ray_pointer = 0
    ray_length = -1
    ray_indices = np.full(shape=(max_stroke_width, 2), fill_value=np.nan, dtype=np.int32)

    for iy, ix in edge_indices_set:
        ray_pointer = 0

        # Get the starting indices and the step values
        delx = hstep_mat[iy, ix]
        dely = vstep_mat[iy, ix]
        deld = dstep_mat[iy, ix]
        itheta = image_gradient_theta[iy, ix]

        # Add the first point and increment the ray pointer
        ray_indices[ray_pointer] = [iy, ix]
        ray_pointer += 1
        breach = False

        while not breach:
            # Get the next point in the gradient direction
            niy = np.int32(np.floor(iy + ray_pointer * dely))
            nix = np.int32(np.floor(ix + ray_pointer * delx))

            ray_length = ray_pointer * deld

            max_sw_check = ray_length <= max_stroke_width - 2
            boundary_check = (0 <= niy < image_height) and (0 <= nix < image_width)
            edge_indices_check = (niy, nix) not in edge_indices_set

            if not (max_sw_check and boundary_check and edge_indices_check):
                if not edge_indices_check:
                    if check_angle_deviation:
                        theta_diff = np.abs(itheta - image_gradient_theta[niy, nix])
                        angle_check = angle_dev_ll <= theta_diff <= angle_dev_ul
                        if angle_check:
                            breach = True
                        else:
                            breach = True
                            ray_length = -1
                    else:
                        breach = True
                else:
                    breach = True
                    ray_length = -1

            if not breach:
                ray_indices[ray_pointer] = [niy, nix]
                ray_pointer += 1

        ray_length = np.int32(ray_length)
        if ray_length >= min_stroke_width:
            _ray_iy = ray_indices[:ray_pointer, 0]
            _ray_ix = ray_indices[:ray_pointer, 1]
            for each_y, each_x in zip(_ray_iy, _ray_ix):
                sw_val = swt_matrix[each_y, each_x]
                if ray_length > sw_val:
                    swt_matrix[each_y, each_x] = ray_length

    return swt_matrix


try:
    swt_strokes_jitted = nb.njit(cache=True)(swt_strokes)
except RuntimeError as e:
    # HACK : This is specifically to facilitate the building of `readthedocs`
    # TODO : documentations.
    swt_strokes_jitted = nb.njit(cache=False)(swt_strokes)
except:
    raise


# mask_arr = np.full(shape=(100, 100), fill_value=0, dtype=np.uint8)
# proxyletters_spec = [('label', nb.typeof(99999)),
#                      ('sw_median', nb.typeof(999.999)),
#                      ('color_median', nb.typeof(999.999)),
#                      ('min_height', nb.typeof(999.999)),
#                      ('min_angle', nb.typeof(999.999)),
#                      ('inflated_radius', nb.typeof(999.999)),
#                      ('circular_mask', nb.typeof(mask_arr)),
#                      ('min_label_mask', nb.typeof(mask_arr))]


# @nb.experimental.jitclass(spec=proxyletters_spec)
[docs]class ProxyLetter:
    """
    A proxy class for the ``Letters`` object, housing only those properties which
    would be required by the Fusion Class. This is to support application of `numba`
    onto the Fusion Class as the ``Letter`` class object wont be acceptable by Fusion class
    were it to be run on nopython-jit mode
    """

    def __init__(self,
                 label,
                 sw_median,
                 color_median,
                 min_height,
                 min_angle,
                 inflated_radius,
                 circular_mask,
                 min_label_mask):
        """
        Create a ProxyLetter object
        Args:
            label (int) : Letter identifier

            sw_median (float) : Median stroke width of this letter

            color_median (float) : Median Color of this letter

            min_height (int) : Minimum Bounding Box height of this letter

            min_angle (float) : Rotation angle of the Minimum Bounding Box of this letter

            inflated_radius (int) : Inflated Circum-Radius of the Minimum Bounding Box of this letter

            circular_mask (np.ndarray) : Circular filled mask of this letter of radius=inflated_radius and

            centre=Centre Co-Ordinates of the Minimum Bounding Box.

            min_label_mask (np.ndarray) : Filled Minimum Bounding Box of the letter
        """
        # Initialisations
        self.label = label
        self.sw_median = sw_median
        self.color_median = color_median
        self.min_height = min_height
        self.min_angle = min_angle
        self.inflated_radius = inflated_radius
        self.circular_mask = circular_mask
        self.min_label_mask = min_label_mask


# @nb.experimental.jitclass(spec=)
[docs]class Fusion:
    """
    Class for fusing Individual Components (Letters) into Grouped Components Words,
    comparing aspects like :
        - Proximity of letters to each other
        - Relative minimum bounding box rotation angle from each other
        - Deviation in color between from one component to the other
        - Ratio of stroke widths from one to the other
        - Ratio of minimum bounding box height of one to the other
    """

    def __init__(self, letters: dict,
                 acceptable_stroke_width_ratio: float,
                 acceptable_color_deviation: List[int],
                 acceptable_height_ratio: float,
                 acceptable_angle_deviation: float):
        """
        Create ``Fusion`` object

        Args:
            letters (List[ProxyLetter]) : List of all the letters to be considered in the fusion pool.

            acceptable_stroke_width_ratio (float) : When comparing two individual components, maximum
             stroke width ratio between two individual components beyond which the component wont be fused together.

            acceptable_color_deviation (List[int]) : When comparing two individual components, maximum color
             deviation between two individual components beyond which the components wont be fused together.

            acceptable_height_ratio (float) : When comparing two individual components, maximum height
             ratio between two individual components beyond which the components wont be fused together.

            acceptable_angle_deviation (float) : When comparing two individual components, maximum angle
             (Minimum Bounding Box Rotation Angle) deviation between two individual components
              beyond which the components wont be fused together.
        """
        self.all_letters: dict = letters
        self.all_words = []
        self.sw_ul = acceptable_stroke_width_ratio
        self.sw_ll = 1 / self.sw_ul
        self.cd_ul = np.linalg.norm(acceptable_color_deviation)
        self.cd_ll = 1 / self.cd_ul
        self.ht_ul = acceptable_height_ratio
        self.ht_ll = 1 / self.ht_ul
        self.ad = np.deg2rad(acceptable_angle_deviation)
        self.letter_masks = [letter.min_label_mask for _, letter in self.all_letters.items()]
        self.letter_masks = np.dstack(self.letter_masks)

[docs]    def getProximityLetters(self, anchor_letter: ProxyLetter, remaining_letters: dict) -> List[int]:
        """
        Finds all the labels which are in proximity of anchor_letter amongst the remaining_letters

        Args:
            anchor_letter (ProxyLetter) : Letter with respect to which proximity labels are
             to be searched.

            remaining_letters (dict) : A dictionary, with labels as keys, mapped to their
             corresponding ProxyLetter object.
        Returns:
            (List[int]) :  List of all the labels which are in the proximity of anchor_letter
        """
        idx_y, idx_x = anchor_letter.circular_mask.nonzero()
        proximity_letter_labels = np.unique(self.letter_masks[idx_y, idx_x, :])
        remaining_labels = set(remaining_letters.keys())
        proximity_letter_labels = list(remaining_labels.intersection(proximity_letter_labels))
        return proximity_letter_labels

[docs]    def groupEligibility(self, curr_letter, proximity_letter) -> bool:
        """
        Check whether two ProxyLetters are eligible to be grouped with one another.

        Args:
              curr_letter (ProxyLetter) : Current Letter
              proximity_letter (ProxyLetter) : A letter in proximity of Current Letter
        Returns:
            (bool) : Whether curr_letter and proximity_letter are eligible to be grouped with each other
        """
        cl: ProxyLetter = curr_letter
        pl: ProxyLetter = proximity_letter
        # Is there much difference between the font colors of curr_letter proximity_letter
        sw_check = self.sw_ll <= cl.sw_median / pl.sw_median <= self.sw_ul
        # Is there much difference between the median stroke widths of curr_letter proximity_letter
        cd_check = self.cd_ll <= cl.color_median / pl.color_median <= self.cd_ul
        # Is there much difference between the heights of curr_letter proximity_letter
        ht_check = self.ht_ll <= cl.min_height / pl.min_height <= self.ht_ul
        # Is there much difference between the inclination of curr_letter proximity_letter
        ad_check = abs(cl.min_angle - pl.min_angle) <= self.ad

        return sw_check and cd_check and ht_check and ad_check

[docs]    def groupLetters(self, curr_letter, remaining_letters, grouping) -> List[ProxyLetter]:
        """
        Groups curr_letter with its proximity labels which are eligible to be grouped
        to it.
        [Recursive Function]

        Args:
            curr_letter (ProxyLetter) : ``ProxyLetter`` whose grouping needs to be mapped.

            remaining_letters (dict) : Dictionary with keys as the ProxyLetter label and the
             corresponding values as the ProxyLetter themselves.

            grouping (list) : A list of lists containing ProxyLetters which can be
             assumed to be *words*.
        Returns:
            (List[ProxyLetter]) : A list containing ProxyLetters which can be
             assumed to be a *word*.
        """
        proximity_letter_labels = self.getProximityLetters(anchor_letter=curr_letter,
                                                           remaining_letters=remaining_letters)
        if not proximity_letter_labels:
            return grouping

        for each_proximity_letter_label in proximity_letter_labels:
            proximity_letter = remaining_letters.get(each_proximity_letter_label)
            # NOTE : Since it is possible that at lower depth calls,
            # some of `each_proximity_letter_label`might have been consumed. That's why
            if proximity_letter:
                if self.groupEligibility(curr_letter=curr_letter, proximity_letter=proximity_letter):
                    confirmed_proximity_letter = remaining_letters.pop(each_proximity_letter_label)
                    grouping.append(confirmed_proximity_letter)
                    grouping = self.groupLetters(curr_letter=confirmed_proximity_letter,
                                                 remaining_letters=remaining_letters,
                                                 grouping=grouping)
        return grouping

[docs]    def runGrouping(self) -> List[List[ProxyLetter]]:
        """
        Fuses eligible individual components (letters) together which can be eligible to form
        a *word* out of them.

        Returns:
            (List[List[ProxyLetter]]) : A list of lists containing ProxyLetter which can be assumed
             to be *words* amongst the pool of individual components provided to the ``Fusion`` class.
        """
        while self.all_letters:
            # Get the next letter to make a group from
            next_letter_label, next_letter = self.all_letters.popitem()
            # Get all the letters which can belong to `next_letter`
            letter_group = self.groupLetters(curr_letter=next_letter,
                                             remaining_letters=self.all_letters,
                                             grouping=[next_letter])

            # Append this word to all_words
            self.all_words.append(letter_group)

        return self.all_words