Camera Motion#

Camera motion stimation module.

`CoordinatesTransformation` #

Bases: ABC

Abstract class representing a coordinate transformation.

Detections' and tracked objects' coordinates can be interpreted in 2 reference:

Relative: their position on the current frame, (0, 0) is top left
Absolute: their position on an fixed space, (0, 0) is the top left of the first frame of the video.

Therefore, coordinate transformation in this context is a class that can transform coordinates in one reference to another.

Source code in norfair/camera_motion.py

class CoordinatesTransformation(ABC):
    """
    Abstract class representing a coordinate transformation.

    Detections' and tracked objects' coordinates can be interpreted in 2 reference:

    - _Relative_: their position on the current frame, (0, 0) is top left
    - _Absolute_: their position on an fixed space, (0, 0)
        is the top left of the first frame of the video.

    Therefore, coordinate transformation in this context is a class that can transform
    coordinates in one reference to another.
    """

    @abstractmethod
    def abs_to_rel(self, points: np.ndarray) -> np.ndarray:
        pass

    @abstractmethod
    def rel_to_abs(self, points: np.ndarray) -> np.ndarray:
        pass

`TransformationGetter` #

Bases: ABC

Abstract class representing a method for finding CoordinatesTransformation between 2 sets of points

Source code in norfair/camera_motion.py

class TransformationGetter(ABC):
    """
    Abstract class representing a method for finding CoordinatesTransformation between 2 sets of points
    """

    @abstractmethod
    def __call__(
        self, curr_pts: np.ndarray, prev_pts: np.ndarray
    ) -> Tuple[bool, CoordinatesTransformation]:
        pass

`TranslationTransformation` #

Bases: CoordinatesTransformation

Coordinate transformation between points using a simple translation

Parameters:

Name	Type	Description	Default
`movement_vector`	`np.ndarray`	The vector representing the translation.	required

Source code in norfair/camera_motion.py

class TranslationTransformation(CoordinatesTransformation):
    """
    Coordinate transformation between points using a simple translation

    Parameters
    ----------
    movement_vector : np.ndarray
        The vector representing the translation.
    """

    def __init__(self, movement_vector):
        self.movement_vector = movement_vector

    def abs_to_rel(self, points: np.ndarray):
        return points + self.movement_vector

    def rel_to_abs(self, points: np.ndarray):
        return points - self.movement_vector

`TranslationTransformationGetter` #

Bases: TransformationGetter

Calculates TranslationTransformation between points.

The camera movement is calculated as the mode of optical flow between the previous reference frame and the current.

Comparing consecutive frames can make differences too small to correctly estimate the translation, for this reason the reference frame is kept fixed as we progress through the video. Eventually, if the transformation is no longer able to match enough points, the reference frame is updated.

Parameters:

Name	Type	Description	Default
`bin_size`	`float`	Before calculatin the mode, optiocal flow is bucketized into bins of this size.	`0.2`
`proportion_points_used_threshold`	`float`	Proportion of points that must be matched, otherwise the reference frame must be updated.	`0.9`

Source code in norfair/camera_motion.py

class TranslationTransformationGetter(TransformationGetter):
    """
    Calculates TranslationTransformation between points.

    The camera movement is calculated as the mode of optical flow between the previous reference frame
    and the current.

    Comparing consecutive frames can make differences too small to correctly estimate the translation,
    for this reason the reference frame is kept fixed as we progress through the video.
    Eventually, if the transformation is no longer able to match enough points, the reference frame is updated.

    Parameters
    ----------
    bin_size : float
        Before calculatin the mode, optiocal flow is bucketized into bins of this size.
    proportion_points_used_threshold: float
        Proportion of points that must be matched, otherwise the reference frame must be updated.
    """

    def __init__(
        self, bin_size: float = 0.2, proportion_points_used_threshold: float = 0.9
    ) -> None:
        self.bin_size = bin_size
        self.proportion_points_used_threshold = proportion_points_used_threshold
        self.data = None

    def __call__(
        self, curr_pts: np.ndarray, prev_pts: np.ndarray
    ) -> Tuple[bool, TranslationTransformation]:
        # get flow
        flow = curr_pts - prev_pts

        # get mode
        flow = np.around(flow / self.bin_size) * self.bin_size
        unique_flows, counts = np.unique(flow, axis=0, return_counts=True)

        max_index = counts.argmax()

        proportion_points_used = counts[max_index] / len(prev_pts)
        update_prvs = proportion_points_used < self.proportion_points_used_threshold

        flow_mode = unique_flows[max_index]

        try:
            flow_mode += self.data
        except TypeError:
            pass

        if update_prvs:
            self.data = flow_mode

        return update_prvs, TranslationTransformation(flow_mode)

`HomographyTransformation` #

Bases: CoordinatesTransformation

Coordinate transformation beweent points using an homography

Parameters:

Name	Type	Description	Default
`homography_matrix`	`np.ndarray`	The matrix representing the homography	required

Source code in norfair/camera_motion.py

class HomographyTransformation(CoordinatesTransformation):
    """
    Coordinate transformation beweent points using an homography

    Parameters
    ----------
    homography_matrix : np.ndarray
        The matrix representing the homography
    """

    def __init__(self, homography_matrix: np.ndarray):
        self.homography_matrix = homography_matrix
        self.inverse_homography_matrix = np.linalg.inv(homography_matrix)

    def abs_to_rel(self, points: np.ndarray):
        ones = np.ones((len(points), 1))
        points_with_ones = np.hstack((points, ones))
        points_transformed = points_with_ones @ self.homography_matrix.T
        points_transformed = points_transformed / points_transformed[:, -1].reshape(
            -1, 1
        )
        return points_transformed[:, :2]

    def rel_to_abs(self, points: np.ndarray):
        ones = np.ones((len(points), 1))
        points_with_ones = np.hstack((points, ones))
        points_transformed = points_with_ones @ self.inverse_homography_matrix.T
        points_transformed = points_transformed / points_transformed[:, -1].reshape(
            -1, 1
        )
        return points_transformed[:, :2]

`HomographyTransformationGetter` #

Bases: TransformationGetter

Calculates HomographyTransformation between points.

The camera movement is represented as an homography that matches the optical flow between the previous reference frame and the current.

Comparing consecutive frames can make differences too small to correctly estimate the homography, often resulting in the identity. For this reason the reference frame is kept fixed as we progress through the video. Eventually, if the transformation is no longer able to match enough points, the reference frame is updated.

Parameters:

Name	Type	Description	Default
`method`	`Optional[int], optional`	One of openCV's method for finding homographies. Valid options are: `[0, cv.RANSAC, cv.LMEDS, cv.RHO]`, by default `cv.RANSAC`	`None`
`ransac_reproj_threshold`	`int, optional`	Maximum allowed reprojection error to treat a point pair as an inlier. More info in links below.	`3`
`max_iters`	`int, optional`	The maximum number of RANSAC iterations. More info in links below.	`2000`
`confidence`	`float, optional`	Confidence level, must be between 0 and 1. More info in links below.	`0.995`
`proportion_points_used_threshold`	`float, optional`	Proportion of points that must be matched, otherwise the reference frame must be updated.	`0.9`

`MotionEstimator` #

Estimator of the motion of the camera.

Uses optical flow to estimate the motion of the camera from frame to frame. The optical flow is calculated on a sample of strong points (corners).

Parameters:

Name	Type	Description	Default
`max_points`	`int, optional`	Maximum amount of points sampled. More points make the estimation process slower but more precise	`200`
`min_distance`	`int, optional`	Minimum distance between the sample points.	`15`
`block_size`	`int, optional`	Size of an average block when finding the corners. More info in links below.	`3`
`transformations_getter`	`TransformationGetter, optional`	An instance of TransformationGetter. By default `HomographyTransformationGetter`	`None`
`draw_flow`	`bool, optional`	Draws the optical flow on the frame for debugging.	`False`
`flow_color`	`Optional[Tuple[int, int, int]], optional`	Color of the drawing, by default blue.	`None`
`quality_level`	`float, optional`	Parameter characterizing the minimal accepted quality of image corners.	`0.01`

Examples:

>>> from norfair import Tracker, Video
>>> from norfair.camera_motion MotionEstimator
>>> video = Video("video.mp4")
>>> tracker = Tracker(...)
>>> motion_estimator = MotionEstimator()
>>> for frame in video:
>>>    detections = get_detections(frame)  # runs detector and returns Detections
>>>    coord_transformation = motion_estimator.update(frame)
>>>    tracked_objects = tracker.update(detections, coord_transformations=coord_transformation)

See Also#

For more infor on how the points are sampled: OpenCV.goodFeaturesToTrack

Source code in norfair/camera_motion.py

class MotionEstimator:
    """
    Estimator of the motion of the camera.

    Uses optical flow to estimate the motion of the camera from frame to frame.
    The optical flow is calculated on a sample of strong points (corners).

    Parameters
    ----------
    max_points : int, optional
        Maximum amount of points sampled.
        More points make the estimation process slower but more precise
    min_distance : int, optional
        Minimum distance between the sample points.
    block_size : int, optional
        Size of an average block when finding the corners. More info in links below.
    transformations_getter : TransformationGetter, optional
        An instance of TransformationGetter. By default [`HomographyTransformationGetter`][norfair.camera_motion.HomographyTransformationGetter]
    draw_flow : bool, optional
        Draws the optical flow on the frame for debugging.
    flow_color : Optional[Tuple[int, int, int]], optional
        Color of the drawing, by default blue.
    quality_level : float, optional
        Parameter characterizing the minimal accepted quality of image corners.

    Examples
    --------
    >>> from norfair import Tracker, Video
    >>> from norfair.camera_motion MotionEstimator
    >>> video = Video("video.mp4")
    >>> tracker = Tracker(...)
    >>> motion_estimator = MotionEstimator()
    >>> for frame in video:
    >>>    detections = get_detections(frame)  # runs detector and returns Detections
    >>>    coord_transformation = motion_estimator.update(frame)
    >>>    tracked_objects = tracker.update(detections, coord_transformations=coord_transformation)

    See Also
    --------
    For more infor on how the points are sampled: [OpenCV.goodFeaturesToTrack](https://docs.opencv.org/3.4/dd/d1a/group__imgproc__feature.html#ga1d6bb77486c8f92d79c8793ad995d541)
    """

    def __init__(
        self,
        max_points: int = 200,
        min_distance: int = 15,
        block_size: int = 3,
        transformations_getter: TransformationGetter = None,
        draw_flow: bool = False,
        flow_color: Optional[Tuple[int, int, int]] = None,
        quality_level: float = 0.01,
    ):

        self.max_points = max_points
        self.min_distance = min_distance
        self.block_size = block_size

        self.draw_flow = draw_flow
        if self.draw_flow and flow_color is None:
            flow_color = [0, 0, 100]
        self.flow_color = flow_color

        self.gray_prvs = None
        self.prev_pts = None
        if transformations_getter is None:
            transformations_getter = HomographyTransformationGetter()

        self.transformations_getter = transformations_getter
        self.prev_mask = None
        self.gray_next = None
        self.quality_level = quality_level

    def update(
        self, frame: np.ndarray, mask: np.ndarray = None
    ) -> CoordinatesTransformation:
        """
        Estimate camera motion for each frame

        Parameters
        ----------
        frame : np.ndarray
            The frame.
        mask : np.ndarray, optional
            An optional mask to avoid areas of the frame when sampling the corner.
            Must be an array of shape `(frame.shape[0], frame.shape[1])`, dtype same as frame,
            and values in {0, 1}.

            In general, the estimation will work best when it samples many points from the background;
            with that intention, this parameters is usefull for masking out the detections/tracked objects,
            forcing the MotionEstimator ignore the moving objects.
            Can be used to mask static areas of the image, such as score overlays in sport transmisions or
            timestamps in security cameras.

        Returns
        -------
        CoordinatesTransformation
            The CoordinatesTransformation that can transform coordinates on this frame to absolute coordinates
            or vice versa.
        """
        self.gray_next = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
        if self.gray_prvs is None:
            self.gray_prvs = self.gray_next
            self.prev_mask = mask

        curr_pts, self.prev_pts = _get_sparse_flow(
            self.gray_next,
            self.gray_prvs,
            self.prev_pts,
            self.max_points,
            self.min_distance,
            self.block_size,
            self.prev_mask,
            quality_level=self.quality_level,
        )
        if self.draw_flow:
            for (curr, prev) in zip(curr_pts, self.prev_pts):
                c = tuple(curr.astype(int).ravel())
                p = tuple(prev.astype(int).ravel())
                cv2.line(frame, c, p, self.flow_color, 2)
                cv2.circle(frame, c, 3, self.flow_color, -1)

        update_prvs, coord_transformations = self.transformations_getter(
            curr_pts,
            self.prev_pts,
        )

        if update_prvs:
            self.gray_prvs = self.gray_next
            self.prev_pts = None
            self.prev_mask = mask

        return coord_transformations

`update(frame, mask=None)` #

Estimate camera motion for each frame

Parameters:

Name Type Description Default

frame

np.ndarray

The frame.

required

mask

np.ndarray, optional

An optional mask to avoid areas of the frame when sampling the corner. Must be an array of shape (frame.shape[0], frame.shape[1]), dtype same as frame, and values in {0, 1}.

In general, the estimation will work best when it samples many points from the background; with that intention, this parameters is usefull for masking out the detections/tracked objects, forcing the MotionEstimator ignore the moving objects. Can be used to mask static areas of the image, such as score overlays in sport transmisions or timestamps in security cameras.

None

Returns:

Type	Description
`CoordinatesTransformation`	The CoordinatesTransformation that can transform coordinates on this frame to absolute coordinates or vice versa.

Source code in norfair/camera_motion.py

def update(
    self, frame: np.ndarray, mask: np.ndarray = None
) -> CoordinatesTransformation:
    """
    Estimate camera motion for each frame

    Parameters
    ----------
    frame : np.ndarray
        The frame.
    mask : np.ndarray, optional
        An optional mask to avoid areas of the frame when sampling the corner.
        Must be an array of shape `(frame.shape[0], frame.shape[1])`, dtype same as frame,
        and values in {0, 1}.

        In general, the estimation will work best when it samples many points from the background;
        with that intention, this parameters is usefull for masking out the detections/tracked objects,
        forcing the MotionEstimator ignore the moving objects.
        Can be used to mask static areas of the image, such as score overlays in sport transmisions or
        timestamps in security cameras.

    Returns
    -------
    CoordinatesTransformation
        The CoordinatesTransformation that can transform coordinates on this frame to absolute coordinates
        or vice versa.
    """
    self.gray_next = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    if self.gray_prvs is None:
        self.gray_prvs = self.gray_next
        self.prev_mask = mask

    curr_pts, self.prev_pts = _get_sparse_flow(
        self.gray_next,
        self.gray_prvs,
        self.prev_pts,
        self.max_points,
        self.min_distance,
        self.block_size,
        self.prev_mask,
        quality_level=self.quality_level,
    )
    if self.draw_flow:
        for (curr, prev) in zip(curr_pts, self.prev_pts):
            c = tuple(curr.astype(int).ravel())
            p = tuple(prev.astype(int).ravel())
            cv2.line(frame, c, p, self.flow_color, 2)
            cv2.circle(frame, c, 3, self.flow_color, -1)

    update_prvs, coord_transformations = self.transformations_getter(
        curr_pts,
        self.prev_pts,
    )

    if update_prvs:
        self.gray_prvs = self.gray_next
        self.prev_pts = None
        self.prev_mask = mask

    return coord_transformations

Camera Motion#

CoordinatesTransformation #

TransformationGetter #

TranslationTransformation #

TranslationTransformationGetter #

HomographyTransformation #

HomographyTransformationGetter #

See Also#

MotionEstimator #

See Also#

update(frame, mask=None) #

`CoordinatesTransformation` #

`TransformationGetter` #

`TranslationTransformation` #

`TranslationTransformationGetter` #

`HomographyTransformation` #

`HomographyTransformationGetter` #

`MotionEstimator` #

`update(frame, mask=None)` #