Source code for cvkit.pose_estimation.config

import os

import numpy as np
import yaml as yml

DEFAULT_THRESHOLD = 0.6 #: Default likelihood threshold value


[docs]class CameraViews: """ Stores metadata of the camera setup. :param data_dictionary: Dictionary containing the metadata for the recording setup. :type data_dictionary: dict :param framerate: Project level Framerate. (Assumes equal framerate for all views) :type framerate: float """ def __init__(self, data_dictionary, framerate): self.axes = data_dictionary.get('axes', {}) #: Contains 2D x_max, y_max, and origin. This can be used to create a coordinate system for the reconstructed data. self.dlt_coefficients = np.array(data_dictionary.get('dlt_coefficients', [])) #: DLT co-efficients generated by the EasyWand package. self.framerate = framerate self.pos = np.array(data_dictionary.get('pos', [])) #: Extrinsic data: Position of the camera in world coordinates. self.resolution = np.array(data_dictionary.get('resolution', [-1,-1])) #: Intrinsic Data: Resolution of the captured video. self.principal_point = np.array(data_dictionary.get('principal_point', [-1,-1])) #: Principal point of the camera lens. self.distortion = np.array(data_dictionary.get('distortion',np.zeros((1,5)))) if self.distortion.ndim ==1 : self.distortion = np.expand_dims(self.distortion,0) self.f_px = data_dictionary.get('f_px', -1) #: Focal length in pixels def is_dlt_valid(self): return self.dlt_coefficients.shape == (12,) def export_dict(self): return { 'axes': self.axes, 'dlt_coefficients': self.dlt_coefficients.tolist(), 'pos': self.pos.tolist(), 'resolution': self.resolution.tolist(), 'f_px': self.f_px, 'principal_point': self.principal_point.tolist(), 'distortion': self.distortion.tolist() }
[docs]class AnnotationConfig: """ Stores information about data files for each view of the project. :param data_dictionary: dictionary containing annotation meta-data. :type data_dictionary: dict """ def __init__(self, name,data_dictionary): self.view = name #: Name of the camera self.annotation_file = data_dictionary['annotation_file'] #: Path of the annotation data file. self.annotation_file_flavor = data_dictionary['annotation_file_flavor'] #: Flavor of the data file. Refer :py:attr::py:attr:`cvkit.pose_estimation.data_readers.datastore_interface.DataStoreInterface.FLAVOR` self.video_file = data_dictionary['video_file'] #: Path to the video file try: assert os.path.isfile(self.video_file) except: print(self.video_file, "not found!") exit() self.video_reader = data_dictionary['video_reader'] #: Flavor of the video file. Refer :py:attr:`cvkit.video_readers.video_reader_interface.BaseVideoReaderInterface.FLAVOR` def export_dict(self): return {'annotation_file': self.annotation_file, 'annotation_file_flavor': self.annotation_file_flavor, 'video_file': self.video_file, 'video_reader': self.video_reader }
[docs]class PoseEstimationConfig: """This class is used to read and write pose estimation configuration files. It contains basic information about the experiments such as the body parts of the tracked subject, their connectivity, camera setup, data folder, data files, reconstruction parameters, and so on. .. highlight:: YAML .. code-block:: YAML # Project Name name: unnamed_project # Valid path to output folder output_folder: '' # List of body parts body_parts: - part_1 - part_2 - part_3 - part_4 # List of lists defining body part connections skeleton: - - part_1 - part_3 - - part_2 - part_3 - - part_3 - part_4 # List of colors (R,G,B). If enough colors are not provided, others will be randomly generated. colors: #Optional - [ 230, 25, 75 ] # Color for part_1 - [ 60, 180, 75 ] # Color for part_2 - [ 255, 225, 25 ] # Color for part_3 # Reconstruction configuration parameters Reconstruction: # Project level framerate. We currently only support videos with equal # # framerate # framerate: 60 # Unscaled length of the x-axis x_len: <length> # Unscaled length of the y-axis y_len: <length> # Reconstruction algorithm, accepts 'default' or 'auto_subset' # # default: Reconstructs if likelihood is higher than the threshold for all # # views. # # auto_subset: Automatically creates a subset of 'accurate' viewpoints # # based on the threshold value. The reconstruction is performed if the # # number of viewpoints is more than 2. # reconstruction_algorithm: default # Optional # Rotation Matrix to align 3D reconstructed data. It will be multiplied # # after initial reconstruction. # rotation_matrix: # Optional - [ 1.0, 0.0, 0.0 ] - [ 0.0, 1.0, 0.0 ] - [ 0.0, 0.0, 1.0 ] # The desired scale factor for converting reconstructed data's units. # # Example: If reconstructed data is in meters, scale can be set to 1000 to # # to generate data in millimeters. # scale: 1.0 # Scale factor that can be computed through update_alignment_matrices. # # This uses pre-known distances on the arena to adjust the desired scaling # # factor for mitigating reconstruction noise. # computed_scale: [1.0,1.0,1.0] # Optional, defaults to scale # Project level likelihood threshold value. threshold: 0.75 # Static translation vector. Added after scaling. # # Used for moving origin to desired location. # # Note: The translation vector has to be scaled before adding # translation_vector: [ 0, 0, 0 ] # Axis Alignment vector. Used to flip targeted axis. # # Only accepts either 1 or -1, indicating whether the corresponding axis # # will be flipped. # # [-1,1,-1] Flips x and z axes. # axis_rotation_3D: [1,1, 1] annotation: VIEW_NAME_1: annotation_file: '' # Path to datastore containing pose data for the view annotation_file_flavor: <flavor> # DataStoreInterface Flavor video_file: '' # Path to the video file for the view video_reader: <flavor> # BaseVideoReaderInterface Flavor # Corresponding Camera ID. Use None for importing video data not # # corresponding to any cameras. # view: None # Repeat for each annotated views views: Cam_id_1: axes: origin: [-1, -1 ] # 2D position of the origin for this camera view x_max: [ -1, -1 ] # 2D position of the x max location for this camera view y_max: [ -1, -1 ] # 2D position of the y max location for this camera view dlt_coefficients: <list of 12 numbers representing the DLT co-efficients for this camera view> f_px: -1 # Focal length of the camera in px pos: [ ] # Position of the camera in world coordinates. principal_point: [ ] # Principal point of the camera. resolution: [ ] # Resolution of the captured frames. # Repeat for each camera. :param path: The path of the yaml file :type path: str """ ENABLE_FLOW_STYLE = ['name', 'output_folder', 'threshold', 'reprojection_toolbox', 'behaviours', 'body_parts', 'skeleton'] def __init__(self, path): self.path = path self.data_dictionary = yml.safe_load(open(path, 'r')) assert 0 <= DEFAULT_THRESHOLD < 1.0 self.project_name = self.data_dictionary.get('name', 'unnamed') #: The name of the Project self.output_folder = self.data_dictionary['output_folder'] #: Path for the output directory assert os.path.exists(self.output_folder) self.threshold = float(self.data_dictionary['Reconstruction'].get('threshold', DEFAULT_THRESHOLD)) #: Threshold value for the project. self.axis_rotation_3D = np.array( self.data_dictionary['Reconstruction'].get('axis_rotation_3D', np.array([1, 1, 1]))) #: 3 dimensional list where all the elements are either 1 or -1. This can be used to flip desired axis. if np.any(np.abs(self.axis_rotation_3D) != 1): print(f"Resetting {self.axis_rotation_3D} to [1,1,1]") self.axis_rotation_3D = np.array([1, 1, 1]) self.body_parts = self.data_dictionary['body_parts'] #: List of body parts self.num_parts = len(self.body_parts) #: Number of body parts self.skeleton = self.data_dictionary['skeleton'] #: Defines connectivity among the body parts. self.colors = list(self.data_dictionary.get('colors', []))#: Custom colors for each body part. Colors are randomly generated if not explicitly provided. self.framerate = self.data_dictionary['Reconstruction']['framerate']#: Project level framerate self.annotation_views = {} #: A dictionary mapping views to its corresponding data files - :py:class:`~cvkit.pose_estimation.config.AnnotationConfig`. if 'annotation' in self.data_dictionary: for annotation_view in self.data_dictionary['annotation']: assert annotation_view != 'Reconstruction' and annotation_view != 'Sync' data = self.data_dictionary['annotation'][annotation_view] self.annotation_views[annotation_view] = AnnotationConfig(annotation_view,data) self.views = {} #: A dictionary mapping view names to camera information - :py:class:`~cvkit.pose_estimation.config.CameraViews`. if 'views' in self.data_dictionary: for view in self.data_dictionary['views']: self.views[view] = CameraViews(self.data_dictionary['views'][view], self.framerate) self.rotation_matrix = np.array(self.data_dictionary['Reconstruction'].get('rotation_matrix', np.identity(3)), dtype=np.float32) #: 3x3 Rotation matrix for aligning reconstructed data. assert self.rotation_matrix.shape == (3, 3) self.x_len = float(self.data_dictionary['Reconstruction'].get('x_len',-1)) self.y_len = float(self.data_dictionary['Reconstruction'].get('y_len',-1)) self.scale = float(self.data_dictionary['Reconstruction'].get('scale', 1.0)) #: Project level scale factor for reconstructed data. self.computed_scale = self.data_dictionary['Reconstruction'].get('computed_scale', self.scale) #: Computed scale factor based on pre-known distances to reduce reconstruction noise self.translation_vector = np.array(self.data_dictionary['Reconstruction'].get('translation_vector', [0, 0, 0]), dtype=np.float32) #: Fixed 3-D translational vector for reconstructed data. self.reconstruction_algorithm = self.data_dictionary['Reconstruction'].get('reconstruction_algorithm', 'default') #: Reconstruction algorithm. Auto-Subset: Picks at least 2 views based on likelihood values. Regular: Only reconstructs if all views have likelihood higher than the threshold. def export_dict(self): return {'name': self.project_name, 'output_folder': self.output_folder, 'body_parts': self.body_parts, 'skeleton': self.skeleton, 'annotation': {view: self.annotation_views[view].export_dict() for view in self.annotation_views}, 'colors': self.colors, 'views': {view: self.views[view].export_dict() for view in self.views}, 'Reconstruction': { 'threshold': self.threshold, 'x_len': self.x_len, 'y_len': self.y_len, 'scale': self.scale, 'framerate': self.framerate, 'rotation_matrix': self.rotation_matrix.tolist(), 'translation_vector': self.translation_vector.tolist(), 'reconstruction_algorithm': self.reconstruction_algorithm, 'computed_scale': float(self.computed_scale), 'axis_rotation_3D': self.axis_rotation_3D.tolist() } }
[docs]def save_config(path, data_dict): """ Saves given data dictionary to Yaml file :param path: Output File Path :type path: str :param data_dict: dictionary containing the project configuration :type data_dict: dict """ out_file = '' for key in data_dict: if key in PoseEstimationConfig.ENABLE_FLOW_STYLE: out_file += yml.dump({key: data_dict[key]}) + '\n' else: out_file += yml.dump({key: data_dict[key]}, default_flow_style=None) + '\n' save_file = open(path, 'w') save_file.write(out_file)