Home | History | Annotate | Download | only in scene4
      1 # Copyright 2018 The Android Open Source Project
      2 #
      3 # Licensed under the Apache License, Version 2.0 (the "License");
      4 # you may not use this file except in compliance with the License.
      5 # You may obtain a copy of the License at
      6 #
      7 #      http://www.apache.org/licenses/LICENSE-2.0
      8 #
      9 # Unless required by applicable law or agreed to in writing, software
     10 # distributed under the License is distributed on an "AS IS" BASIS,
     11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     12 # See the License for the specific language governing permissions and
     13 # limitations under the License.
     14 
     15 import math
     16 import os.path
     17 import re
     18 import sys
     19 import cv2
     20 
     21 import its.caps
     22 import its.device
     23 import its.image
     24 import its.objects
     25 
     26 import numpy as np
     27 
     28 ALIGN_TOL_MM = 4.0E-3  # mm
     29 ALIGN_TOL = 0.01  # multiplied by sensor diagonal to convert to pixels
     30 CHART_DISTANCE_CM = 22  # cm
     31 CIRCLE_RTOL = 0.1
     32 GYRO_REFERENCE = 1
     33 NAME = os.path.basename(__file__).split('.')[0]
     34 TRANS_REF_MATRIX = np.array([0, 0, 0])
     35 
     36 def convert_to_world_coordinates(x, y, r, t, k, z_w):
     37     """Convert x,y coordinates to world coordinates.
     38 
     39     Conversion equation is:
     40     A = [[x*r[2][0] - dot(k_row0, r_col0), x*r_[2][1] - dot(k_row0, r_col1)],
     41          [y*r[2][0] - dot(k_row1, r_col0), y*r_[2][1] - dot(k_row1, r_col1)]]
     42     b = [[z_w*dot(k_row0, r_col2) + dot(k_row0, t) - x*(r[2][2]*z_w + t[2])],
     43          [z_w*dot(k_row1, r_col2) + dot(k_row1, t) - y*(r[2][2]*z_w + t[2])]]
     44 
     45     [[x_w], [y_w]] = inv(A) * b
     46 
     47     Args:
     48         x:      x location in pixel space
     49         y:      y location in pixel space
     50         r:      rotation matrix
     51         t:      translation matrix
     52         k:      intrinsic matrix
     53         z_w:    z distance in world space
     54 
     55     Returns:
     56         x_w:    x in meters in world space
     57         y_w:    y in meters in world space
     58     """
     59     c_1 = r[2, 2] * z_w + t[2]
     60     k_x1 = np.dot(k[0, :], r[:, 0])
     61     k_x2 = np.dot(k[0, :], r[:, 1])
     62     k_x3 = z_w * np.dot(k[0, :], r[:, 2]) + np.dot(k[0, :], t)
     63     k_y1 = np.dot(k[1, :], r[:, 0])
     64     k_y2 = np.dot(k[1, :], r[:, 1])
     65     k_y3 = z_w * np.dot(k[1, :], r[:, 2]) + np.dot(k[1, :], t)
     66 
     67     a = np.array([[x*r[2][0]-k_x1, x*r[2][1]-k_x2],
     68                   [y*r[2][0]-k_y1, y*r[2][1]-k_y2]])
     69     b = np.array([[k_x3-x*c_1], [k_y3-y*c_1]])
     70     return np.dot(np.linalg.inv(a), b)
     71 
     72 
     73 def convert_to_image_coordinates(p_w, r, t, k):
     74     p_c = np.dot(r, p_w) + t
     75     p_h = np.dot(k, p_c)
     76     return p_h[0] / p_h[2], p_h[1] / p_h[2]
     77 
     78 
     79 def rotation_matrix(rotation):
     80     """Convert the rotation parameters to 3-axis data.
     81 
     82     Args:
     83         rotation:   android.lens.Rotation vector
     84     Returns:
     85         3x3 matrix w/ rotation parameters
     86     """
     87     x = rotation[0]
     88     y = rotation[1]
     89     z = rotation[2]
     90     w = rotation[3]
     91     return np.array([[1-2*y**2-2*z**2, 2*x*y-2*z*w, 2*x*z+2*y*w],
     92                      [2*x*y+2*z*w, 1-2*x**2-2*z**2, 2*y*z-2*x*w],
     93                      [2*x*z-2*y*w, 2*y*z+2*x*w, 1-2*x**2-2*y**2]])
     94 
     95 
     96 # TODO: merge find_circle() & test_aspect_ratio_and_crop.measure_aspect_ratio()
     97 # for a unified circle script that is and in pymodules/image.py
     98 def find_circle(gray, name):
     99     """Find the black circle in the image.
    100 
    101     Args:
    102         gray:           numpy grayscale array with pixel values in [0,255].
    103         name:           string of file name.
    104     Returns:
    105         circle:         (circle_center_x, circle_center_y, radius)
    106     """
    107     size = gray.shape
    108     # otsu threshold to binarize the image
    109     _, img_bw = cv2.threshold(np.uint8(gray), 0, 255,
    110                               cv2.THRESH_BINARY + cv2.THRESH_OTSU)
    111 
    112     # connected component
    113     cv2_version = cv2.__version__
    114     if cv2_version.startswith('2.4.'):
    115         contours, hierarchy = cv2.findContours(255-img_bw, cv2.RETR_TREE,
    116                                                cv2.CHAIN_APPROX_SIMPLE)
    117     elif cv2_version.startswith('3.2.'):
    118         _, contours, hierarchy = cv2.findContours(255-img_bw, cv2.RETR_TREE,
    119                                                   cv2.CHAIN_APPROX_SIMPLE)
    120 
    121     # Check each component and find the black circle
    122     min_cmpt = size[0] * size[1] * 0.005
    123     max_cmpt = size[0] * size[1] * 0.35
    124     num_circle = 0
    125     for ct, hrch in zip(contours, hierarchy[0]):
    126         # The radius of the circle is 1/3 of the length of the square, meaning
    127         # around 1/3 of the area of the square
    128         # Parental component should exist and the area is acceptable.
    129         # The contour of a circle should have at least 5 points
    130         child_area = cv2.contourArea(ct)
    131         if (hrch[3] == -1 or child_area < min_cmpt or child_area > max_cmpt
    132                     or len(ct) < 15):
    133             continue
    134         # Check the shapes of current component and its parent
    135         child_shape = component_shape(ct)
    136         parent = hrch[3]
    137         prt_shape = component_shape(contours[parent])
    138         prt_area = cv2.contourArea(contours[parent])
    139         dist_x = abs(child_shape['ctx']-prt_shape['ctx'])
    140         dist_y = abs(child_shape['cty']-prt_shape['cty'])
    141         # 1. 0.56*Parent's width < Child's width < 0.76*Parent's width.
    142         # 2. 0.56*Parent's height < Child's height < 0.76*Parent's height.
    143         # 3. Child's width > 0.1*Image width
    144         # 4. Child's height > 0.1*Image height
    145         # 5. 0.25*Parent's area < Child's area < 0.45*Parent's area
    146         # 6. Child is a black, and Parent is white
    147         # 7. Center of Child and center of parent should overlap
    148         if (prt_shape['width'] * 0.56 < child_shape['width']
    149                     < prt_shape['width'] * 0.76
    150                     and prt_shape['height'] * 0.56 < child_shape['height']
    151                     < prt_shape['height'] * 0.76
    152                     and child_shape['width'] > 0.1 * size[1]
    153                     and child_shape['height'] > 0.1 * size[0]
    154                     and 0.30 * prt_area < child_area < 0.50 * prt_area
    155                     and img_bw[child_shape['cty']][child_shape['ctx']] == 0
    156                     and img_bw[child_shape['top']][child_shape['left']] == 255
    157                     and dist_x < 0.1 * child_shape['width']
    158                     and dist_y < 0.1 * child_shape['height']):
    159             # Calculate circle center and size
    160             circle_ctx = float(child_shape['ctx'])
    161             circle_cty = float(child_shape['cty'])
    162             circle_w = float(child_shape['width'])
    163             circle_h = float(child_shape['height'])
    164             num_circle += 1
    165             # If more than one circle found, break
    166             if num_circle == 2:
    167                 break
    168     its.image.write_image(gray[..., np.newaxis]/255.0, name)
    169 
    170     if num_circle == 0:
    171         print 'No black circle was detected. Please take pictures according',
    172         print 'to instruction carefully!\n'
    173         assert num_circle == 1
    174 
    175     if num_circle > 1:
    176         print 'More than one black circle was detected. Background of scene',
    177         print 'may be too complex.\n'
    178         assert num_circle == 1
    179     return [circle_ctx, circle_cty, (circle_w+circle_h)/4.0]
    180 
    181 
    182 def component_shape(contour):
    183     """Measure the shape of a connected component.
    184 
    185     Args:
    186         contour: return from cv2.findContours. A list of pixel coordinates of
    187         the contour.
    188 
    189     Returns:
    190         The most left, right, top, bottom pixel location, height, width, and
    191         the center pixel location of the contour.
    192     """
    193     shape = {'left': np.inf, 'right': 0, 'top': np.inf, 'bottom': 0,
    194              'width': 0, 'height': 0, 'ctx': 0, 'cty': 0}
    195     for pt in contour:
    196         if pt[0][0] < shape['left']:
    197             shape['left'] = pt[0][0]
    198         if pt[0][0] > shape['right']:
    199             shape['right'] = pt[0][0]
    200         if pt[0][1] < shape['top']:
    201             shape['top'] = pt[0][1]
    202         if pt[0][1] > shape['bottom']:
    203             shape['bottom'] = pt[0][1]
    204     shape['width'] = shape['right'] - shape['left'] + 1
    205     shape['height'] = shape['bottom'] - shape['top'] + 1
    206     shape['ctx'] = (shape['left']+shape['right'])/2
    207     shape['cty'] = (shape['top']+shape['bottom'])/2
    208     return shape
    209 
    210 
    211 def define_reference_camera(pose_reference, cam_reference):
    212     """Determine the reference camera.
    213 
    214     Args:
    215         pose_reference: 0 for cameras, 1 for gyro
    216         cam_reference:  dict with key of physical camera and value True/False
    217     Returns:
    218         i_ref:          physical id of reference camera
    219         i_2nd:          physical id of secondary camera
    220     """
    221 
    222     if pose_reference == GYRO_REFERENCE:
    223         print 'pose_reference is GYRO'
    224         i_ref = list(cam_reference.keys())[0]  # pick first camera as ref
    225         i_2nd = list(cam_reference.keys())[1]
    226     else:
    227         print 'pose_reference is CAMERA'
    228         i_ref = (k for (k, v) in cam_reference.iteritems() if v).next()
    229         i_2nd = (k for (k, v) in cam_reference.iteritems() if not v).next()
    230     return i_ref, i_2nd
    231 
    232 
    233 def main():
    234     """Test the multi camera system parameters related to camera spacing.
    235 
    236     Using the multi-camera physical cameras, take a picture of scene4
    237     (a black circle and surrounding square on a white background) with
    238     one of the physical cameras. Then find the circle center. Using the
    239     parameters:
    240         android.lens.poseReference
    241         android.lens.poseTranslation
    242         android.lens.poseRotation
    243         android.lens.instrinsicCalibration
    244         android.lens.distortion (if available)
    245     project the circle center to the world coordinates for each camera.
    246     Compare the difference between the two cameras' circle centers in
    247     world coordinates.
    248 
    249     Reproject the world coordinates back to pixel coordinates and compare
    250     against originals as a sanity check.
    251 
    252     Compare the circle sizes if the focal lengths of the cameras are
    253     different using
    254         android.lens.availableFocalLengths.
    255     """
    256     chart_distance = CHART_DISTANCE_CM
    257     for s in sys.argv[1:]:
    258         if s[:5] == 'dist=' and len(s) > 5:
    259             chart_distance = float(re.sub('cm', '', s[5:]))
    260             print 'Using chart distance: %.1fcm' % chart_distance
    261     chart_distance *= 1.0E-2
    262 
    263     with its.device.ItsSession() as cam:
    264         props = cam.get_camera_properties()
    265         its.caps.skip_unless(its.caps.read_3a(props) and
    266                              its.caps.per_frame_control(props) and
    267                              its.caps.logical_multi_camera(props))
    268 
    269         # Find physical camera IDs that support raw, and skip if less than 2
    270         ids = its.caps.logical_multi_camera_physical_ids(props)
    271         props_physical = {}
    272         physical_ids = []
    273         for i in ids:
    274             prop = cam.get_camera_properties_by_id(i)
    275             if its.caps.raw16(prop) and len(physical_ids) < 2:
    276                 physical_ids.append(i)
    277                 props_physical[i] = cam.get_camera_properties_by_id(i)
    278 
    279         debug = its.caps.debug_mode()
    280         avail_fls = props['android.lens.info.availableFocalLengths']
    281         pose_reference = props['android.lens.poseReference']
    282 
    283         # Find highest resolution image and determine formats
    284         fmts = ['yuv']
    285         if len(physical_ids) == 2:
    286             fmts.insert(0, 'raw')  # insert in first location in list
    287         else:
    288             physical_ids = ids[0:1]
    289 
    290         w, h = its.objects.get_available_output_sizes('yuv', props)[0]
    291 
    292         # do captures on 2 cameras
    293         caps = {}
    294         for i, fmt in enumerate(fmts):
    295             out_surfaces = [{'format': 'yuv', 'width': w, 'height': h},
    296                             {'format': fmt, 'physicalCamera': physical_ids[0]},
    297                             {'format': fmt, 'physicalCamera': physical_ids[1]}]
    298 
    299             out_surfaces_supported = cam.is_stream_combination_supported(out_surfaces)
    300             its.caps.skip_unless(out_surfaces_supported)
    301 
    302             # Do 3A and get the values
    303             s, e, _, _, fd = cam.do_3a(get_results=True,
    304                                        lock_ae=True, lock_awb=True)
    305             if fmt == 'raw':
    306                 e_corrected = e * 2  # brighten RAW images
    307             else:
    308                 e_corrected = e
    309             print 'out_surfaces:', out_surfaces
    310             req = its.objects.manual_capture_request(s, e_corrected, fd)
    311             _, caps[(fmt, physical_ids[0])], caps[(fmt, physical_ids[1])] = cam.do_capture(
    312                     req, out_surfaces);
    313 
    314     for j, fmt in enumerate(fmts):
    315         size = {}
    316         k = {}
    317         cam_reference = {}
    318         r = {}
    319         t = {}
    320         circle = {}
    321         fl = {}
    322         sensor_diag = {}
    323         print '\nFormat:', fmt
    324         for i in physical_ids:
    325             # process image
    326             img = its.image.convert_capture_to_rgb_image(
    327                     caps[(fmt, i)], props=props_physical[i])
    328             size[i] = (caps[fmt, i]['width'], caps[fmt, i]['height'])
    329 
    330             # save images if debug
    331             if debug:
    332                 its.image.write_image(img, '%s_%s_%s.jpg' % (NAME, fmt, i))
    333 
    334             # convert to [0, 255] images
    335             img *= 255
    336 
    337             # scale to match calibration data if RAW
    338             if fmt == 'raw':
    339                 img = cv2.resize(img.astype(np.uint8), None, fx=2, fy=2)
    340             else:
    341                 img = img.astype(np.uint8)
    342 
    343             # load parameters for each physical camera
    344             ical = props_physical[i]['android.lens.intrinsicCalibration']
    345             assert len(ical) == 5, 'android.lens.instrisicCalibration incorrect.'
    346             k[i] = np.array([[ical[0], ical[4], ical[2]],
    347                              [0, ical[1], ical[3]],
    348                              [0, 0, 1]])
    349             if j == 0:
    350                 print 'Camera %s' % i
    351                 print ' k:', k[i]
    352 
    353             rotation = np.array(props_physical[i]['android.lens.poseRotation'])
    354             if j == 0:
    355                 print ' rotation:', rotation
    356             assert len(rotation) == 4, 'poseRotation has wrong # of params.'
    357             r[i] = rotation_matrix(rotation)
    358 
    359             t[i] = np.array(props_physical[i]['android.lens.poseTranslation'])
    360             if j == 0:
    361                 print ' translation:', t[i]
    362             assert len(t[i]) == 3, 'poseTranslation has wrong # of params.'
    363             if (t[i] == TRANS_REF_MATRIX).all():
    364                 cam_reference[i] = True
    365             else:
    366                 cam_reference[i] = False
    367 
    368             # API spec defines poseTranslation as the world coordinate p_w_cam of
    369             # optics center. When applying [R|t] to go from world coordinates to
    370             # camera coordinates, we need -R*p_w_cam of the coordinate reported in
    371             # metadata.
    372             # ie. for a camera with optical center at world coordinate (5, 4, 3)
    373             # and identity rotation, to convert a world coordinate into the
    374             # camera's coordinate, we need a translation vector of [-5, -4, -3]
    375             # so that: [I|[-5, -4, -3]^T] * [5, 4, 3]^T = [0,0,0]^T
    376             t[i] = -1.0 * np.dot(r[i], t[i])
    377             if debug and j == 1:
    378                 print 't:', t[i]
    379                 print 'r:', r[i]
    380 
    381             # Correct lens distortion to image (if available)
    382             if its.caps.distortion_correction(props_physical[i]) and fmt == 'raw':
    383                 distort = np.array(props_physical[i]['android.lens.distortion'])
    384                 assert len(distort) == 5, 'distortion has wrong # of params.'
    385                 cv2_distort = np.array([distort[0], distort[1],
    386                                         distort[3], distort[4],
    387                                         distort[2]])
    388                 print ' cv2 distortion params:', cv2_distort
    389                 its.image.write_image(img/255.0, '%s_%s_%s.jpg' % (
    390                         NAME, fmt, i))
    391                 img = cv2.undistort(img, k[i], cv2_distort)
    392                 its.image.write_image(img/255.0, '%s_%s_correct_%s.jpg' % (
    393                         NAME, fmt, i))
    394 
    395             # Find the circles in grayscale image
    396             circle[i] = find_circle(cv2.cvtColor(img, cv2.COLOR_BGR2GRAY),
    397                                     '%s_%s_gray_%s.jpg' % (NAME, fmt, i))
    398             print "Circle radius ", i, ": ", circle[i][2]
    399 
    400             # Undo zoom to image (if applicable). Assume that the maximum
    401             # physical YUV image size is close to active array size.
    402             if fmt == 'yuv':
    403                 ar = props_physical[i]['android.sensor.info.activeArraySize']
    404                 arw = ar['right'] - ar['left']
    405                 arh = ar['bottom'] - ar['top']
    406                 cr = caps[(fmt, i)]['metadata']['android.scaler.cropRegion'];
    407                 crw = cr['right'] - cr['left']
    408                 crh = cr['bottom'] - cr['top']
    409                 # Assume pixels remain square after zoom, so use same zoom
    410                 # ratios for x and y.
    411                 zoom_ratio = min(1.0 * arw / crw, 1.0 * arh / crh);
    412                 circle[i][0] = cr['left'] + circle[i][0] / zoom_ratio
    413                 circle[i][1] = cr['top'] + circle[i][1] / zoom_ratio
    414                 circle[i][2] = circle[i][2] / zoom_ratio
    415 
    416             # Find focal length & sensor size
    417             fl[i] = props_physical[i]['android.lens.info.availableFocalLengths'][0]
    418             sensor_diag[i] = math.sqrt(size[i][0] ** 2 + size[i][1] ** 2)
    419 
    420         i_ref, i_2nd = define_reference_camera(pose_reference, cam_reference)
    421         print 'reference camera: %s, secondary camera: %s' % (i_ref, i_2nd)
    422 
    423         # Convert circle centers to real world coordinates
    424         x_w = {}
    425         y_w = {}
    426         if props['android.lens.facing']:
    427             # API spec defines +z is pointing out from screen
    428             print 'lens facing BACK'
    429             chart_distance *= -1
    430         for i in [i_ref, i_2nd]:
    431             x_w[i], y_w[i] = convert_to_world_coordinates(
    432                     circle[i][0], circle[i][1], r[i], t[i], k[i], chart_distance)
    433 
    434         # Back convert to image coordinates for sanity check
    435         x_p = {}
    436         y_p = {}
    437         x_p[i_2nd], y_p[i_2nd] = convert_to_image_coordinates(
    438                 [x_w[i_ref], y_w[i_ref], chart_distance],
    439                 r[i_2nd], t[i_2nd], k[i_2nd])
    440         x_p[i_ref], y_p[i_ref] = convert_to_image_coordinates(
    441                 [x_w[i_2nd], y_w[i_2nd], chart_distance],
    442                 r[i_ref], t[i_ref], k[i_ref])
    443 
    444         # Summarize results
    445         for i in [i_ref, i_2nd]:
    446             print ' Camera: %s' % i
    447             print ' x, y (pixels): %.1f, %.1f' % (circle[i][0], circle[i][1])
    448             print ' x_w, y_w (mm): %.2f, %.2f' % (x_w[i]*1.0E3, y_w[i]*1.0E3)
    449             print ' x_p, y_p (pixels): %.1f, %.1f' % (x_p[i], y_p[i])
    450 
    451         # Check center locations
    452         err = np.linalg.norm(np.array([x_w[i_ref], y_w[i_ref]]) -
    453                              np.array([x_w[i_2nd], y_w[i_2nd]]))
    454         print 'Center location err (mm): %.2f' % (err*1E3)
    455         msg = 'Center locations %s <-> %s too different!' % (i_ref, i_2nd)
    456         msg += ' val=%.2fmm, THRESH=%.fmm' % (err*1E3, ALIGN_TOL_MM*1E3)
    457         assert err < ALIGN_TOL, msg
    458 
    459         # Check projections back into pixel space
    460         for i in [i_ref, i_2nd]:
    461             err = np.linalg.norm(np.array([circle[i][0], circle[i][1]]) -
    462                                  np.array([x_p[i], y_p[i]]))
    463             print 'Camera %s projection error (pixels): %.1f' % (i, err)
    464             tol = ALIGN_TOL * sensor_diag[i]
    465             msg = 'Camera %s project locations too different!' % i
    466             msg += ' diff=%.2f, TOL=%.2f' % (err, tol)
    467             assert err < tol, msg
    468 
    469         # Check focal length and circle size if more than 1 focal length
    470         if len(avail_fls) > 1:
    471             print 'Circle radii (pixels); ref: %.1f, 2nd: %.1f' % (
    472                     circle[i_ref][2], circle[i_2nd][2])
    473             print 'Focal lengths (diopters); ref: %.2f, 2nd: %.2f' % (
    474                     fl[i_ref], fl[i_2nd])
    475             print 'Sensor diagonals (pixels); ref: %.2f, 2nd: %.2f' % (
    476                     sensor_diag[i_ref], sensor_diag[i_2nd])
    477             msg = 'Circle size scales improperly! RTOL=%.1f' % CIRCLE_RTOL
    478             msg += '\nMetric: radius/focal_length*sensor_diag should be equal.'
    479             assert np.isclose(circle[i_ref][2]/fl[i_ref]*sensor_diag[i_ref],
    480                               circle[i_2nd][2]/fl[i_2nd]*sensor_diag[i_2nd],
    481                               rtol=CIRCLE_RTOL), msg
    482 
    483 if __name__ == '__main__':
    484     main()
    485