Sunday, 4 October 2015

Some Sample outputs

Here's a couple of sample point clouds derived from the defaults used, with a Sony DSLR-A850.

This is "the building across the road from the Fox in Shipley", and the image set used has 34 images with 6048*4032 pixels apiece.

There are two runs here; one at default and one at high resolution. Processing these as a background task on my desktop (i5-2300@2.8G) is ~10-15 Minutes per image on default, with four or five hours for feature matching and  bundle adjustment at a bit over four and a half hours. For the high resolution version it's about 20-25 minutes per image, with the matcher taking 12 hours and the bundle adjustment another 13 hours or so.

Default Resolution

High Resolution
The higher resolution images shows the improved detail tracking in the lower contrast image regions, with many more features resolved on the roof in particular and lamp post body.

A working example...

Here's a quick overview of the basic code structure, which should give a functional reconstruction (well "it works for me"(tm)).

This is a little terser than the code detail, and we strip some error checking we'd have in a live application, but this should work as proof of concept code, and the next few posts will go into a bit more detail. It reloads the image features (since they take the bulk of the processing time on my setup) and I've stripped the explicit namespacing to keep the code terse:


#include <iostream>
#include "openMVG/sfm/sfm.hpp"
#include "openMVG/exif/exif_IO_EasyExif.hpp"
#include "openMVG/image/image.hpp"
#include "openMVG/stl/split.hpp"

#include "openMVG/matching_image_collection/Matcher_Regions_AllInMemory.hpp"
#include "openMVG/matching_image_collection/GeometricFilter.hpp"
#include "openMVG/matching_image_collection/F_ACRobust.hpp"
#include "openMVG/sfm/sfm.hpp"

using namespace std;
using namespace openMVG::sfm;
using namespace openMVG::features;
using namespace openMVG::image;
using namespace openMVG::cameras;
using namespace openMVG::exif;
using namespace stlplus;

class ImageList {
public:
  ImageList();
  ~ImageList();
  void setDirectory(const char* nm);
  int countFiles();
  void loadAllImages();
  void loadImage(std::string s);
  void generateFeatures();
  void computeMatches();
  void sequentialReconstruct();

private:
  string _directory;
  string _matches_full;
  string _matches_filtered;
  vector<string> _fnames;
  SfM_Data _sfm_data;
  unique_ptr<Regions> _regionsType;
};


ImageList::ImageList() {
}

ImageList::~ImageList() {
}

void ImageList::setDirectory(const char* nm) {
vector<string> file_list;
  _directory = nm;
  _sfm_data.s_root_path = nm;
  file_list = stlplus::folder_files(_directory);

  sort(file_list.begin(), file_list.end());

  for (vector<string>::const_iterator it = file_list.begin(); it != file_list.end(); it++)  {
    string which = *it;
    string imnm = stlplus::create_filespec(_directory, which);
    _fnames.push_back(imnm);
  }
  _matches_full = string(_directory + "/matches.putative.txt");
  _matches_filtered = string(_directory + "/matches.f.txt");
}

int ImageList::countFiles() {
return _fnames.size();
}

void ImageList::loadAllImages() {
  for ( vector<string>::const_iterator iter_image = _fnames.begin(); iter_image != _fnames.end(); iter_image++ ) {
    string which = *iter_image;
    loadImage(which);
  }
return;
}

void ImageList::loadImage(string which) {
double width, height, image_focal;
double focal, ppx, ppy;

Views& views = _sfm_data.views;
Intrinsics& intrinsics = _sfm_data.intrinsics;
shared_ptr<IntrinsicBase> intrinsic (NULL);

  if (GetFormat(which.c_str()) == openMVG::image::Unknown)
    return;

  unique_ptr<Exif_IO_EasyExif> exifReader(new Exif_IO_EasyExif());
  if (!exifReader->open(which))
    return;

  image_focal = static_cast<double>(exifReader->getFocal());
  width = static_cast<double>(exifReader->getWidth());
  height = static_cast<double>(exifReader->getHeight());

  ppx = width / 2.0;
  ppy = height / 2.0;

  if (image_focal == 0)
    return;

  printf("Image %s: %f x %f, Focal Length %f\n", which.c_str(), width, height, image_focal);

  const double ccdw = 5.75; // Cheap Samsung S890
  //const double ccdw = 4.62;// Also Cheap Canon SX410-IS
  //const double ccdw = 7.39; // Nicer Canon G9
  //const double ccdw = 35.9; // Very Nice Sony DSLR/A850
  focal = std::max (width, height) * image_focal / ccdw;

  View v(which, views.size(), views.size(), views.size(), width, height);

  intrinsic = make_shared<Pinhole_Intrinsic_Radial_K3> (width, height, focal,
                                                        ppx, ppy, 0.0, 0.0, 0.0);
  intrinsics[v.id_intrinsic] = intrinsic;
  views[v.id_view] = std::make_shared<openMVG::sfm::View>(v);
}


void ImageList::generateFeatures(void) {
AKAZEParams params(AKAZEConfig(), AKAZE_MSURF);
unique_ptr<AKAZE_Image_describer> image_describer(new AKAZE_Image_describer(params, true));

  image_describer->Allocate(_regionsType);
  image_describer->Set_configuration_preset(NORMAL_PRESET);

  for(Views::const_iterator iterViews = _sfm_data.views.begin(); iterViews != _sfm_data.views.end(); ++iterViews) {
  const View * view = iterViews->second.get();
  const string sFeat = stlplus::create_filespec(_directory, stlplus::basename_part(view->s_Img_path), "feat");
  const string sDesc = stlplus::create_filespec(_directory, stlplus::basename_part(view->s_Img_path), "desc");

    if (!stlplus::file_exists(sFeat)) {
      Image<unsigned char> imageGray;
      printf("Creating %s\n", sFeat.c_str());
      ReadImage(view->s_Img_path.c_str(), &imageGray);
      unique_ptr<Regions> regions;
      image_describer->Describe(imageGray, regions);
      image_describer->Save(regions.get(), sFeat, sDesc);
    }
    else {
      printf("Using existing features from %s\n", sFeat.c_str());
    }
  }
}

void ImageList::computeMatches() {
float fDistRatio = 0.6f; // Higher is stricter
openMVG::matching::PairWiseMatches map_PutativesMatches;
vector<string> vec_fileNames;
vector<pair<size_t, size_t> > vec_imagesSize;

  for (Views::const_iterator iter = _sfm_data.GetViews().begin(); iter != _sfm_data.GetViews().end(); ++iter) {
    const View * v = iter->second.get();
    vec_fileNames.push_back(stlplus::create_filespec(_sfm_data.s_root_path, v->s_Img_path));
    vec_imagesSize.push_back(make_pair( v->ui_width, v->ui_height) );
  }

  unique_ptr<openMVG::Matcher_Regions_AllInMemory> collectionMatcher;
  collectionMatcher.reset(new openMVG::Matcher_Regions_AllInMemory(fDistRatio, openMVG::ANN_L2));

  if (collectionMatcher->loadData(_regionsType, vec_fileNames, _directory)) {
    openMVG::Pair_Set pairs;
    pairs = openMVG::exhaustivePairs(_sfm_data.GetViews().size());
    collectionMatcher->Match(vec_fileNames, pairs, map_PutativesMatches);
    ofstream file(_matches_full);
    if (file.is_open())
      PairedIndMatchToStream(map_PutativesMatches, file);
    file.close();
  }

  shared_ptr<Features_Provider> feats_provider = make_shared<Features_Provider>();

  if (!feats_provider->load(_sfm_data, _directory, _regionsType))
    return;

  openMVG::PairWiseMatches map_GeometricMatches;

  ImageCollectionGeometricFilter collectionGeomFilter(feats_provider.get());

  const double maxResidualError = 1.0; // dflt 1.0; // Lower is stricter

  collectionGeomFilter.Filter(
            GeometricFilter_FMatrix_AC(maxResidualError),
            map_PutativesMatches,
            map_GeometricMatches,
            vec_imagesSize);

  ofstream file (_matches_filtered);
  if (file.is_open())
    PairedIndMatchToStream(map_GeometricMatches, file);
  file.close();

}


void ImageList::sequentialReconstruct() {
string output_directory = "sequential";
string sfm_data = stlplus::create_filespec(output_directory, "sfm_data", ".json");
string cloud_data = stlplus::create_filespec(output_directory, "cloud_and_poses", ".ply");
string report_name = stlplus::create_filespec(output_directory, "Reconstruction_Report", ".html");

  if (!stlplus::folder_exists(output_directory))
    stlplus::folder_create(output_directory);

  SequentialSfMReconstructionEngine sfmEngine(_sfm_data, output_directory, report_name);
  shared_ptr<Features_Provider> feats_provider = std::make_shared<Features_Provider>();
  shared_ptr<Matches_Provider> matches_provider = std::make_shared<Matches_Provider>();

  feats_provider->load(_sfm_data, _directory, _regionsType);
  matches_provider->load(_sfm_data, _matches_filtered);

  sfmEngine.SetFeaturesProvider(feats_provider.get());
  sfmEngine.SetMatchesProvider(matches_provider.get());

  openMVG::Pair initialPairIndex;
  Views::const_iterator it;

  it = _sfm_data.GetViews().begin();
  const View *v1 = it->second.get();
  it++;
  const View *v2 = it->second.get();

  initialPairIndex.first = v1->id_view;
  initialPairIndex.second = v2->id_view;

  sfmEngine.setInitialPair(initialPairIndex);
  sfmEngine.Set_bFixedIntrinsics(false);
  sfmEngine.SetUnknownCameraType(EINTRINSIC(PINHOLE_CAMERA_RADIAL3));

  sfmEngine.Process();
  Save(sfmEngine.Get_SfM_Data(), sfm_data, ESfM_Data(openMVG::sfm::ALL));
  Save(sfmEngine.Get_SfM_Data(), cloud_data, ESfM_Data(openMVG::sfm::ALL));
}

int LoadImageListing(ImageList* iml, const char* imagedir) {
  iml->setDirectory(imagedir);

  if (iml->countFiles() > 0) {
    iml->loadAllImages();
  }
return 0;
}



int main(int argc, char* argv[])
{
ImageList iml;

  if (argc != 2) {
    printf("supply a target directory\n");
    return 0;
  }
  LoadImageListing(&iml, argv[1]);
  iml.generateFeatures();
  iml.computeMatches() ;
  iml.sequentialReconstruct();
}

Bundle adjust and generate model


For sequential reconstruction then this is a fairly simple process.

We:
  • Create a Sequential SfM Reconstruction Engine instance
  • Create a Features Provider for it, from our Features (Regions) information
  • Create a Matches Provider for it, from our Filtered list of matches
  • Set the initial image pair
  • Run the SfM Engine
  • Save the output



We have already made most of the configuration decisions ahead of this step, so we don't have many options.
In the boilerplate example here I use the first two images unconditionally. This is OK for my image sets, but may need adjusting for some source data sets if the first two images don't have a good overlap.

void ImageList::sequentialReconstruct()
{
std::string output_directory = "sequential";
std::string sfm_data = stlplus::create_filespec(output_directory, "sfm_data", ".json");
std::string cloud_data = stlplus::create_filespec(output_directory, "cloud_and_poses", ".ply");
std::string report_name = stlplus::create_filespec(output_directory, "Reconstruction_Report", ".html");

  if (!stlplus::folder_exists(output_directory))
    stlplus::folder_create(output_directory);

  openMVG::sfm::SequentialSfMReconstructionEngine sfmEngine(_sfm_data, output_directory, report_name);
  std::shared_ptr<openMVG::sfm::Features_Provider> feats_provider = std::make_shared<openMVG::sfm::Features_Provider>();
  std::shared_ptr<openMVG::sfm::Matches_Provider> matches_provider = std::make_shared<openMVG::sfm::Matches_Provider>();

  feats_provider->load(_sfm_data, getDirectory(), _regionsType);
  matches_provider->load(_sfm_data, _matches_filtered);


  sfmEngine.SetFeaturesProvider(feats_provider.get());
  sfmEngine.SetMatchesProvider(matches_provider.get());

  openMVG::Pair initialPairIndex;
  openMVG::sfm::Views::const_iterator it;

  it = _sfm_data.GetViews().begin();
  const openMVG::sfm::View *v1 = it->second.get();
  it++;
  const openMVG::sfm::View *v2 = it->second.get();

  initialPairIndex.first = v1->id_view;
  initialPairIndex.second = v2->id_view;

  sfmEngine.setInitialPair(initialPairIndex);

  sfmEngine.Set_bFixedIntrinsics(false);
  sfmEngine.SetUnknownCameraType(openMVG::cameras::EINTRINSIC(openMVG::cameras::PINHOLE_CAMERA_RADIAL3));

  sfmEngine.Process();
  Save(sfmEngine.Get_SfM_Data(), sfm_data, openMVG::sfm::ESfM_Data(openMVG::sfm::ALL));
  Save(sfmEngine.Get_SfM_Data(), cloud_data, openMVG::sfm::ESfM_Data(openMVG::sfm::ALL));
}

Filtering the Features


This is basically an image matching process, which looks for corresponding matches on points in the image set.

It's actually a two step process – an initial run through looks for likely matches (the “putative” matches) which uses one of a set of possible matching algorithms. Following this then the possible matches are run through a geometric filter which should discard outliers (unreliable) matches.

Once again this isn't something you really need to understand the behind the scenes detail to use – there are a couple of default values which determine the thresholds for flagging a match and determining the outliers, but otherwise the API is fairly opaque.

However the Region information must be used to determine which of the Scalar or Binary geometric matcheroptions are available.

First – Load the image information

Run through the view list, get the file names and push to a list of names and sizes

 
Next– The matchers

Then we create a matcher to process the image information. This is of the base type openMVG::Matcher_Regions_AllInMemory and which is one of the derived matcher types which matches the region we used (scalar or binary).

We pass in a distance ratio, used to determine the threshold for discarding spurious points: A higher value is stricter (from the Nearest Neighbour distance matching code; the "Ratio between best and second best matches must be superior to [the] given threshold")

Since we used the default AKAZE earlier we can simply assign an ANN_L2 matcher to it.
 
On the successful load then we use openMVG::exhaustivePairs() to generate all the pair values (type openMVG::Pair_Set), and then we run the Match function, and the output is set of matches we drop to a file using the PairedIndMatchToStream() call.

Next: Geometric Filter

The geometric filter process takes a maximum residual error value, the complete list of potential (putative) matches alongside a geometric model fitting function. It then generates an output set of matches that fit the model to within the residual error.

There are three available solvers, which are used to generate a target model to filter points against:

  • GeometricFilter_FMatrix_AC AContrario Fundamental matrix solver
  • GeometricFilter_EMatrix_AC AContrario Essential matrix solver
  • GeometricFilter_HMatrix_AC AContrario Homography matrix solver

So, for the Fundamental Matrix solver then we pass it the list of putative match pairs, and using these points then the geometric filter estimates the fundamental matrix and removes outliers from the set of matches. Following this we save the final matches to a file using PairedIndMatchToStream().

Although there's some fiddly detail hiding behind this model, we can simply pass in the filter reference and the maximum residual error that is used to discard outliers; a lower value here discards more points (but too high a value will include outliers and cause problems when bundle adjusting).

 
There's some more information on using fundamental matrix and a-contrario estimation at http://www.loria.fr/~sur/articles/noury07fundamental.pdf

A simple MWE using some default values from the sample code is:
void ImageList::computeMatches()
{
float fDistRatio = 0.6f;
openMVG::matching::PairWiseMatches map_PutativesMatches;
std::vector<std::string> vec_fileNames;
std::vector<std::pair<size_t, size_t> > vec_imagesSize;

  for (openMVG::sfm::Views::const_iterator iter = _sfm_data.GetViews().begin(); iter != _sfm_data.GetViews().end(); ++iter)
  {
    const openMVG::sfm::View * v = iter->second.get();
    vec_fileNames.push_back(stlplus::create_filespec(_sfm_data.s_root_path, v->s_Img_path));
    vec_imagesSize.push_back( std::make_pair( v->ui_width, v->ui_height) );
  }

  std::unique_ptr<openMVG::Matcher_Regions_AllInMemory> collectionMatcher;
  collectionMatcher.reset(new openMVG::Matcher_Regions_AllInMemory(fDistRatio, openMVG::ANN_L2));

  if (collectionMatcher->loadData(_regionsType, vec_fileNames, getDirectory()))
  {
    openMVG::Pair_Set pairs;
    pairs = openMVG::exhaustivePairs(_sfm_data.GetViews().size());
    collectionMatcher->Match(vec_fileNames, pairs, map_PutativesMatches);
    std::ofstream file(_matches_full);
    if (file.is_open())
      PairedIndMatchToStream(map_PutativesMatches, file);
    file.close();
  }

  std::shared_ptr<openMVG::sfm::Features_Provider> feats_provider = std::make_shared<openMVG::sfm::Features_Provider>();

  if (!feats_provider->load(_sfm_data, getDirectory(), _regionsType))
    return;

   openMVG::PairWiseMatches map_GeometricMatches;

  ImageCollectionGeometricFilter collectionGeomFilter(feats_provider.get());

  const double maxResidualError = 1.0;

  collectionGeomFilter.Filter(
            GeometricFilter_FMatrix_AC(maxResidualError),
            map_PutativesMatches,
            map_GeometricMatches,
            vec_imagesSize);

  std::ofstream file (_matches_filtered);
  if (file.is_open())
    PairedIndMatchToStream(map_GeometricMatches, file);
  file.close();

}

The Feature Detection Algorithms Available


Algorithms Available

The details of which descriptor to be used is a tradeoff between computation time and data set – it's a decision between:

SIFT
The Lowe SIFT (Scale-Invariant Feature Transform) Algorithm. This is non-free (patented in the US) which may restrict the application, but a good general purposes algorithm and one for which GPU accelerated implementations are available.

The SIFT algorithm supports a number of “standard” SIFT parameters (original vs upscale, Max octaves count, Scales per octave, Max ratio of Hessian eigenvalues and Min contrast) defined in the SiftParams structure.

The SIFT_Image_describer class supports also three presets: Normal, High and Ultra which tune the minimum contrast value as well as introducing upscaling at the Ultra value.


AKAZE
(or A-KAZE, if you prefer), “a novel 2D feature detection and description method that operates completely in a nonlinear scale space”. This uses the AKAZEConfig config structure.

Here we can choose one of three variants
  • MSURF (the default “Modified Speeded Up Robust Features”)
  • LIOP (“Local Intensity Order Pattern”)
  • MLDB (“Modified-Local Difference Binary”)

There are also three preset thresholds (Normal, High & Ultra) which tune the parameters used by the feature recognition algorithm.

There's also AKAZE OCV: This is the OpenCV supplied AKAZE.

The variants are described in the KAZE paper:

And the AKAZE paper http://www.robesafe.com/personal/pablo.alcantarilla/papers/Alcantarilla13bmvc.pdf has comparisons for AKAZE, SIFT and several other feature extraction methods.

And LIOP is explained in more detail here http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.371.519


The gory details

You don't need to know any of this, if you're simply using the stock methods, but...

Each region is a set of 2D features (the PointFeature class) describing a position, scale and orientation, and stored in the “.feat” file. A descriptor is essentially a container of raw data values (the Descriptor class) which corresponds to the description of a single feature. Since the descriptors are dependent on the feature algorithm used they're treated as opaque chunks of data at this point So the descriptor is constructed with a “type, count” template.

For an example of the mapping see the example file main_ComputeFeatures_OpenCV.cpp, which will demonstrate this working with off the shelf descriptors from OpenCV. In this case we see the detectAndCompute() method of the AKAZE extractor, which produces a vector of Keypoints and Descriptor output array. The keypoints are pushed to the feature list, and each Descriptor is serialised as raw data and pushed onto the Descriptors list.

Feature Generation

The background

The next step in the process is to examine the image data and extract features which can be tracked across multiple images.

OpenMVG offers a selection of feature extraction algorithms. Basically the two primary feature extraction methods are:

Both offer some tuning of parameters for feature recognition, and the selection of the feature extraction method has some impact on the later processing stages.

Generate the initial features

To generate the list of features in an image we use the base class openMVG::features::Image_describer.

This provides the base class for a number of implementations which are used to extract features from an image, specifically AKAZE, AKAZE_OCV (The OpenCV AKAZE) and SIFT.
The actual points in the image are stored in a regions object openMVG::features::Regions, which provides a simple mechanism to extract the features into a region, and then serialise it to a file. Each region object represents a set of regions as “2D position” and “attributes”. The region attributes are referred to as Descriptors.

It's actually very simple to use, without getting involved in the details. The basic workflow is (using AKAZE as an example):
  • Create a unique point to an openMVG::features::Image_describer
  • Create a new openMVG::features::AKAZE_Image_describer and assign it to the generic image descriptor pointer
  • Call the image describer Allocate method, passing a reference to a openMVG::features::Regions pointer
  • Then, For each view
    • Create a openMVG::features::Regions to represent a set of regions
    • Read the image data as a greyscale, using openMVG::image::ReadImage()
    • Call the image describer Describe method on the image
    • Store the results to a file using the image describer Save method

This results in a set of feature and description files (“.desc” and “.feat”) for each image. The openMVG::features::Regions pointer reference is used later to pass to functions which need to know how to interpret the feature data.
Note that we must use the .feat and .desc name convention. This is because when we use the library functions to reload this information for later processing it will use this convention to associate the image files with feature and description information.

void ImageList::generateFeatures(void)
{
std::unique_ptr image_describer;
openMVG::image::Image imageGray;

  image_describer.reset(new openMVG::features::AKAZE_Image_describer(
        openMVG::features::AKAZEParams(openMVG::features::AKAZEConfig(), openMVG::features::AKAZE_MSURF),
        true));
  image_describer->Allocate(_regionsType);
  //image_describer->Set_configuration_preset(openMVG::features::HIGH_PRESET);

  for(openMVG::sfm::Views::const_iterator iterViews = _sfm_data.views.begin();
                                 iterViews != _sfm_data.views.end(); 
                                 ++iterViews)
  {
  const openMVG::sfm::View * view = iterViews->second.get();
  const std::string sFeat = stlplus::create_filespec(getDirectory(), 
                                                     stlplus::basename_part(view->s_Img_path), "feat");
  const std::string sDesc = stlplus::create_filespec(getDirectory(), 
                                                     stlplus::basename_part(view->s_Img_path), "desc");

    if (!stlplus::file_exists(sFeat))
    {
      openMVG::image::ReadImage(view->s_Img_path.c_str(), &imageGray);
      std::unique_ptr regions;
      image_describer->Describe(imageGray, regions);
      image_describer->Save(regions.get(), sFeat, sDesc);
    }
  }
}



In working code it's generally worth reloading the feature and descriptor files, rather than going through the lengthy process of generating each time, so the processing should check for the existence of the file in question before recalculating.

(also note the C++ 11 use of “.get()” “.reset()”, etc to reference the smart pointer directly rather than the referenced object)

One wrinkle is the use of openMVG::features::Regions: This type is a list of Region descriptors, used to save, load and manipulate the feature/descriptor lists. This basically falls into one of two types – either Scalar or Binary regions based on the type of the descriptor.

Depending on the type of descriptor chosen then there are some limits on which filters we can apply to the features. So we have:
  • SIFT, AKAZE Float & AKAZE LIOP: Scalar Regions
  • AKAZE Binary (MLDB): Binary Regions

And when we run the matcher (at the next step) we can choose
  • Binary Regions: Brute Force Hamming
  • Scalar Regions: Brute Force & ANN L2 matchers

So when we put together this section of code we must initially select the image description algorithm we use and the configuration, and this may give use choices on the filter implementation we can choose.

More on this later...

Saturday, 26 September 2015

Loading the images

Overview

The core abstraction here is a view, represented by openMVG::sfm::View.

This represents a single image, and contains a reference to the image path as well as the camera information associated with it.

The top level SfM_Data object holds a list of View objects, openMVG::sfm::Views, which is a Hash Map indexed through the IndexT Id's of each View.

We also need to consider the camera intrinsic structure mentioned earlier, openMVG::cameras::IntrinsicBase. The SfM_Data object also holds aopenMVG::sfm::Intrinsics list - another hash map which contains an Id indexed map of camera intrinsics.

In theory we could share the intrinsics, since several of the images will come from a single camera (and the reference code does this), however it's simpler just to have an intrinsic structure per picture.

Using unique cameras per image does have a fairly major drawback though: During the bundle adjustment OpenMVG will refine the camera intrinsics for us, and dump these in the output. Obviously if we were sharing a camera model these distortion parameters should converge to a single model for a given camera, however since we're using separate intrinsics then the distortion parameters per view can be slightly different even if they should not be.

This problem isn't enough to break things completely, but it does mean our code will be sub-optimal here. For now lets run with it though, and fix it later.

The Image Load Process

Create a view, with an instance of openMVG::sfm::View

This simply takes the basic View constructor parameters, which include the image name, the view intrinsic and pose ID's associated with the view and the width and height of the image.  Note that Pose is the camera extrinsic – it's basically a 3D transform, but we don't need to get involved with it right now.

For the ID's we simply use the current view size to provide a unique integer value. We can use common ID's for the view, intrinsic & pose because they're stored in different lists. As a result we wind up with the view constructor:

openMVG::sfm::View v(which, views.size(), views.size(), views.size(), width, height);

We also create an Intrinsic, allocating and filling in a copy of openMVG::cameras::IntrinsicBase

For our example we're using the class: openMVG::cameras::Pinhole_Intrinsic_Radial_K3 and leaving the distortion parameters as 0.

We add the intrinsic & view to the top level hash map of SfM_Data, with the Id's from the view object that we allocated earlier using the array accessor notation:

  intrinsics[v.id_intrinsic] = intrinsic;
  views[v.id_view] = std::make_shared(v);

Putting this all together we get a final MWE code layout of:

void ImageList::loadImage(std::string which)
{
double width;
double height;
double image_focal;
double focal;
double ppx;
double ppy;

openMVG::sfm::Views& views = _sfm_data.views;
openMVG::sfm::Intrinsics& intrinsics = _sfm_data.intrinsics;
std::shared_ptr intrinsic (NULL);

  if (openMVG::image::GetFormat(which.c_str()) == openMVG::image::Unknown)
    return;


  std::unique_ptr exifReader(new openMVG::exif::Exif_IO_EasyExif());
  if (!exifReader->open(which))
    return;

  image_focal = static_cast(exifReader->getFocal());
  width = static_cast(exifReader->getWidth());
  height = static_cast(exifReader->getHeight());

  ppx = width / 2.0;
  ppy = height / 2.0;

  if (image_focal == 0)
    return;

  printf("Image %s: %f x %f, Focal Length %f\n", which.c_str(), width, height, image_focal);

  const double ccdw = 5.75;
  focal = std::max (width, height) * image_focal / ccdw;

  openMVG::sfm::View v(which, views.size(), views.size(), views.size(), width, height);

  intrinsic = std::make_shared (width, height, focal, ppx, ppy, 0.0, 0.0, 0.0);
  intrinsics[v.id_intrinsic] = intrinsic;
  views[v.id_view] = std::make_shared(v);
}

Processing: Image Processing

At this stage we load the image file information, including the width and height as well as the focal information embedded in the camera images.

We make a decision as to how to handle the camera model, and compensate for image distortion introduced by the camera itself in this stage, and OpenMVG offers several different types of model to choose from.

Reading Image Parameters

OpenMVG provides a simple mechanism for extracting exif data, which can then be used to calculate focal length. Since we have to use the exif information to get a reliable focal length value then at this stage we may as well use the exif reader to extract the width and height information.

Therefore our minimum working example to extract information from the image file is:

  std::unique_ptr exifReader(new openMVG::exif::Exif_IO_EasyExif());
  if (!exifReader->open(which))
    return;
  image_focal = static_cast(exifReader->getFocal());
  width = static_cast(exifReader->getWidth());
  height = static_cast(exifReader->getHeight());

However the value of focal length we have retrieved is in mm, and the actual focal length value we want to use would be in pixels, so we calculate this value using information on our sensor sizes (in mm):

  const double ccdw = 5.75;
  focal = std::max (width, height) * image_focal / ccdw;

where ccdw is the CCD sensor width of the camera, which can be extracted from a camera information database, or hardcoded for a given camera.  It can also be retrieved from the exif image information in some cases.

The OpenMVG sample code uses the database lookup to pick up the information based on the camera name, but I'm hardcoding the numbers for my camera ccd here.

There is some more background on the derivation of the focal length here: http://phototour.cs.washington.edu/focal.html


Note that OpenMVG provides simple methods to read the image data directly, and we can extract the width & height information from that process instead, i.e.:

openMVG::image::Image
  if (openMVG::image::ReadImage(_name.c_str(), &_image_data))
  {
    width = _image_data.Width();
    height = _image_data.Height();
  }

The example code does this in a few places. However this load is redundant in our case, since we need to run the exif parsing for focal lengths. While we want to load the data eventually doing it now would simply incur a double load, and the load call itself is actually quite slow.

Filling in the camera details

The camera information associated with an image is held in a camera Intrinsics structure.

The intrinsic structure stores the optical model, covering such parameters as the camera focal length, the principal point (ideal image centre) and the associated image distortion parameters, etc.

There is also a camera Extrinsics model, referred to in OpenMVG as a Pose (of type openMVG::geometry::Pose3) ,which covers the camera position and rotation, however the details of this actually will be handled by OpenMVG itself.

The structure openMVG::cameras::IntrinsicBase is used as a base type when building lists, and this is ultimately derived into two common classes, openMVG::cameras::Pinhole_Intrinsic_Radial_K1 and openMVG::cameras::Pinhole_Intrinsic_Radial_K3.

The difference between these two classes is the number of parameters supplied (one for K1 and three for K3) which are used to remove image distortion.

The full documentation on this is held here:
http://openmvg.readthedocs.org/en/latest/openMVG/cameras/cameras/#pinhole-camera-model

To keep things simple we initially use a K3 model, but leave the distortion coefficients as 0.

So, to build the intrinsic model from an image then

  intrinsic = std::make_shared (
        width, height, focal,
        ppx, ppy,
        0.0, 0.0, 0.0);
and now we can pass the intrinsic into the SfM container objects.
How to do that next...

Overview of the processing pipeline

The sequence of operations

The basic sequence we follow is:
  • Examine the reference images
    • Extract data on the camera in the process
    • We have to select how we'll handle the camera model here
  • Process the images for feature points – these are points we can track across pictures
    • We have to pick one of the available feature detection algorithms here
  • Process the image point sets, filtering out “outlier” points (i.e. those which don't match between images)
    • We have to select a filtering mechanism, and set thresholds for discarding outliers
  • Incremental SFM, performing Bundle Adjustment
    • Pick an initial pair of images, 
    • Run the processing
    • Output the point cloud

While the detail of the underlying Incremental SFM & filtering algorithms are interesting, and actually rather clever, it's not essential to understand them completely in order to use the library – from a hacky-programmers point of view there are a couple of fairly simple APIs and abstractions that can be plugged in to perform the reconstruction work, and a handful of parameters to tune.

Top Level Data Structures & Concepts

The top level abstraction is that of an SFM container, openMVG::sfm::SfM_Data.

This class contains all of the data and configuration that is used to generate the output as a set of 3d points. We're going to be operating on a single instance of this throughout.

One other key point is the use of reference ID's to track things like camera information, image information, etc. Each ID (of type IndexT) is basically a 32 bit integer, and a number of internal data structures are lists which associate an ID value to a given instance.

The codebase also makes extensive use C++ 11's shared_ptr and unique_ptr smart pointer types to handle automatic object destruction, and as a consequence so will our application.

Finally – the objects all contain serialisation methods, which use the cereal library to save and reload information, and much of the standard use cases involve disk storage and reload to recover the state of processing. This minimal application mostly avoid this, but the feature detection is slow enough that we want to handle this.

Sunday, 20 September 2015

The Basic Build

Install OpenMVG

The openMVG library contains and rebuilds almost all of the dependencies required during a source install, so simply clone it using:

git clone --recursive https://github.com/openMVG/openMVG.git 
cd openMVG 
git submodule init
git submodule update
cd .. 
Then build it in a separate directory with:

mkdir openMVG_Build
cd openMVG_Build/ 
cmake \ 
  -DCMAKE_BUILD_TYPE=DEBUG \
  -DOpenMVG_BUILD_TESTS=ON \
  -DopenMVG_BUILD_EXAMPLES=ON \
  -DCMAKE_INSTALL_PREFIX:STRING="/usr/local/openMVG_install" \
  -G "CodeBlocks - Unix Makefiles" . ../openMVG/src/ 

make clean 
make -j4 
sudo make install 
Note the custom install directory – this can be changed fairly easily, since the application will use the supplied cmake files in the install to locate the headers & libraries.

Adding the Library to an Application

This is all driven through cmake, with the target CmakeLists.txt containing the following fragment to locate the openMVG libraries and headers.

set(OpenMVG_DIR "/usr/local/openMVG_install/share/openMVG/cmake") 
set(CMAKE_BUILD_TYPE "Debug") 
ADD_DEFINITIONS(
    -std=c++11
)
It's actually slightly painful to use anything other than cmake here; the library include and link paths are rather verbose.
Note that C++ 11 is required for the smart pointers used by the openSfM library.

Using the OpenMVG Library

The next few posts look at an Incremental Structure from Motion (SfM) pipeline, where we use the openMVG library to produce a point cloud from some input images, i.e. to generate a 3D model.

Using the openMVG library simplifies the construction process immensely, and hides some of the algorithm details from us.

As a library it's not perfect, and it can be annoyingly verbose at times, but it beats doing it from scratch. There's a few sample applications in the OpenMVG tree, which most of this work is based off, but no good overview on how to tie all the pieces together when using the library directly.

The Very Top Level

We're processing a set of 2D images, and the output will be a 3D model of the objects in the image, as a point cloud.

So we start with a set of photographs:


 And generate a point cloud:





Using an incremental pipeline means that the reconstruction will start with a model reconstruction based on two initial images, and then we add  add new images and 3d points to generate the complete model.

The process is described at: http://imagine.enpc.fr/~marletr/publi/ACCV-2012-Moulon-et-al.pdf.

This approach is based around feature detection within the images and Bundle Adjustment (https://en.wikipedia.org/wiki/Bundle_adjustment) to synthesise the 3D data. The “a contrario model estimation” means that we're going to use the input data itself to identify and remove outlying data samples, however this processing will be hidden within the library itself.

Using an incremental pipeline is slightly simpler, from the perspective of a library client. An alternative approach is Global SFM, which would process all the images simultaneously: Global SFM is potentially more accurate, since sequential processing can introduce errors, and the global version is more efficient and simpler to parallelise. But the library usage in the sequential case is easier, so lets start with that.






Sunday, 12 April 2015

Printing On the Panel

This is actually pretty simple, given the work to date. It could be a lot faster, but for simple scrolling messages then this should be plenty fast enough.

To plot out bitmaps we take three steps:
  • Generate a main bitmap
  • Break down the four grid regions of each panel (one per HT1632C)
  • Render each grid region bitmap

Doing this in reverse (bottom up)

A single HT1632C Renderer

We can define a single handler for a 16 * 8 LED grid, controlled by a single HT1632C chip, as:
#define DISPLAY_W (16)
#define DISPLAY_H (8)
#define DISPLAY_SZ (DISPLAY_W*DISPLAY_H)

class Grid
{

    public:
        Grid();
        virtual ~Grid();
...

    private:
        char _pixels_green[DISPLAY_SZ];
        char _pixels_red[DISPLAY_SZ];
        DisplayInterface* _interface;
        int _display_chip;
};
Where _pixels_green[] and _pixels_red[] are simply bitmaps for the LED states. We can set a single pixel value with
void Grid::SetPixel(int x, int y, char value_g, char value_r)
{
int offset;

    if ((x >= DISPLAY_W) || (y>= DISPLAY_H))
    {
        printf("Display data set is out of range\n");
    return;
    }
    offset = (y * DISPLAY_W) + x;
    _pixels_green[offset] = value_g;
    _pixels_red[offset] = value_r;
}


and the writeout for green looks something like:
    reg = 0;
    dcursor = 0;
    for (int x = 0 ; x < DISPLAY_W; x++)
    {
        for (int y=0; y < DISPLAY_H; y++)
        {
        int offset = (y * DISPLAY_W) + x;

            if (_pixels_green[offset] != 0)
            {
                reg |=1;
            }
            else
            {
                reg &=0xfe;
            }
            if ((dcursor+1) %4 == 0)
            {
               _interface->Command(_display_chip, 0x5, dcursor/4, reg);
            }
            else
            {
                reg = reg <<1;
            }
            dcursor++;
        }
    }

This loads the registers for each column from the incoming bitmap, writing out the register value every four pixels. X=Y=0 is in the top left hand corner of the display in this case. Other than this the class is simply getters and setters for the chip select on this panel (_display_chip) and the appropriate hardware interface controller, as well as grid clear operations (just memset the pixel arrays).

The red LED writeout operates using the same logic, only 32 registers higher.

A Four Grid (Panel) Renderer

This is  actually very simple - we create a master "bitmap" class for our main 32 * 16 display and it aggregates the grid class above:

#define BITMAP_WIDTH (32)
#define BITMAP_HEIGHT (16)
#define BITMAP_SZ (BITMAP_WIDTH*BITMAP_HEIGHT)

class BitMapper
{

    public:
        BitMapper();
        virtual ~BitMapper();
...
    private:
        char _pixels_green[BITMAP_SZ];
        char _pixels_red[BITMAP_SZ];

        Grid _grid;
        DisplayInterface _display;
};

 This has an almost identical setup logic for pixels as the Grid code from earlier. So now we can load and write out a single bitmap to the underlying panels with logic like:

    _grid.SetPanel(0);
    _grid.ClearPixels();
    for (x=0; x <16; x++)
    {
        for (y=0; y < 8; y++)
        {
        int offset = (y * BITMAP_WIDTH) + x;
            _grid.SetPixel(x, y, _pixels_green[offset], _pixels_red[offset]);
        }
    }
    _grid.Writeout();

    _grid.SetPanel(1);
    _grid.ClearPixels();
   for (x=16; x <32; x++)
        for (y=0; y < 8; y++)
        {
        int offset = (y * BITMAP_WIDTH) + x;

            _grid.SetPixel(x-16, y, _pixels_green[offset], _pixels_red[offset]);
        }

    _grid.Writeout();

and so on for for all four panels
i.e. we're simply looping over sections of the source bitmap, and setting the appropriate sub-panel select and pixel value.



The top level renderer

This is also fairly trivial - we create a pixel map for rendering characters and simply drop this into the BitMapper data, then invoke a write and flush it to the display. There's a discussion of bitmap fonts here and I'll just grab the Font Image here as an example of font grid comprising of 10*10 characters.

To get this into our source code we can just use The Gimp and load up the image, then autocrop it (to ensure the characters are on a grid from 0,0) and export as "C Source Code".

This produces the image data as a simple C-Style array:

/*
 * GIMP RGB C-Source image dump (rexpaint_cp437_10x10.c)
 * From http://www.gridsagegames.com/blog/2014/09/font-creation/
*/

#define GIMP_IMAGE_WIDTH (160)
#define GIMP_IMAGE_HEIGHT (160)
#define GIMP_IMAGE_BYTES_PER_PIXEL (3) /* 3:RGB, 4:RGBA */
#define GIMP_IMAGE_PIXEL_DATA ((unsigned char*) GIMP_IMAGE_pixel_data)

static const unsigned char GIMP_IMAGE_pixel_data[160 * 160 * 3 + 1] =
("\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\377"
...


In this code we have defines for the image width, height and pixel depth, as well as the image data array so we can reference a single pixel with code such as:
void GimpImport::SampleMap(int x, int y, char* red, char* green)
{
int index;

    index = y*GIMP_IMAGE_WIDTH*GIMP_IMAGE_BYTES_PER_PIXEL;
    index += x*GIMP_IMAGE_BYTES_PER_PIXEL;

   *red = GIMP_IMAGE_pixel_data[index++];
   *green = GIMP_IMAGE_pixel_data[index++];
return;
}

And we can copy single character glyphs out of the source bitmap and into a destination bitmap:

void GimpImport::RenderGlyph(int which, int x, int y)
{
int xin, yin;


    xin = ((which*_glyph_w) % GIMP_IMAGE_WIDTH);
    yin = (which/_glyph_per_line)* _glyph_h;

    for (int ln =0; ln < _glyph_h; ln++)
    {
        for (int xg =0; xg < _glyph_w; xg++)
        {
        char g,r;
            SampleMap(xin+xg, yin, &r, &g);
            PutPixel(x+xg, y, r, g);
        }
        y++;
        yin++;
    }
}

By combining this with a couple of simple strings we can then simply dump to the output panel:

Section of Character Map

Scrolling date text


Tuesday, 7 April 2015

CI20 and an LED Panel

The Application


So, wired up to the 4 GPIO pins from last time is a Sure DE-DP14111 32*16 LED Dot matrix Display. It's a smaller version of this display here, and although the LED's are smaller the physical HW is still very similar to the "3216 Bicolor LED 3mm Dot Matrix Display Information Board" from that page.

Hardware Interface

The interface to this board is a 16 pin IDC cable and two power rails. However the connector is fairly sparsely populated, and the power rails show as continuous.

Although the board is labelled as requiring a 5V supply I'm actually using a (separate) 3V3 supply to the Sure board, with a common ground to the CI20 and driving the inputs directly from the CI20 expansion header. This "works for me"(tm) with a dimmed display, however for a full 5V system I'd want to build a 3v3 to 5V driver board up, which I can skip for now.

The board really has 6 pins it requires:
  • Ground
  • Power - Nominally 5V, but I'm using 3v3 to match the CI20 expansion header levels
  • Two chip select  controls
  • Two Data line controls
The chip selects are worth taking a second to describe.

The front of the board consists of 8 LED panels, each of which contains an 8x8 LED Array, with 64 (bicolour) Red/Green LED's.

Logically speaking the display is actually broken up into four different areas; a pair of 8x8 panels (128 LEDs) is driven by a single HT1632C driver chip.

The two chip select inputs on the header are used to pick which of the HT1632C chips the data lines are communicating with, by controlling the chip select line of the HT1632C devices.

The chip select controls are a clock and a data line; these actually feed a 74HC164 shift register. Each of the device CS lines is fed to a separate delay stage. So when the chip select  line is pulsed and clocked then the shift register moves the "active low"  along the outputs through CS1 to CS2 to CS3 etc.

Using this mechanism the CS can be selected individually by pulsing the CS_IN line low, and clocking along to the specific chip, or multiple devices can be selected by holding the input low and clocking. In addition the boards can be chained together, since the final CS (CS4) is passed through to the output header.

This does make selecting a device slightly more complex however. A simple piece of code to do this, using the GPIO routines from the previous post, can be:
int DisplayInterface::ClockSelect(int val)
{
    _pins[SELECT_ACTIV].SetData(val);
    _pins[SELECT_CLOCK].SetData(1);
    usleep(USLEEP_DELAY);
    _pins[SELECT_CLOCK].SetData(0);

    usleep(USLEEP_DELAY);
return 0;
}

int DisplayInterface::SelectSegment(int which)
{
    for (int i=0 ; i < 4; i++)
    {// Clock all CS lines to high
        ClockSelect(1);
    }
    ClockSelect(0);
    while (which-- > 0)
    {
        ClockSelect(1);
    }
return 0;
}
i.e. we have two pins SELECT_ACTIV (the CS In, pin 1) and SELECT_CLOCK (CS Clock, pin 2). The ClockSelect() method takes the CS value in and makes a clock transition (low to high, then back to low). The usleep() means we can see the display fill as it happens by tuning the delay.


The SelectSegment() simply takes a chip offset then clocks the CS lines through so that all are high, then pushes in a low pulse and clocks this along to the target device.

This gives us a very simple "pick one" solution for the CS mechanism.

The Data

The data control of the device is likewise a serial bus, with a clock and data input. These fan out to all four of the HT1632C devices, and the chip select determines which one is active.

So a single bit write across the data interface would be of the form
int DisplayInterface::ClockWrite(int val)
{
    _pins[WRITE_CLOCK].SetData(0);
    _pins[WRITE_DATA].SetData(val);
usleep(USLEEP_DELAY);
    _pins[WRITE_CLOCK].SetData(1);
usleep(USLEEP_DELAY);
return 0;
}
Where WRITE_CLOCK and WRITE_DATA are pins 5 and 7 of the input header.

To simplify the write I'm only going to deal with simple command sequences here, where we enable the CS for a device, write a single command and then de-select it. There are continuous write modes, but for my application (simple, largely static, display) then I don't care about frequent updates, so I won't bother with that for now.

There are actually two kinds of data we can send to the devices, one set is simple and one is not.

Configuration

The "not simple" part is the control sequences which set up the HT1632C devices. These configure the output stages, system oscillator, etc. However the good news is that the data sheet contains some simple "good" configurations we can use.

The configuration sequences are 12 bits each, and are shown in the Command Summary section of the user guide; Figure 2-14 (and on p21 of the HT1632C data sheet); the first three bits indicate the Command ID, and the following bits are configuration information.

For our application we can just use the following canned configuration strings

uint32_t sysen = 0x802;
uint32_t ledon = 0x806;
uint32_t nmos = 0x840;
uint32_t rcmaster = 0x830;
uint32_t pwm10 = 0x962;

These are:
  • SYS_EN - Turn the oscillator on
  • LED On - Turn on the LED Duty cycle generator
  • COM Option - N-MOS open drain output and 8 COM option
  • RC Master mode - set clock from on chip oscillator
  • PWM Duty - 10/16 Duty cycle

Setting them is fairly simple: we can push out a 12 bit configuration packet with the function:

int DisplayInterface::Issue12(uint32_t data)
{
int pos = 11;

     while (pos >= 0)
    {
    uint32_t out;
        out = data >> pos;
        ClockWrite(out &0x01);
        pos--;
    }
return 0;
}


Which simply shifts the correct bits into the output position and uses ClockWrite() to issue it to the board.

We can therefore configure a single chip with:

int DisplayInterface::Enable(int which)
{
uint32_t sysen = 0x802;
uint32_t ledon = 0x806;
uint32_t nmos = 0x840;
uint32_t rcmaster = 0x830;
uint32_t pwm10 = 0x962;

    SelectSegment(which);
    Issue12(sysen);
    SelectSegment(which);
    Issue12(ledon);
    SelectSegment(which);
    Issue12(rcmaster);
    SelectSegment(which);
    Issue12(nmos);
    SelectSegment(which);
    Issue12(pwm10);
return 0;
}

Data

Data is much simpler. All data sequences are a three bit ID (101 - write) , and then a 7 bit address and 4 bits of data

The output LEDs are simply memory mapped; each 4 bit data register maps to four LED status settings. The first 32 registers (0x00-0x1F) control the green LEDs - (4 * 32 = 128 LED's from each panel pair) and the next 32 (0x20-0x3F) control the corresponding red LEDs.

So we can write a data sequence with:
int DisplayInterface::Command(int which, unsigned char id, unsigned char addr, unsigned char data)
{
uint32_t wire;
int pos = 13;

    SelectSegment(which);

    wire = id & 0x07;
    wire = wire << 7;
    wire |= addr&0x7F;
    wire = wire << 4;
    wire |= data&0x0F;
    //printf("Write Command 0x%x\n", wire);
    while (pos >= 0)
    {
    uint32_t out;
        out = wire >> pos;
        ClockWrite(out &0x01);
        pos--;
    }
r
eturn 0;
}

(although we form up the id we don't need to do this, since it's always 0x5 for write)

And therefore using the DisplayInterface class we've defined to date we can do something like this:

DisplayInterface display;
...
    display.Enable(0);
    display.Enable(1);
    display.Enable(2);
    display.Enable(3);

    for (int j=0; j < 100; j++ )
    {

        for (int i =0 ; i < 64; i++)
        {
            display.Command(0, 0x5, i, 0x0f);
            display.Command(1, 0x5, i, 0x0f);
            display.Command(2, 0x5, i, 0x0f);
            display.Command(3, 0x5, i, 0x0f);
        }

        for (int i =0 ; i < 64; i++)
        {
            display.Command(0, 0x5, i, 0x00);
            display.Command(1, 0x5, i, 0x00);
            display.Command(2, 0x5, i, 0x00);
            display.Command(3, 0x5, i, 0x00);
        }
    }

Which turns all the LED's on and off again on all the panels in a (more or less) parallel sequence: