Sunday, 4 October 2015

Some Sample outputs

Here's a couple of sample point clouds derived from the defaults used, with a Sony DSLR-A850.

This is "the building across the road from the Fox in Shipley", and the image set used has 34 images with 6048*4032 pixels apiece.

There are two runs here; one at default and one at high resolution. Processing these as a background task on my desktop (i5-2300@2.8G) is ~10-15 Minutes per image on default, with four or five hours for feature matching and  bundle adjustment at a bit over four and a half hours. For the high resolution version it's about 20-25 minutes per image, with the matcher taking 12 hours and the bundle adjustment another 13 hours or so.

Default Resolution

High Resolution
The higher resolution images shows the improved detail tracking in the lower contrast image regions, with many more features resolved on the roof in particular and lamp post body.

A working example...

Here's a quick overview of the basic code structure, which should give a functional reconstruction (well "it works for me"(tm)).

This is a little terser than the code detail, and we strip some error checking we'd have in a live application, but this should work as proof of concept code, and the next few posts will go into a bit more detail. It reloads the image features (since they take the bulk of the processing time on my setup) and I've stripped the explicit namespacing to keep the code terse:


#include <iostream>
#include "openMVG/sfm/sfm.hpp"
#include "openMVG/exif/exif_IO_EasyExif.hpp"
#include "openMVG/image/image.hpp"
#include "openMVG/stl/split.hpp"

#include "openMVG/matching_image_collection/Matcher_Regions_AllInMemory.hpp"
#include "openMVG/matching_image_collection/GeometricFilter.hpp"
#include "openMVG/matching_image_collection/F_ACRobust.hpp"
#include "openMVG/sfm/sfm.hpp"

using namespace std;
using namespace openMVG::sfm;
using namespace openMVG::features;
using namespace openMVG::image;
using namespace openMVG::cameras;
using namespace openMVG::exif;
using namespace stlplus;

class ImageList {
public:
  ImageList();
  ~ImageList();
  void setDirectory(const char* nm);
  int countFiles();
  void loadAllImages();
  void loadImage(std::string s);
  void generateFeatures();
  void computeMatches();
  void sequentialReconstruct();

private:
  string _directory;
  string _matches_full;
  string _matches_filtered;
  vector<string> _fnames;
  SfM_Data _sfm_data;
  unique_ptr<Regions> _regionsType;
};


ImageList::ImageList() {
}

ImageList::~ImageList() {
}

void ImageList::setDirectory(const char* nm) {
vector<string> file_list;
  _directory = nm;
  _sfm_data.s_root_path = nm;
  file_list = stlplus::folder_files(_directory);

  sort(file_list.begin(), file_list.end());

  for (vector<string>::const_iterator it = file_list.begin(); it != file_list.end(); it++)  {
    string which = *it;
    string imnm = stlplus::create_filespec(_directory, which);
    _fnames.push_back(imnm);
  }
  _matches_full = string(_directory + "/matches.putative.txt");
  _matches_filtered = string(_directory + "/matches.f.txt");
}

int ImageList::countFiles() {
return _fnames.size();
}

void ImageList::loadAllImages() {
  for ( vector<string>::const_iterator iter_image = _fnames.begin(); iter_image != _fnames.end(); iter_image++ ) {
    string which = *iter_image;
    loadImage(which);
  }
return;
}

void ImageList::loadImage(string which) {
double width, height, image_focal;
double focal, ppx, ppy;

Views& views = _sfm_data.views;
Intrinsics& intrinsics = _sfm_data.intrinsics;
shared_ptr<IntrinsicBase> intrinsic (NULL);

  if (GetFormat(which.c_str()) == openMVG::image::Unknown)
    return;

  unique_ptr<Exif_IO_EasyExif> exifReader(new Exif_IO_EasyExif());
  if (!exifReader->open(which))
    return;

  image_focal = static_cast<double>(exifReader->getFocal());
  width = static_cast<double>(exifReader->getWidth());
  height = static_cast<double>(exifReader->getHeight());

  ppx = width / 2.0;
  ppy = height / 2.0;

  if (image_focal == 0)
    return;

  printf("Image %s: %f x %f, Focal Length %f\n", which.c_str(), width, height, image_focal);

  const double ccdw = 5.75; // Cheap Samsung S890
  //const double ccdw = 4.62;// Also Cheap Canon SX410-IS
  //const double ccdw = 7.39; // Nicer Canon G9
  //const double ccdw = 35.9; // Very Nice Sony DSLR/A850
  focal = std::max (width, height) * image_focal / ccdw;

  View v(which, views.size(), views.size(), views.size(), width, height);

  intrinsic = make_shared<Pinhole_Intrinsic_Radial_K3> (width, height, focal,
                                                        ppx, ppy, 0.0, 0.0, 0.0);
  intrinsics[v.id_intrinsic] = intrinsic;
  views[v.id_view] = std::make_shared<openMVG::sfm::View>(v);
}


void ImageList::generateFeatures(void) {
AKAZEParams params(AKAZEConfig(), AKAZE_MSURF);
unique_ptr<AKAZE_Image_describer> image_describer(new AKAZE_Image_describer(params, true));

  image_describer->Allocate(_regionsType);
  image_describer->Set_configuration_preset(NORMAL_PRESET);

  for(Views::const_iterator iterViews = _sfm_data.views.begin(); iterViews != _sfm_data.views.end(); ++iterViews) {
  const View * view = iterViews->second.get();
  const string sFeat = stlplus::create_filespec(_directory, stlplus::basename_part(view->s_Img_path), "feat");
  const string sDesc = stlplus::create_filespec(_directory, stlplus::basename_part(view->s_Img_path), "desc");

    if (!stlplus::file_exists(sFeat)) {
      Image<unsigned char> imageGray;
      printf("Creating %s\n", sFeat.c_str());
      ReadImage(view->s_Img_path.c_str(), &imageGray);
      unique_ptr<Regions> regions;
      image_describer->Describe(imageGray, regions);
      image_describer->Save(regions.get(), sFeat, sDesc);
    }
    else {
      printf("Using existing features from %s\n", sFeat.c_str());
    }
  }
}

void ImageList::computeMatches() {
float fDistRatio = 0.6f; // Higher is stricter
openMVG::matching::PairWiseMatches map_PutativesMatches;
vector<string> vec_fileNames;
vector<pair<size_t, size_t> > vec_imagesSize;

  for (Views::const_iterator iter = _sfm_data.GetViews().begin(); iter != _sfm_data.GetViews().end(); ++iter) {
    const View * v = iter->second.get();
    vec_fileNames.push_back(stlplus::create_filespec(_sfm_data.s_root_path, v->s_Img_path));
    vec_imagesSize.push_back(make_pair( v->ui_width, v->ui_height) );
  }

  unique_ptr<openMVG::Matcher_Regions_AllInMemory> collectionMatcher;
  collectionMatcher.reset(new openMVG::Matcher_Regions_AllInMemory(fDistRatio, openMVG::ANN_L2));

  if (collectionMatcher->loadData(_regionsType, vec_fileNames, _directory)) {
    openMVG::Pair_Set pairs;
    pairs = openMVG::exhaustivePairs(_sfm_data.GetViews().size());
    collectionMatcher->Match(vec_fileNames, pairs, map_PutativesMatches);
    ofstream file(_matches_full);
    if (file.is_open())
      PairedIndMatchToStream(map_PutativesMatches, file);
    file.close();
  }

  shared_ptr<Features_Provider> feats_provider = make_shared<Features_Provider>();

  if (!feats_provider->load(_sfm_data, _directory, _regionsType))
    return;

  openMVG::PairWiseMatches map_GeometricMatches;

  ImageCollectionGeometricFilter collectionGeomFilter(feats_provider.get());

  const double maxResidualError = 1.0; // dflt 1.0; // Lower is stricter

  collectionGeomFilter.Filter(
            GeometricFilter_FMatrix_AC(maxResidualError),
            map_PutativesMatches,
            map_GeometricMatches,
            vec_imagesSize);

  ofstream file (_matches_filtered);
  if (file.is_open())
    PairedIndMatchToStream(map_GeometricMatches, file);
  file.close();

}


void ImageList::sequentialReconstruct() {
string output_directory = "sequential";
string sfm_data = stlplus::create_filespec(output_directory, "sfm_data", ".json");
string cloud_data = stlplus::create_filespec(output_directory, "cloud_and_poses", ".ply");
string report_name = stlplus::create_filespec(output_directory, "Reconstruction_Report", ".html");

  if (!stlplus::folder_exists(output_directory))
    stlplus::folder_create(output_directory);

  SequentialSfMReconstructionEngine sfmEngine(_sfm_data, output_directory, report_name);
  shared_ptr<Features_Provider> feats_provider = std::make_shared<Features_Provider>();
  shared_ptr<Matches_Provider> matches_provider = std::make_shared<Matches_Provider>();

  feats_provider->load(_sfm_data, _directory, _regionsType);
  matches_provider->load(_sfm_data, _matches_filtered);

  sfmEngine.SetFeaturesProvider(feats_provider.get());
  sfmEngine.SetMatchesProvider(matches_provider.get());

  openMVG::Pair initialPairIndex;
  Views::const_iterator it;

  it = _sfm_data.GetViews().begin();
  const View *v1 = it->second.get();
  it++;
  const View *v2 = it->second.get();

  initialPairIndex.first = v1->id_view;
  initialPairIndex.second = v2->id_view;

  sfmEngine.setInitialPair(initialPairIndex);
  sfmEngine.Set_bFixedIntrinsics(false);
  sfmEngine.SetUnknownCameraType(EINTRINSIC(PINHOLE_CAMERA_RADIAL3));

  sfmEngine.Process();
  Save(sfmEngine.Get_SfM_Data(), sfm_data, ESfM_Data(openMVG::sfm::ALL));
  Save(sfmEngine.Get_SfM_Data(), cloud_data, ESfM_Data(openMVG::sfm::ALL));
}

int LoadImageListing(ImageList* iml, const char* imagedir) {
  iml->setDirectory(imagedir);

  if (iml->countFiles() > 0) {
    iml->loadAllImages();
  }
return 0;
}



int main(int argc, char* argv[])
{
ImageList iml;

  if (argc != 2) {
    printf("supply a target directory\n");
    return 0;
  }
  LoadImageListing(&iml, argv[1]);
  iml.generateFeatures();
  iml.computeMatches() ;
  iml.sequentialReconstruct();
}

Bundle adjust and generate model


For sequential reconstruction then this is a fairly simple process.

We:
  • Create a Sequential SfM Reconstruction Engine instance
  • Create a Features Provider for it, from our Features (Regions) information
  • Create a Matches Provider for it, from our Filtered list of matches
  • Set the initial image pair
  • Run the SfM Engine
  • Save the output



We have already made most of the configuration decisions ahead of this step, so we don't have many options.
In the boilerplate example here I use the first two images unconditionally. This is OK for my image sets, but may need adjusting for some source data sets if the first two images don't have a good overlap.

void ImageList::sequentialReconstruct()
{
std::string output_directory = "sequential";
std::string sfm_data = stlplus::create_filespec(output_directory, "sfm_data", ".json");
std::string cloud_data = stlplus::create_filespec(output_directory, "cloud_and_poses", ".ply");
std::string report_name = stlplus::create_filespec(output_directory, "Reconstruction_Report", ".html");

  if (!stlplus::folder_exists(output_directory))
    stlplus::folder_create(output_directory);

  openMVG::sfm::SequentialSfMReconstructionEngine sfmEngine(_sfm_data, output_directory, report_name);
  std::shared_ptr<openMVG::sfm::Features_Provider> feats_provider = std::make_shared<openMVG::sfm::Features_Provider>();
  std::shared_ptr<openMVG::sfm::Matches_Provider> matches_provider = std::make_shared<openMVG::sfm::Matches_Provider>();

  feats_provider->load(_sfm_data, getDirectory(), _regionsType);
  matches_provider->load(_sfm_data, _matches_filtered);


  sfmEngine.SetFeaturesProvider(feats_provider.get());
  sfmEngine.SetMatchesProvider(matches_provider.get());

  openMVG::Pair initialPairIndex;
  openMVG::sfm::Views::const_iterator it;

  it = _sfm_data.GetViews().begin();
  const openMVG::sfm::View *v1 = it->second.get();
  it++;
  const openMVG::sfm::View *v2 = it->second.get();

  initialPairIndex.first = v1->id_view;
  initialPairIndex.second = v2->id_view;

  sfmEngine.setInitialPair(initialPairIndex);

  sfmEngine.Set_bFixedIntrinsics(false);
  sfmEngine.SetUnknownCameraType(openMVG::cameras::EINTRINSIC(openMVG::cameras::PINHOLE_CAMERA_RADIAL3));

  sfmEngine.Process();
  Save(sfmEngine.Get_SfM_Data(), sfm_data, openMVG::sfm::ESfM_Data(openMVG::sfm::ALL));
  Save(sfmEngine.Get_SfM_Data(), cloud_data, openMVG::sfm::ESfM_Data(openMVG::sfm::ALL));
}

Filtering the Features


This is basically an image matching process, which looks for corresponding matches on points in the image set.

It's actually a two step process – an initial run through looks for likely matches (the “putative” matches) which uses one of a set of possible matching algorithms. Following this then the possible matches are run through a geometric filter which should discard outliers (unreliable) matches.

Once again this isn't something you really need to understand the behind the scenes detail to use – there are a couple of default values which determine the thresholds for flagging a match and determining the outliers, but otherwise the API is fairly opaque.

However the Region information must be used to determine which of the Scalar or Binary geometric matcheroptions are available.

First – Load the image information

Run through the view list, get the file names and push to a list of names and sizes

 
Next– The matchers

Then we create a matcher to process the image information. This is of the base type openMVG::Matcher_Regions_AllInMemory and which is one of the derived matcher types which matches the region we used (scalar or binary).

We pass in a distance ratio, used to determine the threshold for discarding spurious points: A higher value is stricter (from the Nearest Neighbour distance matching code; the "Ratio between best and second best matches must be superior to [the] given threshold")

Since we used the default AKAZE earlier we can simply assign an ANN_L2 matcher to it.
 
On the successful load then we use openMVG::exhaustivePairs() to generate all the pair values (type openMVG::Pair_Set), and then we run the Match function, and the output is set of matches we drop to a file using the PairedIndMatchToStream() call.

Next: Geometric Filter

The geometric filter process takes a maximum residual error value, the complete list of potential (putative) matches alongside a geometric model fitting function. It then generates an output set of matches that fit the model to within the residual error.

There are three available solvers, which are used to generate a target model to filter points against:

  • GeometricFilter_FMatrix_AC AContrario Fundamental matrix solver
  • GeometricFilter_EMatrix_AC AContrario Essential matrix solver
  • GeometricFilter_HMatrix_AC AContrario Homography matrix solver

So, for the Fundamental Matrix solver then we pass it the list of putative match pairs, and using these points then the geometric filter estimates the fundamental matrix and removes outliers from the set of matches. Following this we save the final matches to a file using PairedIndMatchToStream().

Although there's some fiddly detail hiding behind this model, we can simply pass in the filter reference and the maximum residual error that is used to discard outliers; a lower value here discards more points (but too high a value will include outliers and cause problems when bundle adjusting).

 
There's some more information on using fundamental matrix and a-contrario estimation at http://www.loria.fr/~sur/articles/noury07fundamental.pdf

A simple MWE using some default values from the sample code is:
void ImageList::computeMatches()
{
float fDistRatio = 0.6f;
openMVG::matching::PairWiseMatches map_PutativesMatches;
std::vector<std::string> vec_fileNames;
std::vector<std::pair<size_t, size_t> > vec_imagesSize;

  for (openMVG::sfm::Views::const_iterator iter = _sfm_data.GetViews().begin(); iter != _sfm_data.GetViews().end(); ++iter)
  {
    const openMVG::sfm::View * v = iter->second.get();
    vec_fileNames.push_back(stlplus::create_filespec(_sfm_data.s_root_path, v->s_Img_path));
    vec_imagesSize.push_back( std::make_pair( v->ui_width, v->ui_height) );
  }

  std::unique_ptr<openMVG::Matcher_Regions_AllInMemory> collectionMatcher;
  collectionMatcher.reset(new openMVG::Matcher_Regions_AllInMemory(fDistRatio, openMVG::ANN_L2));

  if (collectionMatcher->loadData(_regionsType, vec_fileNames, getDirectory()))
  {
    openMVG::Pair_Set pairs;
    pairs = openMVG::exhaustivePairs(_sfm_data.GetViews().size());
    collectionMatcher->Match(vec_fileNames, pairs, map_PutativesMatches);
    std::ofstream file(_matches_full);
    if (file.is_open())
      PairedIndMatchToStream(map_PutativesMatches, file);
    file.close();
  }

  std::shared_ptr<openMVG::sfm::Features_Provider> feats_provider = std::make_shared<openMVG::sfm::Features_Provider>();

  if (!feats_provider->load(_sfm_data, getDirectory(), _regionsType))
    return;

   openMVG::PairWiseMatches map_GeometricMatches;

  ImageCollectionGeometricFilter collectionGeomFilter(feats_provider.get());

  const double maxResidualError = 1.0;

  collectionGeomFilter.Filter(
            GeometricFilter_FMatrix_AC(maxResidualError),
            map_PutativesMatches,
            map_GeometricMatches,
            vec_imagesSize);

  std::ofstream file (_matches_filtered);
  if (file.is_open())
    PairedIndMatchToStream(map_GeometricMatches, file);
  file.close();

}

The Feature Detection Algorithms Available


Algorithms Available

The details of which descriptor to be used is a tradeoff between computation time and data set – it's a decision between:

SIFT
The Lowe SIFT (Scale-Invariant Feature Transform) Algorithm. This is non-free (patented in the US) which may restrict the application, but a good general purposes algorithm and one for which GPU accelerated implementations are available.

The SIFT algorithm supports a number of “standard” SIFT parameters (original vs upscale, Max octaves count, Scales per octave, Max ratio of Hessian eigenvalues and Min contrast) defined in the SiftParams structure.

The SIFT_Image_describer class supports also three presets: Normal, High and Ultra which tune the minimum contrast value as well as introducing upscaling at the Ultra value.


AKAZE
(or A-KAZE, if you prefer), “a novel 2D feature detection and description method that operates completely in a nonlinear scale space”. This uses the AKAZEConfig config structure.

Here we can choose one of three variants
  • MSURF (the default “Modified Speeded Up Robust Features”)
  • LIOP (“Local Intensity Order Pattern”)
  • MLDB (“Modified-Local Difference Binary”)

There are also three preset thresholds (Normal, High & Ultra) which tune the parameters used by the feature recognition algorithm.

There's also AKAZE OCV: This is the OpenCV supplied AKAZE.

The variants are described in the KAZE paper:

And the AKAZE paper http://www.robesafe.com/personal/pablo.alcantarilla/papers/Alcantarilla13bmvc.pdf has comparisons for AKAZE, SIFT and several other feature extraction methods.

And LIOP is explained in more detail here http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.371.519


The gory details

You don't need to know any of this, if you're simply using the stock methods, but...

Each region is a set of 2D features (the PointFeature class) describing a position, scale and orientation, and stored in the “.feat” file. A descriptor is essentially a container of raw data values (the Descriptor class) which corresponds to the description of a single feature. Since the descriptors are dependent on the feature algorithm used they're treated as opaque chunks of data at this point So the descriptor is constructed with a “type, count” template.

For an example of the mapping see the example file main_ComputeFeatures_OpenCV.cpp, which will demonstrate this working with off the shelf descriptors from OpenCV. In this case we see the detectAndCompute() method of the AKAZE extractor, which produces a vector of Keypoints and Descriptor output array. The keypoints are pushed to the feature list, and each Descriptor is serialised as raw data and pushed onto the Descriptors list.

Feature Generation

The background

The next step in the process is to examine the image data and extract features which can be tracked across multiple images.

OpenMVG offers a selection of feature extraction algorithms. Basically the two primary feature extraction methods are:

Both offer some tuning of parameters for feature recognition, and the selection of the feature extraction method has some impact on the later processing stages.

Generate the initial features

To generate the list of features in an image we use the base class openMVG::features::Image_describer.

This provides the base class for a number of implementations which are used to extract features from an image, specifically AKAZE, AKAZE_OCV (The OpenCV AKAZE) and SIFT.
The actual points in the image are stored in a regions object openMVG::features::Regions, which provides a simple mechanism to extract the features into a region, and then serialise it to a file. Each region object represents a set of regions as “2D position” and “attributes”. The region attributes are referred to as Descriptors.

It's actually very simple to use, without getting involved in the details. The basic workflow is (using AKAZE as an example):
  • Create a unique point to an openMVG::features::Image_describer
  • Create a new openMVG::features::AKAZE_Image_describer and assign it to the generic image descriptor pointer
  • Call the image describer Allocate method, passing a reference to a openMVG::features::Regions pointer
  • Then, For each view
    • Create a openMVG::features::Regions to represent a set of regions
    • Read the image data as a greyscale, using openMVG::image::ReadImage()
    • Call the image describer Describe method on the image
    • Store the results to a file using the image describer Save method

This results in a set of feature and description files (“.desc” and “.feat”) for each image. The openMVG::features::Regions pointer reference is used later to pass to functions which need to know how to interpret the feature data.
Note that we must use the .feat and .desc name convention. This is because when we use the library functions to reload this information for later processing it will use this convention to associate the image files with feature and description information.

void ImageList::generateFeatures(void)
{
std::unique_ptr image_describer;
openMVG::image::Image imageGray;

  image_describer.reset(new openMVG::features::AKAZE_Image_describer(
        openMVG::features::AKAZEParams(openMVG::features::AKAZEConfig(), openMVG::features::AKAZE_MSURF),
        true));
  image_describer->Allocate(_regionsType);
  //image_describer->Set_configuration_preset(openMVG::features::HIGH_PRESET);

  for(openMVG::sfm::Views::const_iterator iterViews = _sfm_data.views.begin();
                                 iterViews != _sfm_data.views.end(); 
                                 ++iterViews)
  {
  const openMVG::sfm::View * view = iterViews->second.get();
  const std::string sFeat = stlplus::create_filespec(getDirectory(), 
                                                     stlplus::basename_part(view->s_Img_path), "feat");
  const std::string sDesc = stlplus::create_filespec(getDirectory(), 
                                                     stlplus::basename_part(view->s_Img_path), "desc");

    if (!stlplus::file_exists(sFeat))
    {
      openMVG::image::ReadImage(view->s_Img_path.c_str(), &imageGray);
      std::unique_ptr regions;
      image_describer->Describe(imageGray, regions);
      image_describer->Save(regions.get(), sFeat, sDesc);
    }
  }
}



In working code it's generally worth reloading the feature and descriptor files, rather than going through the lengthy process of generating each time, so the processing should check for the existence of the file in question before recalculating.

(also note the C++ 11 use of “.get()” “.reset()”, etc to reference the smart pointer directly rather than the referenced object)

One wrinkle is the use of openMVG::features::Regions: This type is a list of Region descriptors, used to save, load and manipulate the feature/descriptor lists. This basically falls into one of two types – either Scalar or Binary regions based on the type of the descriptor.

Depending on the type of descriptor chosen then there are some limits on which filters we can apply to the features. So we have:
  • SIFT, AKAZE Float & AKAZE LIOP: Scalar Regions
  • AKAZE Binary (MLDB): Binary Regions

And when we run the matcher (at the next step) we can choose
  • Binary Regions: Brute Force Hamming
  • Scalar Regions: Brute Force & ANN L2 matchers

So when we put together this section of code we must initially select the image description algorithm we use and the configuration, and this may give use choices on the filter implementation we can choose.

More on this later...