pktools  2.6.4
Processing Kernel for geospatial data
pkstat.cc
1 /**********************************************************************
2 pkstat.cc: program to calculate basic statistics from raster dataset
3 Copyright (C) 2008-2015 Pieter Kempeneers
4 
5 This file is part of pktools
6 
7 pktools is free software: you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation, either version 3 of the License, or
10 (at your option) any later version.
11 
12 pktools is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16 
17 You should have received a copy of the GNU General Public License
18 along with pktools. If not, see <http://www.gnu.org/licenses/>.
19 ***********************************************************************/
20 #include <iostream>
21 #include <fstream>
22 #include <math.h>
23 #include "base/Optionpk.h"
24 #include "algorithms/StatFactory.h"
25 #include "algorithms/ImgRegression.h"
26 /******************************************************************************/
78 using namespace std;
79 
80 int main(int argc, char *argv[])
81 {
82  Optionpk<string> input_opt("i","input","name of the input raster dataset");
83  Optionpk<unsigned short> band_opt("b","band","band(s) on which to calculate statistics",0);
84  Optionpk<bool> filename_opt("f", "filename", "Shows image filename ", false);
85  Optionpk<bool> stat_opt("stats", "statistics", "Shows basic statistics (min,max, mean and stdDev of the raster datasets)", false);
86  Optionpk<double> ulx_opt("ulx", "ulx", "Upper left x value bounding box");
87  Optionpk<double> uly_opt("uly", "uly", "Upper left y value bounding box");
88  Optionpk<double> lrx_opt("lrx", "lrx", "Lower right x value bounding box");
89  Optionpk<double> lry_opt("lry", "lry", "Lower right y value bounding box");
90  Optionpk<double> nodata_opt("nodata","nodata","Set nodata value(s)");
91  Optionpk<short> down_opt("down", "down", "Down sampling factor (for raster sample datasets only). Can be used to create grid points", 1);
92  Optionpk<unsigned int> random_opt("rnd", "rnd", "generate random numbers", 0);
93  Optionpk<double> scale_opt("scale", "scale", "Scale(s) for reading input image(s)");
94  Optionpk<double> offset_opt("offset", "offset", "Offset(s) for reading input image(s)");
95 
96  // Optionpk<bool> transpose_opt("t","transpose","transpose output",false);
97  // Optionpk<std::string> randdist_opt("dist", "dist", "distribution for generating random numbers, see http://www.gn/software/gsl/manual/gsl-ref_toc.html#TOC320 (only uniform and Gaussian supported yet)", "gaussian");
98  // Optionpk<double> randa_opt("rnda", "rnda", "first parameter for random distribution (mean value in case of Gaussian)", 0);
99  // Optionpk<double> randb_opt("rndb", "rndb", "second parameter for random distribution (standard deviation in case of Gaussian)", 1);
100  Optionpk<bool> mean_opt("mean","mean","calculate mean",false);
101  Optionpk<bool> median_opt("median","median","calculate median",false);
102  Optionpk<bool> var_opt("var","var","calculate variance",false);
103  Optionpk<bool> skewness_opt("skew","skewness","calculate skewness",false);
104  Optionpk<bool> kurtosis_opt("kurt","kurtosis","calculate kurtosis",false);
105  Optionpk<bool> stdev_opt("stdev","stdev","calculate standard deviation",false);
106  Optionpk<bool> sum_opt("sum","sum","calculate sum of column",false);
107  Optionpk<bool> minmax_opt("mm","minmax","calculate minimum and maximum value",false);
108  Optionpk<bool> min_opt("min","min","calculate minimum value",false);
109  Optionpk<bool> max_opt("max","max","calculate maximum value",false);
110  Optionpk<double> src_min_opt("src_min","src_min","start reading source from this minimum value");
111  Optionpk<double> src_max_opt("src_max","src_max","stop reading source from this maximum value");
112  Optionpk<bool> histogram_opt("hist","hist","calculate histogram",false);
113  Optionpk<bool> histogram2d_opt("hist2d","hist2d","calculate 2-dimensional histogram based on two images",false);
114  Optionpk<short> nbin_opt("nbin","nbin","number of bins to calculate histogram");
115  Optionpk<bool> relative_opt("rel","relative","use percentiles for histogram to calculate histogram",false);
116  Optionpk<bool> kde_opt("kde","kde","Use Kernel density estimation when producing histogram. The standard deviation is estimated based on Silverman's rule of thumb",false);
117  Optionpk<bool> rmse_opt("rmse","rmse","calculate root mean square error between two raster datasets",false);
118  Optionpk<bool> reg_opt("reg","regression","calculate linear regression between two raster datasets and get correlation coefficient",false);
119  Optionpk<bool> regerr_opt("regerr","regerr","calculate linear regression between two raster datasets and get root mean square error",false);
120  Optionpk<bool> preg_opt("preg","preg","calculate perpendicular regression between two raster datasets and get correlation coefficient",false);
121  Optionpk<short> verbose_opt("v", "verbose", "verbose mode when positive", 0,2);
122  ulx_opt.setHide(1);
123  uly_opt.setHide(1);
124  lrx_opt.setHide(1);
125  lry_opt.setHide(1);
126  down_opt.setHide(1);
127  random_opt.setHide(1);
128  scale_opt.setHide(1);
129  offset_opt.setHide(1);
130  src_min_opt.setHide(1);
131  src_max_opt.setHide(1);
132  kde_opt.setHide(1);
133 
134  // range_opt.setHide(1);
135  // transpose_opt.setHide(1);
136 
137  bool doProcess;//stop process when program was invoked with help option (-h --help)
138  try{
139  //mandatory options
140  doProcess=input_opt.retrieveOption(argc,argv);
141  //optional options
142  band_opt.retrieveOption(argc,argv);
143  filename_opt.retrieveOption(argc,argv);
144  stat_opt.retrieveOption(argc,argv);
145  nodata_opt.retrieveOption(argc,argv);
146  mean_opt.retrieveOption(argc,argv);
147  median_opt.retrieveOption(argc,argv);
148  var_opt.retrieveOption(argc,argv);
149  stdev_opt.retrieveOption(argc,argv);
150  minmax_opt.retrieveOption(argc,argv);
151  min_opt.retrieveOption(argc,argv);
152  max_opt.retrieveOption(argc,argv);
153  histogram_opt.retrieveOption(argc,argv);
154  nbin_opt.retrieveOption(argc,argv);
155  relative_opt.retrieveOption(argc,argv);
156  histogram2d_opt.retrieveOption(argc,argv);
157  rmse_opt.retrieveOption(argc,argv);
158  reg_opt.retrieveOption(argc,argv);
159  regerr_opt.retrieveOption(argc,argv);
160  preg_opt.retrieveOption(argc,argv);
161  //advanced options
162  ulx_opt.retrieveOption(argc,argv);
163  uly_opt.retrieveOption(argc,argv);
164  lrx_opt.retrieveOption(argc,argv);
165  lry_opt.retrieveOption(argc,argv);
166  down_opt.retrieveOption(argc,argv);
167  random_opt.retrieveOption(argc,argv);
168  scale_opt.retrieveOption(argc,argv);
169  offset_opt.retrieveOption(argc,argv);
170  src_min_opt.retrieveOption(argc,argv);
171  src_max_opt.retrieveOption(argc,argv);
172  kde_opt.retrieveOption(argc,argv);
173  verbose_opt.retrieveOption(argc,argv);
174  }
175  catch(string predefinedString){
176  std::cout << predefinedString << std::endl;
177  exit(0);
178  }
179  if(!doProcess){
180  cout << endl;
181  cout << "Usage: pkstat -i input" << endl;
182  cout << endl;
183  std::cout << "short option -h shows basic options only, use long option --help to show all options" << std::endl;
184  exit(0);//help was invoked, stop processing
185  }
186 
187  if(src_min_opt.size()){
188  while(src_min_opt.size()<band_opt.size())
189  src_min_opt.push_back(src_min_opt[0]);
190  }
191  if(src_max_opt.size()){
192  while(src_max_opt.size()<band_opt.size())
193  src_max_opt.push_back(src_max_opt[0]);
194  }
195 
196  unsigned int nbin=0;
197  double minX=0;
198  double minY=0;
199  double maxX=0;
200  double maxY=0;
201  double minValue=0;
202  double maxValue=0;
203  double meanValue=0;
204  double stdDev=0;
205 
206  const char* pszMessage;
207  void* pProgressArg=NULL;
208  GDALProgressFunc pfnProgress=GDALTermProgress;
209  double progress=0;
210  srand(time(NULL));
211 
214  std::vector<double> histogramOutput;
215  double nsample=0;
216 
217  ImgReaderGdal imgReader;
218 
219  if(scale_opt.size()){
220  while(scale_opt.size()<input_opt.size())
221  scale_opt.push_back(scale_opt[0]);
222  }
223  if(offset_opt.size()){
224  while(offset_opt.size()<input_opt.size())
225  offset_opt.push_back(offset_opt[0]);
226  }
227  if(input_opt.empty()){
228  std::cerr << "No image dataset provided (use option -i). Use --help for help information";
229  exit(0);
230  }
231  for(int ifile=0;ifile<input_opt.size();++ifile){
232  try{
233  imgReader.open(input_opt[ifile]);
234  }
235  catch(std::string errorstring){
236  std::cout << errorstring << std::endl;
237  exit(0);
238  }
239 
240  if(filename_opt[0])
241  std::cout << " --input " << input_opt[ifile] << " ";
242 
243  for(int inodata=0;inodata<nodata_opt.size();++inodata)
244  imgReader.pushNoDataValue(nodata_opt[inodata]);
245 
246  int nband=band_opt.size();
247  for(int iband=0;iband<nband;++iband){
248 
249  for(int inodata=0;inodata<nodata_opt.size();++inodata){
250  if(!inodata)
251  imgReader.GDALSetNoDataValue(nodata_opt[0],band_opt[iband]);//only single no data can be set in GDALRasterBand (used for ComputeStatistics)
252  }
253 
254  if(offset_opt.size()>ifile)
255  imgReader.setOffset(offset_opt[ifile],band_opt[iband]);
256  if(scale_opt.size()>ifile)
257  imgReader.setScale(scale_opt[ifile],band_opt[iband]);
258 
259  // if(stat_opt[0]||mean_opt[0]||var_opt[0]||stdev_opt[0]){
260  // assert(band_opt[iband]<imgReader.nrOfBand());
261  // GDALProgressFunc pfnProgress;
262  // void* pProgressData;
263  // GDALRasterBand* rasterBand;
264  // rasterBand=imgReader.getRasterBand(band_opt[iband]);
265  // rasterBand->ComputeStatistics(0,&minValue,&maxValue,&meanValue,&stdDev,pfnProgress,pProgressData);
266 
267  // if(mean_opt[0])
268  // std::cout << "--mean " << meanValue << " ";
269  // if(stdev_opt[0])
270  // std::cout << "--stdDev " << stdDev << " ";
271  // if(var_opt[0])
272  // std::cout << "--var " << stdDev*stdDev << " ";
273  // if(stat_opt[0])
274  // std::cout << "-min " << minValue << " -max " << maxValue << " --mean " << meanValue << " --stdDev " << stdDev << " ";
275  // }
276 
277  if(minmax_opt[0]||min_opt[0]||max_opt[0]){
278  assert(band_opt[iband]<imgReader.nrOfBand());
279 
280  if((ulx_opt.size()||uly_opt.size()||lrx_opt.size()||lry_opt.size())&&(imgReader.covers(ulx_opt[0],uly_opt[0],lrx_opt[0],lry_opt[0]))){
281  double uli,ulj,lri,lrj;
282  imgReader.geo2image(ulx_opt[0],uly_opt[0],uli,ulj);
283  imgReader.geo2image(lrx_opt[0],lry_opt[0],lri,lrj);
284  imgReader.getMinMax(static_cast<int>(uli),static_cast<int>(lri),static_cast<int>(ulj),static_cast<int>(lrj),band_opt[iband],minValue,maxValue);
285  }
286  else{
287  imgReader.getMinMax(minValue,maxValue,band_opt[iband],true);
288  }
289  if(minmax_opt[0])
290  std::cout << "-min " << minValue << " -max " << maxValue << " ";
291  else{
292  if(min_opt[0])
293  std::cout << "-min " << minValue << " ";
294  if(max_opt[0])
295  std::cout << "-max " << maxValue << " ";
296  }
297  }
298  }
299  if(histogram_opt[0]){//aggregate results from multiple inputs, but only calculate for first selected band
300  assert(band_opt[0]<imgReader.nrOfBand());
301  nbin=(nbin_opt.size())? nbin_opt[0]:0;
302 
303  imgReader.getMinMax(minValue,maxValue,band_opt[0]);
304  if(src_min_opt.size())
305  minValue=src_min_opt[0];
306  if(src_max_opt.size())
307  maxValue=src_max_opt[0];
308  if(minValue>=maxValue)
309  imgReader.getMinMax(minValue,maxValue,band_opt[0]);
310 
311  if(verbose_opt[0])
312  cout << "number of valid pixels in image: " << imgReader.getNvalid(band_opt[0]) << endl;
313 
314  nsample+=imgReader.getHistogram(histogramOutput,minValue,maxValue,nbin,band_opt[0],kde_opt[0]);
315 
316  //only output for last input file
317  if(ifile==input_opt.size()-1){
318  std::cout.precision(10);
319  for(int bin=0;bin<nbin;++bin){
320  double binValue=0;
321  if(nbin==maxValue-minValue+1)
322  binValue=minValue+bin;
323  else
324  binValue=minValue+static_cast<double>(maxValue-minValue)*(bin+0.5)/nbin;
325  std::cout << binValue << " ";
326  if(relative_opt[0]||kde_opt[0])
327  std::cout << 100.0*static_cast<double>(histogramOutput[bin])/static_cast<double>(nsample) << std::endl;
328  else
329  std::cout << static_cast<double>(histogramOutput[bin]) << std::endl;
330  }
331  }
332  }
333  if(histogram2d_opt[0]&&input_opt.size()<2){
334  assert(band_opt.size()>1);
335  imgReader.getMinMax(minX,maxX,band_opt[0]);
336  imgReader.getMinMax(minY,maxY,band_opt[1]);
337  if(src_min_opt.size()){
338  minX=src_min_opt[0];
339  minY=src_min_opt[1];
340  }
341  if(src_max_opt.size()){
342  maxX=src_max_opt[0];
343  maxY=src_max_opt[1];
344  }
345  nbin=(nbin_opt.size())? nbin_opt[0]:0;
346  if(nbin<=1){
347  std::cerr << "Warning: number of bins not defined, calculating bins from min and max value" << std::endl;
348  if(minX>=maxX)
349  imgReader.getMinMax(minX,maxX,band_opt[0]);
350  if(minY>=maxY)
351  imgReader.getMinMax(minY,maxY,band_opt[1]);
352 
353  minValue=(minX<minY)? minX:minY;
354  maxValue=(maxX>maxY)? maxX:maxY;
355  if(verbose_opt[0])
356  std::cout << "min and max values: " << minValue << ", " << maxValue << std::endl;
357  nbin=maxValue-minValue+1;
358  }
359  assert(nbin>1);
360  double sigma=0;
361  //kernel density estimation as in http://en.wikipedia.org/wiki/Kernel_density_estimation
362  if(kde_opt[0]){
363  assert(band_opt[0]<imgReader.nrOfBand());
364  assert(band_opt[1]<imgReader.nrOfBand());
365  GDALProgressFunc pfnProgress;
366  void* pProgressData;
367  GDALRasterBand* rasterBand;
368  double stdDev1=0;
369  double stdDev2=0;
370  rasterBand=imgReader.getRasterBand(band_opt[0]);
371  rasterBand->ComputeStatistics(0,&minValue,&maxValue,&meanValue,&stdDev1,pfnProgress,pProgressData);
372  rasterBand=imgReader.getRasterBand(band_opt[1]);
373  rasterBand->ComputeStatistics(0,&minValue,&maxValue,&meanValue,&stdDev2,pfnProgress,pProgressData);
374 
375  double estimatedSize=1.0*imgReader.getNvalid(band_opt[0])/down_opt[0]/down_opt[0];
376  if(random_opt[0]>0)
377  estimatedSize*=random_opt[0]/100.0;
378  sigma=1.06*sqrt(stdDev1*stdDev2)*pow(estimatedSize,-0.2);
379  }
380  assert(nbin);
381  if(verbose_opt[0]){
382  if(sigma>0)
383  std::cout << "calculating 2d kernel density estimate with sigma " << sigma << " for bands " << band_opt[0] << " and " << band_opt[1] << std::endl;
384  else
385  std::cout << "calculating 2d histogram for bands " << band_opt[0] << " and " << band_opt[1] << std::endl;
386  std::cout << "nbin: " << nbin << std::endl;
387  }
388 
389 
390  vector< vector<double> > output;
391 
392  if(maxX<=minX)
393  imgReader.getMinMax(minX,maxX,band_opt[0]);
394  if(maxY<=minY)
395  imgReader.getMinMax(minY,maxY,band_opt[1]);
396 
397  if(maxX<=minX){
398  std::ostringstream s;
399  s<<"Error: could not calculate distribution (minX>=maxX)";
400  throw(s.str());
401  }
402  if(maxY<=minY){
403  std::ostringstream s;
404  s<<"Error: could not calculate distribution (minY>=maxY)";
405  throw(s.str());
406  }
407  output.resize(nbin);
408  for(int i=0;i<nbin;++i){
409  output[i].resize(nbin);
410  for(int j=0;j<nbin;++j)
411  output[i][j]=0;
412  }
413  int binX=0;
414  int binY=0;
415  vector<double> inputX(imgReader.nrOfCol());
416  vector<double> inputY(imgReader.nrOfCol());
417  unsigned long int nvalid=0;
418  for(int irow=0;irow<imgReader.nrOfRow();++irow){
419  if(irow%down_opt[0])
420  continue;
421  imgReader.readData(inputX,GDT_Float64,irow,band_opt[0]);
422  imgReader.readData(inputY,GDT_Float64,irow,band_opt[1]);
423  for(int icol=0;icol<imgReader.nrOfCol();++icol){
424  if(icol%down_opt[0])
425  continue;
426  if(random_opt[0]>0){
427  double p=static_cast<double>(rand())/(RAND_MAX);
428  p*=100.0;
429  if(p>random_opt[0])
430  continue;//do not select for now, go to next column
431  }
432  if(imgReader.isNoData(inputX[icol]))
433  continue;
434  if(imgReader.isNoData(inputY[icol]))
435  continue;
436  ++nvalid;
437  if(inputX[icol]>=maxX)
438  binX=nbin-1;
439  else if(inputX[icol]<=minX)
440  binX=0;
441  else
442  binX=static_cast<int>(static_cast<double>(inputX[icol]-minX)/(maxX-minX)*nbin);
443  if(inputY[icol]>=maxY)
444  binY=nbin-1;
445  else if(inputY[icol]<=minX)
446  binY=0;
447  else
448  binY=static_cast<int>(static_cast<double>(inputY[icol]-minY)/(maxY-minY)*nbin);
449  assert(binX>=0);
450  assert(binX<output.size());
451  assert(binY>=0);
452  assert(binY<output[binX].size());
453  if(sigma>0){
454  //create kde for Gaussian basis function
455  //todo: speed up by calculating first and last bin with non-zero contriubtion...
456  for(int ibinX=0;ibinX<nbin;++ibinX){
457  double centerX=minX+static_cast<double>(maxX-minX)*ibinX/nbin;
458  double pdfX=gsl_ran_gaussian_pdf(inputX[icol]-centerX, sigma);
459  for(int ibinY=0;ibinY<nbin;++ibinY){
460  //calculate \integral_ibinX^(ibinX+1)
461  double centerY=minY+static_cast<double>(maxY-minY)*ibinY/nbin;
462  double pdfY=gsl_ran_gaussian_pdf(inputY[icol]-centerY, sigma);
463  output[ibinX][binY]+=pdfX*pdfY;
464  }
465  }
466  }
467  else
468  ++output[binX][binY];
469  }
470  }
471  if(verbose_opt[0])
472  cout << "number of valid pixels: " << nvalid << endl;
473 
474  for(int binX=0;binX<nbin;++binX){
475  cout << endl;
476  for(int binY=0;binY<nbin;++binY){
477  double binValueX=0;
478  if(nbin==maxX-minX+1)
479  binValueX=minX+binX;
480  else
481  binValueX=minX+static_cast<double>(maxX-minX)*(binX+0.5)/nbin;
482  double binValueY=0;
483  if(nbin==maxY-minY+1)
484  binValueY=minY+binY;
485  else
486  binValueY=minY+static_cast<double>(maxY-minY)*(binY+0.5)/nbin;
487 
488  double value=static_cast<double>(output[binX][binY]);
489 
490  if(relative_opt[0])
491  value*=100.0/nvalid;
492 
493  cout << binValueX << " " << binValueY << " " << value << std::endl;
494  // double value=static_cast<double>(output[binX][binY])/nvalid;
495  // cout << (maxX-minX)*bin/(nbin-1)+minX << " " << (maxY-minY)*bin/(nbin-1)+minY << " " << value << std::endl;
496  }
497  }
498  }
499  if(reg_opt[0]&&input_opt.size()<2){
500  if(band_opt.size()<2)
501  continue;
502  imgreg.setDown(down_opt[0]);
503  imgreg.setThreshold(random_opt[0]);
504  double c0=0;//offset
505  double c1=1;//scale
506  double r2=imgreg.getR2(imgReader,band_opt[0],band_opt[1],c0,c1,verbose_opt[0]);
507  std::cout << "-c0 " << c0 << " -c1 " << c1 << " -r2 " << r2 << std::endl;
508  }
509  if(regerr_opt[0]&&input_opt.size()<2){
510  if(band_opt.size()<2)
511  continue;
512  imgreg.setDown(down_opt[0]);
513  imgreg.setThreshold(random_opt[0]);
514  double c0=0;//offset
515  double c1=1;//scale
516  double err=imgreg.getRMSE(imgReader,band_opt[0],band_opt[1],c0,c1,verbose_opt[0]);
517  std::cout << "-c0 " << c0 << " -c1 " << c1 << " -rmse " << err << std::endl;
518  }
519  if(rmse_opt[0]&&input_opt.size()<2){
520  if(band_opt.size()<2)
521  continue;
522  imgreg.setDown(down_opt[0]);
523  imgreg.setThreshold(random_opt[0]);
524  double c0=0;//offset
525  double c1=1;//scale
526  double err=imgreg.getRMSE(imgReader,band_opt[0],band_opt[1],c0,c1,verbose_opt[0]);
527  std::cout << " -rmse " << err << std::endl;
528  }
529  if(preg_opt[0]&&input_opt.size()<2){
530  if(band_opt.size()<2)
531  continue;
532  imgreg.setDown(down_opt[0]);
533  imgreg.setThreshold(random_opt[0]);
534  double c0=0;//offset
535  double c1=1;//scale
536  double r2=imgreg.pgetR2(imgReader,band_opt[0],band_opt[1],c0,c1,verbose_opt[0]);
537  std::cout << "-c0 " << c0 << " -c1 " << c1 << " -r2 " << r2 << std::endl;
538  }
539  imgReader.close();
540  }
541  if(reg_opt[0]&&(input_opt.size()>1)){
542  imgreg.setDown(down_opt[0]);
543  imgreg.setThreshold(random_opt[0]);
544  double c0=0;//offset
545  double c1=1;//scale
546  while(band_opt.size()<input_opt.size())
547  band_opt.push_back(band_opt[0]);
548  if(src_min_opt.size()){
549  while(src_min_opt.size()<input_opt.size())
550  src_min_opt.push_back(src_min_opt[0]);
551  }
552  if(src_max_opt.size()){
553  while(src_max_opt.size()<input_opt.size())
554  src_max_opt.push_back(src_max_opt[0]);
555  }
556  ImgReaderGdal imgReader1(input_opt[0]);
557  ImgReaderGdal imgReader2(input_opt[1]);
558 
559  if(offset_opt.size())
560  imgReader1.setOffset(offset_opt[0],band_opt[0]);
561  if(scale_opt.size())
562  imgReader1.setScale(scale_opt[0],band_opt[0]);
563  if(offset_opt.size()>1)
564  imgReader2.setOffset(offset_opt[1],band_opt[1]);
565  if(scale_opt.size()>1)
566  imgReader2.setScale(scale_opt[1],band_opt[1]);
567 
568  for(int inodata=0;inodata<nodata_opt.size();++inodata){
569  if(!inodata){
570  imgReader1.GDALSetNoDataValue(nodata_opt[0],band_opt[0]);//only single no data can be set in GDALRasterBand (used for ComputeStatistics)
571  imgReader2.GDALSetNoDataValue(nodata_opt[0]),band_opt[1];//only single no data can be set in GDALRasterBand (used for ComputeStatistics)
572  }
573  imgReader1.pushNoDataValue(nodata_opt[inodata]);
574  imgReader2.pushNoDataValue(nodata_opt[inodata]);
575  }
576 
577  double r2=imgreg.getR2(imgReader1,imgReader2,c0,c1,band_opt[0],band_opt[1],verbose_opt[0]);
578  std::cout << "-c0 " << c0 << " -c1 " << c1 << " -r2 " << r2 << std::endl;
579  imgReader1.close();
580  imgReader2.close();
581  }
582  if(preg_opt[0]&&(input_opt.size()>1)){
583  imgreg.setDown(down_opt[0]);
584  imgreg.setThreshold(random_opt[0]);
585  double c0=0;//offset
586  double c1=1;//scale
587  while(band_opt.size()<input_opt.size())
588  band_opt.push_back(band_opt[0]);
589  if(src_min_opt.size()){
590  while(src_min_opt.size()<input_opt.size())
591  src_min_opt.push_back(src_min_opt[0]);
592  }
593  if(src_max_opt.size()){
594  while(src_max_opt.size()<input_opt.size())
595  src_max_opt.push_back(src_max_opt[0]);
596  }
597  ImgReaderGdal imgReader1(input_opt[0]);
598  ImgReaderGdal imgReader2(input_opt[1]);
599 
600  if(offset_opt.size())
601  imgReader1.setOffset(offset_opt[0],band_opt[0]);
602  if(scale_opt.size())
603  imgReader1.setScale(scale_opt[0],band_opt[0]);
604  if(offset_opt.size()>1)
605  imgReader2.setOffset(offset_opt[1],band_opt[1]);
606  if(scale_opt.size()>1)
607  imgReader2.setScale(scale_opt[1],band_opt[1]);
608 
609  for(int inodata=0;inodata<nodata_opt.size();++inodata){
610  if(!inodata){
611  imgReader1.GDALSetNoDataValue(nodata_opt[0],band_opt[0]);//only single no data can be set in GDALRasterBand (used for ComputeStatistics)
612  imgReader2.GDALSetNoDataValue(nodata_opt[0]),band_opt[1];//only single no data can be set in GDALRasterBand (used for ComputeStatistics)
613  }
614  imgReader1.pushNoDataValue(nodata_opt[inodata]);
615  imgReader2.pushNoDataValue(nodata_opt[inodata]);
616  }
617 
618  double r2=imgreg.pgetR2(imgReader1,imgReader2,c0,c1,band_opt[0],band_opt[1],verbose_opt[0]);
619  std::cout << "-c0 " << c0 << " -c1 " << c1 << " -r2 " << r2 << std::endl;
620  imgReader1.close();
621  imgReader2.close();
622  }
623  if(regerr_opt[0]&&(input_opt.size()>1)){
624  imgreg.setDown(down_opt[0]);
625  imgreg.setThreshold(random_opt[0]);
626  double c0=0;//offset
627  double c1=1;//scale
628  while(band_opt.size()<input_opt.size())
629  band_opt.push_back(band_opt[0]);
630  if(src_min_opt.size()){
631  while(src_min_opt.size()<input_opt.size())
632  src_min_opt.push_back(src_min_opt[0]);
633  }
634  if(src_max_opt.size()){
635  while(src_max_opt.size()<input_opt.size())
636  src_max_opt.push_back(src_max_opt[0]);
637  }
638  ImgReaderGdal imgReader1(input_opt[0]);
639  ImgReaderGdal imgReader2(input_opt[1]);
640 
641  if(offset_opt.size())
642  imgReader1.setOffset(offset_opt[0],band_opt[0]);
643  if(scale_opt.size())
644  imgReader1.setScale(scale_opt[0],band_opt[0]);
645  if(offset_opt.size()>1)
646  imgReader2.setOffset(offset_opt[1],band_opt[1]);
647  if(scale_opt.size()>1)
648  imgReader2.setScale(scale_opt[1],band_opt[1]);
649 
650  for(int inodata=0;inodata<nodata_opt.size();++inodata){
651  if(!inodata){
652  imgReader1.GDALSetNoDataValue(nodata_opt[0],band_opt[0]);//only single no data can be set in GDALRasterBand (used for ComputeStatistics)
653  imgReader2.GDALSetNoDataValue(nodata_opt[0]),band_opt[1];//only single no data can be set in GDALRasterBand (used for ComputeStatistics)
654  }
655  imgReader1.pushNoDataValue(nodata_opt[inodata]);
656  imgReader2.pushNoDataValue(nodata_opt[inodata]);
657  }
658 
659  double err=imgreg.getRMSE(imgReader1,imgReader2,c0,c1,band_opt[0],band_opt[1],verbose_opt[0]);
660  std::cout << "-c0 " << c0 << " -c1 " << c1 << " -rmse " << err << std::endl;
661  imgReader1.close();
662  imgReader2.close();
663  }
664  if(rmse_opt[0]&&(input_opt.size()>1)){
665  imgreg.setDown(down_opt[0]);
666  imgreg.setThreshold(random_opt[0]);
667  double c0=0;//offset
668  double c1=1;//scale
669  while(band_opt.size()<input_opt.size())
670  band_opt.push_back(band_opt[0]);
671  if(src_min_opt.size()){
672  while(src_min_opt.size()<input_opt.size())
673  src_min_opt.push_back(src_min_opt[0]);
674  }
675  if(src_max_opt.size()){
676  while(src_max_opt.size()<input_opt.size())
677  src_max_opt.push_back(src_max_opt[0]);
678  }
679  ImgReaderGdal imgReader1(input_opt[0]);
680  ImgReaderGdal imgReader2(input_opt[1]);
681 
682  if(offset_opt.size())
683  imgReader1.setOffset(offset_opt[0],band_opt[0]);
684  if(scale_opt.size())
685  imgReader1.setScale(scale_opt[0],band_opt[0]);
686  if(offset_opt.size()>1)
687  imgReader2.setOffset(offset_opt[1],band_opt[1]);
688  if(scale_opt.size()>1)
689  imgReader2.setScale(scale_opt[1],band_opt[1]);
690 
691  for(int inodata=0;inodata<nodata_opt.size();++inodata){
692  if(!inodata){
693  imgReader1.GDALSetNoDataValue(nodata_opt[0],band_opt[0]);//only single no data can be set in GDALRasterBand (used for ComputeStatistics)
694  imgReader2.GDALSetNoDataValue(nodata_opt[0]),band_opt[1];//only single no data can be set in GDALRasterBand (used for ComputeStatistics)
695  }
696  imgReader1.pushNoDataValue(nodata_opt[inodata]);
697  imgReader2.pushNoDataValue(nodata_opt[inodata]);
698  }
699 
700  double err=imgreg.getRMSE(imgReader1,imgReader2,c0,c1,band_opt[0],band_opt[1],verbose_opt[0]);
701  std::cout << "-rmse " << err << std::endl;
702  imgReader1.close();
703  imgReader2.close();
704  }
705  if(histogram2d_opt[0]&&(input_opt.size()>1)){
706  while(band_opt.size()<input_opt.size())
707  band_opt.push_back(band_opt[0]);
708  if(src_min_opt.size()){
709  while(src_min_opt.size()<input_opt.size())
710  src_min_opt.push_back(src_min_opt[0]);
711  }
712  if(src_max_opt.size()){
713  while(src_max_opt.size()<input_opt.size())
714  src_max_opt.push_back(src_max_opt[0]);
715  }
716  ImgReaderGdal imgReader1(input_opt[0]);
717  ImgReaderGdal imgReader2(input_opt[1]);
718 
719  if(offset_opt.size())
720  imgReader1.setOffset(offset_opt[0],band_opt[0]);
721  if(scale_opt.size())
722  imgReader1.setScale(scale_opt[0],band_opt[0]);
723  if(offset_opt.size()>1)
724  imgReader2.setOffset(offset_opt[1],band_opt[1]);
725  if(scale_opt.size()>1)
726  imgReader2.setScale(scale_opt[1],band_opt[1]);
727 
728  for(int inodata=0;inodata<nodata_opt.size();++inodata){
729  if(!inodata){
730  imgReader1.GDALSetNoDataValue(nodata_opt[0],band_opt[0]);//only single no data can be set in GDALRasterBand (used for ComputeStatistics)
731  imgReader2.GDALSetNoDataValue(nodata_opt[0]),band_opt[1];//only single no data can be set in GDALRasterBand (used for ComputeStatistics)
732  }
733  imgReader1.pushNoDataValue(nodata_opt[inodata]);
734  imgReader2.pushNoDataValue(nodata_opt[inodata]);
735  }
736 
737  imgReader1.getMinMax(minX,maxX,band_opt[0]);
738  imgReader2.getMinMax(minY,maxY,band_opt[1]);
739 
740  if(verbose_opt[0]){
741  cout << "minX: " << minX << endl;
742  cout << "maxX: " << maxX << endl;
743  cout << "minY: " << minY << endl;
744  cout << "maxY: " << maxY << endl;
745  }
746 
747  if(src_min_opt.size()){
748  minX=src_min_opt[0];
749  minY=src_min_opt[1];
750  }
751  if(src_max_opt.size()){
752  maxX=src_max_opt[0];
753  maxY=src_max_opt[1];
754  }
755 
756  nbin=(nbin_opt.size())? nbin_opt[0]:0;
757  if(nbin<=1){
758  std::cerr << "Warning: number of bins not defined, calculating bins from min and max value" << std::endl;
759  // imgReader1.getMinMax(minX,maxX,band_opt[0]);
760  // imgReader2.getMinMax(minY,maxY,band_opt[0]);
761  if(minX>=maxX)
762  imgReader1.getMinMax(minX,maxX,band_opt[0]);
763  if(minY>=maxY)
764  imgReader2.getMinMax(minY,maxY,band_opt[1]);
765 
766  minValue=(minX<minY)? minX:minY;
767  maxValue=(maxX>maxY)? maxX:maxY;
768  if(verbose_opt[0])
769  std::cout << "min and max values: " << minValue << ", " << maxValue << std::endl;
770  nbin=maxValue-minValue+1;
771  }
772  assert(nbin>1);
773  double sigma=0;
774  //kernel density estimation as in http://en.wikipedia.org/wiki/Kernel_density_estimation
775  if(kde_opt[0]){
776  GDALProgressFunc pfnProgress;
777  void* pProgressData;
778  GDALRasterBand* rasterBand;
779  double stdDev1=0;
780  double stdDev2=0;
781  rasterBand=imgReader1.getRasterBand(band_opt[0]);
782  rasterBand->ComputeStatistics(0,&minValue,&maxValue,&meanValue,&stdDev1,pfnProgress,pProgressData);
783  rasterBand=imgReader2.getRasterBand(band_opt[0]);
784  rasterBand->ComputeStatistics(0,&minValue,&maxValue,&meanValue,&stdDev2,pfnProgress,pProgressData);
785 
786  //todo: think of smarter way how to estimate size (nodata!)
787  double estimatedSize=1.0*imgReader.getNvalid(band_opt[0])/down_opt[0]/down_opt[0];
788  if(random_opt[0]>0)
789  estimatedSize*=random_opt[0]/100.0;
790  sigma=1.06*sqrt(stdDev1*stdDev2)*pow(estimatedSize,-0.2);
791  }
792  assert(nbin);
793  if(verbose_opt[0]){
794  if(sigma>0)
795  std::cout << "calculating 2d kernel density estimate with sigma " << sigma << " for datasets " << input_opt[0] << " and " << input_opt[1] << std::endl;
796  else
797  std::cout << "calculating 2d histogram for datasets " << input_opt[0] << " and " << input_opt[1] << std::endl;
798  std::cout << "nbin: " << nbin << std::endl;
799  }
800 
801  vector< vector<double> > output;
802 
803  if(maxX<=minX)
804  imgReader1.getMinMax(minX,maxX,band_opt[0]);
805  if(maxY<=minY)
806  imgReader2.getMinMax(minY,maxY,band_opt[1]);
807 
808  if(maxX<=minX){
809  std::ostringstream s;
810  s<<"Error: could not calculate distribution (minX>=maxX)";
811  throw(s.str());
812  }
813  if(maxY<=minY){
814  std::ostringstream s;
815  s<<"Error: could not calculate distribution (minY>=maxY)";
816  throw(s.str());
817  }
818  if(verbose_opt[0]){
819  cout << "minX: " << minX << endl;
820  cout << "maxX: " << maxX << endl;
821  cout << "minY: " << minY << endl;
822  cout << "maxY: " << maxY << endl;
823  }
824  output.resize(nbin);
825  for(int i=0;i<nbin;++i){
826  output[i].resize(nbin);
827  for(int j=0;j<nbin;++j)
828  output[i][j]=0;
829  }
830  int binX=0;
831  int binY=0;
832  vector<double> inputX(imgReader1.nrOfCol());
833  vector<double> inputY(imgReader2.nrOfCol());
834  double nvalid=0;
835  double geoX=0;
836  double geoY=0;
837  double icol1=0;
838  double irow1=0;
839  double icol2=0;
840  double irow2=0;
841  for(int irow=0;irow<imgReader1.nrOfRow();++irow){
842  if(irow%down_opt[0])
843  continue;
844  irow1=irow;
845  imgReader1.image2geo(icol1,irow1,geoX,geoY);
846  imgReader2.geo2image(geoX,geoY,icol2,irow2);
847  irow2=static_cast<int>(irow2);
848  imgReader1.readData(inputX,GDT_Float64,irow1,band_opt[0]);
849  imgReader2.readData(inputY,GDT_Float64,irow2,band_opt[1]);
850  for(int icol=0;icol<imgReader.nrOfCol();++icol){
851  if(icol%down_opt[0])
852  continue;
853  icol1=icol;
854  if(random_opt[0]>0){
855  double p=static_cast<double>(rand())/(RAND_MAX);
856  p*=100.0;
857  if(p>random_opt[0])
858  continue;//do not select for now, go to next column
859  }
860  if(imgReader1.isNoData(inputX[icol]))
861  continue;
862  imgReader1.image2geo(icol1,irow1,geoX,geoY);
863  imgReader2.geo2image(geoX,geoY,icol2,irow2);
864  icol2=static_cast<int>(icol2);
865  if(imgReader2.isNoData(inputY[icol2]))
866  continue;
867  // ++nvalid;
868  if(inputX[icol1]>=maxX)
869  binX=nbin-1;
870  else if(inputX[icol]<=minX)
871  binX=0;
872  else
873  binX=static_cast<int>(static_cast<double>(inputX[icol1]-minX)/(maxX-minX)*nbin);
874  if(inputY[icol2]>=maxY)
875  binY=nbin-1;
876  else if(inputY[icol2]<=minY)
877  binY=0;
878  else
879  binY=static_cast<int>(static_cast<double>(inputY[icol2]-minY)/(maxY-minY)*nbin);
880  assert(binX>=0);
881  assert(binX<output.size());
882  assert(binY>=0);
883  assert(binY<output[binX].size());
884  if(sigma>0){
885  //create kde for Gaussian basis function
886  //todo: speed up by calculating first and last bin with non-zero contriubtion...
887  for(int ibinX=0;ibinX<nbin;++ibinX){
888  double centerX=minX+static_cast<double>(maxX-minX)*ibinX/nbin;
889  double pdfX=gsl_ran_gaussian_pdf(inputX[icol1]-centerX, sigma);
890  for(int ibinY=0;ibinY<nbin;++ibinY){
891  //calculate \integral_ibinX^(ibinX+1)
892  double centerY=minY+static_cast<double>(maxY-minY)*ibinY/nbin;
893  double pdfY=gsl_ran_gaussian_pdf(inputY[icol2]-centerY, sigma);
894  output[ibinX][binY]+=pdfX*pdfY;
895  nvalid+=pdfX*pdfY;
896  }
897  }
898  }
899  else{
900  ++output[binX][binY];
901  ++nvalid;
902  }
903  }
904  }
905  if(verbose_opt[0])
906  cout << "number of valid pixels: " << nvalid << endl;
907  for(int binX=0;binX<nbin;++binX){
908  cout << endl;
909  for(int binY=0;binY<nbin;++binY){
910  double binValueX=0;
911  if(nbin==maxX-minX+1)
912  binValueX=minX+binX;
913  else
914  binValueX=minX+static_cast<double>(maxX-minX)*(binX+0.5)/nbin;
915  double binValueY=0;
916  if(nbin==maxY-minY+1)
917  binValueY=minY+binY;
918  else
919  binValueY=minY+static_cast<double>(maxY-minY)*(binY+0.5)/nbin;
920  double value=static_cast<double>(output[binX][binY]);
921 
922  if(relative_opt[0]||kde_opt[0])
923  value*=100.0/nvalid;
924 
925  cout << binValueX << " " << binValueY << " " << value << std::endl;
926  // double value=static_cast<double>(output[binX][binY])/nvalid;
927  // cout << (maxX-minX)*bin/(nbin-1)+minX << " " << (maxY-minY)*bin/(nbin-1)+minY << " " << value << std::endl;
928  }
929  }
930  imgReader1.close();
931  imgReader2.close();
932  }
933 
934  if(!histogram_opt[0]||histogram2d_opt[0])
935  std::cout << std::endl;
936 }
937 
938 // int nband=(band_opt.size()) ? band_opt.size() : imgReader.nrOfBand();
939 
940 // const char* pszMessage;
941 // void* pProgressArg=NULL;
942 // GDALProgressFunc pfnProgress=GDALTermProgress;
943 // double progress=0;
944 // srand(time(NULL));
945 
946 
947 // statfactory::StatFactory stat;
948 // imgregression::ImgRegression imgreg;
949 
950 // pfnProgress(progress,pszMessage,pProgressArg);
951 // for(irow=0;irow<classReader.nrOfRow();++irow){
952 // if(irow%down_opt[0])
953 // continue;
954 // // classReader.readData(classBuffer,GDT_Int32,irow);
955 // classReader.readData(classBuffer,GDT_Float64,irow);
956 // double x,y;//geo coordinates
957 // double iimg,jimg;//image coordinates in img image
958 // for(icol=0;icol<classReader.nrOfCol();++icol){
959 // if(icol%down_opt[0])
960  // continue;
961 
962 
963  // if(rand_opt[0]>0){
964  // gsl_rng* r=stat.getRandomGenerator(time(NULL));
965  // //todo: init random number generator using time...
966  // if(verbose_opt[0])
967  // std::cout << "generating " << rand_opt[0] << " random numbers: " << std::endl;
968  // for(unsigned int i=0;i<rand_opt[0];++i)
969  // std::cout << i << " " << stat.getRandomValue(r,randdist_opt[0],randa_opt[0],randb_opt[0]) << std::endl;
970  // }
971 
972  // imgreg.setDown(down_opt[0]);
973  // imgreg.setThreshold(threshold_opt[0]);
974  // double c0=0;//offset
975  // double c1=1;//scale
976  // double err=uncertNodata_opt[0];//start with high initial value in case we do not have first ob err=imgreg.getRMSE(imgReaderModel1,imgReader,c0,c1,verbose_opt[0]);
977 
978  // int nband=band_opt.size();
979  // if(band_opt[0]<0)
980  // nband=imgReader.nrOfBand();
981  // for(int iband=0;iband<nband;++iband){
982  // unsigned short band_opt[iband]=(band_opt[0]<0)? iband : band_opt[iband];
983 
984  // if(minmax_opt[0]||min_opt[0]||max_opt[0]){
985  // assert(band_opt[iband]<imgReader.nrOfBand());
986  // if((ulx_opt.size()||uly_opt.size()||lrx_opt.size()||lry_opt.size())&&(imgReader.covers(ulx_opt[0],uly_opt[0],lrx_opt[0],lry_opt[0]))){
987  // double uli,ulj,lri,lrj;
988  // imgReader.geo2image(ulx_opt[0],uly_opt[0],uli,ulj);
989  // imgReader.geo2image(lrx_opt[0],lry_opt[0],lri,lrj);
990  // imgReader.getMinMax(static_cast<int>(uli),static_cast<int>(lri),static_cast<int>(ulj),static_cast<int>(lrj),band_opt[iband],minValue,maxValue);
991  // }
992  // else
993  // imgReader.getMinMax(minValue,maxValue,band_opt[iband],true);
994  // if(minmax_opt[0])
995  // std::cout << "-min " << minValue << " -max " << maxValue << " ";
996  // else{
997  // if(min_opt[0])
998  // std::cout << "-min " << minValue << " ";
999  // if(max_opt[0])
1000  // std::cout << "-max " << maxValue << " ";
1001  // }
1002  // }
1003  // }
1004  // if(relative_opt[0])
1005  // hist_opt[0]=true;
1006  // if(hist_opt[0]){
1007  // assert(band_opt[0]<imgReader.nrOfBand());
1008  // unsigned int nbin=(nbin_opt.size())? nbin_opt[0]:0;
1009  // std::vector<unsigned long int> output;
1010  // minValue=0;
1011  // maxValue=0;
1012  // //todo: optimize such that getMinMax is only called once...
1013  // imgReader.getMinMax(minValue,maxValue,band_opt[0]);
1014 
1015  // if(src_min_opt.size())
1016  // minValue=src_min_opt[0];
1017  // if(src_max_opt.size())
1018  // maxValue=src_max_opt[0];
1019  // unsigned long int nsample=imgReader.getHistogram(output,minValue,maxValue,nbin,band_opt[0]);
1020  // std::cout.precision(10);
1021  // for(int bin=0;bin<nbin;++bin){
1022  // double binValue=0;
1023  // if(nbin==maxValue-minValue+1)
1024  // binValue=minValue+bin;
1025  // else
1026  // binValue=minValue+static_cast<double>(maxValue-minValue)*(bin+0.5)/nbin;
1027  // std::cout << binValue << " ";
1028  // if(relative_opt[0])
1029  // std::cout << 100.0*static_cast<double>(output[bin])/static_cast<double>(nsample) << std::endl;
1030  // else
1031  // std::cout << static_cast<double>(output[bin]) << std::endl;
1032  // }
1033  // }