package org.das2.qds.util; import java.util.Map; import java.util.logging.Level; import java.util.logging.Logger; import org.das2.datum.Datum; import org.das2.datum.Units; import org.das2.datum.UnitsConverter; import org.das2.util.LoggerManager; import org.das2.qds.DDataSet; import org.das2.qds.DataSetOps; import org.das2.qds.DataSetUtil; import org.das2.qds.IDataSet; import org.das2.qds.JoinDataSet; import org.das2.qds.MutablePropertyDataSet; import org.das2.qds.QDataSet; import org.das2.qds.SemanticOps; import org.das2.qds.WritableDataSet; import org.das2.qds.ops.Ops; /** * Reduction is set of static methods for reducing data, or * averaging data to make smaller datasets. * @author jbf */ public class Reduction { private static final Logger logger= LoggerManager.getLogger("qdataset.ops.reduction"); /** * return a converter for differences. If dstUnits are specified, * then explicitly this is the target. * @param src source dataset * @param dst target dataset * @param dstUnits if not null, then explicitly use these units. * @return a converter for differences. */ private static UnitsConverter getDifferencesConverter( QDataSet src, QDataSet dst, Units dstUnits ) { Units unitsIn, unitsOut; unitsIn= (Units) dst.property( QDataSet.UNITS ); if ( unitsIn==null ) unitsIn= Units.dimensionless; unitsOut= (Units)src.property( QDataSet.UNITS ); if ( unitsOut==null ) unitsOut= Units.dimensionless; UnitsConverter xuc; if ( dstUnits!=null ) { xuc= unitsOut.getConverter( dstUnits ); } else { xuc= unitsOut.getConverter( unitsIn.getOffsetUnits() ); } return xuc; } /** * @param ds a rank1 or rank2 waveform dataset. * @param xLimit the target resolution, result will be finer than this, if possible. * @return either the original dataset when there is no reduction to be done, or a series data set with bins (deltas for now). * @see org.das2.qstream.filter.MinMaxReduceFilter. This is basically a copy of that code. */ private static QDataSet reducexWaveform( QDataSet ds, QDataSet xLimit ) { DataSetBuilder xbuilder; DataSetBuilder ybuilder; DataSetBuilder yminbuilder; DataSetBuilder ymaxbuilder; xbuilder= new DataSetBuilder( 1, 1000 ); ybuilder= new DataSetBuilder( 1, 1000 ); yminbuilder= new DataSetBuilder( 1, 1000 ); ymaxbuilder= new DataSetBuilder( 1, 1000 ); //wbuilder= new DataSetBuilder( 2, 1000, ds.length(0) ); Datum cadence= DataSetUtil.asDatum(xLimit); QDataSet _offsets= (QDataSet) ds.property(QDataSet.DEPEND_1); MutablePropertyDataSet offsets= DataSetOps.makePropertiesMutable(_offsets); offsets.putProperty( QDataSet.VALID_MIN, null ); //TODO: EMFISIS HFR has incorrect VALID_MAX. offsets.putProperty( QDataSet.VALID_MAX, null ); if ( offsets.rank()==2 ) { offsets= (MutablePropertyDataSet)offsets.slice(0); logger.fine("slice(0) on rank 2 dataset because code doesn't support time-varying DEPEND_1"); } int icadence= 4; while ( icadence= 0 && dx < dxLimit ) { double pyy = yy.value(j); sy0[j] += pyy*ww.value(j); nn0[j] += ww.value(j); if ( ww.value(j)>0 ) { miny0[j] = Math.min( miny0[j], pyy ); maxy0[j] = Math.max( maxy0[j], pyy ); } } } if ( dx<0 || dx>= dxLimit ) { // clear the accumulators x0 = Math.floor(pxx/dxLimit) * dxLimit; for ( int j=0; j0 ) { boolean nv= nn0[j]==0; //ax0 = sx0 / nx; ax0 = x0 + dxLimit/2; ay0[j] = nv ? fill : sy0[j] / nn0[j]; if (j==0 ) xbuilder.putValue( points, ax0 ); ybuilder.putValue( points, j, ay0[j] ); yminbuilder.putValue( points, j, nv ? fill : miny0[j] ); ymaxbuilder.putValue( points, j, nv ? fill : maxy0[j] ); wbuilder.putValue( points, j, nn0[j] ); } double pyy = yy.value(j); double wwj= ww.value(j); if ( j==0 ) sx0 = pxx*wx; sy0[j] = pyy*wwj; nn0[j] = wwj; nx= 1; if ( wwj>0 ) { miny0[j] = pyy; maxy0[j] = pyy; } else { miny0[j] = Double.POSITIVE_INFINITY; maxy0[j] = Double.NEGATIVE_INFINITY; } } if ( nx>0 ) points++; } i++; } if ( nx>0 ) { for ( int j=0; j xprops= DataSetUtil.getDimensionProperties(x,null); if ( xprops.containsKey( QDataSet.CADENCE ) ) xprops.put( QDataSet.CADENCE, xLimit ); if ( xprops.containsKey( QDataSet.CACHE_TAG ) ) xprops.put( QDataSet.CACHE_TAG, null ); DataSetUtil.putProperties( xprops, xds ); Map yprops= DataSetUtil.getProperties(ds); yprops.put( QDataSet.DEPEND_0, xds ); for ( int j=1; j {1} records) (ms): {2}", new Object[] { ds.length(), result.length(), System.currentTimeMillis()-t0 } ); logger.exiting("Reduction", "reducex" ); //System.err.println( String.format( "time to reducex(%d records -> %d records) (ms): %d", ds.length(), result.length(), System.currentTimeMillis()-t0) ); return result; } /** * produce a simpler version of the dataset by averaging adjacent data. * code taken from org.das2.graph.GraphUtil.reducePath. Adjacent points are * averaged together until a point is found that is not in the bin, and then * a new bin is started. The bin's lower bounds are integer multiples * of xLimit and yLimit. * * If yLimit is null, then averaging is done for all points in the x bin, * regardless of how close they are in Y. This is similarly true when * xLimit is null. * * xLimit and yLimit are rank 0 datasets, so that they can indicate that binning * should be done in log space rather than linear. In this case, a SCALE_TYPE * for the dataset should be "log" and its unit should be convertible to * Units.logERatio (for example, Units.log10Ratio or Units.percentIncrease). * Note when either is log, then averaging is done in the log space. * * @param ds rank 1 dataset. Must have DEPEND_0 (presently) * @param xLimit the size of the bins or null to indicate no limit. * @param yLimit the size of the bins or null to indicate no limit. * @return the reduced dataset, rank 1 with DEPEND_0. */ public static QDataSet reduce2D( QDataSet ds, QDataSet xLimit, QDataSet yLimit ) { logger.entering("Reduction", "reduce2D"); long t0= System.currentTimeMillis(); DataSetBuilder xbuilder= new DataSetBuilder( 1, 1000 ); DataSetBuilder ybuilder= new DataSetBuilder( 1, 1000 ); DataSetBuilder yminbuilder= new DataSetBuilder( 1, 1000 ); DataSetBuilder ymaxbuilder= new DataSetBuilder( 1, 1000 ); DataSetBuilder wbuilder= new DataSetBuilder( 1, 1000 ); // weights to go here QDataSet x= (QDataSet) ds.property( QDataSet.DEPEND_0 ); if ( x==null ) { if ( SemanticOps.getUnits(xLimit)!=Units.dimensionless ) { throw new IllegalArgumentException("xLimit is not dimensionless, yet there are no timetags in the data set: "+ds ); } else { x= new org.das2.qds.IndexGenDataSet(ds.length()); } } QDataSet y= ds; double x0 = Float.MAX_VALUE; double y0 = Float.MAX_VALUE; double sx0 = 0; double sy0 = 0; double nn0 = 0; double miny0 = Double.POSITIVE_INFINITY; double maxy0 = Double.NEGATIVE_INFINITY; double ax0; double ay0; // last averaged location boolean xlog= xLimit!=null && "log".equals( xLimit.property( QDataSet.SCALE_TYPE ) ); boolean ylog= yLimit!=null && "log".equals( yLimit.property( QDataSet.SCALE_TYPE ) ); UnitsConverter uc; double dxLimit, dyLimit; if ( xLimit!=null ) { uc= getDifferencesConverter( xLimit, x, xlog ? Units.logERatio : null ); dxLimit = uc.convert( xLimit.value() ); } else { dxLimit= Double.MAX_VALUE; } if ( yLimit!=null ) { uc= getDifferencesConverter( yLimit, y, ylog ? Units.logERatio : null ); dyLimit = uc.convert( yLimit.value() ); } else { dyLimit= Double.MAX_VALUE; } int points = 0; //int inCount = 0; QDataSet wds= DataSetUtil.weightsDataSet(y); int i=0; while ( i0 ) { miny0 = Math.min( miny0, yy); maxy0 = Math.max( maxy0, yy); } i++; continue; } if ( nn0>0 ) { ax0 = sx0 / nn0; ay0 = sy0 / nn0; xbuilder.putValue( points, xlog ? Math.exp(ax0) : ax0 ); ybuilder.putValue( points, ylog ? Math.exp(ay0) : ay0 ); yminbuilder.putValue( points, miny0 ); ymaxbuilder.putValue( points, maxy0 ); wbuilder.putValue( points, nn0 ); points++; } i++; x0 = dxLimit * ( 0.5 + (int) Math.floor(pxx/dxLimit) ); y0 = dyLimit * ( 0.5 + (int) Math.floor(pyy/dyLimit) ); sx0 = pxx*ww; sy0 = pyy*ww; nn0 = ww; if ( ww>0 ) { miny0 = yy; maxy0 = yy; } else { miny0 = Double.POSITIVE_INFINITY; maxy0 = Double.NEGATIVE_INFINITY; } } if ( nn0>0 ) { ax0 = sx0 / nn0; ay0 = sy0 / nn0; xbuilder.putValue( points, xlog ? Math.exp(ax0) : ax0 ); ybuilder.putValue( points, ylog ? Math.exp(ay0) : ay0 ); yminbuilder.putValue( points, miny0 ); ymaxbuilder.putValue( points, maxy0 ); wbuilder.putValue( points, nn0 ); points++; } MutablePropertyDataSet yds= ybuilder.getDataSet(); MutablePropertyDataSet xds= xbuilder.getDataSet(); Map xprops= DataSetUtil.getProperties(x); if ( xprops.containsKey( QDataSet.CADENCE ) ) xprops.put( QDataSet.CADENCE, xLimit ); if ( xprops.containsKey( QDataSet.CACHE_TAG ) ) xprops.put( QDataSet.CACHE_TAG, null ); DataSetUtil.putProperties( xprops, xds ); Map yprops= DataSetUtil.getProperties(y); yprops.put( QDataSet.DEPEND_0, xds ); DataSetUtil.putProperties( yprops, yds ); yminbuilder.putProperty( QDataSet.UNITS, SemanticOps.getUnits(y) ); ymaxbuilder.putProperty( QDataSet.UNITS, SemanticOps.getUnits(y) ); yds.putProperty( QDataSet.DEPEND_0, xds ); yds.putProperty( QDataSet.WEIGHTS, wbuilder.getDataSet() ); //TODO: this should probably be BIN_PLUS, BIN_MINUS yds.putProperty( QDataSet.DELTA_MINUS, Ops.subtract( yds, yminbuilder.getDataSet() ) ); yds.putProperty( QDataSet.DELTA_PLUS, Ops.subtract( ymaxbuilder.getDataSet(), yds ) ); logger.log( Level.FINE, "time to reduce2D({0} records -> {1} records) (ms): {2}", new Object[] { ds.length(), yds.length(), System.currentTimeMillis()-t0 } ); logger.entering("Reduction", "reduce2D"); return yds; } /** * reduce the buckshot scatter data by laying it out on a 2-D hexgrid and * accumulating the hits to each cell. This has not been thoroughly verified. * @param ds rank1 Y(X) * @param z null or data to average * @return rank 2 ds containing frequency of occurrence for each bin, with DEPEND_0=xxx and DEPEND_1=yyy. * @see org.das2.qds.ops.Ops#histogram2d(org.das2.qds.QDataSet, org.das2.qds.QDataSet, int[], org.das2.qds.QDataSet, org.das2.qds.QDataSet) * @throws IllegalArgumentException when the units cannot be converted * @see https://cran.r-project.org/web/packages/hexbin/vignettes/hexagon_binning.pdf * */ public static QDataSet hexbin( QDataSet ds, QDataSet z ) { logger.entering("Reduction", "hexbin"); if ( ds.rank()!=1 && !Ops.isBundle(ds) ) { throw new IllegalArgumentException("ds.rank() must be 1"); } QDataSet xx= SemanticOps.xtagsDataSet(ds); QDataSet yy= SemanticOps.ytagsDataSet(ds); QDataSet xr= Ops.extent(xx); QDataSet yr= Ops.multiply( Ops.extent(yy), (3/Math.sqrt(3)) ); QDataSet xxx= Ops.linspace( xr.value(0), xr.value(1), 100 ); QDataSet yyy1= Ops.linspace( yr.value(0), yr.value(1), 100 ); double dy= yyy1.value(1)-yyy1.value(0); yyy1= Ops.linspace( yr.value(0)-dy/4, yr.value(1)-dy/4, 100 ); QDataSet yyy2= Ops.linspace( yr.value(0)+dy/4, yr.value(1)+dy/4, 100 ); double ymin1= yyy1.value(0); double ymin2= yyy2.value(0); double xmin= xxx.value(0); double xspace= xxx.value(1) - xxx.value(0); double yspace= yyy1.value(1) - yyy1.value(0); int nx= xxx.length(); int ny= yyy1.length(); IDataSet result= IDataSet.createRank2(nx*2,ny); QDataSet ww= SemanticOps.weightsDataSet(yy); UnitsConverter ucx= SemanticOps.getUnitsConverter( xx,xxx ); UnitsConverter ucy= SemanticOps.getUnitsConverter( yy,yyy1 ); boolean xlog= false; boolean ylog= false; DDataSet S; if ( z==null ) { z= Ops.ones(xx.length()); S= null; } else { S= DDataSet.createRank2(nx*2,ny); } for ( int i=0; i0 ) { double x= ucx.convert( xx.value(i) ); double y= ucy.convert( yy.value(i) ); int ix= (int)( xlog ? (Math.log10(x)-xmin)/xspace : (x-xmin)/xspace ); int iy1= (int)( ylog ? (Math.log10(y)-ymin1)/yspace : (y-ymin1)/yspace ); int iy2= (int)( ylog ? (Math.log10(y)-ymin2)/yspace : (y-ymin2)/yspace ); if ( ix>=0 && ix=0 && iy1=0 && iy2=0 && iy20 ) { double x= ucx.convert( xx.value(i) ); double y= ucy.convert( yy.value(i) ); int ix= (int)( xlog ? (Math.log10(x)-xmin)/xspace : (x-xmin)/xspace ); int iy= (int)( ylog ? (Math.log10(y)-ymin)/yspace : (y-ymin)/yspace ); if ( ix>=0 && ix=0 && iy