/*
* DataSetOps.java
*
* Created on January 29, 2007, 9:48 AM
*/
package org.das2.qds;
import java.text.ParseException;
import java.util.ArrayList;
import java.util.logging.Level;
import java.util.logging.Logger;
import org.das2.datum.Datum;
import org.das2.datum.Units;
import java.util.Arrays;
import java.util.Collections;
import java.util.Comparator;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Scanner;
import java.util.regex.Pattern;
import org.das2.qds.buffer.BufferDataSet;
import org.das2.datum.DatumRange;
import org.das2.datum.EnumerationUnits;
import org.das2.datum.UnitsConverter;
import org.das2.datum.UnitsUtil;
import org.das2.util.LoggerManager;
import org.das2.util.monitor.ProgressMonitor;
import org.das2.qds.examples.Schemes;
import org.das2.qds.ops.Ops;
import org.das2.qds.util.DataSetBuilder;
/**
* Useful operations for QDataSets, such as slice2, leafTrim.
* TODO: identify which functions appear here instead of Ops.java.
* @author jbf
*/
public class DataSetOps {
private static final Logger logger= LoggerManager.getLogger("qdataset.ops");
/**
* absolute length limit for plots. This is used to limit the elements used in autoranging, etc.
*/
public final static int DS_LENGTH_LIMIT= 10000000;
/**
* return a dataset that has mutable properties. If the dataset parameter already has, then the
* dataset is returned. If the dataset is a MutablePropertyDataSet but the immutable flag is
* set, then the dataset is wrapped to make the properties mutable.
* @param dataset dataset
* @return a MutablePropertyDataSet that is has a wrapper around the dataset, or the dataset.
* @see DataSetWrapper
*/
public static MutablePropertyDataSet makePropertiesMutable( final QDataSet dataset ) {
if ( dataset instanceof MutablePropertyDataSet ) {
MutablePropertyDataSet mpds= (MutablePropertyDataSet) dataset;
if ( mpds.isImmutable() ) {
return new DataSetWrapper(dataset);
} else {
return (MutablePropertyDataSet) dataset;
}
} else {
return new DataSetWrapper(dataset);
}
}
/**
* return a dataset that is writable. If the dataset parameter of this idempotent
* function is already writable, then the
* dataset is returned. If the dataset is a WritableDataSet but the immutable flag is
* set, then the a copy is returned.
* @param dataset
* @return a WritableDataSet that is either a copy of the read-only dataset provided, or the parameter writable dataset provided.
*/
public static WritableDataSet makeWritable(QDataSet dataset) {
if ( dataset instanceof WritableDataSet ) {
WritableDataSet wds= (WritableDataSet) dataset;
if ( wds.isImmutable() ) {
return ArrayDataSet.copy(dataset);
} else {
return (WritableDataSet) dataset;
}
} else {
return ArrayDataSet.copy(dataset);
}
}
/**
* slice on the dimension. This saves from the pain of having this branch
* all over the code.
* @param ds the rank N data to slice.
* @param dimension the dimension to slice, 0 is the first.
* @param index the index to slice at.
* @return the rank N-1 result.
*/
public static MutablePropertyDataSet slice( QDataSet ds, int dimension, int index ) {
switch (dimension ) {
case 0:
return slice0(ds,index);
case 1:
return slice1(ds,index);
case 2:
return slice2(ds,index);
case 3:
return slice3(ds,index);
default:
throw new IllegalArgumentException("rank error, must be 0, 1, 2, 3, or 4.");
}
}
/**
* slice on the first dimension. Note the function ds.slice(index) was
* added later and will typically be more efficient. This will create a new
* Slice0DataSet.
*
* DO NOT try to optimize this by calling native trim, some native slice
* implementations call this.
*
* TODO: This actually needs a bit more study, because there are codes that
* talk about not using the native slice because it copies data and they just
* want metadata. This probably is because Slice0DataSet doesn't check for
* immutability, and really should be copying. This needs to be fixed,
* making sure the result of this call is immutable, and the native slice
* really should be more efficient, always.
*
* @param ds rank 1 or more dataset
* @param index the index to slice at
* @return rank 0 or more dataset.
* @see QDataSet#slice(int)
*/
public static MutablePropertyDataSet slice0(final QDataSet ds, final int index) {
return new Slice0DataSet(ds, index,true);
}
/**
* slice dataset operator assumes a qube dataset
* by picking the index-th element of dataset's second dimension, without
* regard to tags.
* @param ds rank 2 or more dataset
* @param index the index to slice at
* @return rank 1 or more dataset.
*/
public static MutablePropertyDataSet slice1(final QDataSet ds, final int index) {
return new Slice1DataSet(ds, index, true, false);
}
/**
* slice dataset operator assumes a qube dataset
* by picking the index-th element of dataset's second dimension, without
* regard to tags.
* @param ds rank 3 or more dataset
* @param index the index to slice at.
* @return rank 2 or more dataset.
*/
public static MutablePropertyDataSet slice2(final QDataSet ds, final int index) {
return new Slice2DataSet(ds, index, true);
}
/**
* slice dataset operator assumes a qube dataset
* by picking the index-th element of dataset's second dimension, without
* regard to tags.
* @param ds rank 4 or more dataset.
* @param index index to slice at
* @return rank 3 or more dataset.
*/
public static MutablePropertyDataSet slice3(final QDataSet ds, final int index) {
return new Slice3DataSet(ds, index, true );
}
/**
* reduce the number of elements in the dataset to the dim 0 indeces specified.
* This does not change the rank of the dataset.
*
* DO NOT try to optimize this by calling native trim, some native trim
* implementations call this.
*
* @param ds the dataset
* @param offset the offset
* @param len the length, (not the stop index!)
* @return trimmed dataset
*/
public static MutablePropertyDataSet trim(final QDataSet ds, final int offset, final int len) {
return new TrimDataSet( ds, offset, offset+len );
}
/**
* reduce the number of elements in the dataset to the dim 0 indeces specified.
* This does not change the rank of the dataset.
* @param dep the dataset.
* @param start first index to include
* @param stop last index, exclusive
* @param stride the step size, e.g. 2 is every other element.
* @return trimmed dataset
*/
public static MutablePropertyDataSet trim( final QDataSet dep, final int start, final int stop, final int stride ) {
if ( dep.rank()!=1 ) throw new IllegalArgumentException("only rank 1 supported");
QubeDataSetIterator itIn= new QubeDataSetIterator(dep);
itIn.setIndexIteratorFactory( 0, new QubeDataSetIterator.StartStopStepIteratorFactory(start, stop, stride ) );
DDataSet depSlice= itIn.createEmptyDs();
QubeDataSetIterator itOut= new QubeDataSetIterator(depSlice);
while ( itIn.hasNext() ) {
itIn.next();
itOut.next();
itOut.putValue( depSlice, itIn.getValue(dep) );
}
String[] names = DataSetUtil.dimensionProperties();
for (String name : names) {
if (dep.property(name) != null) {
depSlice.putProperty(name, dep.property(name));
}
}
return depSlice;
}
/**
* flatten a rank 2 dataset. The result is a n,3 dataset
* of [x,y,f].
* History:
*
modified for use in PW group.
*
missing DEPEND_1 resulted in NullPointerException, so just use 0,1,2,..,n instead and always have rank 2 result.
*
* @param ds rank 2 table dataset
* @return rank 2 dataset that is that is array of (x,y,f).
*/
public static QDataSet flattenRank2( final QDataSet ds ) {
QDataSet dep0= (QDataSet) ds.property(QDataSet.DEPEND_0);
QDataSet dep1= (QDataSet) ds.property(QDataSet.DEPEND_1);
QDataSet dep0offset= (QDataSet) ds.property("OFFSETS_1"); //kludge to experiment with this.
if ( dep0==null ) dep0= Ops.findgen(ds.length());
if ( dep1==null ) dep1= IndexGenDataSet.lastindex(ds);
DataSetBuilder builder= new DataSetBuilder( 1, 100 );
DataSetBuilder xbuilder= new DataSetBuilder( 1, 100 );
DataSetBuilder ybuilder= new DataSetBuilder( 1, 100 );
if ( dep1.rank()==2 && Schemes.isRank2Bins(dep1) ) {
dep1= Ops.reduceBins( dep1 );
}
boolean dep1rank2= dep1!=null && dep1.rank()==2;
for ( int i=0; i0. ) {
indeces[i0] = i;
i0++;
}
}
final Comparator c = (Integer o1, Integer o2) -> {
int i1 = o1;
int i2 = o2;
return Double.compare(ds.value(i1), ds.value(i2));
};
Arrays.sort(indeces, 0, i0, c);
final int[] data = new int[i0];
boolean monotonic= true;
int lastData=0;
if ( i0>0 ) {
data[0] = indeces[0];
lastData= data[0];
}
for (int i = 1; i < i0; i++) {
data[i] = indeces[i];
if ( monotonic && data[i]0 ) {
return getComponentType(ds.slice(0));
} else {
return double.class;
}
}
/**
* return a fill value that is representable by the type.
* @param c the class type, including double.class, float.class, etc.
* @return a fill value that is representable by the type.
*/
public static double suggestFillForComponentType( Class c ) {
if ( c==double.class ) {
return -1e38;
} else if ( c==float.class ) {
return -1e38;
} else if ( c==long.class ) {
return Long.MIN_VALUE;
} else if ( c==int.class ) {
return Integer.MIN_VALUE;
} else if ( c==short.class ) {
return Short.MIN_VALUE;
} else if ( c==byte.class ) {
return Byte.MIN_VALUE;
} else {
return -1e38;
}
}
/**
* return the dataset with records rearranged according to indices.
* @param ds rank N dataset, where N>0
* @param indices rank 1 dataset, length m.
* @return length m rank N dataset.
* @see #applyIndex(org.das2.qds.QDataSet, int, org.das2.qds.QDataSet, boolean)
*/
public static QDataSet applyIndex( QDataSet ds, QDataSet indices ) {
return DataSetOps.applyIndex( ds, 0, indices, true );
}
/**
* Applies the sort index to the idim-th dimension of the qube dataset ds.
* TODO: consider sorting multiple dimensions at once, to reduce excessive copying.
* TODO: this should probably (and would easily) be redone by using dataset implementation that applies the sort on the ith index when read.
* See SubsetDataSet which would do this nicely.
* TODO: note the Jython stuff does this to, using a different implementation. Reconcile these...
* @param ds rank 1,2, or 3 qube dataset
* @param idim the dimension being sorted.
* @param sort rank 1 dataset of new indeces, needn't be same size as index.
* @param deps do dependencies as well. Note this does not rearrange planes!
* @return new dataset that is a copy of the first, resorted.
* @see org.das2.qds.SortDataSet for similar functionality
* @see Ops#decimate(org.das2.qds.QDataSet, int, int)
*/
public static WritableDataSet applyIndex( QDataSet ds, int idim, QDataSet sort, boolean deps ) {
if (idim > 2) {
throw new IllegalArgumentException("idim must be <=2 ");
}
if ( idim==0 ) {
QDataSet ss= new SortDataSet( ds, sort );
ss.property(QDataSet.NAME,0);
return ArrayDataSet.copy( getComponentType(ds), new SortDataSet( ds, sort ) );
}
if (ds.rank() > 3) {
throw new IllegalArgumentException("rank limit");
}
int[] qube = DataSetUtil.qubeDims( ds );
if ( qube==null ) throw new IllegalArgumentException("dataset is not a qube and index is not on first dimension");
qube[idim] = sort.length();
ArrayDataSet cds= ArrayDataSet.create( getComponentType(ds), qube );
Map props= org.das2.qds.DataSetUtil.getDimensionProperties(ds,null);
props.remove( QDataSet.CADENCE );
org.das2.qds.DataSetUtil.putProperties(props, cds);
if (deps) {
String depprop = "DEPEND_" + idim;
QDataSet depds = (QDataSet) ds.property(depprop);
if (depds != null) {
depds = applyIndex(depds, 0, sort, false);
cds.putProperty(depprop, depds);
}
String bundleprop= "BUNDLE_"+idim;
QDataSet bds= (QDataSet) ds.property( bundleprop );
if ( bds!=null ) {
JoinDataSet jds= new JoinDataSet(2);
for ( int i=0; i 2) {
for (int k = 0; k < qube[2]; k++) {
double d = ds.value(i, (int) sort.value(j), k);
cds.putValue(i, j, k, d);
}
} else {
double d = ds.value(i, (int) sort.value(j));
cds.putValue(i, j, d);
}
}
}
} else if (idim == 2) {
for (int i = 0; i < qube[0]; i++) {
for (int j = 0; j < qube[1]; j++) {
for (int k = 0; k < qube[2]; k++) {
double d = ds.value(i, j, (int) sort.value(k));
cds.putValue(i, j, k, d);
}
}
}
}
return cds;
}
/**
* returns a rank 1 dataset that is a histogram of the data. Note there
* will also be in the properties:
* count, the total number of valid values.
* nonZeroMin, the smallest non-zero, positive number
* @param ds rank N dataset
* @param min the min of the first bin. If min=-1 and max=-1, then automatically set the min and max.
* @param max the max of the last bin.
* @param binsize the size of each bin.
* @return a rank 1 dataset with each bin's count. DEPEND_0 indicates the bin locations.
*/
public static QDataSet histogram(QDataSet ds, double min, double max, final double binsize) {
if ( min==-1 && max==-1 ) {
QDataSet range= Ops.extent(ds);
min= (Math.floor(range.value(0)/binsize)) * binsize;
max= (Math.ceil(range.value(1)/binsize)) * binsize;
}
int n = (int) Math.ceil((max - min) / binsize);
MutablePropertyDataSet tags = DataSetUtil.tagGenDataSet(n, min + binsize/2 , binsize, (Units)ds.property(QDataSet.UNITS) );
tags.putProperty( QDataSet.NAME, ds.property(QDataSet.NAME) );
tags.putProperty( QDataSet.LABEL, ds.property(QDataSet.LABEL) );
tags.putProperty( QDataSet.TITLE, ds.property(QDataSet.TITLE) );
tags.putProperty( QDataSet.TYPICAL_MAX, ds.property(QDataSet.TYPICAL_MAX) );
tags.putProperty( QDataSet.TYPICAL_MIN, ds.property(QDataSet.TYPICAL_MIN) );
final int[] hits = new int[n];
QubeDataSetIterator iter = new QubeDataSetIterator(ds);
QDataSet wds= DataSetUtil.weightsDataSet(ds);
double positiveMin= Double.MAX_VALUE;
int count=0;
for (; count0. ) {
int ibin = (int) Math.floor((d - min) / binsize);
if (ibin >= 0 && ibin < n) {
hits[ibin]++;
}
if ( d>0 && d 0) {
approxMean /= validCount; // approximate--suseptible to number error.
}
double mean = 0;
double stddev = 0;
if (validCount > 0) {
iter= new QubeDataSetIterator(ds);
while (iter.hasNext()) {
iter.next();
double d = iter.getValue(ds);
double w = iter.getValue(wds);
if ( w>0.0 ) {
mean += (d - approxMean);
stddev += Math.pow(d - approxMean, 2);
}
}
mean /= validCount;
mean += approxMean;
moment[0] = mean;
if (validCount > 1) {
stddev /= (validCount - 1); // this will be very close to result, even though correction should be made since approxMean != mean.
stddev = Math.sqrt(stddev);
moment[1] = stddev;
} else {
moment[1] = u.getFillDouble();
}
} else {
moment[0] = u.getFillDouble();
}
DRank0DataSet result = DataSetUtil.asDataSet(moment[0]);
result.putProperty( QDataSet.UNITS, u );
DRank0DataSet stddevds= DataSetUtil.asDataSet(moment[1]);
stddevds.putProperty( QDataSet.UNITS, u.getOffsetUnits() );
result.putProperty("stddev", stddevds );
result.putProperty("validCount", validCount);
result.putProperty("invalidCount", invalidCount);
return result;
}
/**
* transpose the rank 2 qube dataset so the rows are columns and the columns are rows.
* @param ds rank 2 Qube DataSet.
* @return rank 2 Qube DataSet
*/
public static QDataSet transpose2(QDataSet ds) {
return new TransposeRank2DataSet(ds);
}
/**
* method to help dataset implementations implement slice.
* 2010-09-23: support rank 2 DEPEND_2 and DEPEND_3
* 2010-09-23: add BINS_1 and BUNDLE_1, Slice0DataSet calls this.
* 2010-02-24: BUNDLE_0 handled.
* 2011-03-25: add WEIGHTS_PLANE
* @param index the index to slice at in the zeroth index.
* @param props the properties to slice.
* @return the properties after the slice.
*/
public static Map sliceProperties0( int index, Map props ) {
Map result= new LinkedHashMap();
QDataSet dep0= (QDataSet) props.get( QDataSet.DEPEND_0 );
QDataSet dep1= (QDataSet) props.get( QDataSet.DEPEND_1 );
QDataSet dep2= (QDataSet) props.get( QDataSet.DEPEND_2 );
QDataSet dep3= (QDataSet) props.get( QDataSet.DEPEND_3 );
String bins1= (String) props.get( QDataSet.BINS_1 );
Object sbundle= props.get( QDataSet.BUNDLE_1 );
QDataSet bundle1= ( sbundle instanceof QDataSet ) ? (QDataSet) sbundle : null; // kludge to handle where QStream reader hasn't resolved this.
sbundle= props.get( QDataSet.BUNDLE_0 );
QDataSet bundle0= ( sbundle instanceof QDataSet ) ? (QDataSet) sbundle : null;
if ( dep0!=null && dep1!=null && dep0.rank()>1 && dep1.rank()>1 ) {
throw new IllegalArgumentException("both DEPEND_0 and DEPEND_1 have rank>1");
}
for ( int i=0; i0) {
result.put(p1, d.slice(index));
}
} else {
logger.log(Level.INFO, "property is not a QDataSet: {0}", p1);
}
}
}
String[] dimprops= DataSetUtil.dimensionProperties(); // TITLE, UNITS, etc.
for (String s : dimprops ) {
Object o = props.get(s);
if (o!=null) {
result.put(s, o);
}
}
if ( props.containsKey(QDataSet.CONTEXT_0) ) {
for ( int i=0; i sse: props.entrySet() ) {
String ss= sse.getKey();
int ii= ss.indexOf("__");
if ( ii>-1 ) {
String hd= ss.substring(ii+2);
int iii=0;
while ( iii0 ) {
int islice= Integer.parseInt( hd.substring(0,iii) );
if ( islice==index ) {
String slicePropName;
if ( iii sliceProperties( Map properties, int sliceDimension ) {
Map result = new LinkedHashMap();
String[] ss= DataSetUtil.dimensionProperties();
for ( String s: ss ) {
Object val= properties.get(s);
if ( val!=null ) result.put( s, val );
}
if ( sliceDimension>=QDataSet.MAX_HIGH_RANK ) {
throw new IllegalArgumentException("sliceDimension > MAX_HIGH_RANK");
}
List