/* * DataSetOps.java * * Created on January 29, 2007, 9:48 AM */ package org.das2.qds; import java.text.ParseException; import java.util.ArrayList; import java.util.logging.Level; import java.util.logging.Logger; import org.das2.datum.Datum; import org.das2.datum.Units; import java.util.Arrays; import java.util.Collections; import java.util.Comparator; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import java.util.Map.Entry; import java.util.Scanner; import java.util.regex.Pattern; import org.das2.qds.buffer.BufferDataSet; import org.das2.datum.DatumRange; import org.das2.datum.EnumerationUnits; import org.das2.datum.UnitsConverter; import org.das2.datum.UnitsUtil; import org.das2.util.LoggerManager; import org.das2.util.monitor.ProgressMonitor; import org.das2.qds.examples.Schemes; import org.das2.qds.ops.Ops; import org.das2.qds.util.DataSetBuilder; /** * Useful operations for QDataSets, such as slice2, leafTrim. * TODO: identify which functions appear here instead of Ops.java. * @author jbf */ public class DataSetOps { private static final Logger logger= LoggerManager.getLogger("qdataset.ops"); /** * absolute length limit for plots. This is used to limit the elements used in autoranging, etc. */ public final static int DS_LENGTH_LIMIT= 10000000; /** * return a dataset that has mutable properties. If the dataset parameter already has, then the * dataset is returned. If the dataset is a MutablePropertyDataSet but the immutable flag is * set, then the dataset is wrapped to make the properties mutable. * @param dataset dataset * @return a MutablePropertyDataSet that is has a wrapper around the dataset, or the dataset. * @see DataSetWrapper */ public static MutablePropertyDataSet makePropertiesMutable( final QDataSet dataset ) { if ( dataset instanceof MutablePropertyDataSet ) { MutablePropertyDataSet mpds= (MutablePropertyDataSet) dataset; if ( mpds.isImmutable() ) { return new DataSetWrapper(dataset); } else { return (MutablePropertyDataSet) dataset; } } else { return new DataSetWrapper(dataset); } } /** * return a dataset that is writable. If the dataset parameter of this idempotent * function is already writable, then the * dataset is returned. If the dataset is a WritableDataSet but the immutable flag is * set, then the a copy is returned. * @param dataset * @return a WritableDataSet that is either a copy of the read-only dataset provided, or the parameter writable dataset provided. */ public static WritableDataSet makeWritable(QDataSet dataset) { if ( dataset instanceof WritableDataSet ) { WritableDataSet wds= (WritableDataSet) dataset; if ( wds.isImmutable() ) { return ArrayDataSet.copy(dataset); } else { return (WritableDataSet) dataset; } } else { return ArrayDataSet.copy(dataset); } } /** * slice on the dimension. This saves from the pain of having this branch * all over the code. * @param ds the rank N data to slice. * @param dimension the dimension to slice, 0 is the first. * @param index the index to slice at. * @return the rank N-1 result. */ public static MutablePropertyDataSet slice( QDataSet ds, int dimension, int index ) { switch (dimension ) { case 0: return slice0(ds,index); case 1: return slice1(ds,index); case 2: return slice2(ds,index); case 3: return slice3(ds,index); default: throw new IllegalArgumentException("rank error, must be 0, 1, 2, 3, or 4."); } } /** * slice on the first dimension. Note the function ds.slice(index) was * added later and will typically be more efficient. This will create a new * Slice0DataSet. * * DO NOT try to optimize this by calling native trim, some native slice * implementations call this. * * TODO: This actually needs a bit more study, because there are codes that * talk about not using the native slice because it copies data and they just * want metadata. This probably is because Slice0DataSet doesn't check for * immutability, and really should be copying. This needs to be fixed, * making sure the result of this call is immutable, and the native slice * really should be more efficient, always. * * @param ds rank 1 or more dataset * @param index the index to slice at * @return rank 0 or more dataset. * @see QDataSet#slice(int) */ public static MutablePropertyDataSet slice0(final QDataSet ds, final int index) { return new Slice0DataSet(ds, index,true); } /** * slice dataset operator assumes a qube dataset * by picking the index-th element of dataset's second dimension, without * regard to tags. * @param ds rank 2 or more dataset * @param index the index to slice at * @return rank 1 or more dataset. */ public static MutablePropertyDataSet slice1(final QDataSet ds, final int index) { return new Slice1DataSet(ds, index, true, false); } /** * slice dataset operator assumes a qube dataset * by picking the index-th element of dataset's second dimension, without * regard to tags. * @param ds rank 3 or more dataset * @param index the index to slice at. * @return rank 2 or more dataset. */ public static MutablePropertyDataSet slice2(final QDataSet ds, final int index) { return new Slice2DataSet(ds, index, true); } /** * slice dataset operator assumes a qube dataset * by picking the index-th element of dataset's second dimension, without * regard to tags. * @param ds rank 4 or more dataset. * @param index index to slice at * @return rank 3 or more dataset. */ public static MutablePropertyDataSet slice3(final QDataSet ds, final int index) { return new Slice3DataSet(ds, index, true ); } /** * reduce the number of elements in the dataset to the dim 0 indeces specified. * This does not change the rank of the dataset. * * DO NOT try to optimize this by calling native trim, some native trim * implementations call this. * * @param ds the dataset * @param offset the offset * @param len the length, (not the stop index!) * @return trimmed dataset */ public static MutablePropertyDataSet trim(final QDataSet ds, final int offset, final int len) { return new TrimDataSet( ds, offset, offset+len ); } /** * reduce the number of elements in the dataset to the dim 0 indeces specified. * This does not change the rank of the dataset. * @param dep the dataset. * @param start first index to include * @param stop last index, exclusive * @param stride the step size, e.g. 2 is every other element. * @return trimmed dataset */ public static MutablePropertyDataSet trim( final QDataSet dep, final int start, final int stop, final int stride ) { if ( dep.rank()!=1 ) throw new IllegalArgumentException("only rank 1 supported"); QubeDataSetIterator itIn= new QubeDataSetIterator(dep); itIn.setIndexIteratorFactory( 0, new QubeDataSetIterator.StartStopStepIteratorFactory(start, stop, stride ) ); DDataSet depSlice= itIn.createEmptyDs(); QubeDataSetIterator itOut= new QubeDataSetIterator(depSlice); while ( itIn.hasNext() ) { itIn.next(); itOut.next(); itOut.putValue( depSlice, itIn.getValue(dep) ); } String[] names = DataSetUtil.dimensionProperties(); for (String name : names) { if (dep.property(name) != null) { depSlice.putProperty(name, dep.property(name)); } } return depSlice; } /** * flatten a rank 2 dataset. The result is a n,3 dataset * of [x,y,f]. * History: * @param ds rank 2 table dataset * @return rank 2 dataset that is that is array of (x,y,f). */ public static QDataSet flattenRank2( final QDataSet ds ) { QDataSet dep0= (QDataSet) ds.property(QDataSet.DEPEND_0); QDataSet dep1= (QDataSet) ds.property(QDataSet.DEPEND_1); QDataSet dep0offset= (QDataSet) ds.property("OFFSETS_1"); //kludge to experiment with this. if ( dep0==null ) dep0= Ops.findgen(ds.length()); if ( dep1==null ) dep1= IndexGenDataSet.lastindex(ds); DataSetBuilder builder= new DataSetBuilder( 1, 100 ); DataSetBuilder xbuilder= new DataSetBuilder( 1, 100 ); DataSetBuilder ybuilder= new DataSetBuilder( 1, 100 ); if ( dep1.rank()==2 && Schemes.isRank2Bins(dep1) ) { dep1= Ops.reduceBins( dep1 ); } boolean dep1rank2= dep1!=null && dep1.rank()==2; for ( int i=0; i0. ) { indeces[i0] = i; i0++; } } final Comparator c = (Integer o1, Integer o2) -> { int i1 = o1; int i2 = o2; return Double.compare(ds.value(i1), ds.value(i2)); }; Arrays.sort(indeces, 0, i0, c); final int[] data = new int[i0]; boolean monotonic= true; int lastData=0; if ( i0>0 ) { data[0] = indeces[0]; lastData= data[0]; } for (int i = 1; i < i0; i++) { data[i] = indeces[i]; if ( monotonic && data[i]0 ) { return getComponentType(ds.slice(0)); } else { return double.class; } } /** * return a fill value that is representable by the type. * @param c the class type, including double.class, float.class, etc. * @return a fill value that is representable by the type. */ public static double suggestFillForComponentType( Class c ) { if ( c==double.class ) { return -1e38; } else if ( c==float.class ) { return -1e38; } else if ( c==long.class ) { return Long.MIN_VALUE; } else if ( c==int.class ) { return Integer.MIN_VALUE; } else if ( c==short.class ) { return Short.MIN_VALUE; } else if ( c==byte.class ) { return Byte.MIN_VALUE; } else { return -1e38; } } /** * return the dataset with records rearranged according to indices. * @param ds rank N dataset, where N>0 * @param indices rank 1 dataset, length m. * @return length m rank N dataset. * @see #applyIndex(org.das2.qds.QDataSet, int, org.das2.qds.QDataSet, boolean) */ public static QDataSet applyIndex( QDataSet ds, QDataSet indices ) { return DataSetOps.applyIndex( ds, 0, indices, true ); } /** * Applies the sort index to the idim-th dimension of the qube dataset ds. * TODO: consider sorting multiple dimensions at once, to reduce excessive copying. * TODO: this should probably (and would easily) be redone by using dataset implementation that applies the sort on the ith index when read. * See SubsetDataSet which would do this nicely. * TODO: note the Jython stuff does this to, using a different implementation. Reconcile these... * @param ds rank 1,2, or 3 qube dataset * @param idim the dimension being sorted. * @param sort rank 1 dataset of new indeces, needn't be same size as index. * @param deps do dependencies as well. Note this does not rearrange planes! * @return new dataset that is a copy of the first, resorted. * @see org.das2.qds.SortDataSet for similar functionality * @see Ops#decimate(org.das2.qds.QDataSet, int, int) */ public static WritableDataSet applyIndex( QDataSet ds, int idim, QDataSet sort, boolean deps ) { if (idim > 2) { throw new IllegalArgumentException("idim must be <=2 "); } if ( idim==0 ) { QDataSet ss= new SortDataSet( ds, sort ); return ArrayDataSet.copy( getComponentType(ds), ss ); } if (ds.rank() > 3) { throw new IllegalArgumentException("rank limit"); } int[] qube = DataSetUtil.qubeDims( ds ); if ( qube==null ) throw new IllegalArgumentException("dataset is not a qube and index is not on first dimension"); qube[idim] = sort.length(); ArrayDataSet cds= ArrayDataSet.create( getComponentType(ds), qube ); Map props= org.das2.qds.DataSetUtil.getDimensionProperties(ds,null); props.remove( QDataSet.CADENCE ); org.das2.qds.DataSetUtil.putProperties(props, cds); if (deps) { String depprop = "DEPEND_" + idim; QDataSet depds = (QDataSet) ds.property(depprop); if (depds != null) { depds = applyIndex(depds, 0, sort, false); cds.putProperty(depprop, depds); } String bundleprop= "BUNDLE_"+idim; QDataSet bds= (QDataSet) ds.property( bundleprop ); if ( bds!=null ) { JoinDataSet jds= new JoinDataSet(2); for ( int i=0; i 2) { for (int k = 0; k < qube[2]; k++) { double d = ds.value(i, (int) sort.value(j), k); cds.putValue(i, j, k, d); } } else { double d = ds.value(i, (int) sort.value(j)); cds.putValue(i, j, d); } } } } else if (idim == 2) { for (int i = 0; i < qube[0]; i++) { for (int j = 0; j < qube[1]; j++) { for (int k = 0; k < qube[2]; k++) { double d = ds.value(i, j, (int) sort.value(k)); cds.putValue(i, j, k, d); } } } } return cds; } /** * returns a rank 1 dataset that is a histogram of the data. Note there * will also be in the properties: * count, the total number of valid values. * nonZeroMin, the smallest non-zero, positive number * @param ds rank N dataset * @param min the min of the first bin. If min=-1 and max=-1, then automatically set the min and max. * @param max the max of the last bin. * @param binsize the size of each bin. * @return a rank 1 dataset with each bin's count. DEPEND_0 indicates the bin locations. */ public static QDataSet histogram(QDataSet ds, double min, double max, final double binsize) { if ( min==-1 && max==-1 ) { QDataSet range= Ops.extent(ds); min= (Math.floor(range.value(0)/binsize)) * binsize; max= (Math.ceil(range.value(1)/binsize)) * binsize; } int n = (int) Math.ceil((max - min) / binsize); MutablePropertyDataSet tags = DataSetUtil.tagGenDataSet(n, min + binsize/2 , binsize, (Units)ds.property(QDataSet.UNITS) ); tags.putProperty( QDataSet.NAME, ds.property(QDataSet.NAME) ); tags.putProperty( QDataSet.LABEL, ds.property(QDataSet.LABEL) ); tags.putProperty( QDataSet.TITLE, ds.property(QDataSet.TITLE) ); tags.putProperty( QDataSet.TYPICAL_MAX, ds.property(QDataSet.TYPICAL_MAX) ); tags.putProperty( QDataSet.TYPICAL_MIN, ds.property(QDataSet.TYPICAL_MIN) ); final int[] hits = new int[n]; QubeDataSetIterator iter = new QubeDataSetIterator(ds); QDataSet wds= DataSetUtil.weightsDataSet(ds); double positiveMin= Double.MAX_VALUE; int count=0; for (; count0. ) { int ibin = (int) Math.floor((d - min) / binsize); if (ibin >= 0 && ibin < n) { hits[ibin]++; } if ( d>0 && d 0) { approxMean /= validCount; // approximate--suseptible to number error. } double mean = 0; double stddev = 0; if (validCount > 0) { iter= new QubeDataSetIterator(ds); while (iter.hasNext()) { iter.next(); double d = iter.getValue(ds); double w = iter.getValue(wds); if ( w>0.0 ) { mean += (d - approxMean); stddev += Math.pow(d - approxMean, 2); } } mean /= validCount; mean += approxMean; moment[0] = mean; if (validCount > 1) { stddev /= (validCount - 1); // this will be very close to result, even though correction should be made since approxMean != mean. stddev = Math.sqrt(stddev); moment[1] = stddev; } else { moment[1] = u.getFillDouble(); } } else { moment[0] = u.getFillDouble(); } DRank0DataSet result = DataSetUtil.asDataSet(moment[0]); result.putProperty( QDataSet.UNITS, u ); DRank0DataSet stddevds= DataSetUtil.asDataSet(moment[1]); stddevds.putProperty( QDataSet.UNITS, u.getOffsetUnits() ); result.putProperty("stddev", stddevds ); result.putProperty("validCount", validCount); result.putProperty("invalidCount", invalidCount); return result; } /** * transpose the rank 2 qube dataset so the rows are columns and the columns are rows. * @param ds rank 2 Qube DataSet. * @return rank 2 Qube DataSet */ public static QDataSet transpose2(QDataSet ds) { return new TransposeRank2DataSet(ds); } /** * method to help dataset implementations implement slice. * 2010-09-23: support rank 2 DEPEND_2 and DEPEND_3 * 2010-09-23: add BINS_1 and BUNDLE_1, Slice0DataSet calls this. * 2010-02-24: BUNDLE_0 handled. * 2011-03-25: add WEIGHTS_PLANE * @param index the index to slice at in the zeroth index. * @param props the properties to slice. * @return the properties after the slice. */ public static Map sliceProperties0( int index, Map props ) { Map result= new LinkedHashMap<>(); QDataSet dep0= (QDataSet) props.get( QDataSet.DEPEND_0 ); QDataSet dep1= (QDataSet) props.get( QDataSet.DEPEND_1 ); QDataSet dep2= (QDataSet) props.get( QDataSet.DEPEND_2 ); QDataSet dep3= (QDataSet) props.get( QDataSet.DEPEND_3 ); String bins1= (String) props.get( QDataSet.BINS_1 ); Object sbundle= props.get( QDataSet.BUNDLE_1 ); QDataSet bundle1= ( sbundle instanceof QDataSet ) ? (QDataSet) sbundle : null; // kludge to handle where QStream reader hasn't resolved this. sbundle= props.get( QDataSet.BUNDLE_0 ); QDataSet bundle0= ( sbundle instanceof QDataSet ) ? (QDataSet) sbundle : null; if ( dep0!=null && dep1!=null && dep0.rank()>1 && dep1.rank()>1 ) { throw new IllegalArgumentException("both DEPEND_0 and DEPEND_1 have rank>1"); } for ( int i=0; i0) { result.put(p1, d.slice(index)); } } else { logger.log(Level.INFO, "property is not a QDataSet: {0}", p1); } } } String[] dimprops= DataSetUtil.dimensionProperties(); // TITLE, UNITS, etc. for (String s : dimprops ) { Object o = props.get(s); if (o!=null) { result.put(s, o); } } if ( props.containsKey(QDataSet.CONTEXT_0) ) { for ( int i=0; i sse: props.entrySet() ) { String ss= sse.getKey(); int ii= ss.indexOf("__"); if ( ii>-1 ) { String hd= ss.substring(ii+2); int iii=0; while ( iii0 ) { int islice= Integer.parseInt( hd.substring(0,iii) ); if ( islice==index ) { String slicePropName; if ( iii=2 ) { result.put( QDataSet.DEPEND_1, dep2.slice( index ) ); } else { result.put( QDataSet.DEPEND_1, dep2 ); } } if ( dep3!=null ) { if ( dep3.rank()>=2 ) { result.put( QDataSet.DEPEND_2, dep3.slice( index ) ); } else { result.put( QDataSet.DEPEND_2, dep3 ); } } if ( bins1!=null ) { result.put( QDataSet.BINS_0, bins1 ); } if ( bundle1!=null ) { result.put( QDataSet.BUNDLE_0, bundle1 ); } if ( bundle0!=null ) { //TODO: what if BUNDLE_0 bundles a high rank dataset? This assumes they are all rank 0. QDataSet bundle0ds= bundle0.slice(index); result.putAll( DataSetUtil.getProperties(bundle0ds) ); } //TODO: verify that we needn't put null in for JOIN_0. return result; } /** * we've sliced a dataset, removing an index. move the properties. This was Ops.sliceProperties * For example, after slicing the zeroth dimension (time), what was DEPEND_1 is * becomes DEPEND_0. * * @param properties the properties to slice. * @param sliceDimension the dimension to slice at (0,1,2...QDataSet.MAX_HIGH_RANK) * @return the properties after the slice. */ public static Map sliceProperties( Map properties, int sliceDimension ) { Map result = new LinkedHashMap<>(); String[] ss= DataSetUtil.dimensionProperties(); for ( String s: ss ) { Object val= properties.get(s); if ( val!=null ) result.put( s, val ); } if ( sliceDimension>=QDataSet.MAX_HIGH_RANK ) { throw new IllegalArgumentException("sliceDimension > MAX_HIGH_RANK"); } List deps = new ArrayList<>(QDataSet.MAX_HIGH_RANK); List bund = new ArrayList<>(QDataSet.MAX_HIGH_RANK); List bins = new ArrayList<>(QDataSet.MAX_HIGH_RANK); for (int i = 0; i < QDataSet.MAX_RANK; i++) { deps.add(i, properties.get("DEPEND_" + i)); bund.add(i, properties.get("BUNDLE_" + i)); bins.add(i, properties.get("BINS_" + i)); } if ( sliceDimension2 ) { // remove the high rank data, the calling code should deal with it. odep= null; } } result.put("DEPEND_" + i, odep); } if ( bund.get(i)!=null ) result.put("BUNDLE_" + i, bund.get(i)); if ( bins.get(i)!=null ) result.put("BINS_" + i, bins.get(i)); } if ( properties.containsKey(QDataSet.CONTEXT_0) ) { for ( int i=0; i names= new ArrayList<>(); final List units= new ArrayList<>(); for ( int j=0; jnames.size() ) throw new IllegalArgumentException("index too large:"+i ); if ( name.equals( QDataSet.NAME ) ) { return names.get(i); } else if ( name.equals( QDataSet.UNITS ) ) { return units.get(i); } else { return null; } } }; return bundleDescriptor; } /** * create array of [ "ch_0", "ch_1", ... ] * @param len * @return string array containing the names that will always work. */ private static String[] backupBundleNames( int len ) { String[] result= new String[len]; for ( int i2=0; i2 result= new ArrayList<>(bundleDs.length(0)); QDataSet bundle1= (QDataSet) bundleDs.property(QDataSet.BUNDLE_1); if ( bundle1==null ) { bundle1= (QDataSet) bundleDs.property(QDataSet.DEPEND_1); //simple legacy bundle was once DEPEND_1. if ( bundle1!=null && bundle1.rank()>1 ) { if ( bundle1.rank()!=2 ) { throw new IllegalArgumentException("high rank DEPEND_1 found where rank 1 was expected"); } else { result= Arrays.asList( backupBundleNames( bundle1.length(0) ) ); } } else if ( bundle1!=null ) { Units u= SemanticOps.getUnits( bundle1 ); for ( int i2=0; i2
     *bds=ripplesVectorTimeSeries(100)
     *2==indexOfBundledDataSet( bds, "Z" ) 
     *
* demonstrates its use. * * Last, extraneous spaces and underscores are removed to see if this will result in a match. * * @param bundleDs a bundle dataset with the property BUNDLE_1 or DEPEND_1 having EnumerationUnits, (or BUNDLE_0 for a rank 1 dataset). * @param name the named dataset. * @return the index or -1 if the name is not found. */ public static int indexOfBundledDataSet( QDataSet bundleDs, String name ) { int rank= bundleDs.rank(); QDataSet bundle1= (QDataSet) bundleDs.property( "BUNDLE_"+(rank-1) ); int ib= -1; int i= name.indexOf("["); // allow name to be "Flux[Time=1440,en=10]" if ( i>0 ) { name= name.substring(i); name= Ops.saferName(name); } else { name= Ops.saferName(name); } if ( name.matches("ch_\\d+") ) { int ich= Integer.parseInt(name.substring(3) ); return ich; } if ( bundle1==null ) { bundle1= (QDataSet) bundleDs.property( "DEPEND_"+(rank-1) ); //simple legacy bundle was once DEPEND_1. if ( bundle1!=null && bundle1.rank()>1 ) { throw new IllegalArgumentException("high rank DEPEND_1 found where rank 1 was expected"); } else if ( bundle1!=null ) { Units u= SemanticOps.getUnits( bundle1 ); for ( int i2=0; i20 || ( dims!=null && dims.length>0 ) ) { n1= (String) bundle1.property( QDataSet.ELEMENT_NAME, j ); if ( n1!=null ) n1= Ops.saferName(n1); if ( n1!=null && n1.equals(name) ) { ib= j; highRank= true; break; } } } //if ( ib==-1 ) { for ( int j=0; j0 ) { n1= (String) bundle1.property( QDataSet.ELEMENT_LABEL, j ); if ( n1!=null ) n1= Ops.saferName(n1); if ( n1!=null && n1.equals(name) ) { ib= j; highRank= true; break; } } } //} if ( ib==-1 ) { name= name.replaceAll("_| ",""); for ( int j=0; j0 ) { n1= (String) bundle1.property( QDataSet.ELEMENT_NAME, j ); if ( n1!=null ) n1= Ops.saferName(n1); if ( n1!=null && n1.equals(name) ) { ib= j; highRank= true; break; } } } } if ( highRank ) { logger.log(Level.FINER, "index of bundled dataset \"{0}\" is {1} (highrank={2})", new Object[]{name, ib, highRank}); } return ib; } /** * Extract the named bundled dataset. For example, extract B_x from bundle of components. * @param bundleDs a bundle of datasets * @param name the name of the bundled dataset, or "ch_<i>" where i is the dataset number * @see #unbundle(org.das2.qds.QDataSet, int) * @throws IllegalArgumentException if no named dataset is found. * @return the named dataset */ public static QDataSet unbundle( QDataSet bundleDs, String name ) { QDataSet bundle1= (QDataSet) bundleDs.property(QDataSet.BUNDLE_1); int ib= indexOfBundledDataSet( bundleDs, name ); boolean highRank= false; // we have to see if they referred to the high-rank dataset, or the rank 1 dataset. Chris, wouldn't it be nice if Java could return two things? int[] dims=null; if ( ib>-1 && bundle1!=null ) dims= (int[])bundle1.property( QDataSet.ELEMENT_DIMENSIONS, ib ); if ( bundle1!=null && ( bundle1.length(ib)>0 || ( dims!=null && dims.length>0 ) ) ) { String n1= (String) bundle1.property( QDataSet.ELEMENT_NAME, ib ); if ( n1!=null ) n1= Ops.saferName(n1); if ( n1!=null && n1.equals(name) ) { highRank= true; } if ( highRank==false ) { n1= (String) bundle1.property( QDataSet.ELEMENT_LABEL, ib ); if ( n1!=null ) n1= Ops.saferName(n1); if ( n1!=null && n1.equals(name) ) { highRank= true; } } } if ( ib==-1 ) { if ( name.matches("ch_\\d+") ) { int ich= Integer.parseInt(name.substring(3) ); return DataSetOps.unbundle(bundleDs, ich, false); } else { throw new IllegalArgumentException("unable to find dataset with name \""+name+"\" in bundle "+bundleDs ); } } else { return unbundle(bundleDs,ib,highRank); } } /** * extract the dataset that is dependent on others, or the last one. * For example, the dataset ds[:,"x,y"] → y[:] * @param bundleDs a bundle of datasets * @return the default dataset * @see Schemes#bundleDataSet() */ public static QDataSet unbundleDefaultDataSet( QDataSet bundleDs ) { QDataSet bundle1= (QDataSet) bundleDs.property(QDataSet.BUNDLE_1); if ( bundle1==null ) { bundle1= (QDataSet) bundleDs.property(QDataSet.DEPEND_1); //simple legacy bundle was once DEPEND_1. if ( bundle1!=null && bundle1.rank()>1 ) { throw new IllegalArgumentException("high rank DEPEND_1 found where rank 1 was expected"); } } int ids= -1; if ( bundle1!=null ) { for ( int i=0; i=2 ) { // unbundle now allows rank >2 ... QDataSet bundle1= (QDataSet) bundleDs.property(QDataSet.BUNDLE_1); if ( bundle1==null ) { bundle1= (QDataSet) bundleDs.property(QDataSet.DEPEND_1); //simple legacy bundle was once DEPEND_1. if ( bundle1==null ) { if ( bundleDs.rank()==2 ) { return new Slice1DataSet( bundleDs, ib ); //TODO: this was throw new IllegalArgumentException( "Neither BUNDLE_1 nor DEPEND_1 found on dataset passed to unbundle command."); } else if ( bundleDs.rank()==3 ) { return new Slice2DataSet( bundleDs, ib ); //TODO: this was throw new IllegalArgumentException( "Neither BUNDLE_1 nor DEPEND_1 found on dataset passed to unbundle command."); } else { throw new IllegalArgumentException("rank must be 2 or 3"); } } if ( bundle1.rank()==2 ) { return new Slice1DataSet( bundleDs, ib ); // warning message removed, because rank 1 context is used. } else if ( bundle1.rank()>1 ) { throw new IllegalArgumentException("high rank DEPEND_1 found where rank 1 was expected"); } else { // Units u= SemanticOps.getUnits( bundle1 ); // if ( !( u instanceof EnumerationUnits ) ) { // throw new IllegalArgumentException("dataset is not a bundle, and units of DEPEND_1 are not enumeration"); // } } } bundle= bundle1; } else if ( bundleDs.rank()==1 ) { QDataSet bundle0= (QDataSet) bundleDs.property(QDataSet.BUNDLE_0); if ( bundle0==null ) { bundle0= (QDataSet) bundleDs.property(QDataSet.DEPEND_0); //simple legacy bundle was once DEPEND_1. if ( bundle0==null ) { return new Slice0DataSet( bundleDs, ib ); } if ( bundle0.rank()>1 ) { throw new IllegalArgumentException("high rank DEPEND_0 found where rank 1 was expected"); } else { Units u= SemanticOps.getUnits( bundle0 ); if ( !( u instanceof EnumerationUnits ) ) { throw new IllegalArgumentException("dataset is not a bundle, and units of DEPEND_0 are not enumeration"); } } } bundle= bundle0; } else { throw new IllegalArgumentException("bundle must be rank 1 or rank 2"); } if ( ib<0 || ib>=bundle.length() ) { throw new IndexOutOfBoundsException("in "+bundleDs+" no such data set at index="+ib +" bundle.length()="+bundle.length() ); } switch (bundle.rank()) { case 1: //simple legacy bundle was once DEPEND_1. MutablePropertyDataSet result= bundleDs.rank()==2 ? DataSetOps.slice1(bundleDs,ib) : DataSetOps.slice0(bundleDs,ib); Units enumunits= (Units) bundle.property(QDataSet.UNITS); if ( enumunits==null ) enumunits= Units.dimensionless; String label= String.valueOf(enumunits.createDatum(bundle.value(ib))); result.putProperty(QDataSet.NAME, Ops.safeName(label) ); //TODO: make safe java-identifier eg: org.virbo.dsops.Ops.safeName(label) result.putProperty(QDataSet.LABEL, label ); return result; case 2: break; default: throw new IllegalArgumentException("rank limit: >2 not supported"); } int len=1; // total number of elements per record of the dataset int j=ib; // column requested int is= ib; // start index of the high-rank dataset // since 2016-09-27, the dimensions should be a property now, and the dataset should be [n,0]. int[] dimensions= (int[]) bundle.property(QDataSet.ELEMENT_DIMENSIONS,ib); if ( dimensions==null && bundle.length(j)>0 ) { dimensions= new int[bundle.length(j)]; for ( int ii=0; ii=2 ) { result= new Slice1DataSet( bundleDs, j, true ); } else { throw new IllegalArgumentException("BundleDs must be rank 1 or rank 2"); // this is handled above and findbugs doesn't see that we can't get here. } String[] names1= DataSetUtil.dimensionProperties(); for (String names11 : names1) { Object v = bundle.property(names11, j); if (v!=null) { result.putProperty(names11, v); } } String[] planeNames= new String[] { QDataSet.BIN_MAX_NAME, QDataSet.BIN_MIN_NAME, QDataSet.BIN_MINUS_NAME, QDataSet.BIN_PLUS_NAME, QDataSet.DELTA_MINUS_NAME, QDataSet.DELTA_PLUS_NAME } ; for ( String s: planeNames ) { String o; o = (String)bundle.property( s,j); if ( o!=null ) { QDataSet dss1= unbundle( bundleDs, o ); // TODO: check for infinite loop. if ( dss1==null ) { logger.log(Level.WARNING, "bundled dataset refers to {0} but this is not found in bundle", o); } else { result.putProperty( s.substring(0,s.length()-5), dss1 ); } } } // allow unindexed properties to define property for all bundled datasets, for example USER_PROPERTIES or FILL Map props3= DataSetUtil.getProperties(bundle, DataSetUtil.globalProperties(), null ); for ( Map.Entry ss1: props3.entrySet() ) { String ss= ss1.getKey(); Object vv= result.property( ss ); if ( vv==null ) { result.putProperty( ss, ss1.getValue() ); } } if ( result.property(QDataSet.DEPEND_0)==null ) { // last make the default DEPEND_0 be the first column, if it is a UT time. if ( ib>0 ) { Units u= (Units) bundle.property(QDataSet.UNITS,0); if ( u!=null && UnitsUtil.isTimeLocation(u) ) { result.putProperty( QDataSet.DEPEND_0, unbundle(bundleDs,0,false) ); } } } return result; } } else if ( dimensions.length==1 || dimensions.length==2 ) { if ( bundleDs.rank()==1 ) { return bundleDs.trim(is, is+len); } TrimStrideWrapper result= new TrimStrideWrapper(bundleDs); result.setTrim( 1, is, is+len, 1 ); Integer ifirst= (Integer) bundle.property( QDataSet.START_INDEX, j ); int first,last; if ( ifirst!=null ) { first= ifirst; last= first+len-1; } else { first= j; // I don't think this should happen, but... last= j; } Map props= DataSetUtil.getProperties( DataSetOps.slice0(bundle,first) ); Map props2= DataSetUtil.getProperties( DataSetOps.slice0(bundle,last) ); for ( Entry e: props2.entrySet() ) { // remove the properties that are not constant within the bundle by checking first against last. String ss= e.getKey(); Object vv= props.get(ss); if ( vv!=null && !vv.equals( e.getValue() ) ) { props.put(ss,null); } } if ( last!=first ) { QDataSet bundleTrim= bundle.trim(first,last+1); MutablePropertyDataSet mds; mds= DataSetOps.makePropertiesMutable( bundleTrim ); Ops.copyIndexedProperties( bundleTrim, mds ); props.put( QDataSet.BUNDLE_1, mds ); } if ( bundleDs.rank()>1 ) { if ( bundle.property(QDataSet.DEPEND_1,first)!=null && bundle.property(QDataSet.DEPEND_1,first)==bundle.property(QDataSet.DEPEND_1,last) ) { props.put( QDataSet.DEPEND_1, bundle.property(QDataSet.DEPEND_1,first) ); } if ( bundle.property(QDataSet.BINS_1,first)!=null && bundle.property(QDataSet.BINS_1,first).equals(bundle.property(QDataSet.BINS_1,last)) ) { props.put( QDataSet.BINS_1, bundle.property(QDataSet.BINS_1,first) ); props.remove( QDataSet.BUNDLE_1 ); } if ( bundle.property(QDataSet.BUNDLE_1,first)!=null && bundle.property(QDataSet.BUNDLE_1,first)==(bundle.property(QDataSet.BUNDLE_1,last) ) ) { props.put( QDataSet.BUNDLE_1, bundle.property(QDataSet.BUNDLE_1,first) ); } } // allow unindexed properties to define property for all bundled datasets, for example USER_PROPERTIES or FILL Map props3= DataSetUtil.getProperties(bundle, DataSetUtil.globalProperties(), null ); for ( Entry e: props3.entrySet() ) { String ss= e.getKey(); Object vv= props.get( ss ); if ( vv==null ) { props.put( ss, e.getValue() ); } } Object o; o= bundle.property(QDataSet.ELEMENT_NAME,j); if ( o!=null ) props.put( QDataSet.NAME, o ); o= bundle.property(QDataSet.ELEMENT_LABEL,j); if ( o!=null ) props.put( QDataSet.LABEL, o ); DataSetUtil.putProperties( props, result ); String[] testProps= DataSetUtil.correlativeProperties(); for ( int i=-1; i0 ) { Units u= (Units) bundle.property(QDataSet.UNITS,0); if ( u!=null && UnitsUtil.isTimeLocation(u) ) { result.putProperty( QDataSet.DEPEND_0, unbundle(bundleDs,0,false) ); } } } if ( dimensions.length==2 ) { int[] qube= new int[] { result.length(), dimensions[0], dimensions[1] }; return Ops.reform( result, qube ); } else { return result; } } else { throw new IllegalArgumentException("rank limit: >2 not supported"); } } /** * given the bundle descriptor bds, return the dataset to be put in the context property. * @param bundle1 rank 2 bundle descriptor of length n with indexed properties. This was introduced * when sloppy slice code was using the NAME and dropping the LABEL. * @param index 0<=index<n index of the unbundle * @return rank 0 QDataSet. */ protected static QDataSet getContextForUnbundle( QDataSet bundle1, int index ) { String tname= (String) bundle1.property(QDataSet.NAME); if ( tname==null ) tname=(String) bundle1.property(QDataSet.NAME,index); String tlabel= (String) bundle1.property(QDataSet.LABEL,index); tname= String.valueOf(tname); // guard against null tlabel= String.valueOf(tlabel); MutablePropertyDataSet context= (MutablePropertyDataSet) ( Ops.labelsDataset( new String[] { tlabel } )).slice(0); if ( !Ops.safeName(tlabel).equals(tname) ) { if ( context.isImmutable() ) { logger.warning("action not taken because dataset is immutable. This needs review."); } else { context.putProperty( QDataSet.NAME, tname ); } } return context; } /** * returns the value from within a distribution that is the nth percentile division. This * returns a fill dataset (Units.dimensionless.getFillDouble()) when the data is all fill. * @param ds the dataset * @param n percent between 0 and 100. * @return */ public static QDataSet getNthPercentileSort( QDataSet ds, double n ) { if ( n<0 ) throw new IllegalArgumentException("n<0"); if ( n>100 ) throw new IllegalArgumentException("n>=100"); QDataSet sort= Ops.sort(ds); if ( sort.length()==0 ) { return DataSetUtil.asDataSet( Units.dimensionless.getFillDatum() ); } int idx; if ( n==100 ) { idx= (int) sort.value( sort.length()-1 ); } else { idx= (int) sort.value( (int)( sort.length() * n / 100 ) ); } return ds.slice(idx); } /** * Get the background level by sorting the data. The result is rank one less than the input rank. * @param ds rank 1, 2, or rank 3 join. * @param level the level between 0 and 100. * @return */ public static QDataSet getBackgroundLevel( QDataSet ds, double level ) { if ( ds.rank()==1 ) { return getNthPercentileSort( ds, level ); } else if ( ds.rank()==2 ) { DDataSet result= DDataSet.createRank1( ds.length(0) ); result.putProperty( QDataSet.DEPEND_0, ds.property(QDataSet.DEPEND_1) ); for ( int jj=0; jj2 ) { JoinDataSet result= new JoinDataSet(ds.rank()-1); for ( int i=0; i *
  • rank 1: each element *
  • rank 2: each row of the dataset *
  • rank 3: each row of each rank 2 dataset slice. * * If the data is already in dB, then the result is a difference. * This is assuming the units are similar to voltage, not a power, we think, * containing code like 20 * Math.log10( ds / background ). * @param ds * @param level the percentile level, e.g. 10= 10% * @return the result dataset, in dB above background. */ public static QDataSet dbAboveBackgroundDim1( QDataSet ds, double level ) { return dbAboveBackgroundDim1( ds, level, false ); } /** * normalize the nth-level percentile from:
      *
    • rank 1: each element *
    • rank 2: each row of the dataset *
    • rank 3: each row of each rank 2 dataset slice. *
    * If the data is already in dB, then the result is a difference. * This is assuming the units are similar to voltage, not a power, * containing code like 20 * Math.log10( ds / background ). * @param ds * @param level the percentile level, e.g. 10= 10% * @param power if true, return 10*Math.log10(ds / background ). * @return the result dataset, in dB above background. */ public static QDataSet dbAboveBackgroundDim1( QDataSet ds, double level, boolean power ) { MutablePropertyDataSet result; double fill= -1e31; boolean hasFill= false; final double mult= power ? 10.0 : 20.0; switch (ds.rank()) { case 1: { QDataSet back= getBackgroundLevel( ds, level ); result= Ops.copy(ds); boolean db= ds.property(QDataSet.UNITS)==Units.dB; WritableDataSet wds= (WritableDataSet)result; QDataSet validDs= Ops.valid(back); QDataSet vds2= DataSetUtil.weightsDataSet(ds); if ( validDs.value()>0 ) { for ( int ii=0; ii0 ) { double v= db ? ds.value(ii) - back.value() : mult * Math.log10( ds.value(ii) / back.value() ); wds.putValue( ii,Math.max( 0,v ) ); } else { wds.putValue( ii, fill ); } } } else { for ( int ii=0; ii0 && vds2.value(ii,jj)>0 ) { double v= db ? ds.value(ii,jj) - back.value(jj) : mult * Math.log10( ds.value(ii,jj) / back.value(jj) ); wds.putValue( ii,jj, Math.max( 0,v ) ); } else { wds.putValue( ii,jj, fill ); hasFill= true; } } } result.putProperty( QDataSet.USER_PROPERTIES,Collections.singletonMap("background", back) ); break; } default: JoinDataSet result1= new JoinDataSet(ds.rank()); for ( int i=0; i0 ) { for ( int ii=0; ii0 ) { double v= db ? wds.value(ii) - back.value() : 20 * Math.log10( wds.value(ii) / back.value() ); wds.putValue( ii,Math.max( 0,v ) ); } else { wds.putValue( ii, fill ); } } } else { for ( int ii=0; ii0 ) { for ( int jj=0; jj0 ) { double v= db ? wds.value(jj) - back.value() : 20 * Math.log10( wds.value(jj) / back.value() ); wds.putValue( jj, Math.max( 0,v ) ); } else { wds.putValue( jj, fill ); } } } else { for ( int jj=0; jj0 && !c.startsWith("|") ) { // grab the component, then apply processes after the pipe. if (!c.equals("") && fillDs.length() > 0 && fillDs.rank() == 2) { String[] labels = SemanticOps.getComponentNames(fillDs); String comp= c; int ip= comp.indexOf("|"); if ( ip!=-1 ) { comp= comp.substring(0,ip); } comp= Ops.saferName(comp); if ( fillDs.property(QDataSet.BUNDLE_1)!=null ) { fillDs= DataSetOps.unbundle( fillDs,comp ); //TODO: illegal argument exception } else { boolean found= false; for (int i = 0; i < labels.length; i++) { if ( Ops.saferName(labels[i]).equals(comp)) { fillDs = DataSetOps.slice1(fillDs, i); found= true; break; } } if ( !found ) { throw new IllegalArgumentException("component not found: "+comp ); } } } int idx= c.indexOf("|"); if ( idx==-1 ) { c=""; } else { c= c.substring(idx); } } if (c.length() > 5 && c.startsWith("|")) { fillDs = DataSetOps.sprocess(c, fillDs, mon ); } return fillDs; } /** * sprocess implements the poorly-named filters string / process string of Autoplot, allowing * clients to "pipe" data through a chain of operations. For example, the filters string * "|slice0(9)|histogram()" will slice on the ninth index and then take a histogram of that * result. See http://www.papco.org/wiki/index.php/DataReductionSpecs (TODO: wiki page was lost, * which could probably be recovered.) There's a big problem here: * if the command is not recognized, then it is ignored. We should probably change this, * but the change should be at a major version change in case it breaks things. * @param c process string like "slice0(9)|histogram()" * @param fillDs The dataset loaded from the data source controller, with initial filters (like fill) applied. * @param mon monitor for the processing. * @throws ParseException when the string cannot be parsed * @throws Exception when a function cannot be processed (e.g. index out of bounds) * @return the dataset after the process string is applied. * @see http://autoplot.org/developer.dataset.filters * @see http://autoplot.org/developer.panel_rank_reduction */ public static QDataSet sprocess( String c, QDataSet fillDs, ProgressMonitor mon ) throws Exception { return OperationsProcessor.sprocess( c, fillDs, mon ); } /** * indicate if this one operator changes the dimensions. For example, * |smooth doesn't change the dimensions, but fftPower and slice do. * @param p the filter, e.g. "|smooth" * @return true if the dimensions change. */ public static boolean changesDimensions( String p ) { int j= p.indexOf('('); if ( j>-1 ) { p= p.substring(0,j); } switch (p) { case "|smooth": case "|nop": case "|trim": case "|magnitude": case "|abs": case "|hanning": case "|butterworth": case "|detrend": case "|medianFilter": case "|copy": case "|setDepend0Cadence": case "|expandToFillGaps": case "|expandWaveform": case "|cleanData": return false; default: return true; } } /** * return the next command that changes dimensions. * @param s0 scanner * @return the command, e.g. "|slice0" */ private static String nextDimensionChangingCommand( Scanner s0 ) { while ( s0.hasNext() ) { String cmd= s0.next(); if ( cmd.startsWith("|") ) { if ( changesDimensions(cmd) ) { return cmd; } } } return null; } /** * indicate if the operators change dimensions of the dataset. Often * this will result in true when the dimensions do not change, this is the better way to err. * @param c0 old value for the process string, e.g. "slice0(0)" * @param c1 new value for the process string, e.g. "slice0(0)|slice1(0)" * @return true if the dimensions would be different. */ public static boolean changesDimensions( String c0, String c1 ) { //if ( c.length()==0 && !c2.startsWith("|") ) return false; //TODO: kludge to avoid true when adding component child. if ( c0==null || c1==null ) return true; Scanner s0= new Scanner( c0 ); s0.useDelimiter("[\\(\\),]"); Scanner s1= new Scanner( c1 ); s1.useDelimiter("[\\(\\),]"); boolean slicesChangesDim= false; String cmd0= nextDimensionChangingCommand( s0 ); String cmd1= nextDimensionChangingCommand( s1 ); while ( cmd0!=null && cmd1!=null ) { if ( !cmd1.equals(cmd0) ) { return true; } if ( cmd0.startsWith("|slices") && cmd0.length()==7 ) { // multi dimensional slice Pattern skipPattern= Pattern.compile("\\'\\:?\\'"); while ( s0.hasNextInt() || s0.hasNext( skipPattern ) ) { if ( s0.hasNextInt() && s1.hasNextInt() ) { s0.nextInt(); s1.nextInt(); } else if ( s0.hasNext( skipPattern ) && s1.hasNext( skipPattern ) ) { s0.next(); s1.next(); } else { slicesChangesDim= true; s0.next(); s1.next(); } } } cmd0= nextDimensionChangingCommand( s0 ); cmd1= nextDimensionChangingCommand( s1 ); } boolean res= slicesChangesDim || cmd0!=null || cmd1!=null; logger.log(Level.FINE, " changesDimensions {0} , {1} ->{2}", new Object[]{c0, c1, res}); return res; } /** * return a bounding qube of the independent dimensions containing * the dataset. If r is the result of the function, then for
      *
    • rank 1: r.slice(0) x bounds, r.slice(1) y bounds *
    • rank 2 waveform: r.slice(0) x bounds, r.slice(1) y bounds *
    • rank 2 table:r.slice(0) x bounds r.slice(1) DEPEND_0 bounds. *
    • rank 3 table:r.slice(0) x bounds r.slice(1) DEPEND_0 bounds. *
    * This does not take DELTA_PLUS and DELTA_MINUS into account. * When all the data is fill, ds[0,0] will be positive infinity. * @param ds a rank 1,2, or 3 dataset. * @return a bounding qube of the independent dimensions */ public static QDataSet dependBoundsSimple( QDataSet ds ) { logger.entering( "org.das2.qds.DataSetOps", "dependBoundsSimple" ); QDataSet xrange; QDataSet yrange; if ( ds.rank()==1 ) { xrange= Ops.extentSimple( SemanticOps.xtagsDataSet(ds), null ); yrange= Ops.extentSimple( ds, null ); } else if( ds.rank() == 2 ) { if ( SemanticOps.isRank2Waveform(ds) ) { xrange= Ops.extentSimple( SemanticOps.xtagsDataSet(ds), null ); yrange= Ops.extentSimple( ds, null ); //} else if ( SemanticOps.isBundle(ds) ) { //bug: spectrogram rend of rbspb_pre_ect-mageisM75-sp-L1_20120908_v1.0.0.cdf?Count_Rate_SpinSetAvg // xrange= Ops.extent( SemanticOps.xtagsDataSet(ds) ); // yrange= null; // for ( int i=0; i *
  • rank 1: r.slice(0) x bounds, r.slice(1) y bounds *
  • rank 2 waveform: r.slice(0) x bounds, r.slice(1) y bounds *
  • rank 2 table:r.slice(0) x bounds r.slice(1) DEPEND_0 bounds. *
  • rank 3 table:r.slice(0) x bounds r.slice(1) DEPEND_0 bounds. * * @param ds rank 1,2, or 3 dataset. * @return a bounding qube of the independent dimensions */ public static QDataSet dependBounds( QDataSet ds ) { logger.entering( "org.das2.qds.DataSetOps", "dependBounds" ); QDataSet xrange; QDataSet yrange; if ( ds.rank()==1 ) { xrange= Ops.extent( SemanticOps.xtagsDataSet(ds) ); yrange= Ops.extent( ds ); } else if( ds.rank() == 2 ) { if ( SemanticOps.isRank2Waveform(ds) ) { xrange= Ops.extent( SemanticOps.xtagsDataSet(ds) ); yrange= Ops.extent( ds ); //} else if ( SemanticOps.isBundle(ds) ) { //bug: spectrogram rend of rbspb_pre_ect-mageisM75-sp-L1_20120908_v1.0.0.cdf?Count_Rate_SpinSetAvg // xrange= Ops.extent( SemanticOps.xtagsDataSet(ds) ); // yrange= null; // for ( int i=0; i