/* * DataSetUtil.java * * Created on April 1, 2007, 4:28 PM * * To change this template, choose Tools | Template Manager * and open the template in the editor. */ package org.das2.qds; import java.lang.reflect.Array; import java.text.DecimalFormat; import java.text.ParseException; import java.util.ArrayList; import java.util.Arrays; import java.util.Calendar; import java.util.Comparator; import java.util.HashMap; import java.util.IllegalFormatConversionException; import java.util.logging.Level; import org.das2.datum.Units; import java.util.LinkedHashMap; import java.util.List; import java.util.Locale; import java.util.Map; import java.util.Random; import java.util.TreeSet; import java.util.logging.Logger; import org.das2.datum.CacheTag; import org.das2.datum.Datum; import org.das2.datum.DatumRange; import org.das2.datum.DatumRangeUtil; import org.das2.datum.DatumUtil; import org.das2.datum.DatumVector; import org.das2.datum.EnumerationUnits; import org.das2.datum.LocationUnits; import org.das2.datum.TimeLocationUnits; import org.das2.datum.UnitsConverter; import org.das2.datum.UnitsUtil; import org.das2.datum.format.DatumFormatter; import org.das2.datum.format.DefaultDatumFormatter; import org.das2.datum.format.EnumerationDatumFormatterFactory; import org.das2.datum.format.FormatStringFormatter; import org.das2.datum.format.TimeDatumFormatter; import org.das2.util.LoggerManager; import org.das2.qds.examples.Schemes; import org.das2.qds.ops.Ops; import org.das2.qds.util.AutoHistogram; import org.das2.qds.util.DataSetBuilder; import org.das2.qds.util.LinFit; /** * Utilities for QDataSet, such as conversions from various forms * to QDataSet, and doing a units conversion. * * TODO: DataSetUtil, DataSetOps, and org.virbo.dsops.Ops have become blurred * over the years. These should either be combined or new mission statements * need to be created. * * @author jbf */ public class DataSetUtil { private static final Logger logger= LoggerManager.getLogger("qdataset.ops"); private static final String CLASSNAME= "org.das2.qds.DataSetUtil"; /** * creates a dataset of integers 0,1,2,...,n-1. * @param n the bound * @return the dataset */ public static MutablePropertyDataSet indexGenDataSet(int n) { return new IndexGenDataSet(n); } /** * creates a dataset with the given cadence, start and length. * This is danger code, because the CADENCE must be reset if the UNITS are reset. * use tagGenDataSet( int n, final double start, final double cadence, Units units ) if * units are going to be specified. * @param n the number of elements * @param start the value for the first element. * @param cadence the step size between elements * @return the dataset */ public static MutablePropertyDataSet tagGenDataSet(int n, final double start, final double cadence) { return new TagGenDataSet( n, cadence, start ); } /** * creates a dataset with the given cadence, start and length. QDataSet.CADENCE * will be set based on the units specified. Do not change the units of the * result without updating cadence as well. * @param n the number of elements * @param start the value for the first element. * @param cadence the step size between elements * @param units the units of the dataset, for example Units.cm * @return the dataset */ public static MutablePropertyDataSet tagGenDataSet(int n, final double start, final double cadence, Units units ) { return new TagGenDataSet( n, cadence, start, units ); } /** * creates a dataset with the given cadence, start and length. * @param n the number of elements * @param value the value for each element * @return the dataset */ public static MutablePropertyDataSet replicateDataSet(int n, final double value) { return new TagGenDataSet( n, 0., value, Units.dimensionless ); } /** * returns true if the dataset is monotonically increasing. * If the dataset has the MONOTONIC property set to Boolean.TRUE, believe it. * The data can contain repeated values. * An empty dataset is not monotonic. * We now use a weights dataset to more thoroughly check for fill. * The dataset may contain fill data, only the non-fill portions are considered. * @param ds the rank 1 dataset with physical units. * @return true when the dataset is monotonically increasing. * @see org.das2.qds.QDataSet#MONOTONIC * @see org.das2.qds.ArrayDataSet#monotonicSubset(org.das2.qds.ArrayDataSet) * @see #isMonotonicAndIncreasing(org.das2.qds.QDataSet) * @see Ops#ensureMonotonic */ public static boolean isMonotonic(QDataSet ds) { if (ds.rank() != 1) { // TODO: support bins dataset rank 2 with BINS_1="min,max" return false; } if (ds.length() == 0) { return false; } if (Boolean.TRUE.equals(ds.property(QDataSet.MONOTONIC))) { return true; } QDataSet wds= DataSetUtil.weightsDataSet(ds); int i; for ( i=0; i> 1; double midVal = ds.value(mid); int cmp; if (midVal < key) { cmp = -1; // Neither val is NaN, thisVal is smaller } else if (midVal > key) { cmp = 1; // Neither val is NaN, thisVal is larger } else { long midBits = Double.doubleToLongBits(midVal); long keyBits = Double.doubleToLongBits(key); cmp = (midBits == keyBits ? 0 : // Values are equal (midBits < keyBits ? -1 : // (-0.0, 0.0) or (!NaN, NaN) 1)); // (0.0, -0.0) or (NaN, !NaN) } if (cmp < 0) { low = mid + 1; } else if (cmp > 0) { high = mid - 1; } else { return mid; } // key found } return -(low + 1); // key not found. } /** * return the index of the closest value in ds to d, using guess as a starting point. This * implementation ignores guess, but wise clients will supply it as a parameter. * @param ds a rank 1, monotonic dataset. * @param d the value to find. * @param guess a guess at a close index, or -1 if no guess should be made. In this case, a binary search is performed. * @return index of the closest. */ public static int closest(QDataSet ds, double d, int guess) { int result = binarySearch(ds, d, 0, ds.length() - 1); if (result == -1) { result = 0; //insertion point is 0 } else if (result < 0) { result = ~result; // usually this is the case if (result >= ds.length() - 1) { result = ds.length() - 1; } else { double x = d; double x0 = ds.value(result - 1); double x1 = ds.value(result); result = ((x - x0) / (x1 - x0) < 0.5 ? result - 1 : result); } } return result; } /** * return the "User" property, which allow for extensions of the data model that * aren't used. This returns the property "name" under the name USER_PROPERTIES, * which must either be null or a Map<String,Object>. * @param ds The dataset containing the property. * @param name The name of the user property. * @return */ public static Object getUserProperty( QDataSet ds, String name ) { Map userProps= (Map) ds.property(QDataSet.USER_PROPERTIES); if ( userProps==null ) return null; return userProps.get(name); } /** * Return the names of non-structural properties of the dataset, like the UNITS and CADENCE. * These are the dimensionProperties, plus others specific to the dataset, such as CADENCE and * DELTA_PLUS. * @return the names of non-structural properties of the dataset, like the UNITS and CADENCE. */ public static String[] propertyNames() { return new String[]{ QDataSet.UNITS, QDataSet.VALID_MIN, QDataSet.VALID_MAX, QDataSet.FILL_VALUE, QDataSet.FORMAT, QDataSet.CADENCE, QDataSet.MONOTONIC, QDataSet.SCALE_TYPE, QDataSet.TYPICAL_MIN, QDataSet.TYPICAL_MAX, QDataSet.RENDER_TYPE, QDataSet.QUBE, QDataSet.NAME, QDataSet.LABEL, QDataSet.TITLE, QDataSet.CACHE_TAG, QDataSet.COORDINATE_FRAME, QDataSet.DELTA_MINUS, QDataSet.DELTA_PLUS, QDataSet.BIN_MINUS, QDataSet.BIN_PLUS, QDataSet.BIN_MIN, QDataSet.BIN_MAX, QDataSet.WEIGHTS, QDataSet.USER_PROPERTIES, QDataSet.NOTES, QDataSet.METADATA, QDataSet.METADATA_MODEL, }; } /** * Copy over all the dimension properties, including: * UNITS, FORMAT, SCALE_TYPE, * TYPICAL_MIN, TYPICAL_MAX, * VALID_MIN, VALID_MAX, FILL_VALUE, * NAME, LABEL, TITLE, * USER_PROPERTIES * These are dimension properties, as opposed to structural * see dimensionProperties() for a list of dimension properties. * TODO: This DOES NOT support join datasets yet. * @param source * @param dest */ public static void copyDimensionProperties( QDataSet source, MutablePropertyDataSet dest ) { String[] names= DIMENSION_PROPERTIES; for ( String n: names ) { Object p= source.property(n); if ( p!=null ) dest.putProperty( n, p ); } } /** * copy over the render type, if it is still appropriate. This nasty bit of code was introduced * to support LANL data, where high-rank data is preferably plotted as a spectrogram, but can be * plotted as a stack of lineplots. * @param source * @param dest */ public static void maybeCopyRenderType( QDataSet source, MutablePropertyDataSet dest ) { String rt= (String) source.property(QDataSet.RENDER_TYPE); if ( rt==null ) return; if ( rt.equals("spectrogram") || rt.equals("nnSpectrogram") ) { if ( dest.rank()>1 ) dest.putProperty( QDataSet.RENDER_TYPE, rt ); } } private static final String[] DIMENSION_PROPERTIES = new String[] { QDataSet.UNITS, QDataSet.FORMAT, QDataSet.SCALE_TYPE, QDataSet.TYPICAL_MIN, QDataSet.TYPICAL_MAX, QDataSet.VALID_MIN, QDataSet.VALID_MAX, QDataSet.FILL_VALUE, QDataSet.NAME, QDataSet.LABEL, QDataSet.TITLE, QDataSet.USER_PROPERTIES, QDataSet.NOTES, }; /** * return the list of properties that pertain to the dimension that dataset * values exist. These are the properties that survive through most operations. * For example, if you flattened the dataset, what properties * would still exist? If you shuffled the data? These are not structural * properties like DEPEND_0, BUNDLE_1, etc. * Note that BUNDLE_1 will carry dimension properties as well. * @return */ public static String[] dimensionProperties() { return Arrays.copyOf( DIMENSION_PROPERTIES,DIMENSION_PROPERTIES.length ); } public static final String PROPERTY_TYPE_STRING="String"; public static final String PROPERTY_TYPE_NUMBER="Number"; public static final String PROPERTY_TYPE_BOOLEAN="Boolean"; public static final String PROPERTY_TYPE_MAP="Map"; public static final String PROPERTY_TYPE_QDATASET="QDataSet"; public static final String PROPERTY_TYPE_CACHETAG="CacheTag"; public static final String PROPERTY_TYPE_UNITS="Units"; /** * return the class for the property, to support Jython. * @param name the property name, e.g. QDataSet.TITLE * @return String.class * @see #getPropertyType(java.lang.String) */ public static Class getPropertyClass( String name ) { if ( name.equals(QDataSet.TITLE) || name.equals(QDataSet.LABEL) ) { return String.class; } else if ( name.equals(QDataSet.UNITS) ) { return Units.class; } else if ( name.equals(QDataSet.NAME) || name.equals(QDataSet.FORMAT) || name.equals(QDataSet.RENDER_TYPE) || name.equals(QDataSet.SCALE_TYPE) ) { return String.class; } else if ( name.equals(QDataSet.TYPICAL_MIN) || name.equals(QDataSet.TYPICAL_MAX) || name.startsWith(QDataSet.VALID_MIN) || name.startsWith(QDataSet.VALID_MAX) || name.equals(QDataSet.FILL_VALUE) ) { return Number.class; } else if ( name.equals(QDataSet.MONOTONIC) || name.equals(QDataSet.QUBE) ) { return Boolean.class; } else if ( name.equals(QDataSet.CACHE_TAG) ) { return CacheTag.class; } else if ( name.equals(QDataSet.USER_PROPERTIES) || name.equals(QDataSet.METADATA) ) { return Map.class; } else if ( name.startsWith("JOIN_") || name.startsWith("BINS_") ) { return String.class; } else if ( name.startsWith(QDataSet.SOURCE) || name.startsWith(QDataSet.VERSION) || name.equals(QDataSet.METADATA_MODEL) ) { return String.class; } else if ( name.equals(QDataSet.CADENCE) ) { return QDataSet.class; } else if ( name.startsWith("DEPEND_") || name.startsWith("BUNDLE_") || name.startsWith("DELTA_") || name.startsWith("BIN_") || name.startsWith("CONTEXT_") || name.startsWith("PLANE_" ) ) { return QDataSet.class; } else if ( name.equals(QDataSet.START_INDEX) ) { return Integer.class; } else { return null; } } /** * return the type of the property, as a string to support use in Jython: * String,Number,Boolean,Map,QDataSet,CacheTag,Units * @param name the property name * @return the property type or null if the name is not recognized * @see #getPropertyClass(java.lang.String) * @see org.das2.qds.QDataSet */ public static String getPropertyType( String name ) { switch (name) { case QDataSet.LABEL: case QDataSet.TITLE: case QDataSet.DESCRIPTION: return PROPERTY_TYPE_STRING; case QDataSet.UNITS: return PROPERTY_TYPE_UNITS; case QDataSet.NAME: case QDataSet.FORMAT: case QDataSet.RENDER_TYPE: case QDataSet.SCALE_TYPE: return PROPERTY_TYPE_STRING; case QDataSet.TYPICAL_MIN: case QDataSet.TYPICAL_MAX: case QDataSet.VALID_MIN: case QDataSet.VALID_MAX: case QDataSet.FILL_VALUE: return PROPERTY_TYPE_NUMBER; case QDataSet.MONOTONIC: case QDataSet.QUBE: return PROPERTY_TYPE_BOOLEAN; case QDataSet.CACHE_TAG: return PROPERTY_TYPE_CACHETAG; case QDataSet.USER_PROPERTIES: case QDataSet.METADATA: return PROPERTY_TYPE_MAP; case QDataSet.CADENCE: case QDataSet.WEIGHTS: return PROPERTY_TYPE_QDATASET; case QDataSet.SOURCE: case QDataSet.VERSION: case QDataSet.METADATA_MODEL: case QDataSet.COORDINATE_FRAME: return PROPERTY_TYPE_STRING; default: break; } if ( name.startsWith("JOIN_") || name.startsWith("BINS_") ) { return PROPERTY_TYPE_STRING; } else if ( name.startsWith("DEPEND_") || name.startsWith("BUNDLE_") || name.startsWith("DELTA_") || name.startsWith("BIN_") || name.startsWith("CONTEXT_") || name.startsWith("PLANE_") ) { return PROPERTY_TYPE_QDATASET; } else { return null; } } /** * return true if the property name is a valid dimension property * like "UNITS" or "FORMAT". See dimensionProperties(). * @param name property name to test * @return true if the property is a dimension property. */ public static boolean isDimensionProperty( String name ) { for ( String n: DIMENSION_PROPERTIES ) { if ( n.equals(name) ) return true; } return false; } private static final String[] GLOBAL_PROPERTIES= new String[] { QDataSet.USER_PROPERTIES, QDataSet.NOTES, QDataSet.VERSION, QDataSet.METADATA, QDataSet.METADATA_MODEL, QDataSet.SOURCE, }; /** * properties that describe the dataset itself, rather than those of a dimension * or structural properties. * @return the properties that describe the dataset itself */ public static String[] globalProperties() { return GLOBAL_PROPERTIES; } private static final String[] CORRELATIVE_PROPERTIES= new String[] { QDataSet.DELTA_MINUS, QDataSet.DELTA_PLUS, QDataSet.BIN_MINUS, QDataSet.BIN_PLUS, QDataSet.WEIGHTS, }; /** * properties that go along with the zeroth index. These are all QDataSets with dimensions compatible with the datasets. * If you trim the dataset, then these must be trimmed as well. * @return the properties that go along with the zeroth index */ public static String[] correlativeProperties() { return CORRELATIVE_PROPERTIES; } /** * true if the property is one that is global and is relevant throughout the * dataset, such as a title or the units. * property( "TITLE",0,0 ) often returns property("TITLE"), but * property( "DEPEND_0",0,0 ) should never return property("DEPEND_0"). * This is false, for example, for DEPEND_1. * @param prop the property name. * @return true if the property is inherited */ public static boolean isInheritedProperty( String prop ) { // QDataSet.MAX_RANK is equal to 4. switch (prop) { case QDataSet.DEPEND_0: case QDataSet.DEPEND_1: case QDataSet.DEPEND_2: case QDataSet.DEPEND_3: return false; case QDataSet.BUNDLE_0: case QDataSet.BUNDLE_1: case QDataSet.BUNDLE_2: case QDataSet.BUNDLE_3: return false; case QDataSet.BINS_0: case QDataSet.BINS_1: return false; case QDataSet.JOIN_0: case "JOIN_1": return false; case QDataSet.START_INDEX: case QDataSet.RENDER_TYPE: return false; default: break; } if ( Arrays.asList(CORRELATIVE_PROPERTIES).contains(prop) ) { return false; } boolean indexProp= prop.startsWith("PLANE_"); // note CONTEXT* is inherited. //TODO: shouldn't DELTA_PLUS and DELTA_MINUS be on this list? return !indexProp; } /** * return properties attached to the slice at index. Note the slice * implementations use this, and this only returns properties from * dimensionProperties(). * * http://autoplot.org//QDataSet#20150514 * * Note this does not look at BUNDLE_1 properties. TODO: consider this. * * @param ds the dataset to slice. * @param index index to slice at. * @param result a map to insert the new properties, or null if a new one should be created. * @return a map of properties attached to the slice at index */ public static Map sliceProperties( QDataSet ds, int index, Map result ) { if ( result==null ) result= new LinkedHashMap(); if ( ds.property(QDataSet.BUNDLE_0 )!=null ) { logger.fine("sliceProperties is not allowed when BUNDLE_0 is set"); return result; } String[] names = DIMENSION_PROPERTIES; // no need to copy when we call dimensionProperties() for (String name : names) { Object val = ds.property(name, index); if (val != null) { result.put(name, val); } } return result; } /** * help out implementations of the QDataSet.trim() command. This does the dimension properties * and geometric properties like DEPEND_0 and DELTA_PLUS. This also * checks for indexed properties, which are NAME__i. * @param ds the dataset with properties to trim. * @param start start index of trim operation * @param stop exclusive stop index of the trim operation. * @return the properties of ds, trimmed to the indices. */ public static Map trimProperties( QDataSet ds, int start, int stop ) { Map result= new LinkedHashMap(); result= getDimensionProperties(ds,result); QDataSet dep0= (QDataSet) ds.property(QDataSet.DEPEND_0); if ( dep0!=null ) result.put( QDataSet.DEPEND_0, dep0.trim(start,stop) ); for ( int i=1; i<=QDataSet.MAX_RANK; i++ ) { String prop= "DEPEND_"+i; QDataSet dep= (QDataSet) ds.property(prop); if ( dep!=null ) { if ( dep.rank()>1 && !Schemes.isRank2Bins(dep) ) { dep= dep.trim(start,stop); } result.put( prop, dep ); } } QDataSet dsp; String [] props= DataSetUtil.correlativeProperties(); for ( String s: props ) { dsp= (QDataSet) ds.property( s ); if ( dsp!=null ) { if ( dsp.rank()>0 ) { result.put( s, dsp.trim(start,stop) ); } else { result.put( s, dsp ); } } } for ( int i=0; i0 ) result.put( "PLANE_"+i, p.trim(start,stop) ); else result.put("PLANE_"+i, p ); // note planes must be at least rank 1 right now. } else { break; } } if ( ds.length() getDimensionProperties( QDataSet ds, Map def ) { return getProperties( ds, DIMENSION_PROPERTIES, def ); } /** * return the properties listed, using the defaults if provided. * See dimensionProperties(), globalProperties(). * @param ds dataset source of the properties. * @param names array of names * @param def defaults, or null if no defaults are to be used. * @return map of the properties. */ public static Map getProperties( QDataSet ds, String[] names, Map def ) { if ( def==null ) { def= new LinkedHashMap(); } else { def= new LinkedHashMap( def ); } for (String name : names) { Object val = ds.property(name); if (val != null) { def.put(name, val); } } return def; } /** * gets all the properties of the dataset. This is a shallow * copy of properties. * @param ds the dataset * @param def an empty map. * @return the properties. */ public static Map getProperties(QDataSet ds, Map def) { Map result = def; for (int i = 0; i <= ds.rank(); i++) { Object dep = ds.property("DEPEND_" + i); if (dep != null) { result.put("DEPEND_" + i, dep); } } for (int i = 0; i <= ds.rank(); i++) { Object dep = ds.property("BUNDLE_" + i); if (dep != null) { result.put("BUNDLE_" + i, dep); } } for (int i = 0; i <= ds.rank(); i++) { Object dep = ds.property("BINS_" + i); if (dep != null) { result.put("BINS_" + i, dep); } } for (int i = 0; i <= ds.rank(); i++) { Object dep = ds.property("JOIN_" + i); if (dep != null) { result.put("JOIN_" + i, dep); } } for (int i = 0; i < QDataSet.MAX_PLANE_COUNT; i++) { Object plane = ds.property("PLANE_" + i); if (plane != null) { result.put("PLANE_" + i, plane); } else { break; } } for (int i = 0; i < QDataSet.MAX_PLANE_COUNT; i++) { Object cds = ds.property("CONTEXT_" + i); if (cds != null) { result.put("CONTEXT_" + i, cds); } else { break; } } String[] names = propertyNames(); for (String name : names) { if (ds.property(name) != null) { result.put(name, ds.property(name)); } } return result; } /** * gets all the properties of the dataset. This is a shallow * copy of properties. * @param ds the dataset * @return the properties */ public static Map getProperties(QDataSet ds) { return getProperties(ds, new LinkedHashMap()); } /** * copy all properties into the dataset by iterating through the map. Properties * that are equal to null are not copied, since null is equivalent to the * property not found. * @param properties the properties * @param ds the mutable property dataset, which is still mutable. */ public static void putProperties(Map properties, MutablePropertyDataSet ds) { if ( ds.isImmutable() ) { logger.warning( "ds is immutable, an exception will be thrown."); } for ( Map.Entry e : properties.entrySet() ) { if ( e.getKey().startsWith("DEPEND_") && e.getValue() instanceof Map ) { QDataSet dep= (QDataSet) ds.property(e.getKey()); if ( dep instanceof MutablePropertyDataSet ) { MutablePropertyDataSet mdep= (MutablePropertyDataSet)dep; putProperties( (Map)e.getValue(), mdep ); } } else if ( e.getKey().startsWith("PLANE_") && e.getValue() instanceof Map ) { QDataSet dep= (QDataSet) ds.property(e.getKey()); if ( dep instanceof MutablePropertyDataSet ) { MutablePropertyDataSet mdep= (MutablePropertyDataSet)dep; putProperties( (Map)e.getValue(), mdep ); } } else if ( e.getKey().startsWith("BUNDLE_") && e.getValue() instanceof Map ) { QDataSet dep= (QDataSet) ds.property(e.getKey()); if ( dep instanceof MutablePropertyDataSet ) { MutablePropertyDataSet mdep= (MutablePropertyDataSet)dep; putProperties( (Map)e.getValue(), mdep ); } } else if ( e.getKey().startsWith("CONTEXT_") && e.getValue() instanceof Map ) { QDataSet dep= (QDataSet) ds.property(e.getKey()); if ( dep instanceof MutablePropertyDataSet ) { MutablePropertyDataSet mdep= (MutablePropertyDataSet)dep; putProperties( (Map)e.getValue(), mdep ); } } else { if ( e.getValue()!=null ) ds.putProperty((String) e.getKey(), e.getValue()); } } } /** * cleans up code by doing the cast, and handles default value. The * result of this is for human-consumption! * */ //public static getProperty( QDataSet ds, String propertyName, Class clazz, Object defaultValue ) { //T p = ds.property( propertyName ); //if ( p==null ) p= defaultValue; //return p; //ArrayList o; //}*/ /** * provide a string representation of the dataset. This is intended for * human consumption, but does follow rules outlined in * http://autoplot.org//developer.datasetToString * * @param ds any dataset. * @return a short, human-readable representation of the dataset. * @see #format(org.das2.qds.QDataSet, boolean) */ public static String toString(QDataSet ds) { if ( ds==null ) { throw new IllegalArgumentException( "null dataset" ); } Units u= (Units)ds.property(QDataSet.UNITS); if ( u==null ) { if ( ds.property(QDataSet.JOIN_0)!=null && ds.length()>0 ) { u= (Units)ds.property(QDataSet.UNITS,0); } if ( u==null) u= Units.dimensionless; } String name = (String) ds.property(QDataSet.NAME); if (name == null) { name = "dataset"; } if ( ds.rank()==0 ) { try { if ( name.equals("dataset") ) { Datum d= DataSetUtil.asDatum(ds); return String.valueOf( d ); } else { return name + "=" + DataSetUtil.asDatum(ds) ; } } catch ( IllegalArgumentException ex ) { return "Error: "+ex; } } if ( ds.rank()==1 && QDataSet.VALUE_BINS_MIN_MAX.equals(ds.property(QDataSet.BINS_0)) ) { if ( ds.value(0) <= ds.value(1) ) { if ( u!=Units.dimensionless ) { DatumRange dr= new DatumRange( ds.value(0), ds.value(1), u ); return dr.toString(); } else { DatumRange dr= new DatumRange( Ops.datum(ds.slice(0)), Ops.datum(ds.slice(1)) ); return dr.toString(); } } else { return String.format( "%s %s (invalid because BINS_0=min,max)", ds.slice(0), ds.slice(1) ); } } if ( ds.rank()==1 && "min,maxInclusive".equals(ds.property(QDataSet.BINS_0)) ) { if ( ds.value(0) <= ds.value(1) ) { DatumRange dr= new DatumRange( ds.value(0), ds.value(1), u ); return dr.toString() + " (inclusive)"; } else { return String.format( "%s %s (invalid because BINS_0=min,maxInclusive)", ds.slice(0), ds.slice(1) ); } } if ( ds.rank()==1 && Schemes.isComplexNumbers(ds) ) { DecimalFormat df= new DecimalFormat("0.000E0"); String rs= String.valueOf(ds.value(0)); String is= String.valueOf(ds.value(1)); if ( rs.length()>7 ) rs= df.format(ds.value(0)); if ( is.length()>7 ) is= df.format(ds.value(1)); return "(" + rs + "+" + is+"j)"; // Use "j" instead of "i" because Python does this. } if ( ds.rank()==1 && Ops.isLegacyBundle(ds) && ds.length()<8 ) { // introduced to support where or rank 2 dataset. QDataSet dep0= (QDataSet) ds.property(QDataSet.DEPEND_0); StringBuilder str = new StringBuilder(""); try { str.append( dep0.slice(0) ).append("=").append( ds.slice(0) ); } catch ( RuntimeException ex ) { logger.log(Level.SEVERE, ex.getMessage(), ex); str.append("Exception"); } for ( int i=1; i 6) { dname = dname.substring(0, 6) + "..."; } depNames[i] = dname + "="; } else { depNames[i] = "DEPEND_"+i+"="; } } } if ( ds.property(QDataSet.BINS_0)!=null ) { depNames[0]= ((String)ds.property(QDataSet.BINS_0)).replaceAll(","," "); } if ( ds.property(QDataSet.BINS_1)!=null ) { depNames[1]= ((String)ds.property(QDataSet.BINS_1)).replaceAll(","," "); } if ( ds.property(QDataSet.JOIN_0)!=null ) { //don't add anything to this. ds[8,time=50*,freq=20*] } if ( ds.property(QDataSet.BUNDLE_0)!=null && depNames[0].length()==0 ) { depNames[0]= "BUNDLE_0="; } if ( ds.property(QDataSet.BUNDLE_1)!=null && depNames[0].length()==0 ) { depNames[1]= "BUNDLE_1="; // TODO: consider ds[time=1440,density,b_gsm=5] vs ds[time=1440,BUNDLE_1=5] } int[] qubeDims; if ( DataSetUtil.isQube(ds) ) { try { qubeDims= DataSetUtil.qubeDims(ds); } catch ( RuntimeException ex ) { logger.log( Level.SEVERE, null, ex ); qubeDims= new int[] { 0,0,0,0,0,0,0,0,0 }; } } else { qubeDims= new int[ ds.rank() ]; qubeDims[0]= ds.length(); if ( ds.rank() > 1) qubeDims[1]= ds.length(0); if ( ds.rank() > 2) qubeDims[2]= ds.length(0,0); if ( ds.rank() > 3) qubeDims[3]= ds.length(0,0,0); } StringBuilder dimStr = new StringBuilder("" + depNames[0] + ds.length()); for ( int i=1; i0 ) { offset= iter.getValue(ds); break; } } if ( offset==u.getFillDouble() ) { return null; } else { return DataSetUtil.asDataSet(offset, u); } } /** * return just the valid points of the dataset. * @param ds a dataset rank > 0. * @return the valid points of the dataset in a rank 1 dataset. */ public static QDataSet validPoints( QDataSet ds ) { int lenmax= DataSetUtil.totalLength(ds); DDataSet result= DDataSet.createRank1(lenmax); int i=0; QDataSet wds= DataSetUtil.weightsDataSet(ds); DataSetIterator iter= new QubeDataSetIterator(ds); while( iter.hasNext() ) { iter.next(); double w= iter.getValue(wds); if ( w>0 ) { result.putValue( i, iter.getValue(ds) ); i=i+1; } } for ( String s: propertyNames() ) { result.putProperty( s, ds.property(s) ); } return result; } /** * return the greatest common divisor, which is the unit for which * all elements in the dataset are integer multiples of the result. * This works on continuous data, however, so limit is used to determine * the fuzz allowed. TODO: this needs review and is not for production use. * @param ds any dataset * @param d rank 0 dataset, first factor for the dataset, error is used to detect non-zero significance. * @param limit rank 0 dataset, the resolution for which data is considered equal, and this * limit should be greater than numerical precision. * @throws IllegalArgumentException if there is no valid data. * @return the greatest common divisor. * */ public static QDataSet gcd( QDataSet ds, QDataSet d, QDataSet limit ) { QDataSet r, hist, peaks; do { r= Ops.mod( ds, d ); hist= Ops.autoHistogram(r); // try { // new AsciiFormatter().formatToFile( "/tmp/hist.dat", (QDataSet)hist.property( QDataSet.DEPEND_0 ), hist ); // } catch (IOException ex) { // Logger.getLogger(DataSetUtil.class.getName()).log(Level.SEVERE, null, ex); // } peaks= AutoHistogram.peaks(hist); if ( peaks.length()==1 && peaks.slice(0).value()==0. ) { // clearly since we divide everything exactly, this is the GCD. return d; } // stop is stopping condition tolerance. double stop= ( d.property(QDataSet.DELTA_MINUS)!=null ) ? ((QDataSet)d.property(QDataSet.DELTA_MINUS)).value() : 0.0; stop= Math.max( stop, DataSetUtil.asDatum(limit).doubleValue( SemanticOps.getUnits( peaks ) ) ); double top= d.value() - stop; int nonZeroPeakIndex= ( peaks.value(0) - stop < 0.0 ) ? 1 : 0; int lastNonZeroPeakIndex= peaks.length()-1; while ( lastNonZeroPeakIndex>=0 && ( peaks.value(lastNonZeroPeakIndex) > top ) ) { lastNonZeroPeakIndex--; } if ( lastNonZeroPeakIndex < nonZeroPeakIndex ) { break; } else { d= peaks.slice( nonZeroPeakIndex ); } if ( d.value()==0.0 ) { //throw new IllegalArgumentException("things have gone wrong again, where d becomes zero"); logger.fine("things have gone wrong again, where d becomes zero"); } } while ( true ); return d; } /** * return the greatest common divisor, which is the unit for which * all elements in the dataset are integer multiples of the result. * This works on continuous data, however, so limit is used to determine * the fuzz allowed. TODO: this needs review and is not for production use. * @param ds any dataset * @param limit the resolution for which data is considered equal. The result * will be an integer multiple of this. * @throws IllegalArgumentException if there is no valid data. * @return the greatest common divisor. */ public static QDataSet gcd( QDataSet ds, QDataSet limit ) { if ( ds.rank()!=1 ) { throw new IllegalArgumentException("dataset must be rank 1"); } if ( limit.rank()!=0 || limit.value()<=0 ) { throw new IllegalArgumentException("limit must be rank 0 and positive"); } if ( !SemanticOps.getUnits(ds).isConvertibleTo(SemanticOps.getUnits(limit) ) ) { throw new IllegalArgumentException("limit must be in the same units as ds"); } QDataSet ds1= validPoints(ds); if ( ds1.length()==0 ) throw new IllegalArgumentException("no valid points"); //if ( ds1.length()==1 ) return DataSetOps.slice0( ds, 0 ); if ( ds1.length()==1 ) return ds.slice( 0 ); //QDataSet guess= DataSetOps.slice0( ds, 1 ); int i0= 1; QDataSet guess= ds.slice( i0 ); while ( Ops.lt(guess,limit).value()>0 && i0<(ds.length()-1) ) { i0++; guess= ds.slice( i0 ); } //try { return gcd( ds, guess, limit ); // } catch ( IndexOutOfBoundsException ex ) { // System.err.println("# demo bug in gcd"); // System.err.println("limit="+limit); // System.err.println("ds=["+ds.value(0)+","); // for ( int i=0; i1000 ) { throw new IllegalArgumentException("dataset is too large (ds.length()>1000)"); } if ( extent==null ) extent= Ops.extent(ds); //String charsScatter= "\u2840\u2804\u2802\u2801"; //\u2800 is blank String charsScatter= "\u28C0\u2824\u2812\u2809"; //\u2800 is blank String charsBar= "\u2581\u2582\u2583\u2584\u2585\u2586\u2587\u2588"; String bb= bar ? charsBar : charsScatter; int maxn= bb.length(); StringBuilder build= new StringBuilder(DataSetUtil.totalLength(ds)); QubeDataSetIterator it= new QubeDataSetIterator(ds); QDataSet wds= DataSetUtil.weightsDataSet(ds); double min= extent.value(0); double range= extent.value(1)-min; while ( it.hasNext() ) { it.next(); if ( it.getValue(wds)>0 ) { int n= (int)( maxn * ( it.getValue(ds) - min ) / range ); if ( bar ) n= Math.max( 0, Math.min( n, (maxn-1) ) ); if ( n>=0 && nyds.length()/2 ) { dy= null; } } if ( dy==null ) { if ( isLogSpacing( yds ) ) { QDataSet diff1= yds.trim(0,yds.length()-1); QDataSet diff2= yds.trim(1,yds.length()); delta= Ops.log( Ops.divide(diff2,diff1) ); delta= Ops.putProperty( delta, QDataSet.UNITS, Units.logERatio ); delta= Ops.convertUnitsTo( delta, Units.percentIncrease ); delta= Ops.interpolate( delta, Ops.linspace( -0.5,diff1.length()-0.5, diff1.length()+1 ) ); QDataSet v= Ops.divide( delta,100. ); v= Ops.putProperty( v, QDataSet.UNITS, null ); QDataSet ddy= Ops.sqrt( Ops.add( 1., v ) ); yds0= Ops.divide( yds, ddy ); yds1= Ops.multiply( yds, ddy ); } else { QDataSet diff1= yds.trim(0,yds.length()-1); QDataSet diff2= yds.trim(1,yds.length()); delta= Ops.interpolate( Ops.subtract(diff2,diff1), Ops.linspace( -0.5,diff1.length()-0.5, diff1.length()+1 ) ); delta= Ops.divide( delta, DataSetUtil.asDataSet(2) ); yds0= Ops.subtract( yds, delta ); yds1= Ops.add( yds, delta ); } } else { if ( UnitsUtil.isRatiometric( SemanticOps.getUnits(dy) ) ) { dy= Ops.convertUnitsTo(dy, Units.percentIncrease ); double ddy= Math.sqrt( 1. + dy.value()/100. ); yds0= Ops.divide( yds, DataSetUtil.asDataSet(ddy) ); yds1= Ops.multiply( yds, DataSetUtil.asDataSet(ddy) ); } else { dy= Ops.divide( dy, DataSetUtil.asDataSet(2) ); yds0= Ops.subtract( yds, dy ); yds1= Ops.add( yds, dy ); } } MutablePropertyDataSet mpds= (MutablePropertyDataSet)Ops.bundle( yds0, yds1 ); mpds.putProperty( QDataSet.BINS_1, "min,max" ); return mpds; } /** * return the cadence between measurements of a waveform dataset. This is * different than the cadence typically quoted, which is the cadence between * waveform records. * @param ds * @return the cadence */ public static RankZeroDataSet getCadenceWaveform( QDataSet ds ) { RankZeroDataSet xlimit; if ( Schemes.isRank2Waveform(ds) ) { QDataSet offsets= (QDataSet)ds.property(QDataSet.DEPEND_1); if ( offsets.rank()==1 ) { xlimit= DataSetUtil.guessCadenceNew( offsets, null ); } else { xlimit= DataSetUtil.guessCadenceNew( offsets.slice(0), null ); } } else if ( Schemes.isRank3Waveform(ds) ) { xlimit= getCadenceWaveform(ds.slice(0)); for ( int i=1; i *
  • 2011-02-21: keep track of repeat values, allowing zero to be considered either mono increasing or mono decreasing *
  • 2011-02-21: deal with interleaved fill values, keeping track of last valid value. * * @param xds the x tags, which may not contain fill values for non-null result. * @param yds the y values, which if non-null is only used for fill values. This * is only used if it is rank 1. * @return null or the cadence in a rank 0 dataset. The following may be * properties of the result:
      *
    • SCALE_TYPE may be "log" *
    • UNITS will be a ratiometric unit when the SCALE_TYPE is log, and * will be the offset unit for interval units like Units.t2000. *
    */ public static RankZeroDataSet guessCadenceNew( QDataSet xds, QDataSet yds) { Logger logger= LoggerManager.getLogger("qdataset.ops.guesscadence"); logger.entering(CLASSNAME,"guessCadenceNew"); Object o= xds.property( QDataSet.CADENCE ); // // if ( o==null ) { // o= DataSetAnnotations.getInstance().getAnnotation( xds, DataSetAnnotations.ANNOTATION_CADENCE ); // } // if ( yds!=null && yds.rank()>1 ) { if ( Schemes.isRank2Waveform(yds)) {// leverage that we have the timetag offsets, and we can look at the first waveform to guess the cadence. RankZeroDataSet r1= guessCadenceNew(xds,null); QDataSet dd= (QDataSet)yds.property(QDataSet.DEPEND_1); Datum rw= null; if ( dd.rank()==1 ) { QDataSet ee= Ops.extent(dd); rw= DataSetUtil.asDatum( Ops.subtract( ee.slice(1), ee.slice(0) ) ); } else { for ( int i=0; i0 ) { logger.log( Level.SEVERE, "averaging CADENCE rank 0: {0}", q); q= Ops.reduceMax( q, 0 ); } logger.exiting(CLASSNAME,"guessCadenceNew"); return DRank0DataSet.create( DataSetUtil.asDatum(q) ); } } else { logger.log(Level.INFO, "CADENCE units ({0}) are inconvertible to {1}", new Object[]{qu, u.getOffsetUnits() }); // bugfix: offset units should be reported. } } if (yds == null) { yds = DataSetUtil.replicateDataSet(xds.length(), 1.0); } else { if ( xds.length()!=yds.length() ) { throw new IllegalArgumentException("xds.length()!=yds.length()"); } } if ( yds.rank()>1 ) { //TODO: check for fill columns. Note the fill check was to support a flakey dataset. yds = DataSetUtil.replicateDataSet(xds.length(), 1.0); } if ( xds.length()<2 ) { logger.exiting(CLASSNAME,"guessCadenceNew"); return null; } if ( xds.rank()==2 && xds.property(QDataSet.BINS_1)!=null ) { xds= DataSetOps.slice1( xds, 0 ); } // Do initial scans of the data to check for monotonic decreasing and "ever increasing" spacing. double sp; // spacing between two measurements. double monoMag; // -1 if mono decreasing, 0 if not monotonic, 1 if mono increasing. QDataSet wds= DataSetUtil.weightsDataSet(xds); // check to see if spacing is monotonically increasing or decreasing, and has repeats. int monoDecreasing= 0; int monoIncreasing= 0; int count= 0; boolean xHasFill= false; int repeatValues= 0; double last= Double.NaN; for ( int i=0; i0. ) { monoIncreasing++; } else { repeatValues++; } last= xds.value(i); } if ( ( repeatValues + monoIncreasing ) >(90*count/100) ) { // 90% increasing monoMag= 1; } else if ( ( repeatValues + monoDecreasing ) >(90*count/100) ) { // 90% decreasing monoMag= -1; } else { monoMag= 0; } // don't allow datasets with fill in x to be considered. if ( xHasFill && monoMag==0 ) { logger.exiting(CLASSNAME,"guessCadenceNew"); return null; } if ( monoMag==0 ) { logger.exiting(CLASSNAME,"guessCadenceNew"); return null; } double everIncreasing= 0.; if ( xds.length()<100 && xds.rank()==1 ) { LinFit f; f= new LinFit( Ops.findgen(xds.length()), xds ); double chilin= f.getChi2(); QDataSet r= Ops.where( Ops.gt( Ops.abs(xds), DataSetUtil.asDataSet(0) ) ); if ( r.length()<2 ) { everIncreasing= 0; } else { QDataSet xdsr= DataSetOps.applyIndex( xds, 0, r, false ); // xdsr= xds[r] f= new LinFit( Ops.findgen(xdsr.length()), Ops.log(xdsr) ); double chilog= f.getChi2(); if ( chilog < ( chilin/2 ) ) { QDataSet ext= Ops.extent(xdsr); everIncreasing= ext.value(1)/ext.value(0); } } } else { if ( xds.length()>2 ) { // check to see if spacing is ever-increasing, which is a strong hint that this is log spacing. // everIncreasing is a measure of this. When it is >0, it is the ratio of the last to the first // number in a ever increasing sequence. Allow for one repeated length (POLAR/Hydra Energies) sp= monoMag * ( xds.value(2) - xds.value(0) ); everIncreasing= xds.value(2) / xds.value(0); double sp0= sp; if ( xds.value(2)<=0 || xds.value(0)<=0 || xds.value(1)>(xds.value(0)+xds.value(2)) ) { everIncreasing= 0; } for ( int i=3; everIncreasing>0 && i sp0*1.00001 ) { everIncreasing= xds.value(i)/xds.value(0); sp0= sp1; } else { everIncreasing= 0; } } } if ( everIncreasing>0 && monoMag==-1 ) everIncreasing= 1/everIncreasing; } boolean logScaleType = "log".equals( xds.property(QDataSet.SCALE_TYPE) ); QDataSet extent= Ops.extent(xds); AutoHistogram ah= new AutoHistogram(); QDataSet diffs; if ( yds.rank()==1 && xds.rank()==1 ) { // ftp://virbo.org/tmp/poes_n17_20041228.cdf?P1_90[0:300] has every other value=fill. QDataSet r= Ops.where( Ops.valid(yds) ); if ( r.length()<2 ) { diffs= Ops.diff( xds ); } else if ( r.length()==yds.length() ) { diffs= Ops.diff( xds ); } else { diffs= Ops.diff( DataSetOps.applyIndex( xds, 0, r, false ) ); } } else { diffs= Ops.diff( xds ); } if ( monoDecreasing>(9*count/10) ) { diffs= Ops.multiply( diffs, asDataSet(-1) ); } if ( repeatValues>0 ) { QDataSet r= Ops.where( Ops.ne( diffs,DataSetUtil.asDataSet(0) ) ); diffs= DataSetOps.applyIndex( diffs, 0, r, false ); } QDataSet hist= ah.doit( diffs ); long total= (Long)( ((Map)hist.property( QDataSet.USER_PROPERTIES )).get(AutoHistogram.USER_PROP_TOTAL) ); if ( total==0 ) { logger.exiting(CLASSNAME,"guessCadenceNew"); return null; } // if the ratio of successive numbers is always increasing this is a strong // hint that ratiometric spacing is more appropriate. If non-zero, then // this is the ratio of the first to the last number. final int everIncreasingLimit = total < 10 ? 25 : 100; int ipeak=0; int peakv=(int) hist.value(0); int linHighestPeak=0; // highest observed non-trivial peak int linMedian=-1; int t=0; // TODO: do this some time. A contour plot only has connections in one direction. // // look for negative cadence peak as well as positive // QDataSet dep0= (QDataSet) hist.property(QDataSet.DEPEND_0 ); // double binWidth1= ((Number)((Map) hist.property(QDataSet.USER_PROPERTIES)).get(AutoHistogram.USER_PROP_BIN_WIDTH)).doubleValue(); // // int imin= -1; // for ( int i=0; i0 ) { // imin= i; // break; // } // } // if ( dep0.value(imin) < binWidth1 ) { // return null; // } double mean= AutoHistogram.mean( hist ).value(); int firstPositiveBin= Integer.MAX_VALUE; QDataSet dep0= (QDataSet) hist.property(QDataSet.DEPEND_0 ); for ( int i=0; ipeakv ) { ipeak= i; peakv= (int) hist.value(i); } if ( hist.value(i)>peakv/10. ) { linHighestPeak= i; } if ( linMedian==-1 && t>total/2 ) { linMedian= i; } if ( dep0.value(i)>0 && firstPositiveBin==Integer.MAX_VALUE ) { firstPositiveBin= i; } } int linLowestPeak=0; for ( int i=0; ipeakv/10. ) { linLowestPeak=i; break; } } Units xunits= (Units) xds.property( QDataSet.UNITS ); if ( xunits==null ) xunits= Units.dimensionless; // we use the range of the bins to exclude log option, such as 800000-800010. boolean log= false; double firstBin= ((Number)((Map) hist.property(QDataSet.USER_PROPERTIES)).get(AutoHistogram.USER_PROP_BIN_START)).doubleValue(); double binWidth= ((Number)((Map) hist.property(QDataSet.USER_PROPERTIES)).get(AutoHistogram.USER_PROP_BIN_WIDTH)).doubleValue(); firstBin= firstBin - binWidth; // kludge, since the firstBin left side is based on the first point. boolean bunch0= firstPositiveBin0, bunch0 }); if ( isratiomeas && extent.value(0)>0 && ( logScaleType || everIncreasing>everIncreasingLimit || bunch0 ) ) { ah= new AutoHistogram(); QDataSet diffs2= Ops.diff(Ops.log(xds)); logDiff= diffs2; QDataSet yy= DataSetUtil.weightsDataSet(yds); if ( repeatValues>0 ) { QDataSet r= Ops.where( Ops.ne( diffs2,DataSetUtil.asDataSet(0) ) ); diffs2= DataSetOps.applyIndex( diffs2, 0, r, false ); yy= DataSetOps.applyIndex( yy, 0, r, false ); } QDataSet loghist= ah.doit( diffs2,yy ); //TODO: sloppy! // ltotal can be different than total. TODO: WHY? maybe because of outliers? long ltotal= (Long)( ((Map)loghist.property( QDataSet.USER_PROPERTIES )).get(AutoHistogram.USER_PROP_TOTAL) ); int logPeak=0; int logPeakv=(int) loghist.value(0); int logMedian=-1; int logHighestPeak=0; t=0; //mean= AutoHistogram.mean(loghist).value(); for ( int i=0; ilogPeakv ) { logPeak=i; logPeakv= (int) loghist.value(i); } if ( loghist.value(i)>logPeakv/100. ) { // be loosy-goosey with log. logHighestPeak= i; } if ( logMedian==-1 && t>ltotal/2 ) { logMedian= i; } } //int logLowestPeak=0; // see commented code below //for ( int i=0; ilogPeakv/10. ) { // logLowestPeak=i; // break; // } //} int highestPeak= linHighestPeak; if ( everIncreasing>everIncreasingLimit || ( logPeak>1 && (1.*logMedian/loghist.length() > 1.*linMedian/hist.length() ) ) ) { logger.finer( String.format( "switch to log everIncreasing=%s logPeak=%s logMedianPerc=%5.1f linMedianPerc=%5.1f", everIncreasing, logPeak, 1.*logMedian/loghist.length(), 1.*linMedian/hist.length() ) ); hist= loghist; ipeak= logPeak; peakv= logPeakv; highestPeak= logHighestPeak; log= true; } else { logger.finer( String.format( "stay linear everIncreasing=%s logPeak=%s logMedianPerc=%5.1f linMedianPerc=%5.1f", everIncreasing, logPeak, 1.*logMedian/loghist.length(), 1.*linMedian/hist.length() ) ); } if ( peakv<20 ) { ipeak= highestPeak; peakv= (int) hist.value(ipeak); } else if ( ipeak Math.max( Math.ceil( hist.value(logLowestPeak) / 10 ), 1 ) ) { // ipeak= logHighestPeak; // peakv= (int)hist.value(ipeak); // } } } else if ( peakv<20 ) { // loosen things up when there isn't much data. ipeak= linHighestPeak; peakv= (int) hist.value(ipeak); } else if ( ipeak Math.max( Math.ceil( hist.value(linLowestPeak) / 10. ), 1 ) ) { ipeak= linHighestPeak; peakv= (int)hist.value(ipeak); } } double ss=0; double nn=0; RankZeroDataSet theResult=null; boolean haveResult= false; QDataSet sss= (QDataSet) hist.property( QDataSet.PLANE_0 ); // DANGER--don't change PLANE_0! for ( int i=ipeak; i>=0; i-- ) { if ( hist.value(i)>(peakv/4) ) { ss+= sss.value(i) * hist.value(i); nn+= hist.value(i); } else { break; } } for ( int i=ipeak+1; i(peakv/4) ) { ss+= sss.value(i) * hist.value(i); nn+= hist.value(i); } else { break; } } // one last sanity check, for the PlasmaWaveGroup file:///home/jbf/project/autoplot/data/qds/gapBug/gapBug.qds?Frequency if ( t<65 && log ) { double s= Math.abs( ss/nn ); int skip=0; int bigSkip=0; for ( int i=0; i s*1.5 ) { skip++; if ( d > s*7 ) { bigSkip++; } } } logger.log(Level.FINE, "guessCadence({0})->null because of log,skip,not bigSkip", new Object[]{xds}); if ( bigSkip==0 && skip>0 ) { logger.exiting(CLASSNAME,"guessCadenceNew"); theResult= null; haveResult= true; } } if ( !haveResult ) { MutablePropertyDataSet result= DRank0DataSet.create(ss/nn); // 1582: one last check, because the gaps in the spectrogram come up way too often! if ( t<65 ) { QDataSet r; QDataSet tresult= Ops.multiply(result,1.10); if ( log && logDiff!=null ) { r= Ops.where( Ops.gt( logDiff,tresult ) ); } else { r= Ops.where( Ops.gt( diffs,tresult ) ); } if ( r.length()>t/4 ) { theResult= null; haveResult= true; } try { if ( !log ) { Units du= SemanticOps.getUnits(diffs); QDataSet dd= DataSetUtil.gcd( diffs, DataSetUtil.asDataSet(SemanticOps.getUnits(diffs).createDatum(0.1)) ); double ddd= dd.value(); boolean isMultiples= true; for ( int i=0; i0 ) { isMultiples= false; } if ( ( diffs.value(i) / ddd ) > 100 ) { // test014_004--log spacing. isMultiples= false; } } if ( isMultiples ) { result= DRank0DataSet.create(dd.value()); } } } catch ( Exception e ) { logger.fine("unable to perform gcd test"); } } if ( !haveResult ) { if ( log ) { result.putProperty( QDataSet.UNITS, Units.logERatio ); result.putProperty( QDataSet.SCALE_TYPE, "log" ); logger.log(Level.FINE, "guessCadence({0})->{1} (log)", new Object[]{xds, result}); logger.exiting(CLASSNAME,"guessCadenceNew"); theResult= (RankZeroDataSet)result; } else { result.putProperty( QDataSet.UNITS, xunits.getOffsetUnits() ); logger.log(Level.FINE, "guessCadence({0})->{1} (linear)", new Object[]{xds, result}); logger.exiting(CLASSNAME,"guessCadenceNew"); theResult= (RankZeroDataSet)result; } } } //DataSetAnnotations.getInstance().putAnnotation( xds, DataSetAnnotations.ANNOTATION_CADENCE, theResult ); return theResult; } /** * return the value of the nth biggest item. This keeps n values in memory. * @param set rank 1 dataset containing comparable data. * @param n the rank. * @return the value at this rank. */ public static Datum nthLargest( final QDataSet set, int n ) { TreeSet ts= new TreeSet<>(); int len= set.length(); QDataSet wds= weightsDataSet(set); int i=0; while ( ts.size()0 ) { ts.add( set.value(i) ); } i=i+1; } if ( i==set.length() && ts.size()0 ) { Double dsiv= set.value(i); if ( dsiv>sm ) { ts.remove(sm); ts.add(dsiv); sm= ts.first(); } } } return Datum.create( sm, SemanticOps.getUnits(set) ); } /** * use K-means algorithm to find N means in a rank 1 dataset. * @param xds dataset containing data from N normal distributions * @param n number of divisions. * @return dataset containing the index for each point. */ public static QDataSet kmeansCadence( QDataSet xds, int n ) { boolean done= false; double[] boundaries; QDataSet ext= Ops.extent(xds); double min= ext.value(0); double max= ext.value(1); boundaries= new double[n-1]; for ( int i=0; i= boundaries[b]) b++; while ( b > 0 && d < boundaries[b-1] ) b--; means[b] += d; ww[b] += 1; bs[i]= b; } done= true; for ( int j=0; jmaxhh) { maxhh= (int)hh.value(i); imaxhh= i; } } QDataSet dephh= (QDataSet)hh.property(QDataSet.DEPEND_0); return dephh.slice(imaxhh); } /** * return the cadence in X for the given X tags and Y tags. The Y tags can repeat, and when this is the * case, the X cadence will be span between successive X tags with the same Y tag. * The goal will be a rank 0 * dataset that describes the intervals, but this will also return a rank 1 * dataset when multiple cadences are found. * @param xds the x tags, which may not contain fill values for non-null result. * @param yds the y values, which may not contain fill values for non-null result. * @param zds the z values * @return rank 0 or rank 1 dataset. */ public static QDataSet guessCadence( QDataSet xds, QDataSet yds, QDataSet zds ) { QDataSet dxds= Ops.diff( xds ); if ( yds.rank()==1 && xds.rank()==1 && yds.length()==xds.length() ) { DataSetBuilder dsb= new DataSetBuilder(1,100); } dxds= Ops.divide( Ops.add( Ops.append( dxds.slice(0), dxds ), Ops.append( dxds, dxds.slice(dxds.length()-1) ) ), Ops.dataset(2) ); QDataSet hh= Ops.autoHistogram( dxds ); int maxhh= -1; int imaxhh= -1; for ( int i=0; imaxhh) { maxhh= (int)hh.value(i); imaxhh= i; } } QDataSet dephh= (QDataSet)hh.property(QDataSet.DEPEND_0); return dephh.slice(imaxhh); } /** * return true if each record of DEPEND_0 is the same. Rank 0 datasets * are trivially constant. * TODO: ds.slice(i) can be slow because equivalent does so much with the metadata. * @param ds any dataset * @return true if the dataset doesn't change with DEPEND_0 or is rank 0. */ public static boolean isConstant( QDataSet ds ) { if ( ds.rank()==0 || ds.length()==0 ) { return true; } else { QDataSet s1= ds.slice(0); for ( int i=1; i p0= DataSetUtil.getProperties(d0); int n=p0.size(); for ( int i=1; i p1= DataSetUtil.getProperties(d1); Map eqp= Ops.equalProperties( p0, p1 ); if ( eqp.size()!=n ) { return false; } } if ( ds instanceof MutablePropertyDataSet && !((MutablePropertyDataSet)ds).isImmutable() ) { logger.fine("putProperty(QDataSet.QUBE,Boolean.TRUE)"); ((MutablePropertyDataSet)ds).putProperty( QDataSet.QUBE, Boolean.TRUE ); } return true; } /** * check to see if a dataset really is a qube, even if there is a * rank 2 dep1. Note this ignores BUNDLE_1 property if there is a DEPEND_1. * This was motivated by the fftPower routine, which returned a rank 2 DEPEND_1, * but is typically constant, and RBSP/ECT datasets that often have rank 2 * DEPEND_1s that are constant. This * will putProperty(QDataSet.QUBE,Boolean.TRUE) when the dataset really is * a qube. * @param ds any dataset * @return true if the dataset really is a qube. */ public static boolean checkQube( QDataSet ds ) { if ( ds.rank()<2 ) { return true; } else { Boolean q = (Boolean) ds.property(QDataSet.QUBE); if ( q == null || q.equals(Boolean.FALSE)) { if ( SemanticOps.isJoin(ds) ) { return checkQubeJoin(ds); } for ( int i=1; i1 ) { if ( !isConstant(dep) ) { return false; } } } if ( ds instanceof MutablePropertyDataSet && !((MutablePropertyDataSet)ds).isImmutable() ) { logger.fine("putProperty(QDataSet.QUBE,Boolean.TRUE)"); ((MutablePropertyDataSet)ds).putProperty( QDataSet.QUBE, Boolean.TRUE ); } return true; } else { return true; } } } /** * test to see that the dataset is a qube. * @param ds QDataSet of any rank. * @return true if the dataset is a qube. */ public static boolean isQube(QDataSet ds) { if (ds.rank() <= 1) return true; Boolean q = (Boolean) ds.property(QDataSet.QUBE); if (q == null || q.equals(Boolean.FALSE)) { QDataSet dep1= (QDataSet) ds.property(QDataSet.DEPEND_1); return ds.rank()==2 && dep1!=null && dep1.rank()==1; } return true; } /** * provides a convenient way of indexing qubes, returning an int[] of * length ds.rank() containing each dimension's length, * or null if the dataset is not a qube. * @param ds * @return int[] of length ds.rank() containing each dimension's length, or null if the dataset is not a qube. */ public static int[] qubeDims(QDataSet ds) { if (ds.rank() > 4) { int [] qube= new int[ds.rank()]; int rank=ds.rank(); for ( int i=0; i 1) { qube[1] = ds.length(0); if (ds.rank() > 2) { qube[2] = ds.length(0, 0); if (ds.rank() > 3) { qube[3] = ds.length(0, 0, 0); if (ds.rank() > 4) { // TODO: generalize to rank N throw new IllegalArgumentException("rank limit"); } } } } return qube; } /** * returns 1 for zero-length qube, the product otherwise. * @param qube int array * @return the product of the elements of the array */ public static int product( int[] qube ) { switch ( qube.length ) { case 0: return 1; case 1: return qube[0]; case 2: return qube[0]*qube[1]; case 3: return qube[0]*qube[1]*qube[2]; case 4: return qube[0]*qube[1]*qube[2]*qube[3]; default: { int result= qube[0]; for ( int i=1; i 0) { for (int i = 1; i < ds.length(); i++) { if (ds.length(i) != ds.length(0)) { throw new IllegalArgumentException("dataset is not a qube"); } } } break; case 3: qube = new int[]{ds.length(), ds.length(0), ds.length(0, 0)}; if (ds.length() > 0 && ds.length(0) > 0) { for (int i = 1; i < ds.length(); i++) { if (ds.length(i) != ds.length(0)) { throw new IllegalArgumentException("dataset is not a qube"); } for (int j = 1; j < ds.length(0); j++) { if (ds.length(i, j) != ds.length(0, 0)) { throw new IllegalArgumentException("dataset is not a qube"); } } } } break; case 4: qube = new int[]{ds.length(), ds.length(0), ds.length(0, 0), ds.length(0,0,0) }; if (ds.length() > 0 && ds.length(0) > 0 && ds.length(0,0)>0 ) { for (int i = 1; i < ds.length(); i++) { if (ds.length(i) != ds.length(0)) { throw new IllegalArgumentException("dataset is not a qube"); } for (int j = 1; j < ds.length(0); j++) { if (ds.length(i, j) != ds.length(0, 0)) { throw new IllegalArgumentException("dataset is not a qube"); } for (int k = 1; k < ds.length(0,0); k++) { if (ds.length(i, j, k) != ds.length(0, 0, 0)) { throw new IllegalArgumentException("dataset is not a qube"); } } } } } break; default: throw new IllegalArgumentException("rank not supported"); } if (qube != null) { ds.putProperty(QDataSet.QUBE, Boolean.TRUE); } } /** * return a human-readable string representing the dataset * @param ds the dataset to represent * @return a human-readable string */ public static String format(QDataSet ds) { return format( ds, true ); } /** * return a human-readable string representing the dataset * @param ds the dataset to represent * @param showContext show the context property (@slice2=1) if present and ds is rank0. * @return a human-readable string * @see #toString(org.das2.qds.QDataSet) */ public static String format(QDataSet ds,boolean showContext) { if ( ds.property(QDataSet.BUNDLE_0)!=null ) { StringBuilder result= new StringBuilder(); // for documenting context. for ( int i=0; i7 ) rs= df.format(ds.value(0)); if ( is.length()>7 ) is= df.format(ds.value(1)); return "(" + rs + "+" + is+"j)"; // Use "j" instead of "i" because Python does this. } if ( ds.rank()==0 ) { String name= (String) ds.property(QDataSet.NAME); Units u= (Units) ds.property(QDataSet.UNITS); String format= (String) ds.property( QDataSet.FORMAT ); StringBuilder result= new StringBuilder(); if ( name!=null ) { result.append(name).append("="); } if ( format!=null && format.trim().length()>0 ) { FormatStringFormatter fsf= new FormatStringFormatter( format, true ); if ( u!=null ) { if ( UnitsUtil.isTimeLocation(u) ) { double millis= u.convertDoubleTo(Units.t1970, ds.value() ); Calendar cal= Calendar.getInstance(); cal.setTimeInMillis( (long)millis ); // TODO: check how to specify to nanos. result.append( String.format(Locale.US,format,cal) ); } else { result.append( fsf.format( DataSetUtil.asDatum(ds) ) ); } } else { result.append( fsf.format( DataSetUtil.asDatum(ds) ) ); } } else { if ( u!=null ) { result.append( u.createDatum(ds.value()).toString() ); } else { result.append( ds.value() ); } } if ( showContext ) { QDataSet context0= (QDataSet) ds.property("CONTEXT_0"); if ( context0!=null ) { result.append(" @ ").append(format(context0)); } } return result.toString(); } StringBuilder buf = new StringBuilder(ds.toString() + ":\n"); if (ds.rank() == 1) { for (int i = 0; i < Math.min(40, ds.length()); i++) { buf.append(" ").append(ds.value(i)); } if (ds.length() >= 40) { buf.append(" ..."); } } if (ds.rank() == 2) { for (int i = 0; i < Math.min(10, ds.length()); i++) { for (int j = 0; j < Math.min(20, ds.length(i)); j++) { buf.append(" ").append(ds.value(i, j)); } if (ds.length() >= 40) { buf.append(" ..."); } buf.append("\n"); } if (ds.length() >= 10) { buf.append(" ... ... ... \n"); } } return buf.toString(); } /** * return a human readable statistical representation of the dataset. Currently * this is the mean, stddev ad number of points. * @param ds the data * @return return a human readable statistical representation */ public static String statsString(QDataSet ds) { RankZeroDataSet stats = DataSetOps.moment(ds); return "" + stats.value() + "+/-" + stats.property("stddev") + " N=" + stats.property("validCount"); } /** * returns true if the dataset is valid, false otherwise. If problems is * non-null, then problems will be indicated here. * @param ds rank N dataset. * @param problems insert problem descriptions here, if null then ignore * @return true if the dataset is valid, false otherwise */ public static boolean validate(QDataSet ds, List problems) { if (problems == null) problems = new ArrayList<>(); return validate(ds, problems, 0); } /** * add method for validating before link is called. * @param xds * @param yds * @param problems insert problem descriptions here, if null then ignore * @return true if the datasets can be linked into a valid dataset, false otherwise */ public static boolean validate(QDataSet xds, QDataSet yds, List problems ) { if ( xds.length()!=yds.length() ) { if (problems != null) problems.add(String.format("DEPEND_%d length is %d, should be %d.", 0, xds.length(), yds.length())); return false; } else { return validate( Ops.link(xds, yds), problems, 0 ); } } /** * add method for validating before link is called. * @param xds rank 1 tags * @param yds rank 1 or rank 2 tags * @param zds the dependent data. * @param problems insert problem descriptions here, if null then ignore * @return true if the datasets can be linked into a valid dataset, false otherwise */ public static boolean validate(QDataSet xds, QDataSet yds, QDataSet zds, List problems ) { if ( xds.length()!=zds.length() ) { if (problems != null) problems.add(String.format("DEPEND_%d length is %d, should be %d.", 0, xds.length(), zds.length())); return false; } else { return validate( Ops.link(xds, yds, zds ), problems, 0 ); } } /** * return the total number of values in the dataset. For qubes this is the product * of the dimension lengths, for other datasets we create a dataset of lengths * and total all the elements. * @param ds * @return the number of values in the dataset. */ public static int totalLength(QDataSet ds) { if ( ds.rank()==0 ) return 1; int[] qube= DataSetUtil.qubeDims(ds); if ( qube==null ) { LengthsDataSet lds= new LengthsDataSet(ds); QubeDataSetIterator it= new QubeDataSetIterator(lds); int total= 0; while ( it.hasNext() ) { it.next(); total+= it.getValue(lds); } return total; } else { int total= qube[0]; for ( int i=1; i problems, int dimOffset) { if (problems == null) problems = new ArrayList<>(); QDataSet dep = (QDataSet) ds.property(QDataSet.DEPEND_0); if (dep != null) { if ( dep.rank()==0 ) { problems.add("DEPEND_0 is rank 0, where it must be rank 1"); return false; } if (dep.length() != ds.length()) { problems.add(String.format("DEPEND_%d length is %d while data length is %d.", dimOffset, dep.length(), ds.length())); } //if ( dep.rank() > ds.rank() ) { // This happens when we have BINS_1 for DEPEND_0. // problems.add(String.format("DEPEND_%d rank is %d but ds.rank() is less (%d)", dimOffset, dep.rank(), ds.rank()) ); //} else { // if ( dep.rank()==2 && dep.length(0)!=ds.length(0) ) problems.add(String.format("DEPEND_%d length(0) is %d while data.length(0) is %d.", dimOffset, dep.length(0), ds.length(0)) ); //} if ( dep.rank()>1 ) { if ( Schemes.isRank2Bins(dep) ) { // okay } else if ( Schemes.isXYScatter(dep) ) { // okay } else { problems.add( "DEPEND_0 should have only one index or must be a bins ([n,2]) dataset."); } } if (ds.rank() > 1 && ds.length() > 0) { QDataSet dep1= (QDataSet)ds.property(QDataSet.DEPEND_1); if ( dep1!=null && dep1.rank()>1 ) { if ( dep1.length()!=ds.length() && !SemanticOps.isBins(dep1) ) { problems.add(String.format("rank 2 DEPEND_1 length is %d while data length is %d.", dep1.length(), ds.length())); } } if ( ds.rank()>QDataSet.MAX_RANK ) { validate( ds.slice(0), problems, dimOffset + 1); // we must use native. } else { validate(DataSetOps.slice0(ds, 0), problems, dimOffset + 1); // don't use native, because it may copy. Note we only check the first assuming QUBE. } } } if ( ds.property(QDataSet.JOIN_0)!=null ) { if ( dimOffset>0 ) { problems.add( "JOIN_0 must only be on zeroth dimension: "+dimOffset ); } else { Units u= null; boolean onceNotify= false; for ( int i=0; i1 Object o= bds1.property(QDataSet.DEPEND_1); if ( o!=null && !(o instanceof QDataSet) ) { validate( bds1,problems,1) ; } } } } } } Object obds= ds.property(QDataSet.BUNDLE_1); if ( obds!=null && !(obds instanceof QDataSet) ) { throw new IllegalArgumentException("BUNDLE_1 property is not a QDataSet"); } else { if ( obds!=null ) { QDataSet bds= (QDataSet)obds; if ( ds.rank()<2 ) { // this happens with CDF slice1, when we don't completely implement slice1. problems.add( "BUNDLE_1 found but dataset is only rank 1"); } else { for ( int i=0; i< Math.min(1,bds.length()); i++ ) { QDataSet bds1= DataSetOps.unbundle(ds,i,true); // assumes rank1, so we have excessive work for rank>1 Object o= bds1.property(QDataSet.DEPEND_1); if ( o!=null && !(o instanceof QDataSet) ) { validate( bds1,problems,1) ; } } } } } Object obins= ds.property( QDataSet.BINS_1 ); if ( obins!=null && !( obins instanceof String ) ) { throw new IllegalArgumentException("BINS_1 property is not a String"); } else { if ( obins!=null ) { if ( obins.equals( QDataSet.VALUE_BINS_MIN_MAX ) ) { if ( ds.length(0)!=2 ) { problems.add( "BINS_1 is 'min,max' but length is not 2." ); } boolean outOfOrder= false; for ( int i=0; ids.value(i,1) ) { outOfOrder= true; } } if ( outOfOrder ) { problems.add( "BINS_1 is min,max min is greater than max" ); } } if ( obds!=null ) { // 2060: check for constant units in BUNDLE_1 Units cu= (Units)ds.property(QDataSet.UNITS); if ( cu==null ) cu= Units.dimensionless; QDataSet bds= (QDataSet)obds; Units inconsistentUnit= null; for ( int j=0; j0 && plane0.length()!=ds.length() ) { problems.add( String.format( "PLANE_0 length is %d, should be %d", plane0.length(), ds.length() ) ); } } } return problems.isEmpty(); } /** * throw out DEPEND and PLANE to make dataset valid. * @param ds */ public static void makeValid(MutablePropertyDataSet ds) { int[] qubeDims = null; if (DataSetUtil.isQube(ds)) { qubeDims = DataSetUtil.qubeDims(ds); } int i = 0; QDataSet dep = (QDataSet) ds.property("DEPEND_" + i); if (dep != null) { if (dep.length() != ds.length()) { ds.putProperty("DEPEND_" + i, null); } } if (qubeDims != null) { for (i = 1; i < qubeDims.length; i++) { dep = (QDataSet) ds.property("DEPEND_" + i); if (dep != null) { if ( dep.length() != qubeDims[i] ) { ds.putProperty("DEPEND_" + i, null); } } } } } /** * special weightsDataSet for when there is a bundle, and each * component could have its own FILL_VALID and VALID_MAX. Each component * gets its own weights dataset in a JoinDataSet. * @param ds rank 2 bundle dataset * @return dataset with the same geometry but a weightsDataSet of each bundled dataset. */ public static QDataSet bundleWeightsDataSet( final QDataSet ds ) { QDataSet bds= (QDataSet)ds.property(QDataSet.BUNDLE_1); if ( bds==null ) { throw new IllegalArgumentException("dataset must be bundle"); } QDataSet result=null; int nb= ds.length(0); if ( nb==0 ) { throw new IllegalArgumentException("bundle is empty"); } for ( int i=0; i0 && ds.length(0)>0 ) { return bundleWeightsDataSet(ds); } else { validMin = (Number) ds.property(QDataSet.VALID_MIN); validMax = (Number) ds.property(QDataSet.VALID_MAX); u = (Units) ds.property(QDataSet.UNITS); ofill = (Number) ds.property(QDataSet.FILL_VALUE); } if (validMin == null) validMin = Double.NEGATIVE_INFINITY; if (validMax == null) validMax = Double.POSITIVE_INFINITY; double fill = (ofill == null ? Double.NaN : ofill.doubleValue()); boolean check = (validMin.doubleValue() > -1 * Double.MAX_VALUE || validMax.doubleValue() < Double.MAX_VALUE || !(Double.isNaN(fill))); if (check) { if ( validMin.doubleValue() > -1 * Double.MAX_VALUE || validMax.doubleValue() < Double.MAX_VALUE ) { result= new WeightsDataSet.ValidRangeFillFinite(ds); } else { result = new WeightsDataSet.ValidRangeFillFinite(ds); } } else { if (u != null) { result = new WeightsDataSet.FillFinite(ds); // support legacy Units to specify fill value } else { result = new WeightsDataSet.Finite(ds); } } } return result; } /** * Iterate through the dataset, changing all points outside of validmin, * validmax and with zero weight to fill=-1e31. VALID_MIN and VALID_MAX * properties are cleared, and FILL_VALUE is set to -1e31. * If the dataset is writable, then the dataset is modified. * * @param ds rank N QUBE dataset. * @return ds with same geometry as ds. */ public static WritableDataSet canonizeFill(QDataSet ds) { if (!(ds instanceof WritableDataSet)) { ds = DDataSet.copy(ds); // assumes ds is QUBE right now... } WritableDataSet wrds = (WritableDataSet) ds; QubeDataSetIterator it = new QubeDataSetIterator(ds); QDataSet wds = weightsDataSet(ds); double fill = -1e31; while (it.hasNext()) { it.next(); if (it.getValue(wds) == 0) { it.putValue(wrds, fill); } } wrds.putProperty(QDataSet.FILL_VALUE, fill); return wrds; } /** * convert the dataset to the given units. * @param ds the dataset * @param u new Units * @return equivalent dataset with the new units. */ public static QDataSet convertTo( QDataSet ds, Units u ) { Units su= (Units) ds.property(QDataSet.UNITS); if ( su==null ) su= Units.dimensionless; UnitsConverter uc= su.getConverter(u); DDataSet result = (DDataSet) ArrayDataSet.copy(ds); // assumes ds is QUBE right now... QubeDataSetIterator it= new QubeDataSetIterator(ds); while ( it.hasNext() ) { it.next(); it.putValue( result, uc.convert( it.getValue(ds)) ); } Number vmin= (Number) ds.property(QDataSet.VALID_MIN); if ( vmin!=null ) result.putProperty( QDataSet.VALID_MIN, uc.convert(vmin)); Number vmax= (Number) ds.property(QDataSet.VALID_MAX); if ( vmax!=null ) result.putProperty( QDataSet.VALID_MAX, uc.convert(vmax)); Number fill= (Number) ds.property(QDataSet.FILL_VALUE); if ( fill!=null ) result.putProperty( QDataSet.FILL_VALUE, uc.convert(fill)); result.putProperty( QDataSet.UNITS, u ); return result; } /** * get the value of the rank 0 dataset in the specified units. * For example, value( ds, Units.km ) * @param ds * @param tu target units * @return the double in target units. */ public static double value( RankZeroDataSet ds, Units tu ) { Units u= (Units) ds.property(QDataSet.UNITS); if ( tu==null && u==null ) { return ds.value(); } else { return u.convertDoubleTo(tu, ds.value() ); } } public static Datum asDatum( RankZeroDataSet ds ) { return asDatum((QDataSet)ds); } /** * convert the rank 0 QDataSet to a Datum. * @param ds rank 0 dataset. * @return Datum */ public static Datum asDatum( QDataSet ds ) { if ( ds.rank()>0 ) { throw new IllegalArgumentException("dataset is not rank 0"); } else { Units u= SemanticOps.getUnits(ds); String format= (String) ds.property(QDataSet.FORMAT); QDataSet wds= weightsDataSet(ds); if ( wds.value()==0 ) { return u.getFillDatum(); } else { if ( format==null || format.trim().length()==0 ) { return Datum.create( ds.value(), u ); } else { return Datum.create( ds.value(), u, new FormatStringFormatter(format, true) ); } } } } /** * return the DatumRange equivalent of this 2-element, rank 1 bins dataset. * * @param ds a rank 1, 2-element bins dataset. * @param sloppy true indicates we don't check BINS_0 property. * @return an equivalent DatumRange */ public static DatumRange asDatumRange( QDataSet ds, boolean sloppy ) { Units u= SemanticOps.getUnits(ds); double dmin= ds.value(0); double dmax= ds.value(1); QDataSet bds= (QDataSet) ds.property( QDataSet.BUNDLE_0 ); if ( bds!=null ) { Units u0= (Units) bds.property(QDataSet.UNITS,0); Units u1= (Units) bds.property(QDataSet.UNITS,1); if ( u0!=null && u1!=null ) { if ( u0==u1 ) { u= u0; } else { logger.finest("accommodating bundle of min,delta."); u= u0; dmax= u1.convertDoubleTo( u0.getOffsetUnits(), dmax ) + dmin; } } } if ( sloppy==false ) { if ( !ds.property( QDataSet.BINS_0 ).equals(QDataSet.VALUE_BINS_MIN_MAX) ) { throw new IllegalArgumentException("expected min,max for BINS_0 because we are not allowing sloppy."); } } return new DatumRange( dmin, dmax, u ); } /** * return the DatumRange equivalent of this 2-element, rank 1 bins dataset. This uses the * sloppy mode, which does not check the BINS_0 property. * * @param ds a two-element dataset * @return a DatumRange. */ public static DatumRange asDatumRange( QDataSet ds ) { return asDatumRange( ds, true ); } /** * return DatumVector, which is a 1-d array of Datums. * @param ds a rank 1 QDataSet * @return a DatumVector */ public static DatumVector asDatumVector( QDataSet ds ) { if ( ds.rank()!=1 ) throw new IllegalArgumentException("Rank must be 1"); double[] dd= new double[ds.length()]; for ( int i=0; i props, QDataSet cds ) { int idx=0; while ( props.get("CONTEXT_"+idx)!=null ) idx++; props.put( "CONTEXT_"+idx, cds ); } /** * provide the context as a string, for example to label a plot. The dataset CONTEXT_i properties are inspected, * each of which must be one of:
      *
    • rank 0 dataset *
    • rank 1 bins dataset *
    • rank 1 bundle *
    * Here a comma is used as the delimiter. * * @param ds the dataset containing context properties which are rank 0 datums or rank 1 datum ranges. * @return a string describing the context. */ public static String contextAsString( QDataSet ds ) { return contextAsString( ds, ", " ); } /** * provide the context as a string, for example to label a plot. The dataset CONTEXT_i properties are inspected, * each of which must be one of:
      *
    • rank 0 dataset *
    • rank 1 bins dataset *
    • rank 1 bundle *
    * @param ds the dataset containing context properties which are rank 0 datums or rank 1 datum ranges. * @param delim the delimiter between context elements, such as "," or "!c" * @return a string describing the context. */ public static String contextAsString( QDataSet ds, String delim ) { StringBuilder result= new StringBuilder(); QDataSet cds= (QDataSet) ds.property( QDataSet.CONTEXT_0 ); logger.log(Level.FINE, "contextAsString {0} CONTEXT_0={1}", new Object[]{ds, cds}); int idx=0; while ( cds!=null ) { if ( cds.rank()>0 ) { if ( cds.rank()==1 && cds.property(QDataSet.BINS_0)!=null ) { if ( cds.value(1)-cds.value(0) > 0 ) { QDataSet fcds= DataSetUtil.asDataSet( DatumRangeUtil.roundSections( DataSetUtil.asDatumRange( cds, true ), 1000 ) ); result.append( DataSetUtil.format(fcds,false) ); } else { result.append( DataSetUtil.format(cds,false) ); } } else { QDataSet extent= Ops.extentSimple(cds,null,null); if ( extent.value(1)==extent.value(0) ) { result.append( DataSetUtil.format(cds.slice(0),false) ); // for CLUSTER/PEACE this happens where rank 1 context is all the same value } else { String name= (String) cds.property(QDataSet.NAME); if ( name==null ) name="data"; String label= name + " varies from " + extent.slice(0) + " to "+ extent.slice(1); result.append(label); //result.append(DataSetUtil.format(extent, false)).append(" ").append( cds.length() ).append( " different values"); // slice was probably done when we should't have. } } } else { result.append( DataSetUtil.format(cds,false) ); } idx++; cds= (QDataSet) ds.property( "CONTEXT_"+idx ); if ( cds!=null ) result.append(delim); } return result.toString(); } /** * returns the indeces of the min and max elements of the monotonic dataset. * This uses DataSetUtil.isMonotonic() which would be slow if MONOTONIC is * not set. * @param ds monotonic, rank 1 dataset. * @return the indeces [min,max] note max is inclusive. * @see org.das2.qds.ops.Ops#extent which returns the range containing any data. * @see #isMonotonic(org.das2.qds.QDataSet) which must be true * @throws IllegalArgumentException when isMonotonic(ds) is false. */ public static int[] rangeOfMonotonic( QDataSet ds ) { if ( ds.rank()!=1 ) throw new IllegalArgumentException("must be rank 1"); if ( DataSetUtil.isMonotonic(ds) ) { QDataSet wds= DataSetUtil.weightsDataSet(ds); int firstValid= 0; while ( firstValid=0 && wds.value(lastValid)==0 ) lastValid--; if ( ( lastValid-firstValid+1 ) == 0 ) { throw new IllegalArgumentException("special case where monotonic dataset contains no valid data"); } return new int[] { firstValid, lastValid }; } else { throw new IllegalArgumentException("expected monotonic dataset"); } } /** * returns the index of a tag, or the (-(insertion point) - 1). (See Arrays.binarySearch) * @param ds monotonically increasing data. * @param datum value we are looking for * @param low inclusive lower bound of the search * @param high inclusive upper bound of the search * @return the index of a tag, or the (-(insertion point) - 1) */ public static int xTagBinarySearch( QDataSet ds, Datum datum, int low, int high ) { Units toUnits= SemanticOps.getUnits( ds ); double key= datum.doubleValue(toUnits); if ( ds.rank()!=1 ) throw new IllegalArgumentException("data must be rank 1"); if ( high>=ds.length() ) throw new IndexOutOfBoundsException("high index must be within the data"); while (low <= high) { int mid = (low + high) >> 1; double midVal = ds.value(mid); int cmp; if (midVal < key) { cmp = -1; // Neither val is NaN, thisVal is smaller } else if (midVal > key) { cmp = 1; // Neither val is NaN, thisVal is larger } else { long midBits = Double.doubleToLongBits(midVal); long keyBits = Double.doubleToLongBits(key); cmp = (midBits == keyBits ? 0 : // Values are equal (midBits < keyBits ? -1 : // (-0.0, 0.0) or (!NaN, NaN) 1)); // (0.0, -0.0) or (NaN, !NaN) } if (cmp < 0) low = mid + 1; else if (cmp > 0) high = mid - 1; else return mid; // key found } return -(low + 1); // key not found. } /** * returns the index of the closest index in the data. * This supports rank 1 datasets, and rank 2 bins datasets where the bin is min,max. * * @param ds tags dataset * @param datum the location to find * @return the index of the closest point. * @throws IllegalArgumentException if the dataset is not rank 1. * @throws IllegalArgumentException if the dataset is length 0. * @throws IllegalArgumentException if the dataset is all fill. */ public static int closestIndex( QDataSet ds, Datum datum ) { logger.entering( CLASSNAME, "closestIndex" ); if ( ds.rank()!=1 ) { if ( ds.rank()==2 && SemanticOps.isBins(ds) ) { ds= Ops.reduceMean(ds,1); } else { throw new IllegalArgumentException("ds rank should be 1"); } } if ( ds.length()==0 ) { throw new IllegalArgumentException("ds length is zero"); } boolean handleFill= false; QDataSet wds= Ops.valid(ds); QDataSet r; if ( UnitsUtil.isNominalMeasurement( datum.getUnits() ) ) { throw new IllegalArgumentException("datum cannot have ordinal units: "+datum ); } if ( UnitsUtil.isNominalMeasurement( SemanticOps.getUnits(ds) ) ) { throw new IllegalArgumentException("ds cannot have ordinal units: "+ds ); } boolean mono= isMonotonic(ds); if ( mono ) { // take a millisecond to check for this oft-occurring case. if ( wds.value(0)>0. && datum.le( asDatum(ds.slice(0)) ) ) { logger.exiting( CLASSNAME, "closestIndex" ); return 0; } int n= ds.length()-1; if ( wds.value(n)>0. && datum.ge( asDatum(ds.slice(n)) ) ) { logger.exiting( CLASSNAME, "closestIndex" ); return n; } } if ( wds instanceof ConstantDataSet && wds.value(0)==1 ) { // optimize r= null; } else { if ( DataSetAnnotations.VALUE_0.equals(DataSetAnnotations.getInstance().getAnnotation(ds,DataSetAnnotations.ANNOTATION_INVALID_COUNT)) ) { r= null; } else { if ( ds instanceof IndexGenDataSet && wds instanceof org.das2.qds.WeightsDataSet.Finite ) { // this happens a lot. DataSetAnnotations.getInstance().putAnnotation(ds,DataSetAnnotations.ANNOTATION_INVALID_COUNT, DataSetAnnotations.VALUE_0 ); r= null; } else { r= Ops.where( wds ); if ( r.length() ds.length()-1 ) { result= ds.length()-1; } else { double x= ddatum; double x0= ds.value(result-1 ); double x1= ds.value(result ); result= ( ( x-x0 ) / ( x1 - x0 ) < 0.5 ? result-1 : result ); } } if ( handleFill ) { assert r!=null; result= (int)r.value(result); } logger.exiting( CLASSNAME, "closestIndex" ); return result; } } public static int closestIndex( QDataSet table, double x, Units units ) { return closestIndex( table, units.createDatum(x) ); } /** * Returns the index of the value which is less than the * value less of the datum. Note for rank 2 bins, the first bin which * has an end less than the datum. * @param ds rank 1 monotonic tags, or rank 2 bins. * @param datum a datum of the same or convertible units. * @return the index, or null (None). */ public static Integer getPreviousIndexStrict( QDataSet ds, Datum datum ) { if ( ds.length()==0 ) return null; if ( SemanticOps.isBins(ds) && ds.rank()==2 ) { // BINS SCHEME ds= Ops.slice1( ds, 1 ); } int i= getPreviousIndex( ds, datum ); if ( Ops.gt( ds.slice(i), datum ).value()>0. ) { return null; } else { return i; } } /** * Returns the index of the value which is greater than the * value less of the datum. Note for rank 2 bins, the first bin which * has an beginning less than the datum. * @param ds rank 1 monotonic tags, or rank 2 bins. * @param datum a datum of the same or convertible units. * @return the index, or null (None). */ public static Integer getNextIndexStrict( QDataSet ds, Datum datum ) { if ( ds.length()==0 ) return null; if ( SemanticOps.isBins(ds) && ds.rank()==2 ) { // BINS SCHEME ds= Ops.slice1( ds, 0 ); } int i= getNextIndex( ds, datum ); if ( Ops.le( ds.slice(i), datum ).value()>0. ) { return null; } else { return i; } } /** * returns the first index that is before the given datum, or zero * if no data is found before the datum. * PARV! * if the datum identifies (==) an xtag, then the previous column is * returned. * @param ds the dataset * @param datum a datum in the same units of the dataset. * @return the index */ public static int getPreviousIndex( QDataSet ds, Datum datum ) { int i= closestIndex( ds, datum ); Units dsUnits= SemanticOps.getUnits(ds); // TODO: consider the virtue of ge if ( i>0 && ds.value(i)>=(datum.doubleValue(dsUnits)) ) { return i-1; } else { return i; } } /** * returns the first column that is after the given datum. Note the * if the datum identifies (==) an xtag, then the previous column is * returned. * @param ds the dataset * @param datum a datum in the same units of the dataset. * @return */ public static int getNextIndex( QDataSet ds, Datum datum ) { int i= closestIndex( ds, datum ); Units dsUnits= SemanticOps.getUnits(ds); // TODO: consider the virtue of le if ( i *
  • QStream--where ASCII mode needs efficient representation * * TODO: make one code for this. * TODO: there also needs to be an optional external context ('2017-03-15') so that 'HH:mm' is a valid response. * See sftp://jbf@jfaden.net/home/jbf/ct/autoplot/script/development/bestDataSetFormatter.jy * @param datums a rank 1 dataset, or if rank>1, then return the formatter for a slice. * @return DatumFormatter for the dataset. */ public static DatumFormatter bestFormatter( QDataSet datums ) { if ( datums.rank()==0 ) { return bestFormatter( Ops.join(null,datums) ); } else if ( datums.rank()>1 ) { if ( datums.rank()==2 && datums.property(QDataSet.BINS_1)!=null ) { } else { //TODO: find formatter for each, and then reconcile. return bestFormatter( datums.slice(0) ); } } Units units= SemanticOps.getUnits(datums); if ( Schemes.isBundleDataSet(datums) && datums.length()>0 ) { throw new IllegalArgumentException("dataset is a bundle"); } if ( units instanceof EnumerationUnits ) { return EnumerationDatumFormatterFactory.getInstance().defaultFormatter(); } else if ( units instanceof TimeLocationUnits ) { Datum gcd= asDatum( gcd( Ops.subtract( datums, datums.slice(0) ), DataSetUtil.asDataSet( Units.microseconds.createDatum(1) ) ) ); try { if ( gcd.lt( Units.nanoseconds.createDatum(1) ) ) { return new TimeDatumFormatter("yyyy-MM-dd'T'HH:mm:ss.SSSSSSSSSSSS)"); } else if ( gcd.lt( Units.nanoseconds.createDatum(1000) ) ) { return new TimeDatumFormatter("yyyy-MM-dd'T'HH:mm:ss.SSSSSSSSS)"); } else if ( gcd.lt( Units.microseconds.createDatum(1000) ) ) { return new TimeDatumFormatter("yyyy-MM-dd'T'HH:mm:ss.SSSSSS)"); } else if ( gcd.lt( Units.milliseconds.createDatum(1000) ) ) { return new TimeDatumFormatter("yyyy-MM-dd'T'HH:mm:ss.SSS)"); } else if ( gcd.lt( Units.seconds.createDatum(60) ) ) { return new TimeDatumFormatter("yyyy-MM-dd'T'HH:mm:ss.SSS)"); } else if ( gcd.lt( Units.seconds.createDatum(600) ) ) { return new TimeDatumFormatter("yyyy-MM-dd'T'HH:mm:ss)"); } else { return new TimeDatumFormatter("yyyy-MM-dd'T'HH:mm"); } } catch ( ParseException ex ) { throw new RuntimeException(ex); } } else if ( units instanceof LocationUnits ) { units= units.getOffsetUnits(); datums= Ops.subtract( datums, datums.slice(0) ); } QDataSet limit= Ops.dataset( Math.pow( 10, (int)Math.log10( Ops.reduceMax( datums, 0 ).value() ) - 7 ), units ); datums= Ops.round( Ops.divide( datums, limit ) ); QDataSet gcd; try { gcd= gcd( datums, asDataSet(1.0) ); datums= Ops.multiply( datums, limit ); gcd= Ops.multiply( gcd, limit ); } catch ( IllegalArgumentException ex ) { // java.lang.IllegalArgumentException: histogram has too few bins gcd= limit; datums= Ops.multiply( datums, limit ); } int smallestExp=99; int ismallestExp=-1; for ( int j=0; j(gcd.value()*0.1) ) { // don't look at fuzzy zero int ee= (int)Math.floor(0.05+Math.log10(Math.abs(d))); if ( ee=vmin && value<=vmax && value!=fill ) { d= u.createDatum( value ); } else { //TODO: consider using format length and "****" to return value. return "****"; } DatumFormatter df= d.getFormatter(); String s; if ( df instanceof DefaultDatumFormatter ) { if ( form==null || form.trim().length()==0 ) { if ( "log".equals( yds.property(QDataSet.SCALE_TYPE) ) ) { s = String.format( Locale.US, "%9.3e", value ).trim(); } else { QDataSet bounds=null; if ( yds.rank()>0 ) bounds= SemanticOps.bounds(yds); if ( bounds!=null && bounds.rank()==2 ) { if ( Math.abs(bounds.value(1,0))<0.01 || Math.abs(bounds.value(1,1))<0.01 ) { s = String.format( Locale.US, "%9.3e", value ).trim(); } else { s = String.format( Locale.US, "%9.3f", value ).trim(); } } else { s = String.format( Locale.US, "%9.3f", value ).trim(); } } } else { try { s = String.format( Locale.US, form, value ); } catch ( IllegalFormatConversionException ex ) { // '%2X' char c= ex.getConversion(); if ( c=='X' || c=='x' || c=='d' || c=='o' || c=='c' || c=='C' ) { s = String.format( Locale.US, form, (long)value ); } else { //warning bad format string s= df.format(d); } } } } else { s = df.format(d,u); } return s; } /** * Return just the value encoded as richly as possible, for human consumption. * Example results of this include "9.5 km" "Chicago" or "fill" * see also format(ds), toString(ds), getStringValue(ds,d) * @param ds * @return just the value, without labels. */ public static String getStringValue( QDataSet ds ) { if ( ds.rank()==0 ) { return getStringValue( ds, ds.value() ); } else { return format(ds); } } /** * make a proper bundle ds from a simple bundle containing ordinal units * This assumes that labels is a unique set of labels. * See http://autoplot.org/QDataSet#DataSet_Properties under BUNDLE_1. * See DataSetOps.unbundle * @param labels * @throws IllegalArgumentException if the input is not rank 1. * @return a BundleDescriptor to be set as BUNDLE_i. See BundleDataSet */ public static MutablePropertyDataSet toBundleDs( QDataSet labels ) { if ( labels.rank()!=1 ) throw new IllegalArgumentException("labels must be rank 1"); IDataSet result= IDataSet.createRank2( labels.length(), 0 ); Units u= SemanticOps.getUnits(labels); for ( int i=0; i