/* * To change this template, choose Tools | Templates * and open the template in the editor. */ package org.das2.qds.util; import java.io.BufferedReader; import java.io.IOException; import java.io.StringReader; import java.lang.reflect.Array; import java.text.ParseException; import java.util.ArrayList; import java.util.HashMap; import java.util.Iterator; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import java.util.Map.Entry; import java.util.logging.Level; import java.util.logging.Logger; import org.das2.datum.DatumUtil; import org.das2.datum.EnumerationUnits; import org.das2.datum.Units; import org.json.JSONArray; import org.json.JSONException; import org.json.JSONObject; import org.das2.qds.AbstractDataSet; import org.das2.qds.DDataSet; import org.das2.qds.DataSetOps; import org.das2.qds.DataSetUtil; import org.das2.qds.MutablePropertyDataSet; import org.das2.qds.QDataSet; import org.das2.qds.ops.Ops; /** * This additional support for parsing ascii data looks at the comment block in * an ASCII file for a structured set of Dataset tags further describing the * data. This is loosely formatted JSON that describes each column of the * data file more abstractly than the AsciiParser. * * This is based on QDataSet metadata tags, as much as possible. * * @author jbf */ public class AsciiHeadersParser { /** * property for dimension of the data defining rank and qube dims. For example, * "[]" (default} * "[1]" scalar--alternate form, and [] is preferred. * "[3]" (three element vector) * "[20,30]" (qube of 60 elements). */ public static final String PROP_DIMENSION = "DIMENSION"; /** * NAME identifier to assign to each column of the parameter. These should follow QDataSet NAME rules. * This is always a 1-D array. */ public static final String PROP_ELEMENT_NAMES = "ELEMENT_NAMES"; /** * Human-readable label for each column of the parameter. */ public static final String PROP_ELEMENT_LABELS = "ELEMENT_LABELS"; private static final Logger logger= Logger.getLogger("qdataset.ascii"); char commented= '?'; // tri-state: '?' 'T' 'F' /** * return the next comment line with content, dropping empty lines, or null. * The comment line is returned without the comment character * @param reader * @return */ private String readNextLine( BufferedReader reader ) throws IOException { String line = reader.readLine(); if ( line==null ) return null; if ( commented=='?' && line.length()>0 ) { commented= line.charAt(0)=='#' ? 'Y' : 'N'; } if ( line.startsWith("#") ) { line = line.substring(1); } else { if ( commented=='Y' ) return null; } while ( line!=null && line.trim().length()==0 ) { line = reader.readLine(); if ( line != null && line.startsWith("#") ) { line = line.substring(1); } else { if ( commented=='Y' ) { return null; } else { return line; } } } return line; } /** * Preprocess the string to make more valid JSON. * 1. pop off comment character (#) from line. * 2. add leading and trailing braces (}) if the first char is not an opening brace. * 3. add implicit comma at line breaks unless the next line starts with comma or closing bracket (]). * 4. closing brace closes JSON. * Note the Java JSON parser used is already pretty loose: * 1. strings needn't be quoted if they don't contain whitespace. * a. I like this for structure tag names "UNITS:" * b. I'd discourage this for string values, because they might be interpreted as numbers. UNITS:"1e2 nT" * 2. equals (and other characters) can be used instead of COLONs. UNITS="1e2 nT" * * TODO: We see that it's misguided to have all this preprocessing done, since it limits * who can process these headers. Many if all of the features here are going to be removed. * * @param s * @return */ protected String prep(String s) { boolean dontHaveOpeningBrace = true; boolean addClosingBrace = false; boolean expectClosingBrace= false; int braceLevel= 0; try { StringBuilder sb = new StringBuilder(); try (BufferedReader reader = new BufferedReader(new StringReader(s))) { String line = readNextLine( reader ); //int iline = 1; while (line != null) { String trimLine = line.trim(); if (dontHaveOpeningBrace) { if (!trimLine.startsWith("{")) { line = "{" + line; addClosingBrace = true; } else { expectClosingBrace= true; } dontHaveOpeningBrace = false; } // read ahead to get the next line containing text, so we can avoid adding comma to dangling text. See test3_1. String nextLine = readNextLine( reader ); // we can add a comma at the end of a line to make it valid. char lastChar; if ( trimLine.length()==0 ) { lastChar= ' '; } else { lastChar= trimLine.charAt(trimLine.length() - 1); } if (lastChar == '"' || Character.isDigit(lastChar) || lastChar == ']' || lastChar == '}') { char nextChar; if (nextLine != null && nextLine.trim().length() > 0) { nextChar = nextLine.trim().charAt(0); if (nextChar != ',' && nextChar != ']') { line = line + ","; } } } // update the brace level boolean inQuote= false; boolean backSlash= false; for ( int i=0; i result ) throws JSONException { String[] names= JSONObject.getNames(jo); for (String name : names) { Object val = jo.get(name); if (val instanceof JSONObject) { Map child= new HashMap(); calcUserProperties( (JSONObject)jo, child ); } else if (val instanceof JSONArray) { result.put(name, (JSONArray)val); //TODO: convert this } else { result.put(name, val); } } } /** * calculate the bundle descriptor, possibly folding together columns to create * high rank datasets. * @param jp the JSON object describing * @param columns column names. * @param columnLabels human-consumable labels for each column */ private static BundleDescriptor calcBundleDescriptor( JSONObject jo, String[] columns, String[] columnLabels ) { String[] snames= new String[ columns.length ]; BundleDescriptor bd= new BundleDescriptor(); //Map dsNames= new LinkedHashMap(); // enumeration of all the names that are not in line. Map dsToPosition= new LinkedHashMap(); // name to the index of first column int ids= 0; // index of the dataset in the bundleDescriptor. Map messages= new LinkedHashMap(); String[] names= JSONObject.getNames(jo); for ( int ivar=0; ivar val= (Map)bd.property(QDataSet.USER_PROPERTIES); if ( val==null ) { val= new HashMap(); bd.putProperty(QDataSet.USER_PROPERTIES, val); } val.put( jsonName, o ); continue; } if ( jsonName.equals( QDataSet.USER_PROPERTIES ) ) { Map val= new HashMap(); calcUserProperties( jo1,val ); bd.putProperty( jsonName,val ); continue; } int[] idims; if ( !jo1.has(PROP_DIMENSION) ) { idims= new int[0]; } else { Object dims= jo1.get(PROP_DIMENSION); if ( dims instanceof JSONArray ) { idims= new int[ ((JSONArray)dims).length() ]; for ( int j=0;j1 ) { // throw new IllegalArgumentException("only rank 2 datasets supported, DIMENSION len="+ idims.length ); //} int total= idims.length==0 ? 1 : idims[0]; for ( int j=1;j icols= new ArrayList(); if ( !jo1.has("VALUES") ) { //early version of JSONHeadedASCII (rich ascii) allowed lookups. for ( int j=0; j1 ) { logger.log(Level.WARNING, "Multiple columns have label \"{0}\": {1}", new Object[] { lookFor, icols } ); if ( jo1.has("START_COLUMN") ) { icol= jo1.getInt("START_COLUMN"); logger.log( Level.FINE, "using START_COLUMN={1} property for {0}", new Object[]{lookFor, icol } ); } else { logger.log( Level.FINE, "using first column ({1}) for {0}", new Object[]{lookFor, icol } ); } if ( labels==null ) { labels= new String[elementNames.length]; for ( int i=0; i-1 ) { //dsNames.put( ids, name ); dsToPosition.put( name, icol ); ids+= DataSetUtil.product(idims); } if ( icol>-1 ) { for ( int j=0; j1 ) { elementNames= new String[total]; for ( int i=0; i-1 ) { //dsNames.put( ids, name ); dsToPosition.put( name, icol ); ids+= DataSetUtil.product(idims); } if ( icol>-1 ) { for ( int j=0; j0 ) { for ( Entry jos1: messages.entrySet() ) { logger.log( Level.INFO, "{0}", jos1.getValue() ); } } Map props= DataSetUtil.getProperties( bd, DataSetUtil.globalProperties(), null ); bd= bd.resortDataSets( dsToPosition ); DataSetUtil.putProperties( props, bd ); for ( Entry ee: dsToPosition.entrySet() ) { int i= ee.getValue(); if ( snames[i]==null ) { bd.addDataSet( columns[i], ids, new int[0] ); } ids++; } return bd; } /** * return the QDataSet * @param arr * @param dims * @return */ private static DDataSet getDataSet( JSONObject jo, JSONArray values, int[] dims ) throws JSONException { double[] dd= new double[ values.length() ]; Object[] oo= new Object[ values.length() ]; Units u= Units.dimensionless; for ( int i=0; i datasets; Map datasets2; Map inlineDataSets; // in-line datasets, like DEPEND_1. Map> props; Map qubes; BundleDescriptor( ) { properties= new LinkedHashMap(); datasets= new LinkedHashMap(); datasets2= new LinkedHashMap(); inlineDataSets= new LinkedHashMap(); props= new LinkedHashMap(); qubes= new LinkedHashMap(); } public int indexOf( String name ) { Integer i= datasets.get(name); if ( i==null ) { return -1; } else { return i; } } /** * add the named dataset with the dimensions. Note qube * doesn't include the first dimension, and this may be null for * rank 1 datasets. * * @param name name of the dataset * @param i index of the dataset. These must be contiguous. * @param qube the dimensions or null for rank 1 data, e.g. vector= [3] */ protected void addDataSet( String name, int i, int[] qube ) { addDataSet( name, i, qube, null, null ); } /** * add the named dataset with the dimensions. Note qube * doesn't include the first dimension, and this may be null for * rank 1 datasets. * * @param name name of the dataset * @param i index of the dataset. These must be contiguous. * @param qube the dimensions or null for rank 1 data, e.g. vector= [3] * @param names the names for each column. See QDataSet NAME property. This implies Vector. * @param labels the labels for each column. See QDataSet LABEL property. */ protected void addDataSet( String name, int i, int[] qube, String[] names, String[] labels ) { int len= DataSetUtil.product(qube); name= Ops.safeName(name); datasets.put( name, i ); for ( int j=0; j0 ) { putProperty( QDataSet.QUBE, i, Boolean.TRUE ); putProperty( QDataSet.ELEMENT_NAME, i, name ); putProperty( QDataSet.ELEMENT_LABEL, i, name ); putProperty( QDataSet.START_INDEX, i, i ); // datasets2 does the mapping. } if ( qube.length>0 && names!=null ) { for ( int k=0; k props1= props.get(ids); if ( props1!=null ) { String[] names= (String[]) props1.get( PROP_ELEMENT_NAMES ); if ( names!=null ) { return names[ic-ids]; } } } if ( name.equals( QDataSet.LABEL ) ) { Map props1= props.get(ids); if ( props1!=null ) { String[] labels= (String[]) props1.get( PROP_ELEMENT_LABELS ); if ( labels==null ) { labels= (String[]) props1.get( PROP_ELEMENT_NAMES ); } if ( labels!=null ) { return labels[ic-ids]; } } } int i= datasets.get(dsname); Map props1= props.get(i); if ( props1==null ) { return null; } else { return props1.get(name); } } } @Override public synchronized void putProperty( String name, int ic, Object v ) { String dsname= datasets2.get(ic); int i= datasets.get(dsname); Map props1= props.get( i ); if ( props1==null ) { props1= new LinkedHashMap<>(); props.put( i, props1 ); } if ( name.startsWith( "DEPEND_" ) && !(name.equals("DEPEND_0") ) && v instanceof String ) { if ( inlineDataSets.containsKey((String)v) ) { props1.put( name, inlineDataSets.get((String)v) ); } else { logger.log(Level.WARNING, "unable to resolve property {0}={1} of {2}. No such dataset found.", new Object[]{name, v, datasets2.get(i)}); throw new IllegalArgumentException("unable to resolve property "+name+"="+v+" of "+datasets2.get(i)+". No such dataset found." ); //props1.put( name, v ); } } else { props1.put( name, v ); } } @Override public double value(int i0, int i1) { String name= datasets2.get(i0); int[] qube= qubes.get(name); if ( qube==null ) { throw new IndexOutOfBoundsException("length=0"); } if ( i1>=qube.length ) { throw new ArrayIndexOutOfBoundsException("qube is "+qube.length+"."); } return qube[i1]; } /** * special code because of START_INDEX property. Must trim at DataSet boundaries. * @param start * @param end * @return */ @Override public QDataSet trim(int start, int end) { return DataSetOps.trim( this, start, end-start ); //throw new IllegalArgumentException("Not supported"); } /** * the parsed JSON comes in any old order, so we need to resort our data. * @param bd * @param dsToPosition * @return */ BundleDescriptor resortDataSets( Map dsToPosition ) { Map positionToDs= new LinkedHashMap(); int maxColumn=-1; for ( Entry entry: dsToPosition.entrySet() ) { if ( positionToDs.get(entry.getValue())!=null ) { throw new IllegalArgumentException("two datasets occupy the same position: "+entry.getKey()+","+positionToDs.get(entry.getValue()) ); } positionToDs.put( entry.getValue(), entry.getKey() ); if ( maxColumn pp= props.get( oldIndex ); pp.put( QDataSet.START_INDEX, i ); newb.props.put( i, pp ); // danger: shallow copy i+= len; } column++; } for ( Entry e: inlineDataSets.entrySet() ) { newb.addDataSet( e.getKey(), e.getValue() ); } return newb; } } /** * provide mapping from JSON object type to QDataSet property type. * @param propName * @param propValue * @return */ private static Object coerceToType( String propName, Object propValue ) { try { switch (propName) { case QDataSet.UNITS: return Units.lookupUnits( String.valueOf(propValue) ); case QDataSet.FILL_VALUE: return Double.parseDouble(String.valueOf(propValue) ); case QDataSet.VALID_MIN: return Double.parseDouble(String.valueOf(propValue) ); case QDataSet.VALID_MAX: return Double.parseDouble(String.valueOf(propValue) ); case QDataSet.TYPICAL_MIN: return Double.parseDouble(String.valueOf(propValue) ); case QDataSet.TYPICAL_MAX: return Double.parseDouble(String.valueOf(propValue) ); case QDataSet.SCALE_TYPE: return String.valueOf( propValue ); case QDataSet.MONOTONIC: return Boolean.valueOf(String.valueOf(propValue) ); case QDataSet.CADENCE: return DataSetUtil.asDataSet( DatumUtil.parse( String.valueOf( propValue) ) ); case QDataSet.FORMAT: return String.valueOf( propValue ); default: return String.valueOf( propValue ); } } catch ( ParseException | NumberFormatException ex ) { logger.log(Level.WARNING, "unable to parse value for {0}: {1}", new Object[]{propName, propValue}); return null; } } /** * return a map of metadata for each column or bundled dataset. * @param jo * @return */ private static void fillMetadata( BundleDescriptor bd, JSONObject jo ) throws JSONException { Iterator it= jo.keys(); for ( ; it.hasNext(); ) { String key= (String) it.next(); Object o= jo.get(key); if ( !( o instanceof JSONObject ) ) { logger.log(Level.WARNING, "expected JSONObject for value: {0}", key); } else { String name= Ops.safeName(key); int ids= bd.indexOf( name ); if ( ids==-1 ) { JSONObject inlineObject= (JSONObject)o; if ( !inlineObject.has("VALUES") ) { // this is when ancillary metadata is in header, and is fine to ignore. logger.log(Level.FINE, "metadata found for key {0}, but values are not found in the ascii file columns", key); continue; } else { // inline dataset already has metadata. continue; } } JSONObject propsj= ((JSONObject)o); bd.putProperty( QDataSet.NAME, ids, name ); Iterator props= propsj.keys(); for ( ; props.hasNext(); ) { String prop= (String) props.next(); Object sv= propsj.get(prop); if ( prop.equals( PROP_DIMENSION ) || prop.equals( "START_COLUMN") || prop.equals("ELEMENT_NAMES") || prop.equals("ELEMENT_LABELS") ) { if ( prop.equals("ELEMENT_NAMES") && sv instanceof JSONArray ) { // String[] ss= toStringArray( (JSONArray)sv ); // String[] labels= toStringArray( (JSONArray)sv ); // for ( int i=0; i0 ) { bd.putProperty( QDataSet.ELEMENT_LABEL, ids, sv ); } else { bd.putProperty( QDataSet.LABEL, ids, sv ); } } else { if ( sv instanceof JSONArray ) { JSONArray asv= (JSONArray)sv; Object item= asv.get(0); boolean homogenious= true; for ( int i=1; i inlineDataSets= ((BundleDescriptor)bds).inlineDataSets; QDataSet result= inlineDataSets.get(name); return result; } else { throw new IllegalArgumentException("bds is not a BundleDescriptor created by this class"); } } /** * return the list of inline dataset names. This was probably used during * development. * @param bds bundle dataset descriptor, though only BundleDescriptor is supported. * @return the inline dataset names. */ public static String[] getInlineDataSetNames( QDataSet bds ) { if ( bds instanceof BundleDescriptor ) { Map inlineDataSets= ((BundleDescriptor)bds).inlineDataSets; return inlineDataSets.keySet().toArray( new String[inlineDataSets.size()] ); } else { throw new IllegalArgumentException("bds is not a BundleDescriptor created by this class"); } } }