/* * AsciiTableDataSource.java * * Created on March 31, 2007, 8:22 AM * */ package org.autoplot.ascii; import org.das2.datum.TimeUtil; import org.das2.datum.Units; import org.das2.util.monitor.ProgressMonitor; import java.io.File; import java.io.FileInputStream; import java.io.FileNotFoundException; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.net.URI; import java.nio.ByteBuffer; import java.nio.channels.FileChannel.MapMode; import java.util.HashMap; import java.util.Map; import java.util.regex.Pattern; import org.das2.qds.DDataSet; import org.das2.qds.QDataSet; import org.das2.qds.DataSetOps; import org.autoplot.datasource.AbstractDataSource; import org.das2.qds.util.AsciiParser; import org.das2.datum.TimeParser; import java.text.ParseException; import java.util.ArrayList; import java.util.List; import java.util.Map.Entry; import java.util.StringTokenizer; import java.util.logging.Level; import java.util.logging.Logger; import java.util.regex.Matcher; import org.das2.CancelledOperationException; import org.das2.dataset.NoDataInIntervalException; import org.das2.datum.EnumerationUnits; import org.das2.datum.UnitsConverter; import org.das2.datum.UnitsUtil; import org.das2.util.ByteBufferInputStream; import org.das2.util.LoggerManager; import org.das2.qds.ArrayDataSet; import org.das2.qds.DataSetUtil; import org.das2.qds.MutablePropertyDataSet; import org.das2.qds.SemanticOps; import org.autoplot.datasource.DataSourceUtil; import org.das2.qds.ops.Ops; import org.das2.qds.util.AsciiHeadersParser; import org.das2.qds.util.AsciiParser.FieldParser; /** * DataSource for reading data in ASCII files, where each record is * one line of the file, and each record has the same number of fields. * This reads in each record and splits on the delimiter, which is typically * guessed by examining the first 5 viable records. This also supports * combining times that are in several fields. * * This also handles true CSV files, where a quoted field may contain a * newline character. * * Last, a three or four column ASCII file containing two ISO8601 strings * for the first two columns is automatically treated as an "events list", * a list of named intervals. * * @author jbf */ public class AsciiTableDataSource extends AbstractDataSource { AsciiParser parser; File file; String column = null; String depend0 = null; private final static Logger logger= LoggerManager.getLogger("apdss.ascii"); public final static String PARAM_INTERVAL_TAG="intervalTag"; /** * if non-null, then this is used to parse the times. For a fixed-column parser, a field * handler is added to the parser. */ TimeParser timeParser; /** * time format of each digit */ String[] timeFormats; /** * the column containing times, or -1. */ int timeColumn = -1; DDataSet ds = null; /** * non-null indicates the columns should be interpreted as rank2. rank2[0] is first column, rank2[1] is last column exclusive. */ int[] rank2 = null; /** * like rank2, but interpret columns as bundle rather than rank 2 dataset. */ int[] bundle= null; /** * non-null indicates the first record will provide the labels for the rows of the rank 2 dataset. */ int[] depend1Labels = null; /** * non-null indicates these will contain the values for the labels. */ String[] depend1Label= null; /** * non-null indicates the first record will provide the values for the rows of the rank 2 dataset. */ int[] depend1Values = null; private double validMin = Double.NEGATIVE_INFINITY; private double validMax = Double.POSITIVE_INFINITY; /** * name of the event list column. */ String eventListColumn= null; /** * column for colors, if available. */ int eventListColorColumn= -1; /** * Creates a new instance of AsciiTableDataSource * @param uri the URI to read. * @throws java.io.FileNotFoundException */ public AsciiTableDataSource(URI uri) throws FileNotFoundException, IOException { super(uri); } @Override public QDataSet getDataSet(ProgressMonitor mon) throws IOException, CancelledOperationException, NoDataInIntervalException { // boolean useReferenceCache= "true".equals( System.getProperty( ReferenceCache.PROP_ENABLE_REFERENCE_CACHE, "false" ) ); // // ReferenceCache.ReferenceCacheEntry rcent=null; // if ( useReferenceCache ) { // rcent= ReferenceCache.getInstance().getDataSetOrLock( getURI(), mon); // if ( !rcent.shouldILoad( Thread.currentThread() ) ) { // try { // logger.log(Level.FINE, "wait for other thread {0}", uri); // QDataSet result= rcent.park( mon ); // if ( result==null ) { // logger.fine("result after parking is null"); // Thread.sleep(100); // experiment with this condition. // result= rcent.park( mon ); // this is still null // return getDataSet(mon); // this is successful. // } else { // return result; // } // } catch ( Exception ex ) { // throw new RuntimeException(ex); // } // } else { // logger.log(Level.FINE, "reference cache in use, {0} is loading {1}", new Object[] { Thread.currentThread().toString(), resourceURI } ); // } // } // // try { logger.fine("read file"); ds = doReadFile(mon); logger.fine("done read file"); if ( mon.isCancelled() ) { throw new CancelledOperationException("cancelled data read"); } /* String o= params.get("tail"); if ( o!=null ) { int itail= Integer.parseInt(o); int nrec= ds.length(); if ( nrec>itail ) { ds= DDataSet.copy( DataSetOps.trim( ds, nrec-itail, itail ) ); } }*/ // old code that handled timeFormats removed. It was no longer in use. MutablePropertyDataSet vds = null; ArrayDataSet dep0 = null; if ((column == null) && (timeColumn != -1) ) { column = parser.getFieldNames()[timeColumn]; } QDataSet bundleDescriptor= (QDataSet) ds.property(QDataSet.BUNDLE_1); //auto-detect event lists if ( eventListColumn==null ) { if ( ds.length(0)>2 && ds.length(0)<5 ) { Units u0= parser.getUnits(0); Units u1= parser.getUnits(1); if ( UnitsUtil.isTimeLocation(u0) && ( u1==u0 ) ) { eventListColumn= "field"+(ds.length(0)-1); } } } if ( eventListColumn!=null ) { dep0= ArrayDataSet.maybeCopy( DataSetOps.leafTrim( ds, 0, 2) ); Units u0= parser.getUnits(0); Units u1= parser.getUnits(1); if ( u0!=u1 ) { if ( UnitsUtil.isTimeLocation(u0) && UnitsUtil.isTimeLocation(u1) ) { throw new IllegalArgumentException("somehow the parser was misconfigured to have two different time units."); } if ( !u1.isConvertibleTo(u0.getOffsetUnits()) ) { // allow "s" to go with UTC throw new IllegalArgumentException("first two columns should have the same units, or second column should be offset (e.g. seconds) from first"); } } dep0.putProperty( QDataSet.UNITS, parser.getUnits(0) ); dep0.putProperty( QDataSet.BINS_1, QDataSet.VALUE_BINS_MIN_MAX ); if ( !eventListColumn.equals("") ) { column= eventListColumn; } } if ( ds.length()==0 ) { logger.info("==========================================="); logger.info("no records found when parsing ascii file!!!"); logger.info("==========================================="); // this may raise an exception in a future version. throw new NoDataInIntervalException("no records found"); } String group= getParam( "group", null ); if ( group!=null ) { vds= ArrayDataSet.copy( DataSetOps.unbundle( ds, group ) ); } else if (column != null) { if ( bundleDescriptor!=null ) { //vds = ArrayDataSet.copy(DataSetOps.slice1(ds, icol)); try { vds= ArrayDataSet.copy(DataSetOps.unbundle(ds,column)); } catch ( IllegalArgumentException ex ) { int icol = parser.getFieldIndex(column); if ( icol!=-1 ) { vds = ArrayDataSet.copy(DataSetOps.slice1(ds, icol)); vds.putProperty( QDataSet.CONTEXT_0, null ); vds.putProperty(QDataSet.UNITS, parser.getUnits(icol)); if ( column.length()>1 ) vds.putProperty( QDataSet.NAME, column ); vds.putProperty( QDataSet.LABEL, parser.getFieldNames()[icol] ); } else { //BUG2000: bundleDescriptor is supposed to be a AsciiHeadersParser.BundleDescriptor. Message is poor when wrong column name is used. //https://sourceforge.net/p/autoplot/bugs/1999/ if ( bundleDescriptor instanceof AsciiHeadersParser.BundleDescriptor ) { QDataSet _vds= AsciiHeadersParser.getInlineDataSet( bundleDescriptor, column ); if ( _vds==null ) { throw new IllegalArgumentException("No such dataset: " +column ); } else { vds= ArrayDataSet.maybeCopy(_vds); } } else { throw new IllegalArgumentException("No such dataset: " +column ); } } } } else { int icol = parser.getFieldIndex(column); if (icol == -1) { throw new IllegalArgumentException("bad column parameter: " + column + ", should be field1, or 1, or "); } vds = ArrayDataSet.copy(DataSetOps.slice1(ds, icol)); vds.putProperty( QDataSet.CONTEXT_0, null ); vds.putProperty(QDataSet.UNITS, parser.getUnits(icol)); if ( column.length()>1 ) vds.putProperty( QDataSet.NAME, column ); vds.putProperty( QDataSet.LABEL, parser.getFieldNames()[icol] ); } if (validMax != Double.POSITIVE_INFINITY) { vds.putProperty(QDataSet.VALID_MAX, validMax); } if (validMin != Double.NEGATIVE_INFINITY) { vds.putProperty(QDataSet.VALID_MIN, validMin); } } else if ( eventListColumn!=null ) { EnumerationUnits eu= EnumerationUnits.create("events"); vds= ArrayDataSet.maybeCopy( Ops.replicate( DataSetUtil.asDataSet(eu.createDatum("event")), ds.length() ) ); } if (depend0 != null) { int icol = parser.getFieldIndex(depend0); if (icol == -1) { throw new IllegalArgumentException("bad depend0 parameter: " + depend0 + ", should be field1, or 1, or "); } if ( ds.property(QDataSet.BUNDLE_1)!=null ) { dep0 = ArrayDataSet.copy(DataSetOps.unbundle(ds,icol)); // avoid warning message about slicing to unbundle } else { dep0 = ArrayDataSet.copy(DataSetOps.slice1(ds, icol)); } dep0.putProperty(QDataSet.UNITS, parser.getUnits(icol)); //String tf= params.get("timeFormat"); if ( UnitsUtil.isTimeLocation( parser.getUnits(icol) ) ) { dep0.putProperty(QDataSet.LABEL,null); dep0.putProperty(QDataSet.NAME,"time"); } if (DataSetUtil.isMonotonic(dep0)) { dep0.putProperty(QDataSet.MONOTONIC, Boolean.TRUE); } String intervalType= params.get( PARAM_INTERVAL_TAG ); if ( intervalType!=null && intervalType.equals("start") ) { QDataSet cadence= DataSetUtil.guessCadenceNew( dep0, null ); if ( cadence!=null && !"log".equals( cadence.property(QDataSet.SCALE_TYPE) ) ) { double add= cadence.value()/2; //DANGER--should really check units. logger.log( Level.FINE, "adding half-interval width to dep0 because of %s: %s", new Object[] { PARAM_INTERVAL_TAG, cadence } ); for ( int i=0; i1 ) dep0.putProperty( QDataSet.NAME, depend0 ); Units xunits= (Units) dep0.property(QDataSet.UNITS); if ( xunits==null || !UnitsUtil.isTimeLocation( xunits ) ) { if ( dep0.property(QDataSet.LABEL)==null ) { dep0.putProperty( QDataSet.LABEL, parser.getFieldNames()[icol] ); } } String dep0Units= getParam( "depend0Units", null ); if ( dep0Units!=null ) { dep0Units= dep0Units.replaceAll("\\+"," "); Units newDep0Units= Units.lookupUnits(dep0Units); if ( UnitsUtil.isTimeLocation( SemanticOps.getUnits(dep0) ) && UnitsUtil.isTimeLocation(newDep0Units) ) { dep0= ArrayDataSet.maybeCopy( Ops.convertUnitsTo( dep0, newDep0Units ) ); } else { dep0.putProperty( QDataSet.UNITS, newDep0Units ); } } } else { if ( ds.rank()==2 ) { MutablePropertyDataSet bds= (MutablePropertyDataSet) ds.property(QDataSet.BUNDLE_1); if ( bds!=null ) { for ( int i=0; i-1 ) { vds= ArrayDataSet.copy( Ops.bundle( Ops.slice1(dep0,0), Ops.slice1(dep0,1), Ops.slice1(ds,eventListColorColumn), vds ) ); ((MutablePropertyDataSet)vds.property(QDataSet.BUNDLE_1)).putProperty( QDataSet.FORMAT,2,"0x%06x" ); } vds.putProperty(QDataSet.RENDER_TYPE,"eventsBar"); } //if ( rcent!=null ) rcent.finished(vds); return vds; } // // } catch ( RuntimeException ex ) { // if ( rcent!=null ) rcent.exception(ex); // throw ex; // } catch ( IOException ex ) { // if ( rcent!=null ) rcent.exception(ex); // throw ex; // } finally { // mon.finished(); // } } /** * returns the rank 2 dataset produced by the ascii table reader. * @param mon note monitor is used twice, so the progress bar jumps back. * @return * @throws java.lang.NumberFormatException * @throws java.io.IOException * @throws java.io.FileNotFoundException */ private DDataSet doReadFile(final ProgressMonitor mon) throws NumberFormatException, IOException, FileNotFoundException { logger.finer("maybe download file"); String o; file = getFile(mon.getSubtaskMonitor("getFile")); logger.finer("got file"); if ( file.isDirectory() ) { throw new IOException("expected file but got directory"); } parser = new AsciiParser(); boolean fixedColumns = false; int columnCount; /** * if non-null, this is the delim we are using to parse the file. */ String delim; o = params.get("skip"); if (o != null) { parser.setSkipLines(Integer.parseInt(o)); } o = params.get("skipLines"); if (o != null) { parser.setSkipLines(Integer.parseInt(o)); } o = params.get("recCount"); if (o != null) { parser.setRecordCountLimit(Integer.parseInt(o)); } o= params.get("recStart"); if ( o!=null ) { parser.setRecordStart(Integer.parseInt(o)); } parser.setKeepFileHeader(true); o = params.get("comment"); if (o != null) { if ( o.equals("") ) { parser.setCommentPrefix(null); } else { parser.setCommentPrefix(o); } } o = params.get("headerDelim"); if (o != null) { parser.setHeaderDelimiter(o); } delim = params.get("delim"); String spattern= params.get("pattern"); String format= params.get("format"); if ( format!=null ) { spattern= AsciiParser.getRegexForFormat( format ); } String sFixedColumns = params.get("fixedColumns"); if ( spattern!=null ) { AsciiParser.RegexParser p = new AsciiParser.RegexParser(parser,spattern); parser.setRecordParser( p ); columnCount= p.fieldCount(); delim= " "; // this is because timeformats needs a delimiter } else if (sFixedColumns == null) { if (delim == null) { AsciiParser.DelimParser p = parser.guessSkipAndDelimParser(file.toString()); if ( p == null) { String cc= params.get("columnCount"); columnCount= ( cc==null ) ? 2 : Integer.parseInt(cc); p= parser.getDelimParser( columnCount, "\\s+" ); } columnCount = p.fieldCount(); delim = p.getDelim(); p.setShowException(true); parser.setRecordParser( p ); } else { if ( delim.equals(",") ) delim="COMMA"; delim= delim.replaceAll("WHITESPACE", "\\s+"); delim= delim.replaceAll("SPACE", " "); delim= delim.replaceAll("COMMA", ","); delim= delim.replaceAll("SEMICOLON", ";"); delim= delim.replaceAll("COLON", ":"); delim= delim.replaceAll("TAB", "\t"); delim= delim.replaceAll("whitespace", "\\s+"); delim= delim.replaceAll("space", " "); delim= delim.replaceAll("comma", ","); delim= delim.replaceAll("semicolon", ";"); delim= delim.replaceAll("colon", ":"); delim= delim.replaceAll("tab", "\t"); if (delim.equals("+")) { delim = " "; } columnCount = parser.setDelimParser(file.toString(), delim).fieldCount(); } //parser.setPropertyPattern( Pattern.compile("^#\\s*(.+)\\s*\\:\\s*(.+)\\s*") ); parser.setPropertyPattern(AsciiParser.NAME_COLON_VALUE_PATTERN); } else { String s = sFixedColumns; AsciiParser.RecordParser p = parser.setFixedColumnsParser(file.toString(), "\\s+"); try { columnCount = Integer.parseInt(sFixedColumns); } catch ( NumberFormatException ex ) { if (sFixedColumns.equals("")) { columnCount = p.fieldCount(); } else { // 0-10,20-34 String[] ss = s.split(","); int[] starts = new int[ss.length]; int[] widths = new int[ss.length]; AsciiParser.FieldParser[] fparsers = new AsciiParser.FieldParser[ss.length]; for (int i = 0; i < ss.length; i++) { String[] ss2 = ss[i].split("-"); starts[i] = Integer.parseInt(ss2[0]); widths[i] = Integer.parseInt(ss2[1]) - starts[i] + 1; fparsers[i] = AsciiParser.DOUBLE_PARSER; } p = parser.setFixedColumnsParser(starts, widths, fparsers); columnCount= p.fieldCount(); } } parser.setPropertyPattern(null); // don't look for these for speed fixedColumns = true; delim = null; } o = params.get("columnCount"); if (columnCount == 0) { if (o != null) { columnCount = Integer.parseInt(o); } else { columnCount = AsciiParser.guessFieldCount(file.toString()); } } o = params.get("fill"); if (o != null) { parser.setFillValue(Double.parseDouble(o)); } o = params.get("validMin"); if (o != null) { this.validMin = Double.parseDouble(o); } o = params.get("validMax"); if (o != null) { this.validMax = Double.parseDouble(o); } /* recognize the column as parsable times, parse with slow general purpose time parser */ o = params.get("time"); if (o != null) { int i = parser.getFieldIndex(o); if (i == -1) { throw new IllegalArgumentException("field not found for time in column named \"" + o + "\""); } else { parser.setFieldParser(i, parser.UNITS_PARSER); parser.setUnits(i, AsciiParser.UNIT_UTC ); depend0 = o; timeColumn = i; } } o = params.get("timeFormat"); if (o != null) { String timeFormat=o; if ( ",".equals(delim) && !timeFormat.contains(",") ) { timeFormat= timeFormat.replaceAll("\\+",","); } if ( !timeFormat.contains(" ") ) { timeFormat = timeFormat.replaceAll("\\+", " "); } timeFormat = timeFormat.replaceAll("\\%", "\\$"); timeFormat = timeFormat.replaceAll("\\{", "("); timeFormat = timeFormat.replaceAll("\\}", ")"); String timeColumnName = params.get("time"); if ( timeColumnName==null ) { timeColumn= 0; } else { // future-proof against field0-field4, coming soon. int i= timeColumnName.indexOf("-"); if ( i>-1 ) { timeColumnName= timeColumnName.substring(0,i); } else { i= timeColumnName.indexOf(":"); if ( i>-1 ) { timeColumnName= timeColumnName.substring(0,i); } } timeColumn= parser.getFieldIndex(timeColumnName); } String timeFormatDelim= delim; if ( delim==null ) timeFormatDelim= " "; timeFormats= timeFormat.split(timeFormatDelim,-2); if (timeFormat.equals("ISO8601")) { String line = parser.readFirstParseableRecord(file.toString()); if (line == null) { throw new IllegalArgumentException("file contains no parseable records."); } String[] ss = new String[ parser.getRecordParser().fieldCount() ]; parser.getRecordParser().splitRecord(line,ss); int i = timeColumn; if (i == -1) { i = 0; } String atime = ss[i]; timeFormat = TimeParser.iso8601String(atime.trim()); timeParser = TimeParser.create(timeFormat); final Units u = AsciiParser.UNIT_UTC; parser.setUnits(i, u); AsciiParser.FieldParser timeFieldParser = new AsciiParser.FieldParser() { @Override public double parseField(String field, int fieldIndex) throws ParseException { return timeParser.parse(field).getTime(u); } }; parser.setFieldParser(i, timeFieldParser); } else if (delim != null && timeFormats.length > 1) { timeParser = TimeParser.create(timeFormat); // we've got a special case here: the time spans multiple columns, so we'll have to combine later. parser.setUnits(timeColumn, Units.dimensionless); //if ( true ) { final Units u= AsciiParser.UNIT_UTC; MultiFieldTimeParser timeFieldParser= new MultiFieldTimeParser( timeColumn, timeFormats, timeParser, u ); for ( int i=timeColumn; i0 && !o.equals("rank2") ) { column = o; timeColumn= 0; final Units u = AsciiParser.UNIT_UTC; parser.setUnits(0, u); depend0= "0"; } o = params.get("column"); if (o != null) { column = o; } o = params.get("depend0"); if (o != null) { depend0 = o; } o = params.get("Z"); if ( o!=null ) { column= o; } else { o = params.get("Y"); if ( o!=null ) { column= o; } } o = params.get("X"); if ( o!=null ) { depend0= o; } o = params.get("rank2"); if (o != null) { rank2 = parseRangeStr(o, columnCount); column = null; } o = params.get("bundle"); if (o != null) { bundle = parseRangeStr(o, columnCount); column = null; } o = params.get("arg_0"); if (o != null ) { if ( o.equals("rank2") ) { rank2 = new int[]{0, columnCount}; column = null; } else if ( o.equals("bundle") ) { bundle = new int[]{0, columnCount}; column = null; } } boolean haveColumn= column!=null; if (column == null && depend0 == null && rank2 == null) { if (parser.getFieldNames().length == 2) { depend0 = parser.getFieldNames()[0]; column = parser.getFieldNames()[1]; } else { column = parser.getFieldNames()[parser.getFieldNames().length-1]; } } o = params.get("depend1Labels"); if (o != null) { if ( o.contains(",") ) { depend1Label= o.split(","); } else { depend1Labels = parseRangeStr(o, columnCount); } } o = params.get("depend1Values"); if (o != null) { depend1Values = parseRangeStr(o, columnCount); } eventListColumn= params.get("eventListColumn"); // rfe https://sourceforge.net/p/autoplot/bugs/1425/: create events list automatically. if ( parser.getFieldLabels().length>=2 && parser.getFieldLabels().length <= 5 && UnitsUtil.isTimeLocation(parser.getUnits(0)) && UnitsUtil.isTimeLocation(parser.getUnits(1)) && !haveColumn ) { if ( parser.getFieldCount()>2 ) { eventListColumn= "field"+(parser.getFieldLabels().length-1); } else { eventListColumn= ""; depend0= null; column= null; } } // rfe https://sourceforge.net/p/autoplot/feature-requests/256: add support for HDMC's simple event list format, where the first two columns are start and stop times. if ( eventListColumn!=null ) { parser.setUnits( 0, AsciiParser.UNIT_UTC ); parser.setUnits( 1, AsciiParser.UNIT_UTC ); //if ( UnitsUtil.isTimeLocation( parser.getUnits(1) ) ) parser.setUnits(1,Units.us2000); // enough of a guess that this will find a good record. parser.setFieldParser(0, parser.UNITS_PARSER); parser.setFieldParser(1, parser.UNITS_PARSER); if ( !eventListColumn.equals("") ) { //"" means it is just two columns: st,en. int icol = parser.getFieldIndex(eventListColumn); EnumerationUnits eu= EnumerationUnits.create("events"); parser.setUnits(icol,eu); parser.setFieldParser(icol,parser.ENUMERATION_PARSER); if ( icol>2 ) { //get the RGB color as well. String[] fields = new String[parser.getRecordParser().fieldCount()]; String s= parser.readFirstParseableRecord(file.toString()); parser.getRecordParser().splitRecord(s,fields); if ( fields[2].startsWith("x") || fields[2].startsWith("0x" ) ) { // kludge for RGB color third column starts with x parser.setUnits(2,Units.dimensionless); parser.setFieldParser( 2, new FieldParser() { @Override public double parseField(String field, int columnIndex) throws ParseException { if ( field.startsWith("x") ) { return Integer.decode( "0"+field ); // kludge for Scott } else { return Integer.decode( field ); } } }); eventListColorColumn= 2; } } } } // check to see if the depend0 or data column appear to be times. I Promise I won't open the file again until it's read in. if ( timeColumn == -1 ) { String s = parser.readFirstParseableRecord(file.toString()); if (s != null) { String[] fields = new String[parser.getRecordParser().fieldCount()]; parser.getRecordParser().splitRecord(s,fields); if ( depend0!=null ) { int idep0 = parser.getFieldIndex(depend0); if (idep0 != -1) { // deal with -1 later String field = fields[idep0]; try { TimeUtil.parseTime(field); if ( new StringTokenizer( field, ":T-/" ).countTokens()>1 ) { parser.setUnits(idep0, AsciiParser.UNIT_UTC ); parser.setFieldParser(idep0, parser.UNITS_PARSER); } } catch (ParseException ex) { } } } if ( column!=null ) { int icol = parser.getFieldIndex(column); if (icol != -1) { // deal with -1 later String field = fields[icol]; try { field= field.trim(); if ( !UnitsUtil.isTimeLocation(parser.getUnits(icol)) && !field.startsWith("-") ) { TimeUtil.parseTime(field); if ( new StringTokenizer( field, ":T-/" ).countTokens()>2 ) { parser.setUnits(icol, AsciiParser.UNIT_UTC); parser.setFieldParser(icol, parser.UNITS_PARSER); } } } catch (ParseException ex) { } } } // check to see if first two columns look like times, and go ahead and handle these automatically for ( int icol= 0; icol2 ) { parser.setUnits(icol, AsciiParser.UNIT_UTC); parser.setFieldParser(icol, parser.UNITS_PARSER); } } } catch (ParseException ex) { } } } } o = params.get("units"); if (o != null) { String sunits = o; Units u; if ( sunits.equals("enum") ) { u = EnumerationUnits.create("default"); } else { u= Units.lookupUnits(sunits); } if (column != null) { int icol = parser.getFieldIndex(column); parser.setUnits(icol, u); if ( sunits.equals("enum") ) { parser.setFieldParser(icol, parser.ENUMERATION_PARSER); } else { parser.setFieldParser(icol, parser.UNITS_PARSER); } } } o= params.get("ordinal"); if (o!=null ) { String sunits = o; EnumerationUnits u = EnumerationUnits.create("default"); if ( sunits.trim().length()>0 ) { String[] ss= sunits.split(","); for ( String s : ss ) { u.createDatum(s); } } if (column != null) { int icol = parser.getFieldIndex(column); parser.setUnits(icol, u); parser.setFieldParser(icol,parser.ENUMERATION_PARSER); } } o= params.get("where"); if ( o!=null ) { String w= o; if ( w.length()>0 ) { Pattern p= Pattern.compile("\\.([nelg][qte])\\("); Matcher m= p.matcher(w); int ieq; if ( !m.find() ) { Pattern p2= Pattern.compile("\\.(within|matches)\\("); Matcher m2= p2.matcher(w); if ( !m2.find() ) { throw new IllegalArgumentException("where can only contain .eq,.ne,.ge,.gt,.le,.lt, .within, or .matches"); } else { ieq= m2.start(); String sop= m2.group(1); String sval= w.substring(ieq+sop.length()+2,w.length()-1); String sparm= w.substring(0,ieq); parser.setWhereConstraint( sparm, sop, DataSourceUtil.unescape(sval) ); } } else { ieq= m.start(); String op= m.group(1); String sval= w.substring(ieq+4,w.length()-1); String sparm= w.substring(0,ieq); parser.setWhereConstraint( sparm, op, sval ); } } } logger.fine("done process parameters and peeking at file"); // --- done configuration, now read --- DDataSet ds1; o = params.get("tail"); if (o != null) { ByteBuffer buff= new FileInputStream( file ).getChannel().map( MapMode.READ_ONLY, 0, file.length() ); int tailNum= Integer.parseInt(o); int tailCount=0; int ipos=(int)file.length(); boolean foundNonEOL= false; while ( tailCount0 ) { ipos--; byte ch= buff.get((int)ipos); switch (ch) { case 10: if ( ipos>1 && buff.get(ipos-1)==13 ) ipos=ipos-1; if ( foundNonEOL ) tailCount++; break; case 13: if ( foundNonEOL ) tailCount++; break; default: foundNonEOL= true; break; } } buff.position( tailCount getMetadata(ProgressMonitor mon) throws Exception { if (ds == null) { return new HashMap<>(); } @SuppressWarnings("unchecked") Map props = (Map) ds.property(QDataSet.USER_PROPERTIES); String header = (String) props.get("fileHeader"); if (header != null) { header = header.replaceAll("\t", "\\\\t"); props.put("fileHeader", header); } String firstRecord = (String) props.get("firstRecord"); if (firstRecord != null) { firstRecord = firstRecord.replaceAll("\t", "\\\\t"); props.put("firstRecord", firstRecord); } List remove= new ArrayList(); for ( Entry e: props.entrySet() ) { String k= e.getKey(); Object v= e.getValue(); if ( v==null ) continue; if ( !( v instanceof Number || v instanceof String || v instanceof org.das2.datum.Datum ) ) remove.add(k); } for ( String k: remove ) { props.remove(k); } return props; } /** * returns the field index of the name, which can be:
    *
  • a column name *
  • an implicit column name "field1" *
  • a column index (0 is the first column) *
  • a negative column index (-1 is the last column) *
* @param name the column name * @param count the number of columns (for negative column numbers) * @return the index of the field. */ private int columnIndex( String name, int count ) { if ( Pattern.matches( "\\d+", name) ) { return Integer.parseInt(name); } else if ( Pattern.matches( "-\\d+", name) ) { return count + Integer.parseInt(name); } else if ( Pattern.matches( "field\\d+", name) ) { return Integer.parseInt( name.substring(5) ); } else { int idx= parser.getFieldIndex(name); return idx; } } /** * parse range strings like "3:6", "3:-5", and "Bx_gsm-Bz_gsm" * if the delimiter is colon, then the end is exclusive. If it is "-", * then it is inclusive. * @param o the string with the spec. * @param columnCount * @return two-element int array of the first and last indeces+1. * @throws java.lang.NumberFormatException */ private int[] parseRangeStr(String o, int columnCount) throws NumberFormatException { String s = o; int first = 0; int last = columnCount; if (s.contains(":")) { String[] ss = s.split(":",-2); if ( ss[0].length() > 0 ) { first = columnIndex(ss[0],columnCount); } if ( ss[1].length() > 0 ) { last = columnIndex(ss[1],columnCount); } } else if ( s.contains("--") ) { int isplit= s.indexOf("--",1); if ( isplit > 0 ) { first = columnIndex( s.substring(0,isplit),columnCount); } if ( isplit < s.length()-2 ) { last = 1 + columnIndex( s.substring(isplit+1),columnCount); } } else if ( s.contains("-") ) { String[] ss = s.split("-",-2); if ( ss[0].length() > 0 ) { first = columnIndex(ss[0],columnCount); } if ( ss[1].length() > 0 ) { last = 1 + columnIndex(ss[1],columnCount); } } return new int[]{first, last}; } }