package org.autoplot.cdaweb; import java.io.BufferedReader; import java.io.File; import java.io.FileInputStream; import java.io.FileNotFoundException; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.net.MalformedURLException; import java.net.URI; import java.net.URISyntaxException; import java.net.URL; import java.net.URLConnection; import java.text.ParseException; import java.util.ArrayList; import java.util.HashMap; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import java.util.Map.Entry; import java.util.logging.Level; import java.util.logging.Logger; import javax.xml.parsers.DocumentBuilder; import javax.xml.parsers.DocumentBuilderFactory; import javax.xml.parsers.ParserConfigurationException; import javax.xml.xpath.XPath; import javax.xml.xpath.XPathConstants; import javax.xml.xpath.XPathExpressionException; import javax.xml.xpath.XPathFactory; import org.das2.components.DasProgressPanel; import org.das2.datum.Datum; import org.das2.datum.DatumRange; import org.das2.datum.DatumRangeUtil; import org.das2.datum.TimeParser; import org.das2.datum.TimeUtil; import org.das2.datum.Units; import org.das2.fsm.FileStorageModel; import org.das2.util.AboutUtil; import org.das2.util.LoggerManager; import org.das2.util.filesystem.FileObject; import org.das2.util.filesystem.FileSystem; import org.das2.util.monitor.CancelledOperationException; import org.das2.util.monitor.NullProgressMonitor; import org.das2.util.monitor.ProgressMonitor; import org.das2.util.monitor.SubTaskMonitor; import org.autoplot.datasource.DataSetURI; import org.autoplot.datasource.DataSourceUtil; import org.w3c.dom.DOMException; import org.w3c.dom.Document; import org.w3c.dom.NamedNodeMap; import org.w3c.dom.Node; import org.w3c.dom.NodeList; import org.xml.sax.InputSource; import org.xml.sax.SAXException; /** * Class for encapsulating the functions of the database * @author jbf */ public class CDAWebDB { private static final Logger logger= LoggerManager.getLogger("apdss.cdaweb"); private static CDAWebDB instance=null; public static final String CDAWeb; static { if ( System.getProperty("cdawebHttps","true").equals("false") ) { CDAWeb = "http://cdaweb.gsfc.nasa.gov/"; } else { // Note modern Javas are needed for https support. // https will be required by Spring 2017. CDAWeb = "https://cdaweb.gsfc.nasa.gov/"; } } public static final String dbloc= CDAWeb + "pub/catalogs/all.xml"; //public static final String dbloc= "https://cdaweb.sci.gsfc.nasa.gov/%7Ecgladney/all.xml"; //private String version; private Document document; // should consume ~ 2 MB private Map ids; // serviceproviderId,Id private long refreshTime=0; private final Map bases= new HashMap(); private final Map tmpls= new HashMap(); private Boolean online= null; public static synchronized CDAWebDB getInstance() { if ( instance==null ) { instance= new CDAWebDB(); } return instance; } /** * returns true if the CDAWeb is on line. * @return true if the CDAWeb is on line. */ public synchronized boolean isOnline() { if ( online==null ) { try { // get a file via http so we get a filesystem offline if we are at a hotel. // Note the file is small, and if the file is already downloaded, this will only result in a head request. DataSetURI.getFile( CDAWeb + "pub/software/cdawlib/AAREADME.txt", false, new NullProgressMonitor() ); online= true; } catch ( IOException ex ) { try { if ( !AboutUtil.isJreVersionAtLeast("1.8.0_102") ) { logger.warning("Java version is probably too old to connect to CDAWeb"); } } catch (ParseException ex1) { logger.warning("Java version may be too old to connect to CDAWeb"); } online= false; } } return online; } /** * refresh no more often than once per 10 minutes. We don't need to refresh * often. Note it only takes a few seconds to refresh, plus download time, * but we don't want to pound on the CDAWeb server needlessly. * @param mon * @throws java.io.IOException when reading dbloc file. */ public synchronized void maybeRefresh( ProgressMonitor mon ) throws IOException { long t= System.currentTimeMillis(); if ( t - refreshTime > 600000 ) { // 10 minutes refresh(mon); refreshTime= t; } } // // public synchronized void refreshViaWebServices( ProgressMonitor mon ) throws IOException { // try { // DocumentBuilder builder = DocumentBuilderFactory.newInstance().newDocumentBuilder(); // mon.setProgressMessage("refreshing database");//TODO: is this working // mon.started(); // mon.setTaskSize(30); // mon.setProgressMessage("call WS for listing" ); // // } catch (SAXException ex) { // logger.log(Level.SEVERE, ex.getMessage(), ex); // } catch (ParserConfigurationException | URISyntaxException ex) { // logger.log(Level.SEVERE, ex.getMessage(), ex); // } // // } // /** * Download and parse the all.xml to create a database of available products. * @param mon progress monitor for the task * @throws IOException when reading dbloc file. */ public synchronized void refresh( ProgressMonitor mon ) throws IOException { try { DocumentBuilder builder = DocumentBuilderFactory.newInstance().newDocumentBuilder(); mon.setProgressMessage("refreshing database"); mon.started(); mon.setTaskSize(30); mon.setProgressMessage("downloading file "+dbloc ); //String lookfor= "ftp://cdaweb.gsfc.nasa.gov/pub/istp/"; //String lookfor2= "ftp://cdaweb.gsfc.nasa.gov/pub/cdaweb_data"; logger.log( Level.FINE, "downloading file {0}", dbloc); File f= DataSetURI.getFile( new URI(dbloc), SubTaskMonitor.create( mon, 0, 10 ) ) ; // bug 3055130 okay FileInputStream fin=null; InputStream altin= null; try { fin= new FileInputStream( f ); InputSource source = new InputSource( fin ); mon.setTaskProgress(10); mon.setProgressMessage("parsing file "+dbloc ); document = builder.parse(source); //XPath xp = XPathFactory.newInstance().newXPath(); //version= xp.evaluate( "/sites/datasite/@version", document ); mon.setTaskProgress(20); mon.setProgressMessage("reading IDs"); altin= CDAWebDB.class.getResourceAsStream("/org/autoplot/cdaweb/filenames_alt.txt") ; if ( altin==null ) { throw new RuntimeException("Unable to locate /org/autoplot/cdaweb/filenames_alt.txt"); } try (BufferedReader rr = new BufferedReader( new InputStreamReader( altin ) )) { String ss= rr.readLine(); while ( ss!=null ) { int i= ss.indexOf("#"); if ( i>-1 ) ss= ss.substring(0,i); if ( ss.trim().length()>0 ) { String[] sss= ss.split("\\s+"); String naming= sss[2]; naming= naming.replaceAll("\\%", "\\$"); naming= naming.replaceAll("\\?","."); //TODO: this . happens to match one character. This may change. tmpls.put( sss[0], naming ); if ( sss[1].length()>1 ) bases.put( sss[0], sss[1] ); } ss= rr.readLine(); } } refreshServiceProviderIds(mon.getSubtaskMonitor(20,30,"process document")); mon.setTaskProgress(30); } finally { if ( fin!=null ) fin.close(); if ( altin!=null ) altin.close(); mon.finished(); } //} catch (XPathExpressionException ex) { // logger.log(Level.SEVERE, ex.getMessage(), ex); } catch (SAXException ex) { logger.log(Level.SEVERE, ex.getMessage(), ex); } catch (ParserConfigurationException | URISyntaxException ex) { logger.log(Level.SEVERE, ex.getMessage(), ex); } } /** * isolate the code that resolves which files need to be accessed, so that * we can use the web service when it is available. * @param spid the service provider id, like "AC_H2_CRIS" * @param tr the timerange * @param useWebServiceHint null means no preference, or "T", or "F" means use file template found in all.xml. * @param mon progress monitor for the download * @return array of strings, with filename|startTime|endTime * @throws java.io.IOException * @throws org.das2.util.monitor.CancelledOperationException */ public String[] getFiles( String spid, DatumRange tr, String useWebServiceHint, ProgressMonitor mon ) throws IOException, CancelledOperationException { boolean useService= !( "F".equals(useWebServiceHint) ); String[] result; logger.log(Level.FINE, "getFiles {0} {1} ws={2}", new Object[]{spid, tr, useService}); if ( useService ) { String[] ff; try { ff = getOriginalFilesAndRangesFromWebService(spid, tr, mon ); } catch ( IOException ex ) { return getFiles( spid, tr, "F", mon ); } List resultList= new ArrayList(ff.length); for ( String ff1 : ff ) { try { String[] ss = ff1.split("\\|"); DatumRange dr= DatumRangeUtil.parseTimeRange( ss[1]+ " to "+ ss[2] ); if (dr.intersects(tr)) { resultList.add(ff1); } }catch (ParseException ex) { Logger.getLogger(CDAWebDB.class.getName()).log(Level.SEVERE, null, ex); } } result= resultList.toArray(new String[resultList.size()]); } else { try { String tmpl= getNaming( spid ); String base= getBaseUrl( spid ); logger.log( Level.FINE, "tmpl={0}", tmpl); logger.log( Level.FINE, "base={0}", base); logger.log(Level.FINE, "{0}/{1}", new Object[]{base, tmpl}); FileSystem fs= FileSystem.create( new URI( base ) ); // bug3055130 okay FileStorageModel fsm= FileStorageModel.create( fs, tmpl ); String[] ff= fsm.getBestNamesFor(tr,mon); result= new String[ ff.length ]; TimeParser tp= TimeParser.create("$Y-$m-$dT$H:$M:$SZ"); for ( int i=0; i r= new ArrayList<>(); for ( int i=0; i getServiceProviderIds() { return ids; } /** * return the list of IDs that this reader can consume. * We apply a number of constraints:
    *
  1. files must end in .cdf *
  2. timerange_start and timerange_stop must be ISO8601 times. *
  3. URL must start with a /, and may not be another website. *
* @throws IOException */ private void refreshServiceProviderIds( ProgressMonitor mon ) throws IOException { if ( document==null ) { throw new IllegalArgumentException("document has not been read, refresh must be called first"); } try { XPath xp = getXPathFactory().newXPath(); NodeList nodes = (NodeList) xp.evaluate( "//sites/datasite/dataset", document, XPathConstants.NODESET ); Map result= new LinkedHashMap<>(); mon.setTaskSize(nodes.getLength()); mon.started(); for ( int i=0; i1 && Character.isDigit(st.charAt(0)) && en.length()>1 && Character.isDigit(en.charAt(0)) //&& nssdc_ID.contains("None") ) { ) { String name= attrs.getNamedItem("serviceprovider_ID").getTextContent(); String url= getURL(name,node); if ( url!=null && ( url.startsWith( CDAWeb ) || url.startsWith("ftp://cdaweb.gsfc.nasa.gov" ) ) && !url.startsWith("/tower3/private" ) ) { String filenaming= getFilenaming(node); String s=attrs.getNamedItem("serviceprovider_ID").getTextContent(); if ( filenaming.endsWith(".cdf") ) { String desc= getDescription(node); //String sid=attrs.getNamedItem("ID").getTextContent(); result.put(s,desc); } else if ( filenaming.endsWith(".nc" ) ) { if ( !name.contains("FORMOSAT") ) { // GOLD_L2_ON2 missing visad library -- not sure why. logger.log(Level.FINE, "ignoring {0} because .nc file is not supported", s); } String desc= getDescription(node); //String sid=attrs.getNamedItem("ID").getTextContent(); result.put(s,desc); } else { logger.log(Level.FINE, "ignoring {0} because files do not end in .cdf or .nc", s); } } } } catch ( DOMException ex2 ) { logger.log( Level.WARNING, "exception", ex2 ); } } mon.finished(); ids= result; } catch (XPathExpressionException ex) { logger.log( Level.WARNING, "serviceprovider_IDs exception", ex ); throw new IOException("unable to read serviceprovider_IDs"); } } /** * 4.2 seconds before getting description. After too! * @param args * @throws IOException * @throws java.text.ParseException */ public static void main( String [] args ) throws IOException, ParseException { CDAWebDB db= getInstance(); long t0= System.currentTimeMillis(); String[] files; db.refresh( DasProgressPanel.createFramed("refreshing database") ); System.err.println( db.getBaseUrl("AC_H3_CRIS") ); System.err.println( db.getNaming("AC_H3_CRIS") ); FileStorageModel fsm= FileStorageModel.create( FileSystem.create(db.getBaseUrl("AC_H3_CRIS")), db.getNaming("AC_H3_CRIS") ); files= fsm.getBestNamesFor( DatumRangeUtil.parseTimeRange( "20110601-20110701" ), new NullProgressMonitor() ); for ( String s: files ) { System.err.println(s); //logger ok } db.getSampleTime("I1_AV_OTT"); // empty trailing folder 1984 caused problem before 20120525. db.getSampleTime("IA_K0_ENF"); // no files... files= db.getFilesAndRangesFromWebService( "AC_H0_MFI", DatumRangeUtil.parseTimeRange( "20010101T000000Z-20010131T000000Z" ) ); for ( String s: files ) { System.err.println(s); //logger ok } files= db.getFilesAndRangesFromWebService( "TIMED_L1B_SABER", DatumRangeUtil.parseTimeRange( "2002-01-26" ) ); for ( String s: files ) { System.err.println(s); //logger ok } Map ids= db.getServiceProviderIds( ); for ( Entry e: ids.entrySet() ) { System.err.println( e.getKey() + ":\t" + e.getValue() ); //logger ok } System.err.println( ids.size() ); //logger ok System.err.println( db.getNaming( "AC_H0_MFI" ) ); //logger ok System.err.println( db.getTimeRange( "AC_H0_MFI" ) ); //logger ok System.err.println( "Timer: " + ( System.currentTimeMillis() - t0 ) ); //logger ok } }