1 |
|
//package eu.dnetlib.data.collector.plugins.schemaorg;
|
2 |
|
//
|
3 |
|
//import eu.dnetlib.data.collector.plugins.schemaorg.sitemapindex.SitemapFileIterator;
|
4 |
|
//import eu.dnetlib.data.collector.plugins.schemaorg.sitemapindex.SitemapIndexIterator;
|
5 |
|
//import org.apache.commons.logging.Log;
|
6 |
|
//import org.apache.commons.logging.LogFactory;
|
7 |
|
//
|
8 |
|
//import java.net.URL;
|
9 |
|
//import java.util.Iterator;
|
10 |
|
//import java.util.concurrent.ArrayBlockingQueue;
|
11 |
|
//import java.util.concurrent.ExecutorService;
|
12 |
|
//import java.util.concurrent.Executors;
|
13 |
|
//
|
14 |
|
//public class SchemaOrgIterableOLD implements Iterable<String> {
|
15 |
|
// private static final Log log = LogFactory.getLog(SchemaOrgIterable.class);
|
16 |
|
//
|
17 |
|
// public static class Options {
|
18 |
|
// private SchemaOrgIterator.Options schemaOrgIteratorOptions;
|
19 |
|
// private SitemapIndexIterator.Options sitemapIndexIteratorOptions;
|
20 |
|
// private SitemapFileIterator.Options sitemapFileIteratorOptions;
|
21 |
|
// private EndpointAccessIterator.Options endpointAccessIteratorOptions;
|
22 |
|
// private DatasetMappingIterator.Options datasetMappingIteratorOptions;
|
23 |
|
//
|
24 |
|
// private int queueSize;
|
25 |
|
//
|
26 |
|
// public DatasetMappingIterator.Options getDatasetMappingIteratorOptions() {
|
27 |
|
// return datasetMappingIteratorOptions;
|
28 |
|
// }
|
29 |
|
//
|
30 |
|
// public void setDatasetMappingIteratorOptions(DatasetMappingIterator.Options datasetMappingIteratorOptions) {
|
31 |
|
// this.datasetMappingIteratorOptions = datasetMappingIteratorOptions;
|
32 |
|
// }
|
33 |
|
//
|
34 |
|
// public EndpointAccessIterator.Options getEndpointAccessIteratorOptions() {
|
35 |
|
// return endpointAccessIteratorOptions;
|
36 |
|
// }
|
37 |
|
//
|
38 |
|
// public void setEndpointAccessIteratorOptions(EndpointAccessIterator.Options endpointAccessIteratorOptions) {
|
39 |
|
// this.endpointAccessIteratorOptions = endpointAccessIteratorOptions;
|
40 |
|
// }
|
41 |
|
//
|
42 |
|
// public SitemapFileIterator.Options getSitemapFileIteratorOptions() {
|
43 |
|
// return sitemapFileIteratorOptions;
|
44 |
|
// }
|
45 |
|
//
|
46 |
|
// public void setSitemapFileIteratorOptions(SitemapFileIterator.Options sitemapFileIteratorOptions) {
|
47 |
|
// this.sitemapFileIteratorOptions = sitemapFileIteratorOptions;
|
48 |
|
// }
|
49 |
|
//
|
50 |
|
// public SitemapIndexIterator.Options getSitemapIndexIteratorOptions() {
|
51 |
|
// return sitemapIndexIteratorOptions;
|
52 |
|
// }
|
53 |
|
//
|
54 |
|
// public void setSitemapIndexIteratorOptions(SitemapIndexIterator.Options sitemapIndexIteratorOptions) {
|
55 |
|
// this.sitemapIndexIteratorOptions = sitemapIndexIteratorOptions;
|
56 |
|
// }
|
57 |
|
//
|
58 |
|
// public SchemaOrgIterator.Options getSchemaOrgIteratorOptions() {
|
59 |
|
// return schemaOrgIteratorOptions;
|
60 |
|
// }
|
61 |
|
//
|
62 |
|
// public void setSchemaOrgIteratorOptions(SchemaOrgIterator.Options schemaOrgIteratorOptions) {
|
63 |
|
// this.schemaOrgIteratorOptions = schemaOrgIteratorOptions;
|
64 |
|
// }
|
65 |
|
//
|
66 |
|
// public int getQueueSize() {
|
67 |
|
// return queueSize;
|
68 |
|
// }
|
69 |
|
//
|
70 |
|
// public void setQueueSize(int queueSize) {
|
71 |
|
// this.queueSize = queueSize;
|
72 |
|
// }
|
73 |
|
// }
|
74 |
|
//
|
75 |
|
// private Options options;
|
76 |
|
// private ArrayBlockingQueue<String> queue;
|
77 |
|
//
|
78 |
|
// public SchemaOrgIterable(Options options) {
|
79 |
|
// this.options = options;
|
80 |
|
// this.queue = new ArrayBlockingQueue<>(this.options.getQueueSize(), true);
|
81 |
|
// }
|
82 |
|
//
|
83 |
|
// public void bootstrap() {
|
84 |
|
// ExecutorService executor = Executors.newSingleThreadExecutor();
|
85 |
|
// executor.execute(new Harvester());
|
86 |
|
// executor.shutdown();
|
87 |
|
// }
|
88 |
|
//
|
89 |
|
// @Override
|
90 |
|
// public Iterator<String> iterator() {
|
91 |
|
// return new SchemaOrgIterator(this.options.getSchemaOrgIteratorOptions(), this.queue);
|
92 |
|
// }
|
93 |
|
//
|
94 |
|
// private class Harvester implements Runnable{
|
95 |
|
//
|
96 |
|
// @Override
|
97 |
|
// public void run() {
|
98 |
|
// this.execute();
|
99 |
|
// }
|
100 |
|
//
|
101 |
|
// private void execute(){
|
102 |
|
// try {
|
103 |
|
// SitemapIndexIterator sitemapIndexIterator = new SitemapIndexIterator(options.getSitemapIndexIteratorOptions());
|
104 |
|
// sitemapIndexIterator.bootstrap();
|
105 |
|
//
|
106 |
|
// while (sitemapIndexIterator.hasNext()) {
|
107 |
|
// String sitemapFile = sitemapIndexIterator.next();
|
108 |
|
// if(sitemapFile == null) continue;
|
109 |
|
//
|
110 |
|
// SitemapFileIterator.Options sitemapFileIteratorOptions = (SitemapFileIterator.Options)options.getSitemapFileIteratorOptions().clone();
|
111 |
|
// sitemapFileIteratorOptions.setFileUrl(new URL(sitemapFile));
|
112 |
|
// SitemapFileIterator sitemapFileIterator = new SitemapFileIterator(sitemapFileIteratorOptions);
|
113 |
|
// sitemapFileIterator.bootstrap();
|
114 |
|
//
|
115 |
|
// EndpointAccessIterator endpointAccessIterator = new EndpointAccessIterator(options.getEndpointAccessIteratorOptions(), sitemapFileIterator);
|
116 |
|
// DatasetMappingIterator datasetMappingIterator = new DatasetMappingIterator(options.getDatasetMappingIteratorOptions(), endpointAccessIterator);
|
117 |
|
//
|
118 |
|
// while (datasetMappingIterator.hasNext()) {
|
119 |
|
// String xml = datasetMappingIterator.next();
|
120 |
|
// if(xml == null) continue;
|
121 |
|
//
|
122 |
|
// queue.put(xml);
|
123 |
|
// }
|
124 |
|
// }
|
125 |
|
// }catch(Exception ex){
|
126 |
|
// log.error("problem execution harvesting", ex);
|
127 |
|
// }
|
128 |
|
// finally {
|
129 |
|
// try {
|
130 |
|
// queue.put(Conventions.TerminateHint);
|
131 |
|
// } catch (Exception ex) {
|
132 |
|
// log.fatal("could not add termination hint. the process will not terminate gracefully", ex);
|
133 |
|
// }
|
134 |
|
// }
|
135 |
|
// }
|
136 |
|
// }
|
137 |
|
//}
|
deleted dead code. shouldn't have been commited in the first place