5 |
5 |
import java.io.IOException;
|
6 |
6 |
import java.net.MalformedURLException;
|
7 |
7 |
import java.net.URL;
|
|
8 |
import java.nio.file.DirectoryStream;
|
|
9 |
import java.nio.file.Files;
|
|
10 |
import java.nio.file.Path;
|
|
11 |
import java.nio.file.Paths;
|
8 |
12 |
import java.util.Iterator;
|
9 |
13 |
|
10 |
|
import org.apache.commons.io.FileUtils;
|
11 |
14 |
import org.apache.commons.io.IOUtils;
|
|
15 |
import org.apache.commons.lang.StringUtils;
|
12 |
16 |
import org.apache.commons.logging.Log;
|
13 |
17 |
import org.apache.commons.logging.LogFactory;
|
14 |
18 |
|
15 |
19 |
import com.google.common.base.Function;
|
16 |
|
import com.google.common.base.Splitter;
|
17 |
|
import com.google.common.collect.Iterables;
|
18 |
20 |
import com.google.common.collect.Iterators;
|
19 |
21 |
|
20 |
22 |
import eu.dnetlib.data.collector.rmi.CollectorServiceException;
|
... | ... | |
22 |
24 |
|
23 |
25 |
/**
|
24 |
26 |
* The Class FilesystemIterable.
|
25 |
|
*
|
|
27 |
*
|
26 |
28 |
* @author Sandro, Michele
|
27 |
29 |
*/
|
28 |
30 |
public class FilesystemIterable implements Iterable<String> {
|
... | ... | |
34 |
36 |
private File baseDir;
|
35 |
37 |
|
36 |
38 |
/** The extensions. */
|
37 |
|
private String[] extensions;
|
|
39 |
private String filter;
|
38 |
40 |
|
39 |
41 |
/**
|
40 |
42 |
* Instantiates a new filesystem iterable.
|
41 |
|
*
|
|
43 |
*
|
42 |
44 |
* @param descriptor
|
43 |
45 |
* the descriptor
|
44 |
46 |
* @throws CollectorServiceException
|
... | ... | |
50 |
52 |
URL basePath = new URL(baseUrl);
|
51 |
53 |
this.baseDir = new File(basePath.getPath());
|
52 |
54 |
if (!baseDir.exists()) throw new CollectorServiceException(String.format("The base ULR %s, does not exist", basePath.getPath()));
|
53 |
|
this.extensions = Iterables
|
54 |
|
.toArray(Splitter.on(",").omitEmptyStrings().trimResults().split(descriptor.getParams().get("extensions")), String.class);
|
|
55 |
// FIXME at the moment the extensions does not work
|
|
56 |
this.filter = descriptor.getParams().get("extensions");
|
55 |
57 |
} catch (MalformedURLException e) {
|
56 |
58 |
throw new CollectorServiceException("Filesystem collector failed! ", e);
|
57 |
59 |
}
|
... | ... | |
59 |
61 |
|
60 |
62 |
/**
|
61 |
63 |
* {@inheritDoc}
|
62 |
|
*
|
|
64 |
*
|
63 |
65 |
* @see java.lang.Iterable#iterator()
|
64 |
66 |
*/
|
65 |
67 |
@Override
|
66 |
68 |
public Iterator<String> iterator() {
|
67 |
|
return Iterators.transform(FileUtils.iterateFiles(baseDir, extensions, true), new Function<File, String>() {
|
|
69 |
try {
|
68 |
70 |
|
69 |
|
@Override
|
70 |
|
public String apply(final File input) {
|
71 |
|
FileInputStream fileInputStream = null;
|
72 |
|
try {
|
73 |
|
fileInputStream = new FileInputStream(input);
|
74 |
|
String s = IOUtils.toString(fileInputStream);
|
75 |
|
return s.startsWith("\uFEFF") ? s.substring(1) : s;
|
76 |
|
} catch (Exception e) {
|
77 |
|
log.error("Unable to read " + input.getPath());
|
78 |
|
return null;
|
79 |
|
} finally {
|
80 |
|
if (fileInputStream != null) {
|
81 |
|
try {
|
82 |
|
fileInputStream.close();
|
83 |
|
} catch (IOException e) {
|
84 |
|
log.error("Unable to close inputstream for " + input.getPath());
|
|
71 |
DirectoryStream<Path> directoryStreams = null;
|
|
72 |
if (StringUtils.isBlank(filter)) {
|
|
73 |
directoryStreams = Files.newDirectoryStream(Paths.get(baseDir.getAbsolutePath()), filter);
|
|
74 |
} else {
|
|
75 |
directoryStreams = Files.newDirectoryStream(Paths.get(baseDir.getAbsolutePath()));
|
|
76 |
}
|
|
77 |
return Iterators.transform(directoryStreams.iterator(), new Function<Path, String>() {
|
|
78 |
|
|
79 |
@Override
|
|
80 |
public String apply(final Path input) {
|
|
81 |
FileInputStream fileInputStream = null;
|
|
82 |
try {
|
|
83 |
fileInputStream = new FileInputStream(input.toString());
|
|
84 |
String s = IOUtils.toString(fileInputStream);
|
|
85 |
return s.startsWith("\uFEFF") ? s.substring(1) : s;
|
|
86 |
} catch (Exception e) {
|
|
87 |
log.error("Unable to read " + input.toString());
|
|
88 |
return null;
|
|
89 |
} finally {
|
|
90 |
if (fileInputStream != null) {
|
|
91 |
try {
|
|
92 |
fileInputStream.close();
|
|
93 |
} catch (IOException e) {
|
|
94 |
log.error("Unable to close inputstream for " + input.toString());
|
|
95 |
}
|
85 |
96 |
}
|
86 |
97 |
}
|
87 |
98 |
}
|
88 |
|
}
|
89 |
|
});
|
|
99 |
});
|
|
100 |
} catch (IOException e1) {
|
|
101 |
log.error(e1);
|
|
102 |
return null;
|
|
103 |
}
|
90 |
104 |
}
|
91 |
105 |
|
92 |
106 |
}
|
implemented a new version of FileSystem collector plugin which uses a file system library of java 1.7