Project

General

Profile

« Previous | Next » 

Revision 52054

filtering metadata and added param in template to specify what to filter out

View differences:

modules/dnet-collector-plugins/trunk/src/main/java/eu/dnetlib/data/collector/plugins/HTTPWithFileName/HTTPWithFileNameCollectorPlugin.java
11 11

  
12 12
    @Override
13 13
    public Iterable<String> collect(InterfaceDescriptor interfaceDescriptor, String s, String s1) throws CollectorServiceException {
14
        return new HTTPWithFileNameCollectorIterable(interfaceDescriptor.getBaseUrl());
14
        return new HTTPWithFileNameCollectorIterable(interfaceDescriptor.getBaseUrl(), interfaceDescriptor.getParams().get("filter"));
15 15
    }
16 16
}
modules/dnet-collector-plugins/trunk/src/main/java/eu/dnetlib/data/collector/plugins/HTTPWithFileName/HTTPWithFileNameCollectorIterable.java
24 24

  
25 25
    private final ArrayList<String> urls = new ArrayList<>();
26 26
    private final ArrayList<String> metas = new ArrayList<String>();
27
    private String filter;
27 28

  
29
    public HTTPWithFileNameCollectorIterable(String startUrl, String filter){
28 30

  
29
    public HTTPWithFileNameCollectorIterable(String startUrl){
30

  
31 31
        urls.add(startUrl);
32
        this.filter = filter;
32 33
    }
33 34

  
35
    private boolean containsFilter(String meta){
36
        if (filter == null || filter.isEmpty())
37
            return false;
38
        String[] filter = this.filter.split(";");
39
        for(String item:filter){
40
            if (meta.contains(item))
41
                return true;
42
        }
43
        return false;
44
    }
45

  
34 46
    private String addFilePath(String meta,String url, boolean isJson){
35 47
        String path = url.replace("metadata", "pdf");
48

  
36 49
        try {
37 50
            if(isJson)
38 51
                meta = meta.substring(0, meta.length() - 1) + ",'downloadFileUrl':'" + path.substring(0, path.indexOf(".json")) + ".pdf'}";
39 52
            else{
40
                if (meta.startsWith("<!DOCTYPE"))
41
                    meta = meta.substring(meta.indexOf(">")+1);
42
                int index = meta.lastIndexOf("</");
43
                meta = meta.substring(0, index) + "<downloadFileUrl>" + path.substring(0, path.indexOf(".xml")) + ".pdf</downloadFileUrl>" + meta.substring(index);
53

  
54
                    if (meta.startsWith("<!DOCTYPE"))
55
                        meta = meta.substring(meta.indexOf(">")+1);
56
                    int index = meta.lastIndexOf("</");
57
                    meta = meta.substring(0, index) + "<downloadFileUrl>" + path.substring(0, path.indexOf(".xml")) + ".pdf</downloadFileUrl>" + meta.substring(index);
58

  
59

  
44 60
            }
45 61

  
46 62
        }catch(Exception ex){
......
91 107
                        if(c.isStatusOk()){
92 108
                            try {
93 109
                                String ret = c.getResponse();
94
                                if (ret != null && ret.length()>0)
110
                                if (ret != null && ret.length()>0 && !containsFilter(ret))
95 111
                                    queue.put(addFilePath(ret,url,url.endsWith(".json")));
96 112
                            } catch (InterruptedException e) {
97 113
                                log.error("not inserted in queue element associate to url " + url + " error: " + e.getMessage() );
modules/dnet-collector-plugins/trunk/src/main/resources/eu/dnetlib/data/collector/plugins/applicationContext-dnet-modular-collector-plugins.xml
36 36
		<property name="protocolDescriptor">
37 37

  
38 38
			<bean class="eu.dnetlib.data.collector.rmi.ProtocolDescriptor" p:name="HTTPWithFileName">
39
				<!--<property name="params">-->
40
				<!--<list>-->
41
				<!--<bean class="eu.dnetlib.data.collector.rmi.ProtocolParameter"-->
42
					  <!--p:name="baseURL" />-->
43
				<!--</list>-->
44
				<!--</property>-->
39
				<property name="params">
40
				<list>
41
				<bean class="eu.dnetlib.data.collector.rmi.ProtocolParameter"
42
					  p:name="filter" />
43
				</list>
44
				</property>
45 45
			</bean>
46 46
		</property>
47 47
	</bean>

Also available in: Unified diff