Project

General

Profile

« Previous | Next » 

Revision 62084

partial cleaning of code releted to hbase

View differences:

modules/dnet-openaireplus-workflows/branches/eosc_services/src/main/java/eu/dnetlib/msro/openaireplus/workflows/nodes/hbase/PrepareCopyTableJobNode.java
1
package eu.dnetlib.msro.openaireplus.workflows.nodes.hbase;
2

  
3
import java.util.Map;
4
import java.util.Set;
5
import javax.annotation.Resource;
6

  
7
import com.google.common.collect.Sets;
8
import com.googlecode.sarasvati.Arc;
9
import com.googlecode.sarasvati.NodeToken;
10
import eu.dnetlib.data.hadoop.rmi.HadoopService;
11
import eu.dnetlib.data.hadoop.rmi.HadoopServiceException;
12
import eu.dnetlib.enabling.locators.UniqueServiceLocator;
13
import eu.dnetlib.msro.rmi.MSROException;
14
import eu.dnetlib.msro.workflows.nodes.SimpleJobNode;
15
import org.apache.commons.lang.StringUtils;
16
import org.apache.commons.logging.Log;
17
import org.apache.commons.logging.LogFactory;
18

  
19
/**
20
 * The PrepareCopyTableJobNode prepares the parameters needed to run the CopyTable job.
21
 */
22
public class PrepareCopyTableJobNode extends SimpleJobNode {
23

  
24
	/**
25
	 * The Constant ZOOKEEPER_ZNODE_PARENT.
26
	 */
27
	private static final String ZOOKEEPER_ZNODE_PARENT = "zookeeper.znode.parent";
28

  
29
	/**
30
	 * The Constant HBASE_ZOOKEEPER_CLIENT_PORT.
31
	 */
32
	private static final String HBASE_ZOOKEEPER_CLIENT_PORT = "hbase.zookeeper.client.port";
33

  
34
	/**
35
	 * The Constant HBASE_ZOOKEEPER_QUORUM.
36
	 */
37
	private static final String HBASE_ZOOKEEPER_QUORUM = "hbase.zookeeper.quorum";
38

  
39
	/**
40
	 * logger.
41
	 */
42
	private static final Log log = LogFactory.getLog(PrepareCopyTableJobNode.class);
43

  
44
	/**
45
	 * The source table.
46
	 */
47
	private String sourceCluster;
48

  
49
	/**
50
	 * The target cluster.
51
	 */
52
	private String targetCluster;
53

  
54
	private String sourceTable;
55

  
56
	private String targetTable;
57

  
58
	@Resource
59
	private UniqueServiceLocator serviceLocator;
60

  
61
	/*
62
	 * (non-Javadoc)
63
	 * 
64
	 * @see eu.dnetlib.msro.workflows.nodes.SimpleJobNode#execute(com.googlecode.sarasvati.NodeToken)
65
	 */
66
	@Override
67
	protected String execute(final NodeToken token) throws Exception {
68

  
69
		checkNodeParams();
70

  
71
		final String outputQuorum = getOutputQuorum();
72
		log.info("build hbase quorum: " + outputQuorum);
73
		token.getEnv().setAttribute("peer.adr", outputQuorum);
74
		token.getEnv().setAttribute("sourceCluster", getSourceCluster());
75
		token.getEnv().setAttribute("sourceTable", getSourceTable());
76

  
77
		token.getEnv().setAttribute("targetCluster", getTargetCluster());
78
		token.getEnv().setAttribute("targetTable", getTargetTable());
79

  
80
		return Arc.DEFAULT_ARC;
81
	}
82

  
83
	/**
84
	 * Builds the output quorum.
85
	 *
86
	 * @return the output quorum
87
	 * @throws HadoopServiceException when cannot retrieve the clustr configuration
88
	 * @throws MSROException          when some of the needed properties is missing in the cluster configuration
89
	 */
90
	private String getOutputQuorum() throws HadoopServiceException, MSROException {
91
		Map<String, String> conf = serviceLocator.getService(HadoopService.class).getClusterConfiguration(getTargetCluster());
92
		log.debug(conf);
93

  
94
		String hbaseQuorum = conf.get(HBASE_ZOOKEEPER_QUORUM);
95
		String hbasePort = conf.get(HBASE_ZOOKEEPER_CLIENT_PORT);
96
		String znodeParent = conf.get(ZOOKEEPER_ZNODE_PARENT);
97

  
98
		checkParamExist(hbaseQuorum, String.format("unable to find property '%s' in cluster configuration: %s", HBASE_ZOOKEEPER_QUORUM, hbaseQuorum));
99
		checkParamExist(hbasePort, String.format("unable to find property '%s' in cluster configuration: %s", HBASE_ZOOKEEPER_CLIENT_PORT, hbasePort));
100
		checkParamExist(znodeParent, String.format("unable to find property '%s' in cluster configuration: %s", ZOOKEEPER_ZNODE_PARENT, znodeParent));
101

  
102
		String outputQuorum = String.format("%s:%s:%s", hbaseQuorum, hbasePort, znodeParent);
103
		return outputQuorum;
104
	}
105

  
106
	/**
107
	 * Checks the wf params.
108
	 *
109
	 * @throws MSROException          the MSRO exception
110
	 * @throws HadoopServiceException
111
	 */
112
	private void checkNodeParams() throws MSROException, HadoopServiceException {
113

  
114
		checkParamExist(getSourceCluster(), "source cluster must be set");
115
		checkParamExist(getTargetCluster(), "target cluster must be set");
116
		checkParamExist(getSourceTable(), "source table must be set");
117
		checkParamExist(getTargetTable(), "target table must be set");
118

  
119
		final HadoopService hadoop = serviceLocator.getService(HadoopService.class);
120

  
121
		Set<String> clusters = Sets.newHashSet(hadoop.listClusters());
122
		if (!clusters.contains(getSourceCluster())) { throw new MSROException(String.format("source cluster '%s' doesn not exists", getSourceCluster())); }
123
		if (!clusters.contains(getTargetCluster())) { throw new MSROException(String.format("target cluster '%s' doesn not exists", getTargetCluster())); }
124

  
125
		if (!hadoop.existHbaseTable(getSourceCluster(), getSourceTable())) {
126
			throw new MSROException(String.format(
127
					"source table '%s' doesn not exists on cluster '%s'", getSourceTable(), getSourceCluster()));
128
		}
129
	}
130

  
131
	/**
132
	 * Check parameter existence.
133
	 *
134
	 * @param param the param
135
	 * @param msg   the msg
136
	 * @throws MSROException the MSRO exception
137
	 */
138
	private void checkParamExist(final String param, final String msg) throws MSROException {
139
		if (StringUtils.isBlank(param)) { throw new MSROException(msg); }
140
	}
141

  
142
	public String getSourceCluster() {
143
		return sourceCluster;
144
	}
145

  
146
	public void setSourceCluster(final String sourceCluster) {
147
		this.sourceCluster = sourceCluster;
148
	}
149

  
150
	public String getTargetCluster() {
151
		return targetCluster;
152
	}
153

  
154
	public void setTargetCluster(final String targetCluster) {
155
		this.targetCluster = targetCluster;
156
	}
157

  
158
	public String getSourceTable() {
159
		return sourceTable;
160
	}
161

  
162
	public void setSourceTable(final String sourceTable) {
163
		this.sourceTable = sourceTable;
164
	}
165

  
166
	public String getTargetTable() {
167
		return targetTable;
168
	}
169

  
170
	public void setTargetTable(final String targetTable) {
171
		this.targetTable = targetTable;
172
	}
173

  
174
}
modules/dnet-openaireplus-workflows/branches/eosc_services/src/main/resources/eu/dnetlib/msro/openaireplus/workflows/hbase/queryOrganizations.sql
1
SELECT
2
		o.id                                                      AS organizationid,
3
		o.legalshortname                                          AS legalshortname,
4
		o.legalname                                               AS legalname,
5
		o.websiteurl                                              AS websiteurl,
6
		o.logourl                                                 AS logourl,
7
		o.ec_legalbody                                            AS eclegalbody,
8
		o.ec_legalperson                                          AS eclegalperson,
9
		o.ec_nonprofit                                            AS ecnonprofit,
10
		o.ec_researchorganization                                 AS ecresearchorganization,
11
		o.ec_highereducation                                      AS echighereducation,
12
		o.ec_internationalorganizationeurinterests                AS ecinternationalorganizationeurinterests,
13
		o.ec_internationalorganization                            AS ecinternationalorganization,
14
		o.ec_enterprise                                           AS ecenterprise,
15
		o.ec_smevalidated                                         AS ecsmevalidated,
16
		o.ec_nutscode                                             AS ecnutscode,
17
		o.dateofcollection                                        AS dateofcollection,
18
		o.lastupdate                                              AS dateoftransformation,
19
		false                                                     AS inferred,
20
		false                                                     AS deletedbyinference,
21
		o.trust                                                   AS trust,
22
		''                                                        AS inferenceprovenance,
23
		d.id                                                      AS collectedfromid,
24
		d.officialname                                            AS collectedfromname,
25

  
26
		o.country || '@@@dnet:countries'                          AS country,
27
		'sysimport:crosswalk:entityregistry@@@sysimport:crosswalk:entityregistry@@@dnet:provenance_actions@@@dnet:provenance_actions' AS provenanceaction,
28

  
29
		ARRAY[]::text[]                                           AS pid
30
FROM dsm_organizations o
31
	LEFT OUTER JOIN dsm_services d ON (d.id = o.collectedfrom)
32
WHERE d.id <> 'openaire____::dfgf'
33
AND d.id <> 'openaire____::innoviris'
34
AND d.id <> 'openaire____::sgov'
35
AND d.id <> 'openaire____::conicytf'
36
AND d.id <> 'openaire____::anr'
37
AND d.id <> 'openaire____::gsrt'
38
AND d.id <> 'openaire____::rif'
39
AND d.id <> 'openaire____::rsf'
40
AND d.id <> 'openaire____::miur'
41

  
42

  
43

  
44

  
45

  
modules/dnet-openaireplus-workflows/branches/eosc_services/src/main/resources/eu/dnetlib/msro/openaireplus/workflows/hbase/queryProjects.sql
1
SELECT *
2
FROM projects_mv
3
WHERE p.collectedfrom <> 'openaire____::dfgf'
4
AND p.collectedfrom <> 'openaire____::innoviris'
5
AND p.collectedfrom <> 'openaire____::sgov'
6
AND p.collectedfrom <> 'openaire____::conicytf'
7
AND p.collectedfrom <> 'openaire____::anr'
8
AND p.collectedfrom <> 'openaire____::gsrt'
9
AND p.collectedfrom <> 'openaire____::rif'
10
AND p.collectedfrom <> 'openaire____::rsf'
11
AND p.collectedfrom <> 'openaire____::miur'
modules/dnet-openaireplus-workflows/branches/eosc_services/src/main/resources/eu/dnetlib/msro/openaireplus/workflows/hbase/queryPersons.sql
1
SELECT
2
	p.id                                                                    AS personid,
3
	p.firstname                                                             AS firstname,
4
	p.secondnames                                                           AS secondnames,
5
	CASE WHEN length(p.secondnames) > 0
6
		THEN trim(BOTH FROM p.secondnames || ', ' || p.firstname)
7
	ELSE p.firstname END                                                    AS fullname,
8
	p.fax                                                                   AS fax,
9
	p.email                                                                 AS email,
10
	p.phone                                                                 AS phone,
11
	p.dateofcollection                                                      AS dateofcollection,
12
	p.inferred                                                              AS inferred,
13
	p.deletedbyinference                                                    AS deletedbyinference,
14
	p.trust                                                                 AS trust,
15
	p.inferenceprovenance                                                   AS inferenceprovenance,
16
	dc.id                                                                   AS collectedfromid,
17
	dc.officialname                                                         AS collectedfromname,
18

  
19
	nc.code || '@@@' || nc.name || '@@@' || ns.code || '@@@' || ns.name     AS nationality,
20
	pac.code || '@@@' || pac.name || '@@@' || pas.code || '@@@' || pas.name AS provenanceaction,
21

  
22
	array_agg(DISTINCT i.pid || '###' || i.issuertypeclass)                 AS pid
23

  
24
FROM persons p
25
	LEFT OUTER JOIN class nc ON (nc.code = p.nationalityclass)
26
	LEFT OUTER JOIN scheme ns ON (ns.code = p.nationalityscheme)
27

  
28
	LEFT OUTER JOIN class pac ON (pac.code = p.provenanceactionclass)
29
	LEFT OUTER JOIN scheme pas ON (pas.code = p.provenanceactionscheme)
30

  
31
	LEFT OUTER JOIN datasources dc ON (dc.id = p.collectedfrom)
32

  
33
	LEFT OUTER JOIN personpids pp ON (pp.person = p.id)
34
	LEFT OUTER JOIN identities i ON (i.pid = pp.pid)
35

  
36
	LEFT OUTER JOIN project_organization po ON (po.contactperson = p.id)
37

  
38
WHERE (p.firstname IS NOT NULL OR p.secondnames IS NOT NULL) AND po.resporganization IS NOT NULL
39

  
40
GROUP BY
41
	p.id,
42
	p.firstname,
43
	p.secondnames,
44
	p.fax,
45
	p.email,
46
	p.phone,
47
	p.dateofcollection,
48
	p.inferred,
49
	p.deletedbyinference,
50
	p.trust,
51
	p.inferenceprovenance,
52
	dc.id,
53
	dc.officialname,
54
	nc.code, nc.name, ns.code, ns.name,
55
	pac.code, pac.name, pas.code, pas.name
modules/dnet-openaireplus-workflows/branches/eosc_services/src/main/resources/eu/dnetlib/msro/openaireplus/workflows/hbase/queryClaimsRel.sql
1
SELECT xml, provenance FROM claims WHERE type = 'rels2actions' and set = 'userclaim_result_project'	or set = 'userclaim_result_result'
modules/dnet-openaireplus-workflows/branches/eosc_services/src/main/resources/eu/dnetlib/msro/openaireplus/workflows/hbase/queryClaimsUpdate.sql
1
SELECT
2
	regexp_replace(xml, '<\?xml version="1\.0" encoding="UTF-8"\?>', '', 'i') AS xml,
3
	provenance
4
FROM claims
5
WHERE type = 'updates2actions' AND set = 'userclaim_dmf'
6

  
7

  
modules/dnet-openaireplus-workflows/branches/eosc_services/src/main/resources/eu/dnetlib/msro/openaireplus/workflows/hbase/queryProjectOrganization.sql
1
SELECT
2
	po.project                                                              AS project,
3
	po.resporganization                                                     AS resporganization,
4
	po.participantnumber                                                    AS participantnumber,
5
	po.contribution                                                         AS contribution,
6
	po.currency                                                             AS currency,
7
	NULL                                                                    AS startdate,
8
	NULL                                                                    AS enddate,
9
	false                                                                   AS inferred,
10
	false                                                                   AS deletedbyinference,
11
	po.trust                                                                AS trust,
12
	NULL                                                                    AS inferenceprovenance,
13

  
14
	po.semanticclass || '@@@' || po.semanticclass || '@@@dnet:project_organization_relations@@@dnet:project_organization_relations' AS semantics,
15
	'sysimport:crosswalk:entityregistry@@@sysimport:crosswalk:entityregistry@@@dnet:provenance_actions@@@dnet:provenance_actions' AS provenanceaction
16

  
17
FROM project_organization po
18
WHERE po.project not like 'dfgf%'
19
AND po.project not like 'innoviris%'
20
AND po.project not like 'sgov%'
21
AND po.project not like 'conicytf%'
22
AND po.project not like 'anr%'
23
AND po.project not like 'gsrt%'
24
AND po.project not like 'rif%'
25
AND po.project not like 'rsf%'
26
AND po.project not like 'miur%'
modules/dnet-openaireplus-workflows/branches/eosc_services/src/main/resources/eu/dnetlib/msro/openaireplus/workflows/hbase/queryDatasources.sql
1
SELECT
2
	d.id                                                                                                       AS datasourceid,
3
	d.id || array_agg(distinct di.pid)                                                                         AS identities,
4
	d.officialname                                                                                             AS officialname,
5
	d.englishname                                                                                              AS englishname,
6
	CASE
7
		WHEN (array_agg(DISTINCT COALESCE (a.compatibility_override, a.compatibility):: TEXT) @> ARRAY ['openaire-cris_1.1'])
8
    			THEN
9
    				'openaire-cris_1.1@@@OpenAIRE CRIS v1.1@@@dnet:datasourceCompatibilityLevel@@@dnet:datasourceCompatibilityLevel'
10
        WHEN (array_agg(DISTINCT COALESCE (a.compatibility_override, a.compatibility):: TEXT) @> ARRAY ['openaire4.0'])
11
                THEN
12
                    'openaire4.0@@@OpenAIRE 4.0@@@dnet:datasourceCompatibilityLevel@@@dnet:datasourceCompatibilityLevel'
13
		WHEN (array_agg(DISTINCT COALESCE (a.compatibility_override, a.compatibility):: TEXT) @> ARRAY ['driver', 'openaire2.0'])
14
			THEN
15
				'driver-openaire2.0@@@OpenAIRE 2.0+ (DRIVER OA, EC funding)@@@dnet:datasourceCompatibilityLevel@@@dnet:datasourceCompatibilityLevel'
16
		WHEN (array_agg(DISTINCT COALESCE (a.compatibility_override, a.compatibility) :: TEXT) @> ARRAY ['driver'])
17
			THEN
18
				'driver@@@OpenAIRE Basic (DRIVER OA)@@@dnet:datasourceCompatibilityLevel@@@dnet:datasourceCompatibilityLevel'
19
		WHEN (array_agg(DISTINCT COALESCE (a.compatibility_override, a.compatibility) :: TEXT) @> ARRAY ['openaire2.0'])
20
			THEN
21
				'openaire2.0@@@OpenAIRE 2.0 (EC funding)@@@dnet:datasourceCompatibilityLevel@@@dnet:datasourceCompatibilityLevel'
22
		WHEN (array_agg(DISTINCT COALESCE (a.compatibility_override, a.compatibility) :: TEXT) @> ARRAY ['openaire3.0'])
23
			THEN
24
				'openaire3.0@@@OpenAIRE 3.0 (OA, funding)@@@dnet:datasourceCompatibilityLevel@@@dnet:datasourceCompatibilityLevel'
25
		WHEN (array_agg(DISTINCT COALESCE (a.compatibility_override, a.compatibility) :: TEXT) @> ARRAY ['openaire2.0_data'])
26
			THEN
27
				'openaire2.0_data@@@OpenAIRE Data (funded, referenced datasets)@@@dnet:datasourceCompatibilityLevel@@@dnet:datasourceCompatibilityLevel'
28
		WHEN (array_agg(DISTINCT COALESCE (a.compatibility_override, a.compatibility) :: TEXT) @> ARRAY ['native'])
29
			THEN
30
				'native@@@proprietary@@@dnet:datasourceCompatibilityLevel@@@dnet:datasourceCompatibilityLevel'
31
		WHEN (array_agg(DISTINCT COALESCE (a.compatibility_override, a.compatibility) :: TEXT) @> ARRAY ['hostedBy'])
32
			THEN
33
				'hostedBy@@@collected from a compatible aggregator@@@dnet:datasourceCompatibilityLevel@@@dnet:datasourceCompatibilityLevel'
34
		WHEN (array_agg(DISTINCT COALESCE (a.compatibility_override, a.compatibility) :: TEXT) @> ARRAY ['notCompatible'])
35
			THEN
36
			'notCompatible@@@under validation@@@dnet:datasourceCompatibilityLevel@@@dnet:datasourceCompatibilityLevel'
37
	ELSE
38
		'UNKNOWN@@@not available@@@dnet:datasourceCompatibilityLevel@@@dnet:datasourceCompatibilityLevel'
39
	END                                                                                                        AS openairecompatibility,
40
	d.websiteurl                                                                                               AS websiteurl,
41
	d.logourl                                                                                                  AS logourl,
42
	array_agg(DISTINCT CASE WHEN a.protocol = 'oai' and last_aggregation_date is not null THEN a.baseurl ELSE NULL END)                              AS accessinfopackage,
43
	d.latitude                                                                                                 AS latitude,
44
	d.longitude                                                                                                AS longitude,
45
	d.namespaceprefix                                                                                          AS namespaceprefix,
46
	NULL                                                                                                       AS odnumberofitems,
47
	NULL                                                                                                       AS odnumberofitemsdate,
48

  
49
	(SELECT array_agg(s|| '###keywords@@@keywords@@@dnet:subject_classification_typologies@@@dnet:subject_classification_typologies')
50
		FROM UNNEST(
51
			ARRAY(
52
				SELECT trim(s)
53
        FROM unnest(string_to_array(d.subjects, '@@')) AS s)) AS s)                                   AS subjects,
54

  
55
	d.description                                                                                              AS description,
56
	NULL                                                                                                       AS odpolicies,
57
	ARRAY(SELECT trim(s)
58
	      FROM unnest(string_to_array(d.languages, ',')) AS s)                                                 AS odlanguages,
59
	false                                                                                                      AS inferred,
60
	false                                                                                                      AS deletedbyinference,
61
	0.9                                                                                                        AS trust,
62
	NULL                                                                                                       AS inferenceprovenance,
63
	d.dateofcollection                                                                                         AS dateofcollection,
64
	d.dateofvalidation                                                                                         AS dateofvalidation,
65
		-- re3data fields
66
	d.releasestartdate                                                                                         AS releasestartdate,
67
	d.releaseenddate                                                                                           AS releaseenddate,
68
	d.missionstatementurl                                                                                      AS missionstatementurl,
69
	d.databaseaccesstype                                                                                       AS databaseaccesstype,
70
	d.datauploadtype                                                                                           AS datauploadtype,
71
	d.databaseaccessrestriction                                                                                AS databaseaccessrestriction,
72
	d.datauploadrestriction                                                                                    AS datauploadrestriction,
73
	d.citationguidelineurl                                                                                     AS citationguidelineurl,
74
	d.pidsystems                                                                                               AS pidsystems,
75
	d.certificates                                                                                             AS certificates,
76
	ARRAY[]::text[]                                                                                            AS policies,
77
	dc.id                                                                                                      AS collectedfromid,
78
	dc.officialname                                                                                            AS collectedfromname,
79
	d.typology || '@@@' || CASE
80
		WHEN (d.typology = 'crissystem') THEN 'CRIS System'
81
		WHEN (d.typology = 'datarepository::unknown') THEN 'Data Repository'
82
		WHEN (d.typology = 'aggregator::datarepository') THEN 'Data Repository Aggregator'
83
		WHEN (d.typology = 'infospace') THEN 'Information Space'
84
		WHEN (d.typology = 'pubsrepository::institutional') THEN 'Institutional Repository'
85
		WHEN (d.typology = 'aggregator::pubsrepository::institutional') THEN 'Institutional Repository Aggregator'
86
		WHEN (d.typology = 'pubsrepository::journal') THEN 'Journal'
87
		WHEN (d.typology = 'aggregator::pubsrepository::journals') THEN 'Journal Aggregator/Publisher'
88
		WHEN (d.typology = 'pubsrepository::mock') THEN 'Other'
89
		WHEN (d.typology = 'pubscatalogue::unknown') THEN 'Publication Catalogue'
90
		WHEN (d.typology = 'pubsrepository::unknown') THEN 'Publication Repository'
91
		WHEN (d.typology = 'aggregator::pubsrepository::unknown') THEN 'Publication Repository Aggregator'
92
		WHEN (d.typology = 'entityregistry') THEN 'Registry'
93
		WHEN (d.typology = 'scholarcomminfra') THEN 'Scholarly Comm. Infrastructure'
94
		WHEN (d.typology = 'pubsrepository::thematic') THEN 'Thematic Repository'
95
		WHEN (d.typology = 'websource') THEN 'Web Source'
96
		WHEN (d.typology = 'entityregistry::projects') THEN 'Funder database'
97
		WHEN (d.typology = 'entityregistry::repositories') THEN 'Registry of repositories'
98
		WHEN (d.typology = 'softwarerepository') THEN 'Software Repository'
99
		WHEN (d.typology = 'aggregator::softwarerepository') THEN 'Software Repository Aggregator'
100
		WHEN (d.typology = 'orprepository') THEN 'Repository'
101
		ELSE 'Other'
102
	END || '@@@dnet:datasource_typologies@@@dnet:datasource_typologies'                               AS datasourcetype,
103
	'sysimport:crosswalk:entityregistry@@@sysimport:crosswalk:entityregistry@@@dnet:provenance_actions@@@dnet:provenance_actions' AS provenanceaction,
104
	CONCAT(d.issn, '@@@', d.eissn, '@@@', d.lissn)                                                    AS journal
105

  
106
FROM dsm_services d
107

  
108
LEFT OUTER JOIN dsm_services dc on (d.collectedfrom = dc.id)
109
LEFT OUTER JOIN dsm_api a ON (d.id = a.service)
110
LEFT OUTER JOIN dsm_servicepids di ON (d.id = di.service)
111

  
112
WHERE d.id <> 'openaire____::dfgf'
113
AND d.id <> 'openaire____::innoviris'
114
AND d.id <> 'openaire____::sgov'
115
AND d.id <> 'openaire____::conicytf'
116
AND d.id <> 'openaire____::anr'
117
AND d.id <> 'openaire____::gsrt'
118
AND d.id <> 'openaire____::rif'
119
AND d.id <> 'openaire____::rsf'
120
AND d.id <> 'openaire____::miur'
121

  
122
GROUP BY
123
	d.id,
124
	d.officialname,
125
	d.englishname,
126
	d.websiteurl,
127
	d.logourl,
128
	d.contactemail,
129
	d.namespaceprefix,
130
	d.description,
131
	d.latitude,
132
	d.longitude,
133
	d.dateofcollection,
134
	d.dateofvalidation,
135
	d.releasestartdate,
136
	d.releaseenddate,
137
	d.missionstatementurl,
138
	d.databaseaccesstype,
139
	d.datauploadtype,
140
	d.databaseaccessrestriction,
141
	d.datauploadrestriction,
142
	d.citationguidelineurl,
143
	d.pidsystems,
144
	d.certificates,
145
	dc.id,
146
	dc.officialname,
147
	d.issn,
148
	d.eissn,
149
	d.lissn
modules/dnet-openaireplus-workflows/branches/eosc_services/src/main/resources/eu/dnetlib/msro/openaireplus/workflows/hbase/querySimilarityFromOpenOrgsDB.sql
1
SELECT local_id AS id1, oa_original_id AS id2 FROM openaire_simrels WHERE reltype = 'is_similar'
2

  
3
UNION ALL
4

  
5
SELECT
6
	o.id                                                     AS id1,
7
	'openorgsmesh'||substring(o.id, 13)||'-'||md5(a.acronym) AS id2
8
FROM acronyms a
9
	LEFT OUTER JOIN organizations o ON (a.id = o.id)
10

  
11
UNION ALL
12
		
13
SELECT
14
	o.id                                                     AS id1,
15
	'openorgsmesh'||substring(o.id, 13)||'-'||md5(n.name)    AS id2
16
FROM other_names n
17
	LEFT OUTER JOIN organizations o ON (n.id = o.id)
modules/dnet-openaireplus-workflows/branches/eosc_services/src/main/resources/eu/dnetlib/msro/openaireplus/workflows/hbase/queryOrganizationsFromOpenOrgsDB.sql
1
SELECT
2
	o.id                                                                                                                          AS organizationid,
3
	coalesce((array_agg(a.acronym))[1], o.name)                                                                                   AS legalshortname,
4
	o.name                                                                                                                        AS legalname,
5
	array_agg(DISTINCT n.name)                                                                                                    AS "alternativeNames",
6
	(array_agg(u.url))[1]                                                                                                         AS websiteurl,
7
	o.modification_date                                                                                                           AS dateoftransformation,
8
	false                                                                                                                         AS inferred,
9
	false                                                                                                                         AS deletedbyinference,
10
	0.95                                                                                                                          AS trust,
11
	''                                                                                                                            AS inferenceprovenance,
12
	'openaire____::openorgs'                                                                                                      AS collectedfromid,
13
	'OpenOrgs Database'                                                                                                           AS collectedfromname,
14
	o.country || '@@@dnet:countries'                                                                                              AS country,
15
	'sysimport:crosswalk:entityregistry@@@sysimport:crosswalk:entityregistry@@@dnet:provenance_actions@@@dnet:provenance_actions' AS provenanceaction,
16
	array_agg(DISTINCT i.otherid || '###' || i.type || '@@@dnet:pid_types')                                                       AS pid
17
FROM organizations o
18
	LEFT OUTER JOIN acronyms a    ON (a.id = o.id)
19
	LEFT OUTER JOIN urls u        ON (u.id = o.id)
20
	LEFT OUTER JOIN other_ids i   ON (i.id = o.id)
21
	LEFT OUTER JOIN other_names n ON (n.id = o.id)
22
GROUP BY
23
	o.id,
24
	o.name,
25
	o.modification_date,
26
	o.country
27
	
28
UNION ALL
29
	
30
SELECT
31
	'openorgsmesh'||substring(o.id, 13)||'-'||md5(n.name)                                                                         AS organizationid,
32
	n.name                                                                                                                        AS legalshortname,
33
	n.name                                                                                                                        AS legalname,
34
	ARRAY[]::text[]                                                                                                               AS "alternativeNames",
35
	(array_agg(u.url))[1]                                                                                                         AS websiteurl,
36
	o.modification_date                                                                                                           AS dateoftransformation,
37
	false                                                                                                                         AS inferred,
38
	false                                                                                                                         AS deletedbyinference,
39
	0.88                                                                                                                          AS trust,
40
	''                                                                                                                            AS inferenceprovenance,
41
	'openaire____::openorgs'                                                                                                      AS collectedfromid,
42
	'OpenOrgs Database'                                                                                                           AS collectedfromname,
43
	o.country || '@@@dnet:countries'                                                                                              AS country,
44
	'sysimport:crosswalk:entityregistry@@@sysimport:crosswalk:entityregistry@@@dnet:provenance_actions@@@dnet:provenance_actions' AS provenanceaction,
45
	array_agg(DISTINCT i.otherid || '###' || i.type || '@@@dnet:pid_types')                                                       AS pid
46
FROM other_names n
47
	LEFT OUTER JOIN organizations o ON (n.id = o.id)
48
	LEFT OUTER JOIN urls u          ON (u.id = o.id)
49
	LEFT OUTER JOIN other_ids i     ON (i.id = o.id)
50
GROUP BY
51
	o.id, o.modification_date, o.country, n.name
52

  
53

  
modules/dnet-openaireplus-workflows/branches/eosc_services/src/main/resources/eu/dnetlib/msro/openaireplus/workflows/hbase/queryDatasourcesForSize.sql
1
SELECT count(*)
2
FROM dsm_services d
3
WHERE d.id <> 'openaire____::dfgf'
4
AND d.id <> 'openaire____::innoviris'
5
AND d.id <> 'openaire____::sgov'
6
AND d.id <> 'openaire____::conicytf'
7
AND d.id <> 'openaire____::anr'
8
AND d.id <> 'openaire____::gsrt'
9
AND d.id <> 'openaire____::rif'
10
AND d.id <> 'openaire____::rsf'
11
AND d.id <> 'openaire____::miur'
modules/dnet-openaireplus-workflows/branches/eosc_services/src/main/resources/eu/dnetlib/msro/openaireplus/workflows/hbase/queryDatasourceOrganization.sql
1
SELECT
2
	dor.service                                                          AS datasource,
3
	dor.organization                                                        AS organization,
4
	NULL                                                                    AS startdate,
5
	NULL                                                                    AS enddate,
6
	false                                                                   AS inferred,
7
	false                                                                   AS deletedbyinference,
8
	0.9                                                                     AS trust,
9
	NULL                                                                    AS inferenceprovenance,
10

  
11
	'providedBy@@@provided by@@@dnet:datasources_organizations_typologies@@@dnet:datasources_organizations_typologies' AS semantics,
12
	d.provenanceaction || '@@@' || d.provenanceaction || '@@@dnet:provenanceActions@@@dnet:provenanceActions' AS provenanceaction
13

  
14
FROM dsm_service_organization dor
15
	LEFT OUTER JOIN dsm_services d ON (dor.service = d.id)
16
WHERE d.id <> 'openaire____::dfgf'
17
AND d.id <> 'openaire____::innoviris'
18
AND d.id <> 'openaire____::sgov'
19
AND d.id <> 'openaire____::conicytf'
20
AND d.id <> 'openaire____::anr'
21
AND d.id <> 'openaire____::gsrt'
22
AND d.id <> 'openaire____::rif'
23
AND d.id <> 'openaire____::rsf'
24
AND d.id <> 'openaire____::miur'
25

  
modules/dnet-openaireplus-workflows/branches/eosc_services/src/main/resources/eu/dnetlib/test/profiles/openaireplus/workflows/hbase/importGridAC.xml
1
<RESOURCE_PROFILE>
2
	<HEADER>
3
		<RESOURCE_IDENTIFIER value="ff25219c-e485-4440-8bc0-a8bbe42512ac_V29ya2Zsb3dEU1Jlc291cmNlcy9Xb3JrZmxvd0RTUmVzb3VyY2VUeXBl"/>
4
		<RESOURCE_TYPE value="WorkflowDSResourceType"/>
5
		<RESOURCE_KIND value="WorkflowDSResources"/>
6
		<RESOURCE_URI value=""/>
7
		<DATE_OF_CREATION value="2018-08-06T16:19:33+00:00"/>
8
	</HEADER>
9
	<BODY>
10
		<WORKFLOW_NAME>Import GridAC</WORKFLOW_NAME>
11
		<WORKFLOW_TYPE>Import InfoSpace</WORKFLOW_TYPE>
12
		<WORKFLOW_PRIORITY>30</WORKFLOW_PRIORITY>
13
		<CONFIGURATION start="manual">
14
			<NODE isStart="true" name="setInputPath" type="SetHdfsFile">
15
				<DESCRIPTION>set the hdfs output path</DESCRIPTION>
16
				<PARAMETERS>
17
					<PARAM managedBy="user" name="hdfsPath" required="true" type="string">/usr/dnet/data/gridac</PARAM>
18
					<PARAM managedBy="system" name="hdfsPathParam" required="true" type="string">inputPath</PARAM>
19
				</PARAMETERS>
20
				<ARCS>
21
					<ARC to="prepareActionSets"/>
22
				</ARCS>
23
			</NODE>
24
			<NODE name="prepareActionSets" type="PrepareActionSets">
25
				<DESCRIPTION>prepare action sets</DESCRIPTION>
26
				<PARAMETERS>
27
					<PARAM managedBy="system" name="sets" required="true" type="string">
28
						[
29
						{
30
						'set' : 'gridac-dump',
31
						'jobProperty' : 'export_action_set_gridac_dump',
32
						'enablingProperty' : 'active_gridac_dump',
33
						'enabled' : 'true'
34
						}
35
						]
36
					</PARAM>
37
				</PARAMETERS>
38
				<ARCS>
39
					<ARC to="extractOutputPath"/>
40
				</ARCS>
41
			</NODE>
42
			<NODE name="extractOutputPath" type="ExtractOutputPath">
43
				<DESCRIPTION>extract the hdfs output path generated in the previous node</DESCRIPTION>
44
				<PARAMETERS>
45
					<PARAM managedBy="system" name="hdfsOutputPathParam" required="true" type="string">outputPath</PARAM>
46
				</PARAMETERS>
47
				<ARCS>
48
					<ARC to="importActionSet"/>
49
				</ARCS>
50
			</NODE>
51
			<NODE isJoin="true" name="importActionSet" type="SubmitHadoopJob">
52
				<DESCRIPTION>IIS main</DESCRIPTION>
53
				<PARAMETERS>
54
					<PARAM managedBy="system" name="hadoopJob" required="true" type="string">importGridAcJob</PARAM>
55
					<PARAM managedBy="system" name="cluster" required="true" type="string">DM</PARAM>
56
					<PARAM managedBy="system" name="envParams" required="true" type="string">
57
						{
58
						'mapred.input.dir':'inputPath',
59
						'mapred.output.dir':'outputPath'
60
						}
61
					</PARAM>
62
					<PARAM managedBy="user" name="simulation" required="true" type="boolean">false</PARAM>
63
				</PARAMETERS>
64
				<ARCS>
65
					<ARC to="updateActionSets"/>
66
				</ARCS>
67
			</NODE>
68
			<NODE name="updateActionSets" type="UpdateActionSets">
69
				<DESCRIPTION>update action sets</DESCRIPTION>
70
				<PARAMETERS/>
71
				<ARCS>
72
					<ARC to="success"/>
73
				</ARCS>
74
			</NODE>
75
		</CONFIGURATION>
76
		<STATUS>
77
			<LAST_EXECUTION_ID>wf_20180807_092640_582</LAST_EXECUTION_ID>
78
			<LAST_EXECUTION_DATE>2018-08-07T09:29:23+00:00</LAST_EXECUTION_DATE>
79
			<LAST_EXECUTION_STATUS>SUCCESS</LAST_EXECUTION_STATUS>
80
			<LAST_EXECUTION_ERROR/>
81
		</STATUS>
82
	</BODY>
83
</RESOURCE_PROFILE>
modules/dnet-openaireplus-workflows/branches/eosc_services/src/main/resources/eu/dnetlib/test/profiles/openaireplus/workflows/hbase/importActionsFromHDFS.xml
1
<?xml version="1.0" encoding="UTF-8"?>
2
<RESOURCE_PROFILE>
3
	<HEADER>
4
		<RESOURCE_IDENTIFIER
5
				value="cbfb388a-c184-4a88-be66-0f1d3bb61fe5_V29ya2Zsb3dEU1Jlc291cmNlcy9Xb3JrZmxvd0RTUmVzb3VyY2VUeXBl"/>
6
		<RESOURCE_TYPE value="WorkflowDSResourceType"/>
7
		<RESOURCE_KIND value="WorkflowDSResources"/>
8
		<RESOURCE_URI value=""/>
9
		<DATE_OF_CREATION value="2006-05-04T18:13:51.0Z"/>
10
	</HEADER>
11
	<BODY>
12
		<WORKFLOW_NAME>Actions from HDFS</WORKFLOW_NAME>
13
		<WORKFLOW_TYPE>Data Load</WORKFLOW_TYPE>
14
		<WORKFLOW_PRIORITY>30</WORKFLOW_PRIORITY>
15
		<CONFIGURATION start="manual">
16
			<NODE name="hadoopConfig" type="SetClusterAndTable" isStart="true">
17
				<DESCRIPTION>Set table name</DESCRIPTION>
18
				<PARAMETERS>
19
					<PARAM required="true" type="string" name="cluster" managedBy="system">DM</PARAM>
20
					<PARAM required="true" type="string" name="tableParam" managedBy="system">tableName</PARAM>
21
					<PARAM required="true" type="string" name="table" managedBy="user"></PARAM>
22
				</PARAMETERS>
23
				<ARCS>
24
					<ARC to="importMapreduce"/>
25
				</ARCS>
26
			</NODE>
27
			<NODE name="setActionsPath" type="SetEnvParameter" isStart="true">
28
				<DESCRIPTION>Set the sequence file path on HDFS holding the exported Actions</DESCRIPTION>
29
				<PARAMETERS>
30
					<PARAM managedBy="system" name="parameterName" required="true" type="string">actionsPath</PARAM>
31
					<PARAM managedBy="user" name="parameterValue" required="true" type="string">/tmp/2016-03-25_iis_preprocessing/document_referencedDatasets/rawset_76cfeefd-1139-4a30-a394-f01bf67220bf_1458652954774</PARAM>
32
				</PARAMETERS>
33
				<ARCS>
34
					<ARC to="importMapreduce"/>
35
				</ARCS>
36
			</NODE>
37

  
38
			<NODE name="importMapreduce" type="SubmitHadoopJob" isJoin="true">
39
				<DESCRIPTION>Run M/R import Job</DESCRIPTION>
40
				<PARAMETERS>
41
					<PARAM required="true" type="string" name="hadoopJob" managedBy="system">actionsImportJob</PARAM>
42
					<PARAM required="true" type="string" name="envParams" managedBy="system">
43
						{
44
						'cluster' : 'cluster',
45
						'mapred.input.dir' : 'actionsPath',
46
						'hbase.mapred.outputtable' : 'tableName'
47
						}
48
					</PARAM>
49
					<PARAM required="true" type="boolean" name="simulation" managedBy="user">false</PARAM>
50
				</PARAMETERS>
51
				<ARCS>
52
					<ARC to="success"/>
53
				</ARCS>
54
			</NODE>
55

  
56
		</CONFIGURATION>
57
		<STATUS/>
58
	</BODY>
59
</RESOURCE_PROFILE>
60

  
61

  
modules/dnet-openaireplus-workflows/branches/eosc_services/src/main/resources/eu/dnetlib/test/profiles/openaireplus/workflows/hbase/importScholexplorer.xml
1
<?xml version="1.0" encoding="UTF-8"?>
2
<RESOURCE_PROFILE>
3
	<HEADER>
4
		<RESOURCE_IDENTIFIER
5
				value="e03f256e-1e4d-4b3d-9c07-91faf5d25210_V29ya2Zsb3dEU1Jlc291cmNlcy9Xb3JrZmxvd0RTUmVzb3VyY2VUeXBl"/>
6
		<RESOURCE_TYPE value="WorkflowDSResourceType"/>
7
		<RESOURCE_KIND value="WorkflowDSResources"/>
8
		<RESOURCE_URI value=""/>
9
		<DATE_OF_CREATION value="2006-05-04T18:13:51.0Z"/>
10
	</HEADER>
11
	<BODY>
12
		<WORKFLOW_NAME>Import ScholExplorer Links and entities</WORKFLOW_NAME>
13
		<WORKFLOW_TYPE>Import InfoSpace</WORKFLOW_TYPE>
14
		<WORKFLOW_PRIORITY>30</WORKFLOW_PRIORITY>
15
		<CONFIGURATION start="manual">
16
            <NODE name="setInputPath" isStart="true" type="SetHdfsFile">
17
                <DESCRIPTION>set the hdfs output path</DESCRIPTION>
18
                <PARAMETERS>
19
                    <PARAM managedBy="user" name="hdfsPath" required="true" type="string">/usr/dnet/dli/export/scolixDumpExport</PARAM>
20
                    <PARAM managedBy="system" name="hdfsPathParam" required="true" type="string">inputPath</PARAM>
21
                </PARAMETERS>
22
                <ARCS>
23
                    <ARC to="prepareActionSets"/>
24
                </ARCS>
25
            </NODE>
26
			<NODE name="prepareActionSets" type="PrepareActionSets">
27
				<DESCRIPTION>prepare action sets</DESCRIPTION>
28
				<PARAMETERS>
29
					<PARAM required="true" type="string" name="sets" managedBy="system">
30
						[
31
						{
32
						'set' : 'scholexplorer-dump',
33
						'jobProperty' : 'export_action_set_scholexplorer_dump',
34
						'enablingProperty' : 'active_scholexplorer_dump',
35
						'enabled' : 'true'
36
						}
37
						]
38
					</PARAM>
39
				</PARAMETERS>
40
				<ARCS>
41
					<ARC to="extractOutputPath"/>
42
				</ARCS>
43
			</NODE>
44

  
45
            <NODE name="extractOutputPath"  type="ExtractOutputPath">
46
                <DESCRIPTION>extract the hdfs output path generated in the previous node</DESCRIPTION>
47
                <PARAMETERS>
48
                    <PARAM managedBy="system" name="hdfsOutputPathParam" required="true" type="string">outputPath</PARAM>
49

  
50
                </PARAMETERS>
51
                <ARCS>
52
                    <ARC to="importActionSet"/>
53
                </ARCS>
54
            </NODE>
55

  
56
			<NODE name="importActionSet" type="SubmitHadoopJob" isJoin="true">
57
				<DESCRIPTION>IIS main</DESCRIPTION>
58
				<PARAMETERS>
59
					<PARAM required="true" type="string" name="hadoopJob" managedBy="system">importScholexplorerJob</PARAM>
60
					<PARAM required="true" type="string" name="cluster" managedBy="system">DM</PARAM>
61
					<PARAM required="true" type="string" name="envParams" managedBy="system">
62
						{
63
						'cluster' : 'cluster',
64
						'mapred.input.dir':'inputPath',
65
                        'mapred.output.dir':'outputPath'
66
						}
67
					</PARAM>
68
					<PARAM required="true" type="boolean" name="simulation" managedBy="user">false</PARAM>
69
				</PARAMETERS>
70
				<ARCS>
71
					<ARC to="updateActionSets"/>
72
				</ARCS>
73
			</NODE>
74
			<NODE name="updateActionSets" type="UpdateActionSets">
75
				<DESCRIPTION>update action sets</DESCRIPTION>
76
				<PARAMETERS/>
77
				<ARCS>
78
					<ARC to="success"/>
79
				</ARCS>
80
			</NODE>
81
		</CONFIGURATION>
82
		<STATUS/>
83
	</BODY>
84
</RESOURCE_PROFILE>
modules/dnet-openaireplus-workflows/branches/eosc_services/src/main/resources/eu/dnetlib/test/profiles/openaireplus/workflows/hbase/resetHbase.xml
1
<?xml version="1.0" encoding="UTF-8"?>
2
<RESOURCE_PROFILE>
3
	<HEADER>
4
		<RESOURCE_IDENTIFIER value="ce304c65-5836-4cf0-9a48-53472b9f6f35_V29ya2Zsb3dEU1Jlc291cmNlcy9Xb3JrZmxvd0RTUmVzb3VyY2VUeXBl"/>
5
		<RESOURCE_TYPE value="WorkflowDSResourceType"/>
6
		<RESOURCE_KIND value="WorkflowDSResources"/>
7
		<RESOURCE_URI value=""/>
8
		<DATE_OF_CREATION value="2006-05-04T18:13:51.0Z"/>
9
	</HEADER>
10
	<BODY>
11
		<WORKFLOW_NAME>Reset HBase</WORKFLOW_NAME>
12
		<WORKFLOW_TYPE>Data Load</WORKFLOW_TYPE>
13
		<WORKFLOW_PRIORITY>30</WORKFLOW_PRIORITY>
14
		<CONFIGURATION start="auto">
15
			<NODE name="checkTable" type="CheckHBaseTable" isStart="true">
16
				<DESCRIPTION>check hbase table</DESCRIPTION>
17
				<PARAMETERS>
18
					<PARAM name="hbaseTableProperty" type="string" managedBy="system" required="true">hbase.mapred.datatable</PARAM>
19
					<PARAM name="cluster" type="string" managedBy="system" required="true">DM</PARAM>
20
					<PARAM name="tableColumnsParamName" type="string" managedBy="system" required="true">hTableColumns</PARAM>
21
					<PARAM name="existOutNode" type="string" managedBy="system" required="true">drop</PARAM>
22
					<PARAM name="dontExistOutNode" type="string" required="true" managedBy="system">define</PARAM>
23
				</PARAMETERS>
24
				<ARCS>
25
					<ARC to="drop" name="drop"/>
26
					<ARC to="define" name="define"/>
27
				</ARCS>
28
			</NODE>
29
			<NODE name="drop" type="DropHBaseTable">
30
				<DESCRIPTION>drop hbase table</DESCRIPTION>
31
				<PARAMETERS>
32
					<PARAM name="hbaseTableProperty" type="string" managedBy="system" required="true">hbase.mapred.datatable</PARAM>
33
					<PARAM name="cluster" type="string" managedBy="system" required="true">DM</PARAM>
34
				</PARAMETERS>
35
				<ARCS>
36
					<ARC to="define"/>
37
				</ARCS>
38
			</NODE>
39
			<NODE name="define" type="DefineHBaseOpenaireSchema">
40
				<DESCRIPTION>define OpenAIRE hbase table</DESCRIPTION>
41
				<PARAMETERS>
42
					<PARAM name="tableColumnsParamName" type="string" managedBy="system" required="true">hTableColumns</PARAM>
43
					<PARAM name="hbaseTableProperty" type="string" managedBy="system" required="true">hbase.mapred.datatable</PARAM>
44
					<PARAM name="cluster" type="string" managedBy="system" required="true">DM</PARAM>
45
				</PARAMETERS>
46
				<ARCS>
47
					<ARC to="create"/>
48
				</ARCS>
49
			</NODE>
50
			<NODE name="create" type="CreateHBaseTable">
51
				<DESCRIPTION>create hbase table</DESCRIPTION>
52
				<PARAMETERS>
53
					<PARAM name="hbaseTableProperty" type="string" managedBy="system" required="true">hbase.mapred.datatable</PARAM>
54
					<PARAM name="cluster" type="string" managedBy="system" required="true">DM</PARAM>
55
					<PARAM name="tableColumnsParamName" type="string" managedBy="system" required="true">hTableColumns</PARAM>
56
				</PARAMETERS>
57
				<ARCS>
58
					<ARC to="success"/>
59
				</ARCS>
60
			</NODE>
61
		</CONFIGURATION>
62
		<STATUS/>
63
	</BODY>
64
</RESOURCE_PROFILE>
modules/dnet-openaireplus-workflows/branches/eosc_services/src/main/resources/eu/dnetlib/test/profiles/openaireplus/workflows/hbase/actions2hbase.xml
1
<?xml version="1.0" encoding="UTF-8"?>
2
<RESOURCE_PROFILE>
3
	<HEADER>
4
		<RESOURCE_IDENTIFIER value="4b8dabb8-cef7-4910-a0ad-fd8e70d53b9e_V29ya2Zsb3dEU1Jlc291cmNlcy9Xb3JrZmxvd0RTUmVzb3VyY2VUeXBl"/>
5
		<RESOURCE_TYPE value="WorkflowDSResourceType"/>
6
		<RESOURCE_KIND value="WorkflowDSResources"/>
7
		<RESOURCE_URI value=""/>
8
		<DATE_OF_CREATION value="2006-05-04T18:13:51.0Z"/>
9
	</HEADER>
10
	<BODY>
11
		<WORKFLOW_NAME>Actions to HBase</WORKFLOW_NAME>
12
		<WORKFLOW_TYPE>Data Load</WORKFLOW_TYPE>
13
		<WORKFLOW_PRIORITY>30</WORKFLOW_PRIORITY>
14
		<CONFIGURATION start="auto">
15
			<NODE name="promoteActions" type="PromoteActions" isStart="true">
16
				<DESCRIPTION>Promote actions</DESCRIPTION>
17
				<PARAMETERS>
18
					<PARAM function="obtainValues('actionSets', {})" required="false" type="string" name="set" managedBy="user">ALL SETS</PARAM>
19
				</PARAMETERS>
20
				<ARCS>
21
					<ARC to="success"/>
22
				</ARCS>
23
			</NODE>
24
		</CONFIGURATION>
25
		<STATUS/>
26
	</BODY>
27
</RESOURCE_PROFILE>
modules/dnet-openaireplus-workflows/branches/eosc_services/src/main/resources/eu/dnetlib/test/profiles/openaireplus/workflows/hbase/distcp.xml
1
<RESOURCE_PROFILE xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
2
    <HEADER>
3
        <RESOURCE_IDENTIFIER value="37a026fd-8707-4390-9d8c-b1566d2a1e3f_V29ya2Zsb3dEU1Jlc291cmNlcy9Xb3JrZmxvd0RTUmVzb3VyY2VUeXBl"/>
4
        <RESOURCE_TYPE value="WorkflowDSResourceType"/>
5
        <RESOURCE_KIND value="WorkflowDSResources"/>
6
        <RESOURCE_URI value=""/>
7
        <DATE_OF_CREATION value="2019-10-17T08:44:46+00:00"/>
8
    </HEADER>
9
    <BODY>
10
        <WORKFLOW_NAME>distcp</WORKFLOW_NAME>
11
        <WORKFLOW_TYPE>Export InfoSpace</WORKFLOW_TYPE>
12
        <WORKFLOW_PRIORITY>30</WORKFLOW_PRIORITY>
13
        <CONFIGURATION start="manual">
14

  
15
            <NODE isStart="true" name="setSourcePath" type="SetEnvParameter">
16
                <DESCRIPTION>Set the source path on HDFS (DM)</DESCRIPTION>
17
                <PARAMETERS>
18
                    <PARAM managedBy="system" name="parameterName" required="true" type="string">sourcePath</PARAM>
19
                    <PARAM managedBy="user" name="parameterValue" required="true" type="string">/tmp/db_openaireplus_services_beta.export.2019.08.17</PARAM>
20
                </PARAMETERS>
21
                <ARCS>
22
                    <ARC to="distcp"/>
23
                </ARCS>
24
            </NODE>
25
            <NODE isStart="true" name="setTargetPath" type="SetEnvParameter">
26
                <DESCRIPTION>Set the target path on HDFS (IIS)</DESCRIPTION>
27
                <PARAMETERS>
28
                    <PARAM managedBy="system" name="parameterName" required="true" type="string">targetPath</PARAM>
29
                    <PARAM managedBy="user" name="parameterValue" required="true" type="string">/tmp</PARAM>
30
                </PARAMETERS>
31
                <ARCS>
32
                    <ARC to="distcp"/>
33
                </ARCS>
34
            </NODE>
35
            <NODE isJoin="true" name="distcp" type="SubmitHadoopJob">
36
                <DESCRIPTION>distcp</DESCRIPTION>
37
                <PARAMETERS>
38
                    <PARAM managedBy="system" name="hadoopJob" required="true" type="string">distcpJob</PARAM>
39
                    <PARAM managedBy="system" name="cluster" required="true" type="string">IIS</PARAM>
40
                    <PARAM managedBy="system" name="envParams" required="true" type="string">
41
                        {
42
                        'sourcePath' : 'sourcePath',
43
                        'targetPath' : 'targetPath'
44
                        }
45
                    </PARAM>
46
                    <PARAM managedBy="user" name="simulation" required="true" type="boolean">false</PARAM>
47
                    <PARAM managedBy="system" name="oozieReportActionsCsv" required="true" type="string">build-report</PARAM>
48
                </PARAMETERS>
49
                <ARCS>
50
                    <ARC to="success"/>
51
                </ARCS>
52
            </NODE>
53

  
54
        </CONFIGURATION>
55
        <STATUS>
56
            <LAST_EXECUTION_ID>wf_20190805_085505_893</LAST_EXECUTION_ID>
57
            <LAST_EXECUTION_DATE>2019-08-07T05:19:02+00:00</LAST_EXECUTION_DATE>
58
            <LAST_EXECUTION_STATUS>SUCCESS</LAST_EXECUTION_STATUS>
59
            <LAST_EXECUTION_ERROR/>
60
        </STATUS>
61
    </BODY>
62
</RESOURCE_PROFILE>
modules/dnet-openaireplus-workflows/branches/eosc_services/src/main/resources/eu/dnetlib/test/profiles/openaireplus/workflows/hbase/importAffiliationsDOIBoost.xml
1
<RESOURCE_PROFILE>
2
	<HEADER>
3
		<RESOURCE_IDENTIFIER value="970c4826-cf6d-4dfe-850c-41e508d341fa_V29ya2Zsb3dEU1Jlc291cmNlcy9Xb3JrZmxvd0RTUmVzb3VyY2VUeXBl"/>
4
		<RESOURCE_TYPE value="WorkflowDSResourceType"/>
5
		<RESOURCE_KIND value="WorkflowDSResources"/>
6
		<RESOURCE_URI value=""/>
7
		<DATE_OF_CREATION value="2018-11-15T11:24:05+00:00"/>
8
	</HEADER>
9
	<BODY>
10
		<WORKFLOW_NAME>Import Organizations from DOIboost</WORKFLOW_NAME>
11
		<WORKFLOW_TYPE>Import InfoSpace</WORKFLOW_TYPE>
12
		<WORKFLOW_PRIORITY>30</WORKFLOW_PRIORITY>
13
		<CONFIGURATION start="manual">
14
			<NODE isStart="true" name="setOrgsOnly" type="SetEnvParameter">
15
				<DESCRIPTION>set the parameter to drive the import procedure behaviour so that it will import only organizations</DESCRIPTION>
16
				<PARAMETERS>
17
					<PARAM managedBy="system" name="parameterName" required="true" type="string">onlyOrganization</PARAM>
18
					<PARAM managedBy="user" name="parameterValue" required="false" type="string">true</PARAM>
19
				</PARAMETERS>
20
				<ARCS>
21
					<ARC to="importActionSet"/>
22
				</ARCS>
23
			</NODE>
24
			<NODE isStart="true" name="setInputPath" type="SetHdfsFile">
25
				<DESCRIPTION>set the hdfs output path</DESCRIPTION>
26
				<PARAMETERS>
27
					<PARAM managedBy="user" name="hdfsPath" required="true" type="string">/usr/dnet/data/doiboost</PARAM>
28
					<PARAM managedBy="system" name="hdfsPathParam" required="true" type="string">inputPath</PARAM>
29
				</PARAMETERS>
30
				<ARCS>
31
					<ARC to="prepareActionSets"/>
32
				</ARCS>
33
			</NODE>
34
			<NODE name="prepareActionSets" type="PrepareActionSets">
35
				<DESCRIPTION>prepare action sets</DESCRIPTION>
36
				<PARAMETERS>
37
					<PARAM managedBy="system" name="sets" required="true" type="string">
38
						[
39
						{
40
						'set' : 'doiboost-organizations',
41
						'jobProperty' : 'export_action_set_doiboost-organizations',
42
						'enablingProperty' : 'active_doiboost-organizations',
43
						'enabled' : 'true'
44
						}
45
						]
46
					</PARAM>
47
				</PARAMETERS>
48
				<ARCS>
49
					<ARC to="extractOutputPath"/>
50
				</ARCS>
51
			</NODE>
52
			<NODE name="extractOutputPath" type="ExtractOutputPath">
53
				<DESCRIPTION>extract the hdfs output path generated in the previous node</DESCRIPTION>
54
				<PARAMETERS>
55
					<PARAM managedBy="system" name="hdfsOutputPathParam" required="true" type="string">outputPath</PARAM>
56
				</PARAMETERS>
57
				<ARCS>
58
					<ARC to="importActionSet"/>
59
				</ARCS>
60
			</NODE>
61
			<NODE isJoin="true" name="importActionSet" type="SubmitHadoopJob">
62
				<DESCRIPTION>IIS main</DESCRIPTION>
63
				<PARAMETERS>
64
					<PARAM managedBy="system" name="hadoopJob" required="true" type="string">importDOIBoostJob</PARAM>
65
					<PARAM managedBy="system" name="cluster" required="true" type="string">DM</PARAM>
66
					<PARAM managedBy="system" name="envParams" required="true" type="string">
67
						{
68
						'mapred.input.dir':'inputPath',
69
						'mapred.output.dir':'outputPath',
70
						'onlyOrganization' : 'onlyOrganization'
71
						}
72
					</PARAM>
73
					<PARAM managedBy="user" name="simulation" required="true" type="boolean">false</PARAM>
74
				</PARAMETERS>
75
				<ARCS>
76
					<ARC to="updateActionSets"/>
77
				</ARCS>
78
			</NODE>
79
			<NODE name="updateActionSets" type="UpdateActionSets">
80
				<DESCRIPTION>update action sets</DESCRIPTION>
81
				<PARAMETERS/>
82
				<ARCS>
83
					<ARC to="success"/>
84
				</ARCS>
85
			</NODE>
86
		</CONFIGURATION>
87
		<STATUS>
88
			<LAST_EXECUTION_ID>wf_20181026_140552_878</LAST_EXECUTION_ID>
89
			<LAST_EXECUTION_DATE>2018-10-26T14:21:47+00:00</LAST_EXECUTION_DATE>
90
			<LAST_EXECUTION_STATUS>SUCCESS</LAST_EXECUTION_STATUS>
91
			<LAST_EXECUTION_ERROR/>
92
		</STATUS>
93
	</BODY>
94
</RESOURCE_PROFILE>
modules/dnet-openaireplus-workflows/branches/eosc_services/src/main/resources/eu/dnetlib/test/profiles/openaireplus/workflows/hbase/oaf2hbase.xml
1
<?xml version="1.0" encoding="UTF-8"?>
2
<RESOURCE_PROFILE>
3
	<HEADER>
4
		<RESOURCE_IDENTIFIER
5
				value="c7d7d775-2db3-474d-85ab-5173a582d515_V29ya2Zsb3dEU1Jlc291cmNlcy9Xb3JrZmxvd0RTUmVzb3VyY2VUeXBl"/>
6
		<RESOURCE_TYPE value="WorkflowDSResourceType"/>
7
		<RESOURCE_KIND value="WorkflowDSResources"/>
8
		<RESOURCE_URI value=""/>
9
		<DATE_OF_CREATION value="2006-05-04T18:13:51.0Z"/>
10
	</HEADER>
11
	<BODY>
12
		<WORKFLOW_NAME>OAF to HBase</WORKFLOW_NAME>
13
		<WORKFLOW_TYPE>Data Load</WORKFLOW_TYPE>
14
		<WORKFLOW_PRIORITY>30</WORKFLOW_PRIORITY>
15
		<CONFIGURATION start="manual">
16
			<NODE isStart="true" name="setTable" type="SetHBaseTable">
17
				<DESCRIPTION>set hbase table</DESCRIPTION>
18
				<PARAMETERS>
19
					<PARAM managedBy="user" name="table" required="true" type="string"></PARAM>
20
				</PARAMETERS>
21
				<ARCS>
22
					<ARC to="mapreduce"/>
23
				</ARCS>
24
			</NODE>
25
			<NODE isStart="true" name="prepareImport" type="PrepareMDStoreImport">
26
				<DESCRIPTION>Configure export to HDFS</DESCRIPTION>
27
				<PARAMETERS>
28
					<PARAM managedBy="system" name="hdfsPathParam" required="true" type="string">hdfsPath</PARAM>
29
					<PARAM managedBy="user"   name="hdfsPath"      required="true" type="string"></PARAM>
30
					<PARAM managedBy="system" name="mappingParam"  required="true" type="string">xslt</PARAM>
31
					<PARAM managedBy="user"   name="mapping"       required="true" type="string" function="obtainValues('oaf2hbaseMappings', {})"></PARAM>
32
				</PARAMETERS>
33
				<ARCS>
34
					<ARC to="reuseHdfsRecords"/>
35
				</ARCS>
36
			</NODE>
37
			<NODE name="reuseHdfsRecords" type="ReuseHdfsRecords">
38
				<DESCRIPTION>reuse mdstore records</DESCRIPTION>
39
				<PARAMETERS>
40
					<PARAM managedBy="user" name="reuseMdRecords" required="true" type="boolean"></PARAM>
41
				</PARAMETERS>
42
				<ARCS>
43
					<ARC name="true" to="doneExport"/>
44
					<ARC name="false" to="exportRecords"/>
45
				</ARCS>
46
			</NODE>
47
			<NODE name="exportRecords" type="MDStoreBatchExporter">
48
				<DESCRIPTION>Fetch mdstore records</DESCRIPTION>
49
				<PARAMETERS>
50
					<PARAM managedBy="user"   name="format"           required="true" type="string">OAF</PARAM>
51
					<PARAM managedBy="user"   name="layout"           required="true" type="string">store</PARAM>
52
					<PARAM managedBy="user"   name="interpretation"   required="true" type="string">cleaned</PARAM>
53
					<PARAM managedBy="system" name="outputEprParam"   required="true" type="string">records_epr</PARAM>
54
				</PARAMETERS>
55
				<ARCS>
56
					<ARC to="storeHdfsRecords"/>
57
				</ARCS>
58
			</NODE>
59
			<NODE name="storeHdfsRecords" type="StoreHdfsRecords">
60
				<DESCRIPTION>Store records to HDFS</DESCRIPTION>
61
				<PARAMETERS>
62
					<PARAM managedBy="system" name="inputEprParam" required="true" type="string">records_epr</PARAM>
63
					<PARAM managedBy="system" name="hdfsPathParam" required="true" type="string">hdfsPath</PARAM>
64
					<PARAM managedBy="system" name="cluster"       required="true" type="string">DM</PARAM>
65
				</PARAMETERS>
66
				<ARCS>
67
					<ARC to="doneExport"/>
68
				</ARCS>
69
			</NODE>
70
			<NODE name="doneExport">
71
				<DESCRIPTION/>
72
				<PARAMETERS/>
73
				<ARCS>
74
					<ARC to="mapreduce"/>
75
				</ARCS>
76
			</NODE>
77
			<NODE isJoin="true" name="mapreduce" type="SubmitHadoopJob">
78
				<DESCRIPTION>Run M/R import Job</DESCRIPTION>
79
				<PARAMETERS>
80
					<PARAM managedBy="system" name="cluster"    required="true" type="string">DM</PARAM>
81
					<PARAM managedBy="system" name="hadoopJob"  required="true" type="string">mdStoreHdfsImportJob</PARAM>
82
					<PARAM managedBy="user"   name="simulation" required="true" type="boolean">false</PARAM>
83
					<PARAM managedBy="system" name="envParams"  required="true" type="string">
84
						{
85
						'mapred.input.dir' : 'hdfsPath',
86
						'hbase.import.xslt' : 'xslt',
87
						'hbase.mapred.outputtable' : 'hbaseTable'
88
						}
89
					</PARAM>
90
				</PARAMETERS>
91
				<ARCS>
92
					<ARC to="success"/>
93
				</ARCS>
94
			</NODE>
95
		</CONFIGURATION>
96
		<STATUS/>
97
	</BODY>
98
</RESOURCE_PROFILE>
99

  
100

  
modules/dnet-openaireplus-workflows/branches/eosc_services/src/main/resources/eu/dnetlib/test/profiles/openaireplus/workflows/hbase/db2hbase.xml
1
<RESOURCE_PROFILE>
2
	<HEADER>
3
		<RESOURCE_IDENTIFIER value="e92d2f81-ea98-4732-a306-07da87f35033_V29ya2Zsb3dEU1Jlc291cmNlcy9Xb3JrZmxvd0RTUmVzb3VyY2VUeXBl"/>
4
		<RESOURCE_TYPE value="WorkflowDSResourceType"/>
5
		<RESOURCE_KIND value="WorkflowDSResources"/>
6
		<RESOURCE_URI value=""/>
7
		<DATE_OF_CREATION value="2006-05-04T18:13:51.0Z"/>
8
	</HEADER>
9
	<BODY>
10
		<WORKFLOW_NAME>DB to HBase</WORKFLOW_NAME>
11
		<WORKFLOW_TYPE>Data Load</WORKFLOW_TYPE>
12
		<WORKFLOW_PRIORITY>30</WORKFLOW_PRIORITY>
13
		<CONFIGURATION start="manual">
14
			<NODE isStart="true" name="start">
15
				<DESCRIPTION>start</DESCRIPTION>
16
				<PARAMETERS/>
17
				<ARCS>
18
					<ARC to="queryDatasources"/>
19
				</ARCS>
20
			</NODE>
21
			<NODE name="queryDatasources" type="QueryDb">
22
				<DESCRIPTION>query Datasources</DESCRIPTION>
23
				<PARAMETERS>
24
					<PARAM managedBy="system" name="dbProperty" required="true" type="string">dnet.openaire.db.name</PARAM>
25
					<PARAM managedBy="system" name="sql" required="true" type="string">/eu/dnetlib/msro/openaireplus/workflows/hbase/queryDatasources.sql
26
					</PARAM>
27
					<PARAM managedBy="system" name="sqlForSize" required="true" type="string">
28
						/eu/dnetlib/msro/openaireplus/workflows/hbase/queryDatasourcesForSize.sql
29
					</PARAM>
30
					<PARAM managedBy="system" name="outputEprParam" required="true" type="string">dsEpr</PARAM>
31
				</PARAMETERS>
32
				<ARCS>
33
					<ARC to="storeDatasources"/>
34
				</ARCS>
35
			</NODE>
36
			<NODE name="storeDatasources" type="StoreHBase">
37
				<DESCRIPTION>Store Datasources to HBase</DESCRIPTION>
38
				<PARAMETERS>
39
					<PARAM managedBy="system" name="inputEprParam" required="true" type="string">dsEpr</PARAM>
40
					<PARAM managedBy="system" name="hbaseTableProperty" required="true" type="string">hbase.mapred.datatable</PARAM>
41
					<PARAM managedBy="system" name="cluster" required="true" type="string">DM</PARAM>
42
					<PARAM managedBy="user" name="mapping" required="true" type="string" function="obtainValues('dbmf2hbaseMappings', {})"></PARAM>
43
				</PARAMETERS>
44
				<ARCS>
45
					<ARC to="queryDatasourceOrganization"/>
46
				</ARCS>
47
			</NODE>
48
			<NODE name="queryDatasourceOrganization" type="QueryDb">
49
				<DESCRIPTION>query relation Datasource_Organization</DESCRIPTION>
50
				<PARAMETERS>
51
					<PARAM managedBy="system" name="dbProperty" required="true" type="string">dnet.openaire.db.name</PARAM>
52
					<PARAM managedBy="system" name="sql" required="true" type="string">
53
						/eu/dnetlib/msro/openaireplus/workflows/hbase/queryDatasourceOrganization.sql
54
					</PARAM>
55
					<PARAM managedBy="system" name="outputEprParam" required="true" type="string">dsOrgEpr</PARAM>
56
				</PARAMETERS>
57
				<ARCS>
58
					<ARC to="storeDatasourceOrganization"/>
59
				</ARCS>
60
			</NODE>
61
			<NODE name="storeDatasourceOrganization" type="StoreHBase">
62
				<DESCRIPTION>Store relation Datasource_Organization</DESCRIPTION>
63
				<PARAMETERS>
64
					<PARAM managedBy="system" name="inputEprParam" required="true" type="string">dsOrgEpr</PARAM>
65
					<PARAM managedBy="system" name="hbaseTableProperty" required="true" type="string">hbase.mapred.datatable</PARAM>
66
					<PARAM managedBy="system" name="cluster" required="true" type="string">DM</PARAM>
67
					<PARAM managedBy="user" name="mapping" required="true" type="string" function="obtainValues('dbmf2hbaseMappings', {})"></PARAM>
68
				</PARAMETERS>
69
				<ARCS>
70
					<ARC to="queryOrganizations"/>
71
				</ARCS>
72
			</NODE>
73
			<NODE name="queryOrganizations" type="QueryDb">
74
				<DESCRIPTION>query Organizations</DESCRIPTION>
75
				<PARAMETERS>
76
					<PARAM managedBy="system" name="dbProperty" required="true" type="string">dnet.openaire.db.name</PARAM>
77
					<PARAM managedBy="system" name="sql" required="true" type="string">/eu/dnetlib/msro/openaireplus/workflows/hbase/queryOrganizations.sql
78
					</PARAM>
79
					<PARAM managedBy="system" name="outputEprParam" required="true" type="string">orgEpr</PARAM>
80
				</PARAMETERS>
81
				<ARCS>
82
					<ARC to="storeOrganizations"/>
83
				</ARCS>
84
			</NODE>
85
			<NODE name="storeOrganizations" type="StoreHBase">
86
				<DESCRIPTION>Store Organizations to HBase</DESCRIPTION>
87
				<PARAMETERS>
88
					<PARAM managedBy="system" name="inputEprParam" required="true" type="string">orgEpr</PARAM>
89
					<PARAM managedBy="system" name="hbaseTableProperty" required="true" type="string">hbase.mapred.datatable</PARAM>
90
					<PARAM managedBy="system" name="cluster" required="true" type="string">DM</PARAM>
91
					<PARAM managedBy="user" name="mapping" required="true" type="string" function="obtainValues('dbmf2hbaseMappings', {})"></PARAM>
92
				</PARAMETERS>
93
				<ARCS>
94
					<ARC to="queryOrganizationsFromOpenOrgsDB"/>
95
				</ARCS>
96
			</NODE>
97
			<NODE name="queryOrganizationsFromOpenOrgsDB" type="QueryDb">
98
				<DESCRIPTION>query Open Organizations</DESCRIPTION>
99
				<PARAMETERS>
100
					<PARAM managedBy="system" name="dbProperty" required="true" type="string">dnet.openorgs.db.name</PARAM>
101
					<PARAM managedBy="system" name="sql" required="true" type="string">/eu/dnetlib/msro/openaireplus/workflows/hbase/queryOrganizationsFromOpenOrgsDB.sql
102
					</PARAM>
103
					<PARAM managedBy="system" name="outputEprParam" required="true" type="string">openOrgsEpr</PARAM>
104
				</PARAMETERS>
105
				<ARCS>
106
					<ARC to="storeOrganizationsFromOpenOrgsDB"/>
107
				</ARCS>
108
			</NODE>
109
			<NODE name="storeOrganizationsFromOpenOrgsDB" type="StoreHBase">
110
				<DESCRIPTION>Store Open Organizations to HBase</DESCRIPTION>
111
				<PARAMETERS>
112
					<PARAM managedBy="system" name="inputEprParam" required="true" type="string">openOrgsEpr</PARAM>
113
					<PARAM managedBy="system" name="hbaseTableProperty" required="true" type="string">hbase.mapred.datatable</PARAM>
114
					<PARAM managedBy="system" name="cluster" required="true" type="string">DM</PARAM>
115
					<PARAM managedBy="user" name="mapping" required="true" type="string" function="obtainValues('dbmf2hbaseMappings', {})"></PARAM>
116
				</PARAMETERS>
117
				<ARCS>
118
					<ARC to="querySimilaritiesFromOpenOrgsDB"/>
119
				</ARCS>
120
			</NODE>
121
			<NODE name="querySimilaritiesFromOpenOrgsDB" type="QueryDb">
122
                <DESCRIPTION>query similarities (openorgs)</DESCRIPTION>
123
                <PARAMETERS>
124
                    <PARAM managedBy="system" name="dbProperty" required="true" type="string">dnet.openorgs.db.name</PARAM>
125
                    <PARAM managedBy="system" name="sql" required="true" type="string">/eu/dnetlib/msro/openaireplus/workflows/hbase/querySimilarityFromOpenOrgsDB.sql</PARAM>
126
                    <PARAM managedBy="system" name="outputEprParam" required="true" type="string">openSimRelsEpr</PARAM>
127
                </PARAMETERS>
128
                <ARCS>
129
                    <ARC to="storeSimilaritiesFromOpenOrgsDB"/>
130
                </ARCS>
131
            </NODE>
132
            <NODE name="storeSimilaritiesFromOpenOrgsDB" type="StoreHBase">
133
                <DESCRIPTION>Store similarities to HBase (openorgs)</DESCRIPTION>
134
                <PARAMETERS>
135
                    <PARAM managedBy="system" name="inputEprParam" required="true" type="string">openSimRelsEpr</PARAM>
136
                    <PARAM managedBy="system" name="hbaseTableProperty" required="true" type="string">hbase.mapred.datatable</PARAM>
137
                    <PARAM managedBy="system" name="cluster" required="true" type="string">DM</PARAM>
138
                    <PARAM function="obtainValues('dbmf2hbaseMappings', {})" managedBy="user" name="mapping" required="true" type="string"></PARAM>
139
                </PARAMETERS>
140
                <ARCS>
141
                    <ARC to="queryProjects"/>
142
                </ARCS>
143
            </NODE>
144
			<NODE name="queryProjects" type="QueryDb">
145
				<DESCRIPTION>query Projects</DESCRIPTION>
146
				<PARAMETERS>
147
					<PARAM managedBy="system" name="dbProperty" required="true" type="string">dnet.openaire.db.name</PARAM>
148
					<PARAM managedBy="system" name="sql" required="true" type="string">/eu/dnetlib/msro/openaireplus/workflows/hbase/queryProjects.sql</PARAM>
149
					<PARAM managedBy="system" name="outputEprParam" required="true" type="string">projEpr</PARAM>
150
				</PARAMETERS>
151
				<ARCS>
152
					<ARC to="storeProjects"/>
153
				</ARCS>
154
			</NODE>
155
			<NODE name="storeProjects" type="StoreHBase">
156
				<DESCRIPTION>Store Projects to HBase</DESCRIPTION>
157
				<PARAMETERS>
158
					<PARAM managedBy="system" name="inputEprParam" required="true" type="string">projEpr</PARAM>
159
					<PARAM managedBy="system" name="hbaseTableProperty" required="true" type="string">hbase.mapred.datatable</PARAM>
160
					<PARAM managedBy="system" name="cluster" required="true" type="string">DM</PARAM>
161
					<PARAM managedBy="user" name="mapping" required="true" type="string" function="obtainValues('dbmf2hbaseMappings', {})"></PARAM>
162
				</PARAMETERS>
163
				<ARCS>
164
					<ARC to="queryProjectOrganization"/>
165
				</ARCS>
166
			</NODE>
167
			<NODE name="queryProjectOrganization" type="QueryDb">
168
				<DESCRIPTION>query relation Project_Organization</DESCRIPTION>
169
				<PARAMETERS>
170
					<PARAM managedBy="system" name="dbProperty" required="true" type="string">dnet.openaire.db.name</PARAM>
171
					<PARAM managedBy="system" name="sql" required="true" type="string">
172
						/eu/dnetlib/msro/openaireplus/workflows/hbase/queryProjectOrganization.sql
173
					</PARAM>
174
					<PARAM managedBy="system" name="outputEprParam" required="true" type="string">projOrgEpr</PARAM>
175
				</PARAMETERS>
176
				<ARCS>
177
					<ARC to="storeProjectOrganization"/>
178
				</ARCS>
179
			</NODE>
180
			<NODE name="storeProjectOrganization" type="StoreHBase">
181
				<DESCRIPTION>Store relation Project_Organizations</DESCRIPTION>
182
				<PARAMETERS>
183
					<PARAM managedBy="system" name="inputEprParam" required="true" type="string">projOrgEpr</PARAM>
184
					<PARAM managedBy="system" name="hbaseTableProperty" required="true" type="string">hbase.mapred.datatable</PARAM>
185
					<PARAM managedBy="system" name="cluster" required="true" type="string">DM</PARAM>
186
					<PARAM managedBy="user" name="mapping" required="true" type="string" function="obtainValues('dbmf2hbaseMappings', {})"></PARAM>
187
				</PARAMETERS>
188
				<ARCS>
189
					<ARC to="success"/>
190
				</ARCS>
191
			</NODE>
192
		</CONFIGURATION>
193
		<STATUS>
194
			<LAST_EXECUTION_ID>wf_20140509_125739_830</LAST_EXECUTION_ID>
195
			<LAST_EXECUTION_DATE>2014-05-09T13:03:16+02:00</LAST_EXECUTION_DATE>
196
			<LAST_EXECUTION_STATUS>SUCCESS</LAST_EXECUTION_STATUS>
197
			<LAST_EXECUTION_ERROR/>
198
			<LAST_EXECUTION_OUTPUT name=""></LAST_EXECUTION_OUTPUT>
199
		</STATUS>
200
	</BODY>
201
</RESOURCE_PROFILE>
modules/dnet-openaireplus-workflows/branches/eosc_services/src/main/resources/eu/dnetlib/test/profiles/openaireplus/workflows/hbase/importOrcid.xml
1
<RESOURCE_PROFILE>
2
	<HEADER>
3
		<RESOURCE_IDENTIFIER value="7c8765af-1253-4bd7-8806-315b73bf7319_V29ya2Zsb3dEU1Jlc291cmNlcy9Xb3JrZmxvd0RTUmVzb3VyY2VUeXBl"/>
4
		<RESOURCE_TYPE value="WorkflowDSResourceType"/>
5
		<RESOURCE_KIND value="WorkflowDSResources"/>
6
		<RESOURCE_URI value=""/>
7
		<DATE_OF_CREATION value="2019-05-29T10:54:33+00:00"/>
8
	</HEADER>
9
	<BODY>
10
		<WORKFLOW_NAME>Import Orcid</WORKFLOW_NAME>
11
		<WORKFLOW_TYPE>Import InfoSpace</WORKFLOW_TYPE>
12
		<WORKFLOW_PRIORITY>30</WORKFLOW_PRIORITY>
13
		<CONFIGURATION start="manual">
14
			<NODE isStart="true" name="setInputPath" type="SetHdfsFile">
15
				<DESCRIPTION>set the hdfs output path</DESCRIPTION>
16
				<PARAMETERS>
17
					<PARAM managedBy="user" name="hdfsPath" required="true" type="string">/usr/dnet/data/orcid</PARAM>
18
					<PARAM managedBy="system" name="hdfsPathParam" required="true" type="string">inputPath</PARAM>
19
				</PARAMETERS>
20
				<ARCS>
21
					<ARC to="prepareActionSets"/>
22
				</ARCS>
23
			</NODE>
24
			<NODE name="prepareActionSets" type="PrepareActionSets">
25
				<DESCRIPTION>prepare action sets</DESCRIPTION>
26
				<PARAMETERS>
27
					<PARAM managedBy="system" name="sets" required="true" type="string">
28
						[
29
						{
30
						'set' : 'orcidworks-no-doi',
31
						'jobProperty' : 'export_action_set_orcidworks_no_doi',
32
						'enablingProperty' : 'active_orcidworks_no_doi',
33
						'enabled' : 'true'
34
						}
35
						]
36
					</PARAM>
... This diff was truncated because it exceeds the maximum size that can be displayed.

Also available in: Unified diff