Revision 62084
Added by Michele Artini over 2 years ago
modules/dnet-openaireplus-workflows/branches/eosc_services/src/main/java/eu/dnetlib/msro/openaireplus/workflows/nodes/hbase/PrepareCopyTableJobNode.java | ||
---|---|---|
1 |
package eu.dnetlib.msro.openaireplus.workflows.nodes.hbase; |
|
2 |
|
|
3 |
import java.util.Map; |
|
4 |
import java.util.Set; |
|
5 |
import javax.annotation.Resource; |
|
6 |
|
|
7 |
import com.google.common.collect.Sets; |
|
8 |
import com.googlecode.sarasvati.Arc; |
|
9 |
import com.googlecode.sarasvati.NodeToken; |
|
10 |
import eu.dnetlib.data.hadoop.rmi.HadoopService; |
|
11 |
import eu.dnetlib.data.hadoop.rmi.HadoopServiceException; |
|
12 |
import eu.dnetlib.enabling.locators.UniqueServiceLocator; |
|
13 |
import eu.dnetlib.msro.rmi.MSROException; |
|
14 |
import eu.dnetlib.msro.workflows.nodes.SimpleJobNode; |
|
15 |
import org.apache.commons.lang.StringUtils; |
|
16 |
import org.apache.commons.logging.Log; |
|
17 |
import org.apache.commons.logging.LogFactory; |
|
18 |
|
|
19 |
/** |
|
20 |
* The PrepareCopyTableJobNode prepares the parameters needed to run the CopyTable job. |
|
21 |
*/ |
|
22 |
public class PrepareCopyTableJobNode extends SimpleJobNode { |
|
23 |
|
|
24 |
/** |
|
25 |
* The Constant ZOOKEEPER_ZNODE_PARENT. |
|
26 |
*/ |
|
27 |
private static final String ZOOKEEPER_ZNODE_PARENT = "zookeeper.znode.parent"; |
|
28 |
|
|
29 |
/** |
|
30 |
* The Constant HBASE_ZOOKEEPER_CLIENT_PORT. |
|
31 |
*/ |
|
32 |
private static final String HBASE_ZOOKEEPER_CLIENT_PORT = "hbase.zookeeper.client.port"; |
|
33 |
|
|
34 |
/** |
|
35 |
* The Constant HBASE_ZOOKEEPER_QUORUM. |
|
36 |
*/ |
|
37 |
private static final String HBASE_ZOOKEEPER_QUORUM = "hbase.zookeeper.quorum"; |
|
38 |
|
|
39 |
/** |
|
40 |
* logger. |
|
41 |
*/ |
|
42 |
private static final Log log = LogFactory.getLog(PrepareCopyTableJobNode.class); |
|
43 |
|
|
44 |
/** |
|
45 |
* The source table. |
|
46 |
*/ |
|
47 |
private String sourceCluster; |
|
48 |
|
|
49 |
/** |
|
50 |
* The target cluster. |
|
51 |
*/ |
|
52 |
private String targetCluster; |
|
53 |
|
|
54 |
private String sourceTable; |
|
55 |
|
|
56 |
private String targetTable; |
|
57 |
|
|
58 |
@Resource |
|
59 |
private UniqueServiceLocator serviceLocator; |
|
60 |
|
|
61 |
/* |
|
62 |
* (non-Javadoc) |
|
63 |
* |
|
64 |
* @see eu.dnetlib.msro.workflows.nodes.SimpleJobNode#execute(com.googlecode.sarasvati.NodeToken) |
|
65 |
*/ |
|
66 |
@Override |
|
67 |
protected String execute(final NodeToken token) throws Exception { |
|
68 |
|
|
69 |
checkNodeParams(); |
|
70 |
|
|
71 |
final String outputQuorum = getOutputQuorum(); |
|
72 |
log.info("build hbase quorum: " + outputQuorum); |
|
73 |
token.getEnv().setAttribute("peer.adr", outputQuorum); |
|
74 |
token.getEnv().setAttribute("sourceCluster", getSourceCluster()); |
|
75 |
token.getEnv().setAttribute("sourceTable", getSourceTable()); |
|
76 |
|
|
77 |
token.getEnv().setAttribute("targetCluster", getTargetCluster()); |
|
78 |
token.getEnv().setAttribute("targetTable", getTargetTable()); |
|
79 |
|
|
80 |
return Arc.DEFAULT_ARC; |
|
81 |
} |
|
82 |
|
|
83 |
/** |
|
84 |
* Builds the output quorum. |
|
85 |
* |
|
86 |
* @return the output quorum |
|
87 |
* @throws HadoopServiceException when cannot retrieve the clustr configuration |
|
88 |
* @throws MSROException when some of the needed properties is missing in the cluster configuration |
|
89 |
*/ |
|
90 |
private String getOutputQuorum() throws HadoopServiceException, MSROException { |
|
91 |
Map<String, String> conf = serviceLocator.getService(HadoopService.class).getClusterConfiguration(getTargetCluster()); |
|
92 |
log.debug(conf); |
|
93 |
|
|
94 |
String hbaseQuorum = conf.get(HBASE_ZOOKEEPER_QUORUM); |
|
95 |
String hbasePort = conf.get(HBASE_ZOOKEEPER_CLIENT_PORT); |
|
96 |
String znodeParent = conf.get(ZOOKEEPER_ZNODE_PARENT); |
|
97 |
|
|
98 |
checkParamExist(hbaseQuorum, String.format("unable to find property '%s' in cluster configuration: %s", HBASE_ZOOKEEPER_QUORUM, hbaseQuorum)); |
|
99 |
checkParamExist(hbasePort, String.format("unable to find property '%s' in cluster configuration: %s", HBASE_ZOOKEEPER_CLIENT_PORT, hbasePort)); |
|
100 |
checkParamExist(znodeParent, String.format("unable to find property '%s' in cluster configuration: %s", ZOOKEEPER_ZNODE_PARENT, znodeParent)); |
|
101 |
|
|
102 |
String outputQuorum = String.format("%s:%s:%s", hbaseQuorum, hbasePort, znodeParent); |
|
103 |
return outputQuorum; |
|
104 |
} |
|
105 |
|
|
106 |
/** |
|
107 |
* Checks the wf params. |
|
108 |
* |
|
109 |
* @throws MSROException the MSRO exception |
|
110 |
* @throws HadoopServiceException |
|
111 |
*/ |
|
112 |
private void checkNodeParams() throws MSROException, HadoopServiceException { |
|
113 |
|
|
114 |
checkParamExist(getSourceCluster(), "source cluster must be set"); |
|
115 |
checkParamExist(getTargetCluster(), "target cluster must be set"); |
|
116 |
checkParamExist(getSourceTable(), "source table must be set"); |
|
117 |
checkParamExist(getTargetTable(), "target table must be set"); |
|
118 |
|
|
119 |
final HadoopService hadoop = serviceLocator.getService(HadoopService.class); |
|
120 |
|
|
121 |
Set<String> clusters = Sets.newHashSet(hadoop.listClusters()); |
|
122 |
if (!clusters.contains(getSourceCluster())) { throw new MSROException(String.format("source cluster '%s' doesn not exists", getSourceCluster())); } |
|
123 |
if (!clusters.contains(getTargetCluster())) { throw new MSROException(String.format("target cluster '%s' doesn not exists", getTargetCluster())); } |
|
124 |
|
|
125 |
if (!hadoop.existHbaseTable(getSourceCluster(), getSourceTable())) { |
|
126 |
throw new MSROException(String.format( |
|
127 |
"source table '%s' doesn not exists on cluster '%s'", getSourceTable(), getSourceCluster())); |
|
128 |
} |
|
129 |
} |
|
130 |
|
|
131 |
/** |
|
132 |
* Check parameter existence. |
|
133 |
* |
|
134 |
* @param param the param |
|
135 |
* @param msg the msg |
|
136 |
* @throws MSROException the MSRO exception |
|
137 |
*/ |
|
138 |
private void checkParamExist(final String param, final String msg) throws MSROException { |
|
139 |
if (StringUtils.isBlank(param)) { throw new MSROException(msg); } |
|
140 |
} |
|
141 |
|
|
142 |
public String getSourceCluster() { |
|
143 |
return sourceCluster; |
|
144 |
} |
|
145 |
|
|
146 |
public void setSourceCluster(final String sourceCluster) { |
|
147 |
this.sourceCluster = sourceCluster; |
|
148 |
} |
|
149 |
|
|
150 |
public String getTargetCluster() { |
|
151 |
return targetCluster; |
|
152 |
} |
|
153 |
|
|
154 |
public void setTargetCluster(final String targetCluster) { |
|
155 |
this.targetCluster = targetCluster; |
|
156 |
} |
|
157 |
|
|
158 |
public String getSourceTable() { |
|
159 |
return sourceTable; |
|
160 |
} |
|
161 |
|
|
162 |
public void setSourceTable(final String sourceTable) { |
|
163 |
this.sourceTable = sourceTable; |
|
164 |
} |
|
165 |
|
|
166 |
public String getTargetTable() { |
|
167 |
return targetTable; |
|
168 |
} |
|
169 |
|
|
170 |
public void setTargetTable(final String targetTable) { |
|
171 |
this.targetTable = targetTable; |
|
172 |
} |
|
173 |
|
|
174 |
} |
modules/dnet-openaireplus-workflows/branches/eosc_services/src/main/resources/eu/dnetlib/msro/openaireplus/workflows/hbase/queryOrganizations.sql | ||
---|---|---|
1 |
SELECT |
|
2 |
o.id AS organizationid, |
|
3 |
o.legalshortname AS legalshortname, |
|
4 |
o.legalname AS legalname, |
|
5 |
o.websiteurl AS websiteurl, |
|
6 |
o.logourl AS logourl, |
|
7 |
o.ec_legalbody AS eclegalbody, |
|
8 |
o.ec_legalperson AS eclegalperson, |
|
9 |
o.ec_nonprofit AS ecnonprofit, |
|
10 |
o.ec_researchorganization AS ecresearchorganization, |
|
11 |
o.ec_highereducation AS echighereducation, |
|
12 |
o.ec_internationalorganizationeurinterests AS ecinternationalorganizationeurinterests, |
|
13 |
o.ec_internationalorganization AS ecinternationalorganization, |
|
14 |
o.ec_enterprise AS ecenterprise, |
|
15 |
o.ec_smevalidated AS ecsmevalidated, |
|
16 |
o.ec_nutscode AS ecnutscode, |
|
17 |
o.dateofcollection AS dateofcollection, |
|
18 |
o.lastupdate AS dateoftransformation, |
|
19 |
false AS inferred, |
|
20 |
false AS deletedbyinference, |
|
21 |
o.trust AS trust, |
|
22 |
'' AS inferenceprovenance, |
|
23 |
d.id AS collectedfromid, |
|
24 |
d.officialname AS collectedfromname, |
|
25 |
|
|
26 |
o.country || '@@@dnet:countries' AS country, |
|
27 |
'sysimport:crosswalk:entityregistry@@@sysimport:crosswalk:entityregistry@@@dnet:provenance_actions@@@dnet:provenance_actions' AS provenanceaction, |
|
28 |
|
|
29 |
ARRAY[]::text[] AS pid |
|
30 |
FROM dsm_organizations o |
|
31 |
LEFT OUTER JOIN dsm_services d ON (d.id = o.collectedfrom) |
|
32 |
WHERE d.id <> 'openaire____::dfgf' |
|
33 |
AND d.id <> 'openaire____::innoviris' |
|
34 |
AND d.id <> 'openaire____::sgov' |
|
35 |
AND d.id <> 'openaire____::conicytf' |
|
36 |
AND d.id <> 'openaire____::anr' |
|
37 |
AND d.id <> 'openaire____::gsrt' |
|
38 |
AND d.id <> 'openaire____::rif' |
|
39 |
AND d.id <> 'openaire____::rsf' |
|
40 |
AND d.id <> 'openaire____::miur' |
|
41 |
|
|
42 |
|
|
43 |
|
|
44 |
|
|
45 |
|
modules/dnet-openaireplus-workflows/branches/eosc_services/src/main/resources/eu/dnetlib/msro/openaireplus/workflows/hbase/queryProjects.sql | ||
---|---|---|
1 |
SELECT * |
|
2 |
FROM projects_mv |
|
3 |
WHERE p.collectedfrom <> 'openaire____::dfgf' |
|
4 |
AND p.collectedfrom <> 'openaire____::innoviris' |
|
5 |
AND p.collectedfrom <> 'openaire____::sgov' |
|
6 |
AND p.collectedfrom <> 'openaire____::conicytf' |
|
7 |
AND p.collectedfrom <> 'openaire____::anr' |
|
8 |
AND p.collectedfrom <> 'openaire____::gsrt' |
|
9 |
AND p.collectedfrom <> 'openaire____::rif' |
|
10 |
AND p.collectedfrom <> 'openaire____::rsf' |
|
11 |
AND p.collectedfrom <> 'openaire____::miur' |
modules/dnet-openaireplus-workflows/branches/eosc_services/src/main/resources/eu/dnetlib/msro/openaireplus/workflows/hbase/queryPersons.sql | ||
---|---|---|
1 |
SELECT |
|
2 |
p.id AS personid, |
|
3 |
p.firstname AS firstname, |
|
4 |
p.secondnames AS secondnames, |
|
5 |
CASE WHEN length(p.secondnames) > 0 |
|
6 |
THEN trim(BOTH FROM p.secondnames || ', ' || p.firstname) |
|
7 |
ELSE p.firstname END AS fullname, |
|
8 |
p.fax AS fax, |
|
9 |
p.email AS email, |
|
10 |
p.phone AS phone, |
|
11 |
p.dateofcollection AS dateofcollection, |
|
12 |
p.inferred AS inferred, |
|
13 |
p.deletedbyinference AS deletedbyinference, |
|
14 |
p.trust AS trust, |
|
15 |
p.inferenceprovenance AS inferenceprovenance, |
|
16 |
dc.id AS collectedfromid, |
|
17 |
dc.officialname AS collectedfromname, |
|
18 |
|
|
19 |
nc.code || '@@@' || nc.name || '@@@' || ns.code || '@@@' || ns.name AS nationality, |
|
20 |
pac.code || '@@@' || pac.name || '@@@' || pas.code || '@@@' || pas.name AS provenanceaction, |
|
21 |
|
|
22 |
array_agg(DISTINCT i.pid || '###' || i.issuertypeclass) AS pid |
|
23 |
|
|
24 |
FROM persons p |
|
25 |
LEFT OUTER JOIN class nc ON (nc.code = p.nationalityclass) |
|
26 |
LEFT OUTER JOIN scheme ns ON (ns.code = p.nationalityscheme) |
|
27 |
|
|
28 |
LEFT OUTER JOIN class pac ON (pac.code = p.provenanceactionclass) |
|
29 |
LEFT OUTER JOIN scheme pas ON (pas.code = p.provenanceactionscheme) |
|
30 |
|
|
31 |
LEFT OUTER JOIN datasources dc ON (dc.id = p.collectedfrom) |
|
32 |
|
|
33 |
LEFT OUTER JOIN personpids pp ON (pp.person = p.id) |
|
34 |
LEFT OUTER JOIN identities i ON (i.pid = pp.pid) |
|
35 |
|
|
36 |
LEFT OUTER JOIN project_organization po ON (po.contactperson = p.id) |
|
37 |
|
|
38 |
WHERE (p.firstname IS NOT NULL OR p.secondnames IS NOT NULL) AND po.resporganization IS NOT NULL |
|
39 |
|
|
40 |
GROUP BY |
|
41 |
p.id, |
|
42 |
p.firstname, |
|
43 |
p.secondnames, |
|
44 |
p.fax, |
|
45 |
p.email, |
|
46 |
p.phone, |
|
47 |
p.dateofcollection, |
|
48 |
p.inferred, |
|
49 |
p.deletedbyinference, |
|
50 |
p.trust, |
|
51 |
p.inferenceprovenance, |
|
52 |
dc.id, |
|
53 |
dc.officialname, |
|
54 |
nc.code, nc.name, ns.code, ns.name, |
|
55 |
pac.code, pac.name, pas.code, pas.name |
modules/dnet-openaireplus-workflows/branches/eosc_services/src/main/resources/eu/dnetlib/msro/openaireplus/workflows/hbase/queryClaimsRel.sql | ||
---|---|---|
1 |
SELECT xml, provenance FROM claims WHERE type = 'rels2actions' and set = 'userclaim_result_project' or set = 'userclaim_result_result' |
modules/dnet-openaireplus-workflows/branches/eosc_services/src/main/resources/eu/dnetlib/msro/openaireplus/workflows/hbase/queryClaimsUpdate.sql | ||
---|---|---|
1 |
SELECT |
|
2 |
regexp_replace(xml, '<\?xml version="1\.0" encoding="UTF-8"\?>', '', 'i') AS xml, |
|
3 |
provenance |
|
4 |
FROM claims |
|
5 |
WHERE type = 'updates2actions' AND set = 'userclaim_dmf' |
|
6 |
|
|
7 |
|
modules/dnet-openaireplus-workflows/branches/eosc_services/src/main/resources/eu/dnetlib/msro/openaireplus/workflows/hbase/queryProjectOrganization.sql | ||
---|---|---|
1 |
SELECT |
|
2 |
po.project AS project, |
|
3 |
po.resporganization AS resporganization, |
|
4 |
po.participantnumber AS participantnumber, |
|
5 |
po.contribution AS contribution, |
|
6 |
po.currency AS currency, |
|
7 |
NULL AS startdate, |
|
8 |
NULL AS enddate, |
|
9 |
false AS inferred, |
|
10 |
false AS deletedbyinference, |
|
11 |
po.trust AS trust, |
|
12 |
NULL AS inferenceprovenance, |
|
13 |
|
|
14 |
po.semanticclass || '@@@' || po.semanticclass || '@@@dnet:project_organization_relations@@@dnet:project_organization_relations' AS semantics, |
|
15 |
'sysimport:crosswalk:entityregistry@@@sysimport:crosswalk:entityregistry@@@dnet:provenance_actions@@@dnet:provenance_actions' AS provenanceaction |
|
16 |
|
|
17 |
FROM project_organization po |
|
18 |
WHERE po.project not like 'dfgf%' |
|
19 |
AND po.project not like 'innoviris%' |
|
20 |
AND po.project not like 'sgov%' |
|
21 |
AND po.project not like 'conicytf%' |
|
22 |
AND po.project not like 'anr%' |
|
23 |
AND po.project not like 'gsrt%' |
|
24 |
AND po.project not like 'rif%' |
|
25 |
AND po.project not like 'rsf%' |
|
26 |
AND po.project not like 'miur%' |
modules/dnet-openaireplus-workflows/branches/eosc_services/src/main/resources/eu/dnetlib/msro/openaireplus/workflows/hbase/queryDatasources.sql | ||
---|---|---|
1 |
SELECT |
|
2 |
d.id AS datasourceid, |
|
3 |
d.id || array_agg(distinct di.pid) AS identities, |
|
4 |
d.officialname AS officialname, |
|
5 |
d.englishname AS englishname, |
|
6 |
CASE |
|
7 |
WHEN (array_agg(DISTINCT COALESCE (a.compatibility_override, a.compatibility):: TEXT) @> ARRAY ['openaire-cris_1.1']) |
|
8 |
THEN |
|
9 |
'openaire-cris_1.1@@@OpenAIRE CRIS v1.1@@@dnet:datasourceCompatibilityLevel@@@dnet:datasourceCompatibilityLevel' |
|
10 |
WHEN (array_agg(DISTINCT COALESCE (a.compatibility_override, a.compatibility):: TEXT) @> ARRAY ['openaire4.0']) |
|
11 |
THEN |
|
12 |
'openaire4.0@@@OpenAIRE 4.0@@@dnet:datasourceCompatibilityLevel@@@dnet:datasourceCompatibilityLevel' |
|
13 |
WHEN (array_agg(DISTINCT COALESCE (a.compatibility_override, a.compatibility):: TEXT) @> ARRAY ['driver', 'openaire2.0']) |
|
14 |
THEN |
|
15 |
'driver-openaire2.0@@@OpenAIRE 2.0+ (DRIVER OA, EC funding)@@@dnet:datasourceCompatibilityLevel@@@dnet:datasourceCompatibilityLevel' |
|
16 |
WHEN (array_agg(DISTINCT COALESCE (a.compatibility_override, a.compatibility) :: TEXT) @> ARRAY ['driver']) |
|
17 |
THEN |
|
18 |
'driver@@@OpenAIRE Basic (DRIVER OA)@@@dnet:datasourceCompatibilityLevel@@@dnet:datasourceCompatibilityLevel' |
|
19 |
WHEN (array_agg(DISTINCT COALESCE (a.compatibility_override, a.compatibility) :: TEXT) @> ARRAY ['openaire2.0']) |
|
20 |
THEN |
|
21 |
'openaire2.0@@@OpenAIRE 2.0 (EC funding)@@@dnet:datasourceCompatibilityLevel@@@dnet:datasourceCompatibilityLevel' |
|
22 |
WHEN (array_agg(DISTINCT COALESCE (a.compatibility_override, a.compatibility) :: TEXT) @> ARRAY ['openaire3.0']) |
|
23 |
THEN |
|
24 |
'openaire3.0@@@OpenAIRE 3.0 (OA, funding)@@@dnet:datasourceCompatibilityLevel@@@dnet:datasourceCompatibilityLevel' |
|
25 |
WHEN (array_agg(DISTINCT COALESCE (a.compatibility_override, a.compatibility) :: TEXT) @> ARRAY ['openaire2.0_data']) |
|
26 |
THEN |
|
27 |
'openaire2.0_data@@@OpenAIRE Data (funded, referenced datasets)@@@dnet:datasourceCompatibilityLevel@@@dnet:datasourceCompatibilityLevel' |
|
28 |
WHEN (array_agg(DISTINCT COALESCE (a.compatibility_override, a.compatibility) :: TEXT) @> ARRAY ['native']) |
|
29 |
THEN |
|
30 |
'native@@@proprietary@@@dnet:datasourceCompatibilityLevel@@@dnet:datasourceCompatibilityLevel' |
|
31 |
WHEN (array_agg(DISTINCT COALESCE (a.compatibility_override, a.compatibility) :: TEXT) @> ARRAY ['hostedBy']) |
|
32 |
THEN |
|
33 |
'hostedBy@@@collected from a compatible aggregator@@@dnet:datasourceCompatibilityLevel@@@dnet:datasourceCompatibilityLevel' |
|
34 |
WHEN (array_agg(DISTINCT COALESCE (a.compatibility_override, a.compatibility) :: TEXT) @> ARRAY ['notCompatible']) |
|
35 |
THEN |
|
36 |
'notCompatible@@@under validation@@@dnet:datasourceCompatibilityLevel@@@dnet:datasourceCompatibilityLevel' |
|
37 |
ELSE |
|
38 |
'UNKNOWN@@@not available@@@dnet:datasourceCompatibilityLevel@@@dnet:datasourceCompatibilityLevel' |
|
39 |
END AS openairecompatibility, |
|
40 |
d.websiteurl AS websiteurl, |
|
41 |
d.logourl AS logourl, |
|
42 |
array_agg(DISTINCT CASE WHEN a.protocol = 'oai' and last_aggregation_date is not null THEN a.baseurl ELSE NULL END) AS accessinfopackage, |
|
43 |
d.latitude AS latitude, |
|
44 |
d.longitude AS longitude, |
|
45 |
d.namespaceprefix AS namespaceprefix, |
|
46 |
NULL AS odnumberofitems, |
|
47 |
NULL AS odnumberofitemsdate, |
|
48 |
|
|
49 |
(SELECT array_agg(s|| '###keywords@@@keywords@@@dnet:subject_classification_typologies@@@dnet:subject_classification_typologies') |
|
50 |
FROM UNNEST( |
|
51 |
ARRAY( |
|
52 |
SELECT trim(s) |
|
53 |
FROM unnest(string_to_array(d.subjects, '@@')) AS s)) AS s) AS subjects, |
|
54 |
|
|
55 |
d.description AS description, |
|
56 |
NULL AS odpolicies, |
|
57 |
ARRAY(SELECT trim(s) |
|
58 |
FROM unnest(string_to_array(d.languages, ',')) AS s) AS odlanguages, |
|
59 |
false AS inferred, |
|
60 |
false AS deletedbyinference, |
|
61 |
0.9 AS trust, |
|
62 |
NULL AS inferenceprovenance, |
|
63 |
d.dateofcollection AS dateofcollection, |
|
64 |
d.dateofvalidation AS dateofvalidation, |
|
65 |
-- re3data fields |
|
66 |
d.releasestartdate AS releasestartdate, |
|
67 |
d.releaseenddate AS releaseenddate, |
|
68 |
d.missionstatementurl AS missionstatementurl, |
|
69 |
d.databaseaccesstype AS databaseaccesstype, |
|
70 |
d.datauploadtype AS datauploadtype, |
|
71 |
d.databaseaccessrestriction AS databaseaccessrestriction, |
|
72 |
d.datauploadrestriction AS datauploadrestriction, |
|
73 |
d.citationguidelineurl AS citationguidelineurl, |
|
74 |
d.pidsystems AS pidsystems, |
|
75 |
d.certificates AS certificates, |
|
76 |
ARRAY[]::text[] AS policies, |
|
77 |
dc.id AS collectedfromid, |
|
78 |
dc.officialname AS collectedfromname, |
|
79 |
d.typology || '@@@' || CASE |
|
80 |
WHEN (d.typology = 'crissystem') THEN 'CRIS System' |
|
81 |
WHEN (d.typology = 'datarepository::unknown') THEN 'Data Repository' |
|
82 |
WHEN (d.typology = 'aggregator::datarepository') THEN 'Data Repository Aggregator' |
|
83 |
WHEN (d.typology = 'infospace') THEN 'Information Space' |
|
84 |
WHEN (d.typology = 'pubsrepository::institutional') THEN 'Institutional Repository' |
|
85 |
WHEN (d.typology = 'aggregator::pubsrepository::institutional') THEN 'Institutional Repository Aggregator' |
|
86 |
WHEN (d.typology = 'pubsrepository::journal') THEN 'Journal' |
|
87 |
WHEN (d.typology = 'aggregator::pubsrepository::journals') THEN 'Journal Aggregator/Publisher' |
|
88 |
WHEN (d.typology = 'pubsrepository::mock') THEN 'Other' |
|
89 |
WHEN (d.typology = 'pubscatalogue::unknown') THEN 'Publication Catalogue' |
|
90 |
WHEN (d.typology = 'pubsrepository::unknown') THEN 'Publication Repository' |
|
91 |
WHEN (d.typology = 'aggregator::pubsrepository::unknown') THEN 'Publication Repository Aggregator' |
|
92 |
WHEN (d.typology = 'entityregistry') THEN 'Registry' |
|
93 |
WHEN (d.typology = 'scholarcomminfra') THEN 'Scholarly Comm. Infrastructure' |
|
94 |
WHEN (d.typology = 'pubsrepository::thematic') THEN 'Thematic Repository' |
|
95 |
WHEN (d.typology = 'websource') THEN 'Web Source' |
|
96 |
WHEN (d.typology = 'entityregistry::projects') THEN 'Funder database' |
|
97 |
WHEN (d.typology = 'entityregistry::repositories') THEN 'Registry of repositories' |
|
98 |
WHEN (d.typology = 'softwarerepository') THEN 'Software Repository' |
|
99 |
WHEN (d.typology = 'aggregator::softwarerepository') THEN 'Software Repository Aggregator' |
|
100 |
WHEN (d.typology = 'orprepository') THEN 'Repository' |
|
101 |
ELSE 'Other' |
|
102 |
END || '@@@dnet:datasource_typologies@@@dnet:datasource_typologies' AS datasourcetype, |
|
103 |
'sysimport:crosswalk:entityregistry@@@sysimport:crosswalk:entityregistry@@@dnet:provenance_actions@@@dnet:provenance_actions' AS provenanceaction, |
|
104 |
CONCAT(d.issn, '@@@', d.eissn, '@@@', d.lissn) AS journal |
|
105 |
|
|
106 |
FROM dsm_services d |
|
107 |
|
|
108 |
LEFT OUTER JOIN dsm_services dc on (d.collectedfrom = dc.id) |
|
109 |
LEFT OUTER JOIN dsm_api a ON (d.id = a.service) |
|
110 |
LEFT OUTER JOIN dsm_servicepids di ON (d.id = di.service) |
|
111 |
|
|
112 |
WHERE d.id <> 'openaire____::dfgf' |
|
113 |
AND d.id <> 'openaire____::innoviris' |
|
114 |
AND d.id <> 'openaire____::sgov' |
|
115 |
AND d.id <> 'openaire____::conicytf' |
|
116 |
AND d.id <> 'openaire____::anr' |
|
117 |
AND d.id <> 'openaire____::gsrt' |
|
118 |
AND d.id <> 'openaire____::rif' |
|
119 |
AND d.id <> 'openaire____::rsf' |
|
120 |
AND d.id <> 'openaire____::miur' |
|
121 |
|
|
122 |
GROUP BY |
|
123 |
d.id, |
|
124 |
d.officialname, |
|
125 |
d.englishname, |
|
126 |
d.websiteurl, |
|
127 |
d.logourl, |
|
128 |
d.contactemail, |
|
129 |
d.namespaceprefix, |
|
130 |
d.description, |
|
131 |
d.latitude, |
|
132 |
d.longitude, |
|
133 |
d.dateofcollection, |
|
134 |
d.dateofvalidation, |
|
135 |
d.releasestartdate, |
|
136 |
d.releaseenddate, |
|
137 |
d.missionstatementurl, |
|
138 |
d.databaseaccesstype, |
|
139 |
d.datauploadtype, |
|
140 |
d.databaseaccessrestriction, |
|
141 |
d.datauploadrestriction, |
|
142 |
d.citationguidelineurl, |
|
143 |
d.pidsystems, |
|
144 |
d.certificates, |
|
145 |
dc.id, |
|
146 |
dc.officialname, |
|
147 |
d.issn, |
|
148 |
d.eissn, |
|
149 |
d.lissn |
modules/dnet-openaireplus-workflows/branches/eosc_services/src/main/resources/eu/dnetlib/msro/openaireplus/workflows/hbase/querySimilarityFromOpenOrgsDB.sql | ||
---|---|---|
1 |
SELECT local_id AS id1, oa_original_id AS id2 FROM openaire_simrels WHERE reltype = 'is_similar' |
|
2 |
|
|
3 |
UNION ALL |
|
4 |
|
|
5 |
SELECT |
|
6 |
o.id AS id1, |
|
7 |
'openorgsmesh'||substring(o.id, 13)||'-'||md5(a.acronym) AS id2 |
|
8 |
FROM acronyms a |
|
9 |
LEFT OUTER JOIN organizations o ON (a.id = o.id) |
|
10 |
|
|
11 |
UNION ALL |
|
12 |
|
|
13 |
SELECT |
|
14 |
o.id AS id1, |
|
15 |
'openorgsmesh'||substring(o.id, 13)||'-'||md5(n.name) AS id2 |
|
16 |
FROM other_names n |
|
17 |
LEFT OUTER JOIN organizations o ON (n.id = o.id) |
modules/dnet-openaireplus-workflows/branches/eosc_services/src/main/resources/eu/dnetlib/msro/openaireplus/workflows/hbase/queryOrganizationsFromOpenOrgsDB.sql | ||
---|---|---|
1 |
SELECT |
|
2 |
o.id AS organizationid, |
|
3 |
coalesce((array_agg(a.acronym))[1], o.name) AS legalshortname, |
|
4 |
o.name AS legalname, |
|
5 |
array_agg(DISTINCT n.name) AS "alternativeNames", |
|
6 |
(array_agg(u.url))[1] AS websiteurl, |
|
7 |
o.modification_date AS dateoftransformation, |
|
8 |
false AS inferred, |
|
9 |
false AS deletedbyinference, |
|
10 |
0.95 AS trust, |
|
11 |
'' AS inferenceprovenance, |
|
12 |
'openaire____::openorgs' AS collectedfromid, |
|
13 |
'OpenOrgs Database' AS collectedfromname, |
|
14 |
o.country || '@@@dnet:countries' AS country, |
|
15 |
'sysimport:crosswalk:entityregistry@@@sysimport:crosswalk:entityregistry@@@dnet:provenance_actions@@@dnet:provenance_actions' AS provenanceaction, |
|
16 |
array_agg(DISTINCT i.otherid || '###' || i.type || '@@@dnet:pid_types') AS pid |
|
17 |
FROM organizations o |
|
18 |
LEFT OUTER JOIN acronyms a ON (a.id = o.id) |
|
19 |
LEFT OUTER JOIN urls u ON (u.id = o.id) |
|
20 |
LEFT OUTER JOIN other_ids i ON (i.id = o.id) |
|
21 |
LEFT OUTER JOIN other_names n ON (n.id = o.id) |
|
22 |
GROUP BY |
|
23 |
o.id, |
|
24 |
o.name, |
|
25 |
o.modification_date, |
|
26 |
o.country |
|
27 |
|
|
28 |
UNION ALL |
|
29 |
|
|
30 |
SELECT |
|
31 |
'openorgsmesh'||substring(o.id, 13)||'-'||md5(n.name) AS organizationid, |
|
32 |
n.name AS legalshortname, |
|
33 |
n.name AS legalname, |
|
34 |
ARRAY[]::text[] AS "alternativeNames", |
|
35 |
(array_agg(u.url))[1] AS websiteurl, |
|
36 |
o.modification_date AS dateoftransformation, |
|
37 |
false AS inferred, |
|
38 |
false AS deletedbyinference, |
|
39 |
0.88 AS trust, |
|
40 |
'' AS inferenceprovenance, |
|
41 |
'openaire____::openorgs' AS collectedfromid, |
|
42 |
'OpenOrgs Database' AS collectedfromname, |
|
43 |
o.country || '@@@dnet:countries' AS country, |
|
44 |
'sysimport:crosswalk:entityregistry@@@sysimport:crosswalk:entityregistry@@@dnet:provenance_actions@@@dnet:provenance_actions' AS provenanceaction, |
|
45 |
array_agg(DISTINCT i.otherid || '###' || i.type || '@@@dnet:pid_types') AS pid |
|
46 |
FROM other_names n |
|
47 |
LEFT OUTER JOIN organizations o ON (n.id = o.id) |
|
48 |
LEFT OUTER JOIN urls u ON (u.id = o.id) |
|
49 |
LEFT OUTER JOIN other_ids i ON (i.id = o.id) |
|
50 |
GROUP BY |
|
51 |
o.id, o.modification_date, o.country, n.name |
|
52 |
|
|
53 |
|
modules/dnet-openaireplus-workflows/branches/eosc_services/src/main/resources/eu/dnetlib/msro/openaireplus/workflows/hbase/queryDatasourcesForSize.sql | ||
---|---|---|
1 |
SELECT count(*) |
|
2 |
FROM dsm_services d |
|
3 |
WHERE d.id <> 'openaire____::dfgf' |
|
4 |
AND d.id <> 'openaire____::innoviris' |
|
5 |
AND d.id <> 'openaire____::sgov' |
|
6 |
AND d.id <> 'openaire____::conicytf' |
|
7 |
AND d.id <> 'openaire____::anr' |
|
8 |
AND d.id <> 'openaire____::gsrt' |
|
9 |
AND d.id <> 'openaire____::rif' |
|
10 |
AND d.id <> 'openaire____::rsf' |
|
11 |
AND d.id <> 'openaire____::miur' |
modules/dnet-openaireplus-workflows/branches/eosc_services/src/main/resources/eu/dnetlib/msro/openaireplus/workflows/hbase/queryDatasourceOrganization.sql | ||
---|---|---|
1 |
SELECT |
|
2 |
dor.service AS datasource, |
|
3 |
dor.organization AS organization, |
|
4 |
NULL AS startdate, |
|
5 |
NULL AS enddate, |
|
6 |
false AS inferred, |
|
7 |
false AS deletedbyinference, |
|
8 |
0.9 AS trust, |
|
9 |
NULL AS inferenceprovenance, |
|
10 |
|
|
11 |
'providedBy@@@provided by@@@dnet:datasources_organizations_typologies@@@dnet:datasources_organizations_typologies' AS semantics, |
|
12 |
d.provenanceaction || '@@@' || d.provenanceaction || '@@@dnet:provenanceActions@@@dnet:provenanceActions' AS provenanceaction |
|
13 |
|
|
14 |
FROM dsm_service_organization dor |
|
15 |
LEFT OUTER JOIN dsm_services d ON (dor.service = d.id) |
|
16 |
WHERE d.id <> 'openaire____::dfgf' |
|
17 |
AND d.id <> 'openaire____::innoviris' |
|
18 |
AND d.id <> 'openaire____::sgov' |
|
19 |
AND d.id <> 'openaire____::conicytf' |
|
20 |
AND d.id <> 'openaire____::anr' |
|
21 |
AND d.id <> 'openaire____::gsrt' |
|
22 |
AND d.id <> 'openaire____::rif' |
|
23 |
AND d.id <> 'openaire____::rsf' |
|
24 |
AND d.id <> 'openaire____::miur' |
|
25 |
|
modules/dnet-openaireplus-workflows/branches/eosc_services/src/main/resources/eu/dnetlib/test/profiles/openaireplus/workflows/hbase/importGridAC.xml | ||
---|---|---|
1 |
<RESOURCE_PROFILE> |
|
2 |
<HEADER> |
|
3 |
<RESOURCE_IDENTIFIER value="ff25219c-e485-4440-8bc0-a8bbe42512ac_V29ya2Zsb3dEU1Jlc291cmNlcy9Xb3JrZmxvd0RTUmVzb3VyY2VUeXBl"/> |
|
4 |
<RESOURCE_TYPE value="WorkflowDSResourceType"/> |
|
5 |
<RESOURCE_KIND value="WorkflowDSResources"/> |
|
6 |
<RESOURCE_URI value=""/> |
|
7 |
<DATE_OF_CREATION value="2018-08-06T16:19:33+00:00"/> |
|
8 |
</HEADER> |
|
9 |
<BODY> |
|
10 |
<WORKFLOW_NAME>Import GridAC</WORKFLOW_NAME> |
|
11 |
<WORKFLOW_TYPE>Import InfoSpace</WORKFLOW_TYPE> |
|
12 |
<WORKFLOW_PRIORITY>30</WORKFLOW_PRIORITY> |
|
13 |
<CONFIGURATION start="manual"> |
|
14 |
<NODE isStart="true" name="setInputPath" type="SetHdfsFile"> |
|
15 |
<DESCRIPTION>set the hdfs output path</DESCRIPTION> |
|
16 |
<PARAMETERS> |
|
17 |
<PARAM managedBy="user" name="hdfsPath" required="true" type="string">/usr/dnet/data/gridac</PARAM> |
|
18 |
<PARAM managedBy="system" name="hdfsPathParam" required="true" type="string">inputPath</PARAM> |
|
19 |
</PARAMETERS> |
|
20 |
<ARCS> |
|
21 |
<ARC to="prepareActionSets"/> |
|
22 |
</ARCS> |
|
23 |
</NODE> |
|
24 |
<NODE name="prepareActionSets" type="PrepareActionSets"> |
|
25 |
<DESCRIPTION>prepare action sets</DESCRIPTION> |
|
26 |
<PARAMETERS> |
|
27 |
<PARAM managedBy="system" name="sets" required="true" type="string"> |
|
28 |
[ |
|
29 |
{ |
|
30 |
'set' : 'gridac-dump', |
|
31 |
'jobProperty' : 'export_action_set_gridac_dump', |
|
32 |
'enablingProperty' : 'active_gridac_dump', |
|
33 |
'enabled' : 'true' |
|
34 |
} |
|
35 |
] |
|
36 |
</PARAM> |
|
37 |
</PARAMETERS> |
|
38 |
<ARCS> |
|
39 |
<ARC to="extractOutputPath"/> |
|
40 |
</ARCS> |
|
41 |
</NODE> |
|
42 |
<NODE name="extractOutputPath" type="ExtractOutputPath"> |
|
43 |
<DESCRIPTION>extract the hdfs output path generated in the previous node</DESCRIPTION> |
|
44 |
<PARAMETERS> |
|
45 |
<PARAM managedBy="system" name="hdfsOutputPathParam" required="true" type="string">outputPath</PARAM> |
|
46 |
</PARAMETERS> |
|
47 |
<ARCS> |
|
48 |
<ARC to="importActionSet"/> |
|
49 |
</ARCS> |
|
50 |
</NODE> |
|
51 |
<NODE isJoin="true" name="importActionSet" type="SubmitHadoopJob"> |
|
52 |
<DESCRIPTION>IIS main</DESCRIPTION> |
|
53 |
<PARAMETERS> |
|
54 |
<PARAM managedBy="system" name="hadoopJob" required="true" type="string">importGridAcJob</PARAM> |
|
55 |
<PARAM managedBy="system" name="cluster" required="true" type="string">DM</PARAM> |
|
56 |
<PARAM managedBy="system" name="envParams" required="true" type="string"> |
|
57 |
{ |
|
58 |
'mapred.input.dir':'inputPath', |
|
59 |
'mapred.output.dir':'outputPath' |
|
60 |
} |
|
61 |
</PARAM> |
|
62 |
<PARAM managedBy="user" name="simulation" required="true" type="boolean">false</PARAM> |
|
63 |
</PARAMETERS> |
|
64 |
<ARCS> |
|
65 |
<ARC to="updateActionSets"/> |
|
66 |
</ARCS> |
|
67 |
</NODE> |
|
68 |
<NODE name="updateActionSets" type="UpdateActionSets"> |
|
69 |
<DESCRIPTION>update action sets</DESCRIPTION> |
|
70 |
<PARAMETERS/> |
|
71 |
<ARCS> |
|
72 |
<ARC to="success"/> |
|
73 |
</ARCS> |
|
74 |
</NODE> |
|
75 |
</CONFIGURATION> |
|
76 |
<STATUS> |
|
77 |
<LAST_EXECUTION_ID>wf_20180807_092640_582</LAST_EXECUTION_ID> |
|
78 |
<LAST_EXECUTION_DATE>2018-08-07T09:29:23+00:00</LAST_EXECUTION_DATE> |
|
79 |
<LAST_EXECUTION_STATUS>SUCCESS</LAST_EXECUTION_STATUS> |
|
80 |
<LAST_EXECUTION_ERROR/> |
|
81 |
</STATUS> |
|
82 |
</BODY> |
|
83 |
</RESOURCE_PROFILE> |
modules/dnet-openaireplus-workflows/branches/eosc_services/src/main/resources/eu/dnetlib/test/profiles/openaireplus/workflows/hbase/importActionsFromHDFS.xml | ||
---|---|---|
1 |
<?xml version="1.0" encoding="UTF-8"?> |
|
2 |
<RESOURCE_PROFILE> |
|
3 |
<HEADER> |
|
4 |
<RESOURCE_IDENTIFIER |
|
5 |
value="cbfb388a-c184-4a88-be66-0f1d3bb61fe5_V29ya2Zsb3dEU1Jlc291cmNlcy9Xb3JrZmxvd0RTUmVzb3VyY2VUeXBl"/> |
|
6 |
<RESOURCE_TYPE value="WorkflowDSResourceType"/> |
|
7 |
<RESOURCE_KIND value="WorkflowDSResources"/> |
|
8 |
<RESOURCE_URI value=""/> |
|
9 |
<DATE_OF_CREATION value="2006-05-04T18:13:51.0Z"/> |
|
10 |
</HEADER> |
|
11 |
<BODY> |
|
12 |
<WORKFLOW_NAME>Actions from HDFS</WORKFLOW_NAME> |
|
13 |
<WORKFLOW_TYPE>Data Load</WORKFLOW_TYPE> |
|
14 |
<WORKFLOW_PRIORITY>30</WORKFLOW_PRIORITY> |
|
15 |
<CONFIGURATION start="manual"> |
|
16 |
<NODE name="hadoopConfig" type="SetClusterAndTable" isStart="true"> |
|
17 |
<DESCRIPTION>Set table name</DESCRIPTION> |
|
18 |
<PARAMETERS> |
|
19 |
<PARAM required="true" type="string" name="cluster" managedBy="system">DM</PARAM> |
|
20 |
<PARAM required="true" type="string" name="tableParam" managedBy="system">tableName</PARAM> |
|
21 |
<PARAM required="true" type="string" name="table" managedBy="user"></PARAM> |
|
22 |
</PARAMETERS> |
|
23 |
<ARCS> |
|
24 |
<ARC to="importMapreduce"/> |
|
25 |
</ARCS> |
|
26 |
</NODE> |
|
27 |
<NODE name="setActionsPath" type="SetEnvParameter" isStart="true"> |
|
28 |
<DESCRIPTION>Set the sequence file path on HDFS holding the exported Actions</DESCRIPTION> |
|
29 |
<PARAMETERS> |
|
30 |
<PARAM managedBy="system" name="parameterName" required="true" type="string">actionsPath</PARAM> |
|
31 |
<PARAM managedBy="user" name="parameterValue" required="true" type="string">/tmp/2016-03-25_iis_preprocessing/document_referencedDatasets/rawset_76cfeefd-1139-4a30-a394-f01bf67220bf_1458652954774</PARAM> |
|
32 |
</PARAMETERS> |
|
33 |
<ARCS> |
|
34 |
<ARC to="importMapreduce"/> |
|
35 |
</ARCS> |
|
36 |
</NODE> |
|
37 |
|
|
38 |
<NODE name="importMapreduce" type="SubmitHadoopJob" isJoin="true"> |
|
39 |
<DESCRIPTION>Run M/R import Job</DESCRIPTION> |
|
40 |
<PARAMETERS> |
|
41 |
<PARAM required="true" type="string" name="hadoopJob" managedBy="system">actionsImportJob</PARAM> |
|
42 |
<PARAM required="true" type="string" name="envParams" managedBy="system"> |
|
43 |
{ |
|
44 |
'cluster' : 'cluster', |
|
45 |
'mapred.input.dir' : 'actionsPath', |
|
46 |
'hbase.mapred.outputtable' : 'tableName' |
|
47 |
} |
|
48 |
</PARAM> |
|
49 |
<PARAM required="true" type="boolean" name="simulation" managedBy="user">false</PARAM> |
|
50 |
</PARAMETERS> |
|
51 |
<ARCS> |
|
52 |
<ARC to="success"/> |
|
53 |
</ARCS> |
|
54 |
</NODE> |
|
55 |
|
|
56 |
</CONFIGURATION> |
|
57 |
<STATUS/> |
|
58 |
</BODY> |
|
59 |
</RESOURCE_PROFILE> |
|
60 |
|
|
61 |
|
modules/dnet-openaireplus-workflows/branches/eosc_services/src/main/resources/eu/dnetlib/test/profiles/openaireplus/workflows/hbase/importScholexplorer.xml | ||
---|---|---|
1 |
<?xml version="1.0" encoding="UTF-8"?> |
|
2 |
<RESOURCE_PROFILE> |
|
3 |
<HEADER> |
|
4 |
<RESOURCE_IDENTIFIER |
|
5 |
value="e03f256e-1e4d-4b3d-9c07-91faf5d25210_V29ya2Zsb3dEU1Jlc291cmNlcy9Xb3JrZmxvd0RTUmVzb3VyY2VUeXBl"/> |
|
6 |
<RESOURCE_TYPE value="WorkflowDSResourceType"/> |
|
7 |
<RESOURCE_KIND value="WorkflowDSResources"/> |
|
8 |
<RESOURCE_URI value=""/> |
|
9 |
<DATE_OF_CREATION value="2006-05-04T18:13:51.0Z"/> |
|
10 |
</HEADER> |
|
11 |
<BODY> |
|
12 |
<WORKFLOW_NAME>Import ScholExplorer Links and entities</WORKFLOW_NAME> |
|
13 |
<WORKFLOW_TYPE>Import InfoSpace</WORKFLOW_TYPE> |
|
14 |
<WORKFLOW_PRIORITY>30</WORKFLOW_PRIORITY> |
|
15 |
<CONFIGURATION start="manual"> |
|
16 |
<NODE name="setInputPath" isStart="true" type="SetHdfsFile"> |
|
17 |
<DESCRIPTION>set the hdfs output path</DESCRIPTION> |
|
18 |
<PARAMETERS> |
|
19 |
<PARAM managedBy="user" name="hdfsPath" required="true" type="string">/usr/dnet/dli/export/scolixDumpExport</PARAM> |
|
20 |
<PARAM managedBy="system" name="hdfsPathParam" required="true" type="string">inputPath</PARAM> |
|
21 |
</PARAMETERS> |
|
22 |
<ARCS> |
|
23 |
<ARC to="prepareActionSets"/> |
|
24 |
</ARCS> |
|
25 |
</NODE> |
|
26 |
<NODE name="prepareActionSets" type="PrepareActionSets"> |
|
27 |
<DESCRIPTION>prepare action sets</DESCRIPTION> |
|
28 |
<PARAMETERS> |
|
29 |
<PARAM required="true" type="string" name="sets" managedBy="system"> |
|
30 |
[ |
|
31 |
{ |
|
32 |
'set' : 'scholexplorer-dump', |
|
33 |
'jobProperty' : 'export_action_set_scholexplorer_dump', |
|
34 |
'enablingProperty' : 'active_scholexplorer_dump', |
|
35 |
'enabled' : 'true' |
|
36 |
} |
|
37 |
] |
|
38 |
</PARAM> |
|
39 |
</PARAMETERS> |
|
40 |
<ARCS> |
|
41 |
<ARC to="extractOutputPath"/> |
|
42 |
</ARCS> |
|
43 |
</NODE> |
|
44 |
|
|
45 |
<NODE name="extractOutputPath" type="ExtractOutputPath"> |
|
46 |
<DESCRIPTION>extract the hdfs output path generated in the previous node</DESCRIPTION> |
|
47 |
<PARAMETERS> |
|
48 |
<PARAM managedBy="system" name="hdfsOutputPathParam" required="true" type="string">outputPath</PARAM> |
|
49 |
|
|
50 |
</PARAMETERS> |
|
51 |
<ARCS> |
|
52 |
<ARC to="importActionSet"/> |
|
53 |
</ARCS> |
|
54 |
</NODE> |
|
55 |
|
|
56 |
<NODE name="importActionSet" type="SubmitHadoopJob" isJoin="true"> |
|
57 |
<DESCRIPTION>IIS main</DESCRIPTION> |
|
58 |
<PARAMETERS> |
|
59 |
<PARAM required="true" type="string" name="hadoopJob" managedBy="system">importScholexplorerJob</PARAM> |
|
60 |
<PARAM required="true" type="string" name="cluster" managedBy="system">DM</PARAM> |
|
61 |
<PARAM required="true" type="string" name="envParams" managedBy="system"> |
|
62 |
{ |
|
63 |
'cluster' : 'cluster', |
|
64 |
'mapred.input.dir':'inputPath', |
|
65 |
'mapred.output.dir':'outputPath' |
|
66 |
} |
|
67 |
</PARAM> |
|
68 |
<PARAM required="true" type="boolean" name="simulation" managedBy="user">false</PARAM> |
|
69 |
</PARAMETERS> |
|
70 |
<ARCS> |
|
71 |
<ARC to="updateActionSets"/> |
|
72 |
</ARCS> |
|
73 |
</NODE> |
|
74 |
<NODE name="updateActionSets" type="UpdateActionSets"> |
|
75 |
<DESCRIPTION>update action sets</DESCRIPTION> |
|
76 |
<PARAMETERS/> |
|
77 |
<ARCS> |
|
78 |
<ARC to="success"/> |
|
79 |
</ARCS> |
|
80 |
</NODE> |
|
81 |
</CONFIGURATION> |
|
82 |
<STATUS/> |
|
83 |
</BODY> |
|
84 |
</RESOURCE_PROFILE> |
modules/dnet-openaireplus-workflows/branches/eosc_services/src/main/resources/eu/dnetlib/test/profiles/openaireplus/workflows/hbase/resetHbase.xml | ||
---|---|---|
1 |
<?xml version="1.0" encoding="UTF-8"?> |
|
2 |
<RESOURCE_PROFILE> |
|
3 |
<HEADER> |
|
4 |
<RESOURCE_IDENTIFIER value="ce304c65-5836-4cf0-9a48-53472b9f6f35_V29ya2Zsb3dEU1Jlc291cmNlcy9Xb3JrZmxvd0RTUmVzb3VyY2VUeXBl"/> |
|
5 |
<RESOURCE_TYPE value="WorkflowDSResourceType"/> |
|
6 |
<RESOURCE_KIND value="WorkflowDSResources"/> |
|
7 |
<RESOURCE_URI value=""/> |
|
8 |
<DATE_OF_CREATION value="2006-05-04T18:13:51.0Z"/> |
|
9 |
</HEADER> |
|
10 |
<BODY> |
|
11 |
<WORKFLOW_NAME>Reset HBase</WORKFLOW_NAME> |
|
12 |
<WORKFLOW_TYPE>Data Load</WORKFLOW_TYPE> |
|
13 |
<WORKFLOW_PRIORITY>30</WORKFLOW_PRIORITY> |
|
14 |
<CONFIGURATION start="auto"> |
|
15 |
<NODE name="checkTable" type="CheckHBaseTable" isStart="true"> |
|
16 |
<DESCRIPTION>check hbase table</DESCRIPTION> |
|
17 |
<PARAMETERS> |
|
18 |
<PARAM name="hbaseTableProperty" type="string" managedBy="system" required="true">hbase.mapred.datatable</PARAM> |
|
19 |
<PARAM name="cluster" type="string" managedBy="system" required="true">DM</PARAM> |
|
20 |
<PARAM name="tableColumnsParamName" type="string" managedBy="system" required="true">hTableColumns</PARAM> |
|
21 |
<PARAM name="existOutNode" type="string" managedBy="system" required="true">drop</PARAM> |
|
22 |
<PARAM name="dontExistOutNode" type="string" required="true" managedBy="system">define</PARAM> |
|
23 |
</PARAMETERS> |
|
24 |
<ARCS> |
|
25 |
<ARC to="drop" name="drop"/> |
|
26 |
<ARC to="define" name="define"/> |
|
27 |
</ARCS> |
|
28 |
</NODE> |
|
29 |
<NODE name="drop" type="DropHBaseTable"> |
|
30 |
<DESCRIPTION>drop hbase table</DESCRIPTION> |
|
31 |
<PARAMETERS> |
|
32 |
<PARAM name="hbaseTableProperty" type="string" managedBy="system" required="true">hbase.mapred.datatable</PARAM> |
|
33 |
<PARAM name="cluster" type="string" managedBy="system" required="true">DM</PARAM> |
|
34 |
</PARAMETERS> |
|
35 |
<ARCS> |
|
36 |
<ARC to="define"/> |
|
37 |
</ARCS> |
|
38 |
</NODE> |
|
39 |
<NODE name="define" type="DefineHBaseOpenaireSchema"> |
|
40 |
<DESCRIPTION>define OpenAIRE hbase table</DESCRIPTION> |
|
41 |
<PARAMETERS> |
|
42 |
<PARAM name="tableColumnsParamName" type="string" managedBy="system" required="true">hTableColumns</PARAM> |
|
43 |
<PARAM name="hbaseTableProperty" type="string" managedBy="system" required="true">hbase.mapred.datatable</PARAM> |
|
44 |
<PARAM name="cluster" type="string" managedBy="system" required="true">DM</PARAM> |
|
45 |
</PARAMETERS> |
|
46 |
<ARCS> |
|
47 |
<ARC to="create"/> |
|
48 |
</ARCS> |
|
49 |
</NODE> |
|
50 |
<NODE name="create" type="CreateHBaseTable"> |
|
51 |
<DESCRIPTION>create hbase table</DESCRIPTION> |
|
52 |
<PARAMETERS> |
|
53 |
<PARAM name="hbaseTableProperty" type="string" managedBy="system" required="true">hbase.mapred.datatable</PARAM> |
|
54 |
<PARAM name="cluster" type="string" managedBy="system" required="true">DM</PARAM> |
|
55 |
<PARAM name="tableColumnsParamName" type="string" managedBy="system" required="true">hTableColumns</PARAM> |
|
56 |
</PARAMETERS> |
|
57 |
<ARCS> |
|
58 |
<ARC to="success"/> |
|
59 |
</ARCS> |
|
60 |
</NODE> |
|
61 |
</CONFIGURATION> |
|
62 |
<STATUS/> |
|
63 |
</BODY> |
|
64 |
</RESOURCE_PROFILE> |
modules/dnet-openaireplus-workflows/branches/eosc_services/src/main/resources/eu/dnetlib/test/profiles/openaireplus/workflows/hbase/actions2hbase.xml | ||
---|---|---|
1 |
<?xml version="1.0" encoding="UTF-8"?> |
|
2 |
<RESOURCE_PROFILE> |
|
3 |
<HEADER> |
|
4 |
<RESOURCE_IDENTIFIER value="4b8dabb8-cef7-4910-a0ad-fd8e70d53b9e_V29ya2Zsb3dEU1Jlc291cmNlcy9Xb3JrZmxvd0RTUmVzb3VyY2VUeXBl"/> |
|
5 |
<RESOURCE_TYPE value="WorkflowDSResourceType"/> |
|
6 |
<RESOURCE_KIND value="WorkflowDSResources"/> |
|
7 |
<RESOURCE_URI value=""/> |
|
8 |
<DATE_OF_CREATION value="2006-05-04T18:13:51.0Z"/> |
|
9 |
</HEADER> |
|
10 |
<BODY> |
|
11 |
<WORKFLOW_NAME>Actions to HBase</WORKFLOW_NAME> |
|
12 |
<WORKFLOW_TYPE>Data Load</WORKFLOW_TYPE> |
|
13 |
<WORKFLOW_PRIORITY>30</WORKFLOW_PRIORITY> |
|
14 |
<CONFIGURATION start="auto"> |
|
15 |
<NODE name="promoteActions" type="PromoteActions" isStart="true"> |
|
16 |
<DESCRIPTION>Promote actions</DESCRIPTION> |
|
17 |
<PARAMETERS> |
|
18 |
<PARAM function="obtainValues('actionSets', {})" required="false" type="string" name="set" managedBy="user">ALL SETS</PARAM> |
|
19 |
</PARAMETERS> |
|
20 |
<ARCS> |
|
21 |
<ARC to="success"/> |
|
22 |
</ARCS> |
|
23 |
</NODE> |
|
24 |
</CONFIGURATION> |
|
25 |
<STATUS/> |
|
26 |
</BODY> |
|
27 |
</RESOURCE_PROFILE> |
modules/dnet-openaireplus-workflows/branches/eosc_services/src/main/resources/eu/dnetlib/test/profiles/openaireplus/workflows/hbase/distcp.xml | ||
---|---|---|
1 |
<RESOURCE_PROFILE xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"> |
|
2 |
<HEADER> |
|
3 |
<RESOURCE_IDENTIFIER value="37a026fd-8707-4390-9d8c-b1566d2a1e3f_V29ya2Zsb3dEU1Jlc291cmNlcy9Xb3JrZmxvd0RTUmVzb3VyY2VUeXBl"/> |
|
4 |
<RESOURCE_TYPE value="WorkflowDSResourceType"/> |
|
5 |
<RESOURCE_KIND value="WorkflowDSResources"/> |
|
6 |
<RESOURCE_URI value=""/> |
|
7 |
<DATE_OF_CREATION value="2019-10-17T08:44:46+00:00"/> |
|
8 |
</HEADER> |
|
9 |
<BODY> |
|
10 |
<WORKFLOW_NAME>distcp</WORKFLOW_NAME> |
|
11 |
<WORKFLOW_TYPE>Export InfoSpace</WORKFLOW_TYPE> |
|
12 |
<WORKFLOW_PRIORITY>30</WORKFLOW_PRIORITY> |
|
13 |
<CONFIGURATION start="manual"> |
|
14 |
|
|
15 |
<NODE isStart="true" name="setSourcePath" type="SetEnvParameter"> |
|
16 |
<DESCRIPTION>Set the source path on HDFS (DM)</DESCRIPTION> |
|
17 |
<PARAMETERS> |
|
18 |
<PARAM managedBy="system" name="parameterName" required="true" type="string">sourcePath</PARAM> |
|
19 |
<PARAM managedBy="user" name="parameterValue" required="true" type="string">/tmp/db_openaireplus_services_beta.export.2019.08.17</PARAM> |
|
20 |
</PARAMETERS> |
|
21 |
<ARCS> |
|
22 |
<ARC to="distcp"/> |
|
23 |
</ARCS> |
|
24 |
</NODE> |
|
25 |
<NODE isStart="true" name="setTargetPath" type="SetEnvParameter"> |
|
26 |
<DESCRIPTION>Set the target path on HDFS (IIS)</DESCRIPTION> |
|
27 |
<PARAMETERS> |
|
28 |
<PARAM managedBy="system" name="parameterName" required="true" type="string">targetPath</PARAM> |
|
29 |
<PARAM managedBy="user" name="parameterValue" required="true" type="string">/tmp</PARAM> |
|
30 |
</PARAMETERS> |
|
31 |
<ARCS> |
|
32 |
<ARC to="distcp"/> |
|
33 |
</ARCS> |
|
34 |
</NODE> |
|
35 |
<NODE isJoin="true" name="distcp" type="SubmitHadoopJob"> |
|
36 |
<DESCRIPTION>distcp</DESCRIPTION> |
|
37 |
<PARAMETERS> |
|
38 |
<PARAM managedBy="system" name="hadoopJob" required="true" type="string">distcpJob</PARAM> |
|
39 |
<PARAM managedBy="system" name="cluster" required="true" type="string">IIS</PARAM> |
|
40 |
<PARAM managedBy="system" name="envParams" required="true" type="string"> |
|
41 |
{ |
|
42 |
'sourcePath' : 'sourcePath', |
|
43 |
'targetPath' : 'targetPath' |
|
44 |
} |
|
45 |
</PARAM> |
|
46 |
<PARAM managedBy="user" name="simulation" required="true" type="boolean">false</PARAM> |
|
47 |
<PARAM managedBy="system" name="oozieReportActionsCsv" required="true" type="string">build-report</PARAM> |
|
48 |
</PARAMETERS> |
|
49 |
<ARCS> |
|
50 |
<ARC to="success"/> |
|
51 |
</ARCS> |
|
52 |
</NODE> |
|
53 |
|
|
54 |
</CONFIGURATION> |
|
55 |
<STATUS> |
|
56 |
<LAST_EXECUTION_ID>wf_20190805_085505_893</LAST_EXECUTION_ID> |
|
57 |
<LAST_EXECUTION_DATE>2019-08-07T05:19:02+00:00</LAST_EXECUTION_DATE> |
|
58 |
<LAST_EXECUTION_STATUS>SUCCESS</LAST_EXECUTION_STATUS> |
|
59 |
<LAST_EXECUTION_ERROR/> |
|
60 |
</STATUS> |
|
61 |
</BODY> |
|
62 |
</RESOURCE_PROFILE> |
modules/dnet-openaireplus-workflows/branches/eosc_services/src/main/resources/eu/dnetlib/test/profiles/openaireplus/workflows/hbase/importAffiliationsDOIBoost.xml | ||
---|---|---|
1 |
<RESOURCE_PROFILE> |
|
2 |
<HEADER> |
|
3 |
<RESOURCE_IDENTIFIER value="970c4826-cf6d-4dfe-850c-41e508d341fa_V29ya2Zsb3dEU1Jlc291cmNlcy9Xb3JrZmxvd0RTUmVzb3VyY2VUeXBl"/> |
|
4 |
<RESOURCE_TYPE value="WorkflowDSResourceType"/> |
|
5 |
<RESOURCE_KIND value="WorkflowDSResources"/> |
|
6 |
<RESOURCE_URI value=""/> |
|
7 |
<DATE_OF_CREATION value="2018-11-15T11:24:05+00:00"/> |
|
8 |
</HEADER> |
|
9 |
<BODY> |
|
10 |
<WORKFLOW_NAME>Import Organizations from DOIboost</WORKFLOW_NAME> |
|
11 |
<WORKFLOW_TYPE>Import InfoSpace</WORKFLOW_TYPE> |
|
12 |
<WORKFLOW_PRIORITY>30</WORKFLOW_PRIORITY> |
|
13 |
<CONFIGURATION start="manual"> |
|
14 |
<NODE isStart="true" name="setOrgsOnly" type="SetEnvParameter"> |
|
15 |
<DESCRIPTION>set the parameter to drive the import procedure behaviour so that it will import only organizations</DESCRIPTION> |
|
16 |
<PARAMETERS> |
|
17 |
<PARAM managedBy="system" name="parameterName" required="true" type="string">onlyOrganization</PARAM> |
|
18 |
<PARAM managedBy="user" name="parameterValue" required="false" type="string">true</PARAM> |
|
19 |
</PARAMETERS> |
|
20 |
<ARCS> |
|
21 |
<ARC to="importActionSet"/> |
|
22 |
</ARCS> |
|
23 |
</NODE> |
|
24 |
<NODE isStart="true" name="setInputPath" type="SetHdfsFile"> |
|
25 |
<DESCRIPTION>set the hdfs output path</DESCRIPTION> |
|
26 |
<PARAMETERS> |
|
27 |
<PARAM managedBy="user" name="hdfsPath" required="true" type="string">/usr/dnet/data/doiboost</PARAM> |
|
28 |
<PARAM managedBy="system" name="hdfsPathParam" required="true" type="string">inputPath</PARAM> |
|
29 |
</PARAMETERS> |
|
30 |
<ARCS> |
|
31 |
<ARC to="prepareActionSets"/> |
|
32 |
</ARCS> |
|
33 |
</NODE> |
|
34 |
<NODE name="prepareActionSets" type="PrepareActionSets"> |
|
35 |
<DESCRIPTION>prepare action sets</DESCRIPTION> |
|
36 |
<PARAMETERS> |
|
37 |
<PARAM managedBy="system" name="sets" required="true" type="string"> |
|
38 |
[ |
|
39 |
{ |
|
40 |
'set' : 'doiboost-organizations', |
|
41 |
'jobProperty' : 'export_action_set_doiboost-organizations', |
|
42 |
'enablingProperty' : 'active_doiboost-organizations', |
|
43 |
'enabled' : 'true' |
|
44 |
} |
|
45 |
] |
|
46 |
</PARAM> |
|
47 |
</PARAMETERS> |
|
48 |
<ARCS> |
|
49 |
<ARC to="extractOutputPath"/> |
|
50 |
</ARCS> |
|
51 |
</NODE> |
|
52 |
<NODE name="extractOutputPath" type="ExtractOutputPath"> |
|
53 |
<DESCRIPTION>extract the hdfs output path generated in the previous node</DESCRIPTION> |
|
54 |
<PARAMETERS> |
|
55 |
<PARAM managedBy="system" name="hdfsOutputPathParam" required="true" type="string">outputPath</PARAM> |
|
56 |
</PARAMETERS> |
|
57 |
<ARCS> |
|
58 |
<ARC to="importActionSet"/> |
|
59 |
</ARCS> |
|
60 |
</NODE> |
|
61 |
<NODE isJoin="true" name="importActionSet" type="SubmitHadoopJob"> |
|
62 |
<DESCRIPTION>IIS main</DESCRIPTION> |
|
63 |
<PARAMETERS> |
|
64 |
<PARAM managedBy="system" name="hadoopJob" required="true" type="string">importDOIBoostJob</PARAM> |
|
65 |
<PARAM managedBy="system" name="cluster" required="true" type="string">DM</PARAM> |
|
66 |
<PARAM managedBy="system" name="envParams" required="true" type="string"> |
|
67 |
{ |
|
68 |
'mapred.input.dir':'inputPath', |
|
69 |
'mapred.output.dir':'outputPath', |
|
70 |
'onlyOrganization' : 'onlyOrganization' |
|
71 |
} |
|
72 |
</PARAM> |
|
73 |
<PARAM managedBy="user" name="simulation" required="true" type="boolean">false</PARAM> |
|
74 |
</PARAMETERS> |
|
75 |
<ARCS> |
|
76 |
<ARC to="updateActionSets"/> |
|
77 |
</ARCS> |
|
78 |
</NODE> |
|
79 |
<NODE name="updateActionSets" type="UpdateActionSets"> |
|
80 |
<DESCRIPTION>update action sets</DESCRIPTION> |
|
81 |
<PARAMETERS/> |
|
82 |
<ARCS> |
|
83 |
<ARC to="success"/> |
|
84 |
</ARCS> |
|
85 |
</NODE> |
|
86 |
</CONFIGURATION> |
|
87 |
<STATUS> |
|
88 |
<LAST_EXECUTION_ID>wf_20181026_140552_878</LAST_EXECUTION_ID> |
|
89 |
<LAST_EXECUTION_DATE>2018-10-26T14:21:47+00:00</LAST_EXECUTION_DATE> |
|
90 |
<LAST_EXECUTION_STATUS>SUCCESS</LAST_EXECUTION_STATUS> |
|
91 |
<LAST_EXECUTION_ERROR/> |
|
92 |
</STATUS> |
|
93 |
</BODY> |
|
94 |
</RESOURCE_PROFILE> |
modules/dnet-openaireplus-workflows/branches/eosc_services/src/main/resources/eu/dnetlib/test/profiles/openaireplus/workflows/hbase/oaf2hbase.xml | ||
---|---|---|
1 |
<?xml version="1.0" encoding="UTF-8"?> |
|
2 |
<RESOURCE_PROFILE> |
|
3 |
<HEADER> |
|
4 |
<RESOURCE_IDENTIFIER |
|
5 |
value="c7d7d775-2db3-474d-85ab-5173a582d515_V29ya2Zsb3dEU1Jlc291cmNlcy9Xb3JrZmxvd0RTUmVzb3VyY2VUeXBl"/> |
|
6 |
<RESOURCE_TYPE value="WorkflowDSResourceType"/> |
|
7 |
<RESOURCE_KIND value="WorkflowDSResources"/> |
|
8 |
<RESOURCE_URI value=""/> |
|
9 |
<DATE_OF_CREATION value="2006-05-04T18:13:51.0Z"/> |
|
10 |
</HEADER> |
|
11 |
<BODY> |
|
12 |
<WORKFLOW_NAME>OAF to HBase</WORKFLOW_NAME> |
|
13 |
<WORKFLOW_TYPE>Data Load</WORKFLOW_TYPE> |
|
14 |
<WORKFLOW_PRIORITY>30</WORKFLOW_PRIORITY> |
|
15 |
<CONFIGURATION start="manual"> |
|
16 |
<NODE isStart="true" name="setTable" type="SetHBaseTable"> |
|
17 |
<DESCRIPTION>set hbase table</DESCRIPTION> |
|
18 |
<PARAMETERS> |
|
19 |
<PARAM managedBy="user" name="table" required="true" type="string"></PARAM> |
|
20 |
</PARAMETERS> |
|
21 |
<ARCS> |
|
22 |
<ARC to="mapreduce"/> |
|
23 |
</ARCS> |
|
24 |
</NODE> |
|
25 |
<NODE isStart="true" name="prepareImport" type="PrepareMDStoreImport"> |
|
26 |
<DESCRIPTION>Configure export to HDFS</DESCRIPTION> |
|
27 |
<PARAMETERS> |
|
28 |
<PARAM managedBy="system" name="hdfsPathParam" required="true" type="string">hdfsPath</PARAM> |
|
29 |
<PARAM managedBy="user" name="hdfsPath" required="true" type="string"></PARAM> |
|
30 |
<PARAM managedBy="system" name="mappingParam" required="true" type="string">xslt</PARAM> |
|
31 |
<PARAM managedBy="user" name="mapping" required="true" type="string" function="obtainValues('oaf2hbaseMappings', {})"></PARAM> |
|
32 |
</PARAMETERS> |
|
33 |
<ARCS> |
|
34 |
<ARC to="reuseHdfsRecords"/> |
|
35 |
</ARCS> |
|
36 |
</NODE> |
|
37 |
<NODE name="reuseHdfsRecords" type="ReuseHdfsRecords"> |
|
38 |
<DESCRIPTION>reuse mdstore records</DESCRIPTION> |
|
39 |
<PARAMETERS> |
|
40 |
<PARAM managedBy="user" name="reuseMdRecords" required="true" type="boolean"></PARAM> |
|
41 |
</PARAMETERS> |
|
42 |
<ARCS> |
|
43 |
<ARC name="true" to="doneExport"/> |
|
44 |
<ARC name="false" to="exportRecords"/> |
|
45 |
</ARCS> |
|
46 |
</NODE> |
|
47 |
<NODE name="exportRecords" type="MDStoreBatchExporter"> |
|
48 |
<DESCRIPTION>Fetch mdstore records</DESCRIPTION> |
|
49 |
<PARAMETERS> |
|
50 |
<PARAM managedBy="user" name="format" required="true" type="string">OAF</PARAM> |
|
51 |
<PARAM managedBy="user" name="layout" required="true" type="string">store</PARAM> |
|
52 |
<PARAM managedBy="user" name="interpretation" required="true" type="string">cleaned</PARAM> |
|
53 |
<PARAM managedBy="system" name="outputEprParam" required="true" type="string">records_epr</PARAM> |
|
54 |
</PARAMETERS> |
|
55 |
<ARCS> |
|
56 |
<ARC to="storeHdfsRecords"/> |
|
57 |
</ARCS> |
|
58 |
</NODE> |
|
59 |
<NODE name="storeHdfsRecords" type="StoreHdfsRecords"> |
|
60 |
<DESCRIPTION>Store records to HDFS</DESCRIPTION> |
|
61 |
<PARAMETERS> |
|
62 |
<PARAM managedBy="system" name="inputEprParam" required="true" type="string">records_epr</PARAM> |
|
63 |
<PARAM managedBy="system" name="hdfsPathParam" required="true" type="string">hdfsPath</PARAM> |
|
64 |
<PARAM managedBy="system" name="cluster" required="true" type="string">DM</PARAM> |
|
65 |
</PARAMETERS> |
|
66 |
<ARCS> |
|
67 |
<ARC to="doneExport"/> |
|
68 |
</ARCS> |
|
69 |
</NODE> |
|
70 |
<NODE name="doneExport"> |
|
71 |
<DESCRIPTION/> |
|
72 |
<PARAMETERS/> |
|
73 |
<ARCS> |
|
74 |
<ARC to="mapreduce"/> |
|
75 |
</ARCS> |
|
76 |
</NODE> |
|
77 |
<NODE isJoin="true" name="mapreduce" type="SubmitHadoopJob"> |
|
78 |
<DESCRIPTION>Run M/R import Job</DESCRIPTION> |
|
79 |
<PARAMETERS> |
|
80 |
<PARAM managedBy="system" name="cluster" required="true" type="string">DM</PARAM> |
|
81 |
<PARAM managedBy="system" name="hadoopJob" required="true" type="string">mdStoreHdfsImportJob</PARAM> |
|
82 |
<PARAM managedBy="user" name="simulation" required="true" type="boolean">false</PARAM> |
|
83 |
<PARAM managedBy="system" name="envParams" required="true" type="string"> |
|
84 |
{ |
|
85 |
'mapred.input.dir' : 'hdfsPath', |
|
86 |
'hbase.import.xslt' : 'xslt', |
|
87 |
'hbase.mapred.outputtable' : 'hbaseTable' |
|
88 |
} |
|
89 |
</PARAM> |
|
90 |
</PARAMETERS> |
|
91 |
<ARCS> |
|
92 |
<ARC to="success"/> |
|
93 |
</ARCS> |
|
94 |
</NODE> |
|
95 |
</CONFIGURATION> |
|
96 |
<STATUS/> |
|
97 |
</BODY> |
|
98 |
</RESOURCE_PROFILE> |
|
99 |
|
|
100 |
|
modules/dnet-openaireplus-workflows/branches/eosc_services/src/main/resources/eu/dnetlib/test/profiles/openaireplus/workflows/hbase/db2hbase.xml | ||
---|---|---|
1 |
<RESOURCE_PROFILE> |
|
2 |
<HEADER> |
|
3 |
<RESOURCE_IDENTIFIER value="e92d2f81-ea98-4732-a306-07da87f35033_V29ya2Zsb3dEU1Jlc291cmNlcy9Xb3JrZmxvd0RTUmVzb3VyY2VUeXBl"/> |
|
4 |
<RESOURCE_TYPE value="WorkflowDSResourceType"/> |
|
5 |
<RESOURCE_KIND value="WorkflowDSResources"/> |
|
6 |
<RESOURCE_URI value=""/> |
|
7 |
<DATE_OF_CREATION value="2006-05-04T18:13:51.0Z"/> |
|
8 |
</HEADER> |
|
9 |
<BODY> |
|
10 |
<WORKFLOW_NAME>DB to HBase</WORKFLOW_NAME> |
|
11 |
<WORKFLOW_TYPE>Data Load</WORKFLOW_TYPE> |
|
12 |
<WORKFLOW_PRIORITY>30</WORKFLOW_PRIORITY> |
|
13 |
<CONFIGURATION start="manual"> |
|
14 |
<NODE isStart="true" name="start"> |
|
15 |
<DESCRIPTION>start</DESCRIPTION> |
|
16 |
<PARAMETERS/> |
|
17 |
<ARCS> |
|
18 |
<ARC to="queryDatasources"/> |
|
19 |
</ARCS> |
|
20 |
</NODE> |
|
21 |
<NODE name="queryDatasources" type="QueryDb"> |
|
22 |
<DESCRIPTION>query Datasources</DESCRIPTION> |
|
23 |
<PARAMETERS> |
|
24 |
<PARAM managedBy="system" name="dbProperty" required="true" type="string">dnet.openaire.db.name</PARAM> |
|
25 |
<PARAM managedBy="system" name="sql" required="true" type="string">/eu/dnetlib/msro/openaireplus/workflows/hbase/queryDatasources.sql |
|
26 |
</PARAM> |
|
27 |
<PARAM managedBy="system" name="sqlForSize" required="true" type="string"> |
|
28 |
/eu/dnetlib/msro/openaireplus/workflows/hbase/queryDatasourcesForSize.sql |
|
29 |
</PARAM> |
|
30 |
<PARAM managedBy="system" name="outputEprParam" required="true" type="string">dsEpr</PARAM> |
|
31 |
</PARAMETERS> |
|
32 |
<ARCS> |
|
33 |
<ARC to="storeDatasources"/> |
|
34 |
</ARCS> |
|
35 |
</NODE> |
|
36 |
<NODE name="storeDatasources" type="StoreHBase"> |
|
37 |
<DESCRIPTION>Store Datasources to HBase</DESCRIPTION> |
|
38 |
<PARAMETERS> |
|
39 |
<PARAM managedBy="system" name="inputEprParam" required="true" type="string">dsEpr</PARAM> |
|
40 |
<PARAM managedBy="system" name="hbaseTableProperty" required="true" type="string">hbase.mapred.datatable</PARAM> |
|
41 |
<PARAM managedBy="system" name="cluster" required="true" type="string">DM</PARAM> |
|
42 |
<PARAM managedBy="user" name="mapping" required="true" type="string" function="obtainValues('dbmf2hbaseMappings', {})"></PARAM> |
|
43 |
</PARAMETERS> |
|
44 |
<ARCS> |
|
45 |
<ARC to="queryDatasourceOrganization"/> |
|
46 |
</ARCS> |
|
47 |
</NODE> |
|
48 |
<NODE name="queryDatasourceOrganization" type="QueryDb"> |
|
49 |
<DESCRIPTION>query relation Datasource_Organization</DESCRIPTION> |
|
50 |
<PARAMETERS> |
|
51 |
<PARAM managedBy="system" name="dbProperty" required="true" type="string">dnet.openaire.db.name</PARAM> |
|
52 |
<PARAM managedBy="system" name="sql" required="true" type="string"> |
|
53 |
/eu/dnetlib/msro/openaireplus/workflows/hbase/queryDatasourceOrganization.sql |
|
54 |
</PARAM> |
|
55 |
<PARAM managedBy="system" name="outputEprParam" required="true" type="string">dsOrgEpr</PARAM> |
|
56 |
</PARAMETERS> |
|
57 |
<ARCS> |
|
58 |
<ARC to="storeDatasourceOrganization"/> |
|
59 |
</ARCS> |
|
60 |
</NODE> |
|
61 |
<NODE name="storeDatasourceOrganization" type="StoreHBase"> |
|
62 |
<DESCRIPTION>Store relation Datasource_Organization</DESCRIPTION> |
|
63 |
<PARAMETERS> |
|
64 |
<PARAM managedBy="system" name="inputEprParam" required="true" type="string">dsOrgEpr</PARAM> |
|
65 |
<PARAM managedBy="system" name="hbaseTableProperty" required="true" type="string">hbase.mapred.datatable</PARAM> |
|
66 |
<PARAM managedBy="system" name="cluster" required="true" type="string">DM</PARAM> |
|
67 |
<PARAM managedBy="user" name="mapping" required="true" type="string" function="obtainValues('dbmf2hbaseMappings', {})"></PARAM> |
|
68 |
</PARAMETERS> |
|
69 |
<ARCS> |
|
70 |
<ARC to="queryOrganizations"/> |
|
71 |
</ARCS> |
|
72 |
</NODE> |
|
73 |
<NODE name="queryOrganizations" type="QueryDb"> |
|
74 |
<DESCRIPTION>query Organizations</DESCRIPTION> |
|
75 |
<PARAMETERS> |
|
76 |
<PARAM managedBy="system" name="dbProperty" required="true" type="string">dnet.openaire.db.name</PARAM> |
|
77 |
<PARAM managedBy="system" name="sql" required="true" type="string">/eu/dnetlib/msro/openaireplus/workflows/hbase/queryOrganizations.sql |
|
78 |
</PARAM> |
|
79 |
<PARAM managedBy="system" name="outputEprParam" required="true" type="string">orgEpr</PARAM> |
|
80 |
</PARAMETERS> |
|
81 |
<ARCS> |
|
82 |
<ARC to="storeOrganizations"/> |
|
83 |
</ARCS> |
|
84 |
</NODE> |
|
85 |
<NODE name="storeOrganizations" type="StoreHBase"> |
|
86 |
<DESCRIPTION>Store Organizations to HBase</DESCRIPTION> |
|
87 |
<PARAMETERS> |
|
88 |
<PARAM managedBy="system" name="inputEprParam" required="true" type="string">orgEpr</PARAM> |
|
89 |
<PARAM managedBy="system" name="hbaseTableProperty" required="true" type="string">hbase.mapred.datatable</PARAM> |
|
90 |
<PARAM managedBy="system" name="cluster" required="true" type="string">DM</PARAM> |
|
91 |
<PARAM managedBy="user" name="mapping" required="true" type="string" function="obtainValues('dbmf2hbaseMappings', {})"></PARAM> |
|
92 |
</PARAMETERS> |
|
93 |
<ARCS> |
|
94 |
<ARC to="queryOrganizationsFromOpenOrgsDB"/> |
|
95 |
</ARCS> |
|
96 |
</NODE> |
|
97 |
<NODE name="queryOrganizationsFromOpenOrgsDB" type="QueryDb"> |
|
98 |
<DESCRIPTION>query Open Organizations</DESCRIPTION> |
|
99 |
<PARAMETERS> |
|
100 |
<PARAM managedBy="system" name="dbProperty" required="true" type="string">dnet.openorgs.db.name</PARAM> |
|
101 |
<PARAM managedBy="system" name="sql" required="true" type="string">/eu/dnetlib/msro/openaireplus/workflows/hbase/queryOrganizationsFromOpenOrgsDB.sql |
|
102 |
</PARAM> |
|
103 |
<PARAM managedBy="system" name="outputEprParam" required="true" type="string">openOrgsEpr</PARAM> |
|
104 |
</PARAMETERS> |
|
105 |
<ARCS> |
|
106 |
<ARC to="storeOrganizationsFromOpenOrgsDB"/> |
|
107 |
</ARCS> |
|
108 |
</NODE> |
|
109 |
<NODE name="storeOrganizationsFromOpenOrgsDB" type="StoreHBase"> |
|
110 |
<DESCRIPTION>Store Open Organizations to HBase</DESCRIPTION> |
|
111 |
<PARAMETERS> |
|
112 |
<PARAM managedBy="system" name="inputEprParam" required="true" type="string">openOrgsEpr</PARAM> |
|
113 |
<PARAM managedBy="system" name="hbaseTableProperty" required="true" type="string">hbase.mapred.datatable</PARAM> |
|
114 |
<PARAM managedBy="system" name="cluster" required="true" type="string">DM</PARAM> |
|
115 |
<PARAM managedBy="user" name="mapping" required="true" type="string" function="obtainValues('dbmf2hbaseMappings', {})"></PARAM> |
|
116 |
</PARAMETERS> |
|
117 |
<ARCS> |
|
118 |
<ARC to="querySimilaritiesFromOpenOrgsDB"/> |
|
119 |
</ARCS> |
|
120 |
</NODE> |
|
121 |
<NODE name="querySimilaritiesFromOpenOrgsDB" type="QueryDb"> |
|
122 |
<DESCRIPTION>query similarities (openorgs)</DESCRIPTION> |
|
123 |
<PARAMETERS> |
|
124 |
<PARAM managedBy="system" name="dbProperty" required="true" type="string">dnet.openorgs.db.name</PARAM> |
|
125 |
<PARAM managedBy="system" name="sql" required="true" type="string">/eu/dnetlib/msro/openaireplus/workflows/hbase/querySimilarityFromOpenOrgsDB.sql</PARAM> |
|
126 |
<PARAM managedBy="system" name="outputEprParam" required="true" type="string">openSimRelsEpr</PARAM> |
|
127 |
</PARAMETERS> |
|
128 |
<ARCS> |
|
129 |
<ARC to="storeSimilaritiesFromOpenOrgsDB"/> |
|
130 |
</ARCS> |
|
131 |
</NODE> |
|
132 |
<NODE name="storeSimilaritiesFromOpenOrgsDB" type="StoreHBase"> |
|
133 |
<DESCRIPTION>Store similarities to HBase (openorgs)</DESCRIPTION> |
|
134 |
<PARAMETERS> |
|
135 |
<PARAM managedBy="system" name="inputEprParam" required="true" type="string">openSimRelsEpr</PARAM> |
|
136 |
<PARAM managedBy="system" name="hbaseTableProperty" required="true" type="string">hbase.mapred.datatable</PARAM> |
|
137 |
<PARAM managedBy="system" name="cluster" required="true" type="string">DM</PARAM> |
|
138 |
<PARAM function="obtainValues('dbmf2hbaseMappings', {})" managedBy="user" name="mapping" required="true" type="string"></PARAM> |
|
139 |
</PARAMETERS> |
|
140 |
<ARCS> |
|
141 |
<ARC to="queryProjects"/> |
|
142 |
</ARCS> |
|
143 |
</NODE> |
|
144 |
<NODE name="queryProjects" type="QueryDb"> |
|
145 |
<DESCRIPTION>query Projects</DESCRIPTION> |
|
146 |
<PARAMETERS> |
|
147 |
<PARAM managedBy="system" name="dbProperty" required="true" type="string">dnet.openaire.db.name</PARAM> |
|
148 |
<PARAM managedBy="system" name="sql" required="true" type="string">/eu/dnetlib/msro/openaireplus/workflows/hbase/queryProjects.sql</PARAM> |
|
149 |
<PARAM managedBy="system" name="outputEprParam" required="true" type="string">projEpr</PARAM> |
|
150 |
</PARAMETERS> |
|
151 |
<ARCS> |
|
152 |
<ARC to="storeProjects"/> |
|
153 |
</ARCS> |
|
154 |
</NODE> |
|
155 |
<NODE name="storeProjects" type="StoreHBase"> |
|
156 |
<DESCRIPTION>Store Projects to HBase</DESCRIPTION> |
|
157 |
<PARAMETERS> |
|
158 |
<PARAM managedBy="system" name="inputEprParam" required="true" type="string">projEpr</PARAM> |
|
159 |
<PARAM managedBy="system" name="hbaseTableProperty" required="true" type="string">hbase.mapred.datatable</PARAM> |
|
160 |
<PARAM managedBy="system" name="cluster" required="true" type="string">DM</PARAM> |
|
161 |
<PARAM managedBy="user" name="mapping" required="true" type="string" function="obtainValues('dbmf2hbaseMappings', {})"></PARAM> |
|
162 |
</PARAMETERS> |
|
163 |
<ARCS> |
|
164 |
<ARC to="queryProjectOrganization"/> |
|
165 |
</ARCS> |
|
166 |
</NODE> |
|
167 |
<NODE name="queryProjectOrganization" type="QueryDb"> |
|
168 |
<DESCRIPTION>query relation Project_Organization</DESCRIPTION> |
|
169 |
<PARAMETERS> |
|
170 |
<PARAM managedBy="system" name="dbProperty" required="true" type="string">dnet.openaire.db.name</PARAM> |
|
171 |
<PARAM managedBy="system" name="sql" required="true" type="string"> |
|
172 |
/eu/dnetlib/msro/openaireplus/workflows/hbase/queryProjectOrganization.sql |
|
173 |
</PARAM> |
|
174 |
<PARAM managedBy="system" name="outputEprParam" required="true" type="string">projOrgEpr</PARAM> |
|
175 |
</PARAMETERS> |
|
176 |
<ARCS> |
|
177 |
<ARC to="storeProjectOrganization"/> |
|
178 |
</ARCS> |
|
179 |
</NODE> |
|
180 |
<NODE name="storeProjectOrganization" type="StoreHBase"> |
|
181 |
<DESCRIPTION>Store relation Project_Organizations</DESCRIPTION> |
|
182 |
<PARAMETERS> |
|
183 |
<PARAM managedBy="system" name="inputEprParam" required="true" type="string">projOrgEpr</PARAM> |
|
184 |
<PARAM managedBy="system" name="hbaseTableProperty" required="true" type="string">hbase.mapred.datatable</PARAM> |
|
185 |
<PARAM managedBy="system" name="cluster" required="true" type="string">DM</PARAM> |
|
186 |
<PARAM managedBy="user" name="mapping" required="true" type="string" function="obtainValues('dbmf2hbaseMappings', {})"></PARAM> |
|
187 |
</PARAMETERS> |
|
188 |
<ARCS> |
|
189 |
<ARC to="success"/> |
|
190 |
</ARCS> |
|
191 |
</NODE> |
|
192 |
</CONFIGURATION> |
|
193 |
<STATUS> |
|
194 |
<LAST_EXECUTION_ID>wf_20140509_125739_830</LAST_EXECUTION_ID> |
|
195 |
<LAST_EXECUTION_DATE>2014-05-09T13:03:16+02:00</LAST_EXECUTION_DATE> |
|
196 |
<LAST_EXECUTION_STATUS>SUCCESS</LAST_EXECUTION_STATUS> |
|
197 |
<LAST_EXECUTION_ERROR/> |
|
198 |
<LAST_EXECUTION_OUTPUT name=""></LAST_EXECUTION_OUTPUT> |
|
199 |
</STATUS> |
|
200 |
</BODY> |
|
201 |
</RESOURCE_PROFILE> |
modules/dnet-openaireplus-workflows/branches/eosc_services/src/main/resources/eu/dnetlib/test/profiles/openaireplus/workflows/hbase/importOrcid.xml | ||
---|---|---|
1 |
<RESOURCE_PROFILE> |
|
2 |
<HEADER> |
|
3 |
<RESOURCE_IDENTIFIER value="7c8765af-1253-4bd7-8806-315b73bf7319_V29ya2Zsb3dEU1Jlc291cmNlcy9Xb3JrZmxvd0RTUmVzb3VyY2VUeXBl"/> |
|
4 |
<RESOURCE_TYPE value="WorkflowDSResourceType"/> |
|
5 |
<RESOURCE_KIND value="WorkflowDSResources"/> |
|
6 |
<RESOURCE_URI value=""/> |
|
7 |
<DATE_OF_CREATION value="2019-05-29T10:54:33+00:00"/> |
|
8 |
</HEADER> |
|
9 |
<BODY> |
|
10 |
<WORKFLOW_NAME>Import Orcid</WORKFLOW_NAME> |
|
11 |
<WORKFLOW_TYPE>Import InfoSpace</WORKFLOW_TYPE> |
|
12 |
<WORKFLOW_PRIORITY>30</WORKFLOW_PRIORITY> |
|
13 |
<CONFIGURATION start="manual"> |
|
14 |
<NODE isStart="true" name="setInputPath" type="SetHdfsFile"> |
|
15 |
<DESCRIPTION>set the hdfs output path</DESCRIPTION> |
|
16 |
<PARAMETERS> |
|
17 |
<PARAM managedBy="user" name="hdfsPath" required="true" type="string">/usr/dnet/data/orcid</PARAM> |
|
18 |
<PARAM managedBy="system" name="hdfsPathParam" required="true" type="string">inputPath</PARAM> |
|
19 |
</PARAMETERS> |
|
20 |
<ARCS> |
|
21 |
<ARC to="prepareActionSets"/> |
|
22 |
</ARCS> |
|
23 |
</NODE> |
|
24 |
<NODE name="prepareActionSets" type="PrepareActionSets"> |
|
25 |
<DESCRIPTION>prepare action sets</DESCRIPTION> |
|
26 |
<PARAMETERS> |
|
27 |
<PARAM managedBy="system" name="sets" required="true" type="string"> |
|
28 |
[ |
|
29 |
{ |
|
30 |
'set' : 'orcidworks-no-doi', |
|
31 |
'jobProperty' : 'export_action_set_orcidworks_no_doi', |
|
32 |
'enablingProperty' : 'active_orcidworks_no_doi', |
|
33 |
'enabled' : 'true' |
|
34 |
} |
|
35 |
] |
|
36 |
</PARAM> |
Also available in: Unified diff
partial cleaning of code releted to hbase