Revision 35763
Added by Claudio Atzori over 9 years ago
modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-3.0.1/src/test/java/eu/dnetlib/data/mapreduce/util/OafRelDecoderTest.java | ||
---|---|---|
1 |
package eu.dnetlib.data.mapreduce.util; |
|
2 |
|
|
3 |
import static org.junit.Assert.assertEquals; |
|
4 |
import static org.junit.Assert.assertNotNull; |
|
5 |
|
|
6 |
import org.junit.Before; |
|
7 |
import org.junit.Test; |
|
8 |
|
|
9 |
import com.google.protobuf.Descriptors.FieldDescriptor; |
|
10 |
|
|
11 |
import eu.dnetlib.data.proto.OafProtos.OafRel; |
|
12 |
import eu.dnetlib.data.proto.PersonResultProtos.PersonResult.Authorship; |
|
13 |
import eu.dnetlib.data.proto.RelTypeProtos.RelType; |
|
14 |
import eu.dnetlib.data.proto.RelTypeProtos.SubRelType; |
|
15 |
|
|
16 |
public class OafRelDecoderTest { |
|
17 |
|
|
18 |
private OafRel oafRel; |
|
19 |
|
|
20 |
@Before |
|
21 |
public void setUp() { |
|
22 |
oafRel = OafTest.getPersonResult("ID_1", "ID_2", "1", "isAuthor"); |
|
23 |
} |
|
24 |
|
|
25 |
@Test |
|
26 |
public void testSetClass() { |
|
27 |
|
|
28 |
OafRelDecoder d1 = OafRelDecoder.decode(oafRel); |
|
29 |
|
|
30 |
assertNotNull(d1); |
|
31 |
assertEquals("isAuthor", d1.getRelClass()); |
|
32 |
|
|
33 |
OafRelDecoder d2 = OafRelDecoder.decode(d1.setClassId("hasAuthor").build()); |
|
34 |
|
|
35 |
assertEquals("hasAuthor", d2.getRelClass()); |
|
36 |
assertEquals("hasAuthor", d2.getRelMetadata().getSemantics().getClassid()); |
|
37 |
assertEquals("hasAuthor", d2.getRelMetadata().getSemantics().getClassname()); |
|
38 |
|
|
39 |
FieldDescriptor fd = Authorship.getDescriptor().findFieldByName("ranking"); |
|
40 |
assertEquals(d1.getSubRel().getField(fd), d2.getSubRel().getField(fd)); |
|
41 |
} |
|
42 |
|
|
43 |
@Test |
|
44 |
public void testGetCF() { |
|
45 |
assertEquals("personResult_authorship_isAuthorOf", OafRelDecoder.getCFQ(RelType.personResult, SubRelType.authorship, Authorship.RelName.isAuthorOf)); |
|
46 |
assertEquals("personResult_authorship_isAuthorOf", OafRelDecoder.getCFQ(RelType.personResult, SubRelType.authorship, "isAuthorOf")); |
|
47 |
} |
|
48 |
|
|
49 |
} |
modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-3.0.1/src/main/resources/eu/dnetlib/actionmanager/xslt/datacite2updateActions.xslt | ||
---|---|---|
1 |
<?xml version="1.0" encoding="UTF-8"?> |
|
2 |
<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform" |
|
3 |
xmlns:dnet="eu.dnetlib.actionmanager.actions.infopackage.DataciteInfoPackageToHbaseXsltFunctions" |
|
4 |
xmlns:oaf="http://namespace.openaire.eu/oaf" |
|
5 |
xmlns:dri="http://www.driver-repository.eu/namespace/dri" |
|
6 |
xmlns:date="eu.dnetlib.miscutils.datetime.DateUtils" |
|
7 |
xmlns:dr="http://www.driver-repository.eu/namespace/dr" xmlns:exslt="http://exslt.org/common" |
|
8 |
extension-element-prefixes="exslt" exclude-result-prefixes="xsl dnet exslt oaf dr dri date"> |
|
9 |
|
|
10 |
<xsl:output omit-xml-declaration="yes" indent="yes"/> |
|
11 |
|
|
12 |
<xsl:param name="trust" select="string('0.9')"/> |
|
13 |
<xsl:param name="provenance" select="string('UNKNOWN')"/> |
|
14 |
<xsl:param name="namespaceprefix" select="string('datacite____')"/> |
|
15 |
|
|
16 |
<xsl:template match="/*"> |
|
17 |
<xsl:variable name="dataInfo" select="/*[local-name() = 'record']/*[local-name() = 'about']/*[local-name() = 'datainfo']"/> |
|
18 |
<xsl:variable name="metadata" select="exslt:node-set(/*[local-name()='record']/*[local-name()='metadata']/*[local-name()='resource'])"/> |
|
19 |
<xsl:variable name="rightNSPrefix"> |
|
20 |
<xsl:choose> |
|
21 |
<xsl:when test="not($namespaceprefix)"> |
|
22 |
<xsl:value-of select="//oaf:datasourceprefix"/> |
|
23 |
</xsl:when> |
|
24 |
<xsl:otherwise> |
|
25 |
<xsl:value-of select="$namespaceprefix"/> |
|
26 |
</xsl:otherwise> |
|
27 |
</xsl:choose> |
|
28 |
</xsl:variable> |
|
29 |
|
|
30 |
<xsl:choose> |
|
31 |
<xsl:when test="count($metadata) = 0"> |
|
32 |
<ACTIONS/> |
|
33 |
</xsl:when> |
|
34 |
<xsl:otherwise> |
|
35 |
<xsl:variable name="originalId" select="//*[local-name() = 'identifier' and ./@identifierType='DOI']"/> |
|
36 |
<xsl:variable name="resultId" select="dnet:oafSimpleId('result', //dri:objIdentifier)"/> |
|
37 |
|
|
38 |
<xsl:variable name="creators" select="//*[local-name() = 'creator']"/> |
|
39 |
<xsl:variable name="titles" select="//*[local-name() = 'title']"/> |
|
40 |
<xsl:variable name="subjects" select="//*[local-name() = 'subject']"/> |
|
41 |
<xsl:variable name="publisher" select="//*[local-name() = 'publisher']"/> |
|
42 |
<xsl:variable name="descriptions" select="//*[local-name() = 'description']"/> |
|
43 |
<xsl:variable name="dates" select="//*[local-name() = 'date']"/> |
|
44 |
<xsl:variable name="dateaccepted" select="//oaf:dateAccepted" /> |
|
45 |
<xsl:variable name="resourceType" select="//*[local-name() = 'resourceType']"/> |
|
46 |
<xsl:variable name="formats" select="//*[local-name() = 'format']"/> |
|
47 |
<xsl:variable name="sizes" select="//*[local-name() = 'size']"/> |
|
48 |
<xsl:variable name="rights" select="//oaf:accessrights" /> |
|
49 |
<xsl:variable name="version" select="//*[local-name() = 'version']"/> |
|
50 |
<xsl:variable name="instanceURI" |
|
51 |
select="concat('http://dx.doi.org','/',//*[local-name() = 'resource']/*[local-name() = 'identifier'])"/> |
|
52 |
<xsl:variable name="hostedbyid" |
|
53 |
select="dnet:oafSplitId('datasource', //oaf:hostedBy/@id)"/> |
|
54 |
<xsl:variable name="hostedbyname" select="//oaf:hostedBy/@name"/> |
|
55 |
<xsl:variable name="collectedfromid" |
|
56 |
select="dnet:oafSplitId('datasource', //oaf:collectedFrom/@id)"/> |
|
57 |
<xsl:variable name="collectedfromname" select="//oaf:collectedFrom/@name"/> |
|
58 |
<xsl:variable name="dateOfCollection" select="//dr:dateOfCollection"/> |
|
59 |
<xsl:variable name="language" select="//oaf:language" /> |
|
60 |
<xsl:variable name="cobjcategory" select="//dr:CobjCategory" /> |
|
61 |
|
|
62 |
<ACTIONS> |
|
63 |
<ACTION targetKey="{$resultId}" targetColumnFamily="result" targetColumn="{concat('update_', date:now())}"> |
|
64 |
<xsl:value-of |
|
65 |
select="dnet:oafDataCiteResultFromInfoPackage($resultId, $dataInfo, $metadata, $titles, |
|
66 |
$subjects, $publisher, $descriptions, $dates, $dateaccepted, $resourceType, |
|
67 |
$formats, $sizes, $language, $cobjcategory, $rights, $version, $provenance, $trust, $hostedbyid, $hostedbyname, |
|
68 |
$collectedfromid, $collectedfromname, $originalId, $instanceURI, $dateOfCollection)" |
|
69 |
/> |
|
70 |
</ACTION> |
|
71 |
</ACTIONS> |
|
72 |
</xsl:otherwise> |
|
73 |
</xsl:choose> |
|
74 |
</xsl:template> |
|
75 |
</xsl:stylesheet> |
modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-3.0.1/pom.xml | ||
---|---|---|
1 |
<?xml version="1.0" encoding="UTF-8"?> |
|
2 |
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd"> |
|
3 |
<parent> |
|
4 |
<groupId>eu.dnetlib</groupId> |
|
5 |
<artifactId>dnet-hadoop-parent</artifactId> |
|
6 |
<version>1.0.0</version> |
|
7 |
<relativePath /> |
|
8 |
</parent> |
|
9 |
<modelVersion>4.0.0</modelVersion> |
|
10 |
<groupId>eu.dnetlib</groupId> |
|
11 |
<artifactId>dnet-openaireplus-mapping-utils</artifactId> |
|
12 |
<packaging>jar</packaging> |
|
13 |
<version>3.0.1</version> |
|
14 |
<scm> |
|
15 |
<developerConnection>scm:svn:https://svn.driver.research-infrastructures.eu/driver/dnet40/modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-3.0.1</developerConnection> |
|
16 |
</scm> |
|
17 |
<dependencies> |
|
18 |
<dependency> |
|
19 |
<groupId>com.google.guava</groupId> |
|
20 |
<artifactId>guava</artifactId> |
|
21 |
<version>${google.guava.version}</version> |
|
22 |
</dependency> |
|
23 |
<dependency> |
|
24 |
<groupId>junit</groupId> |
|
25 |
<artifactId>junit</artifactId> |
|
26 |
<version>${junit.version}</version> |
|
27 |
<scope>test</scope> |
|
28 |
</dependency> |
|
29 |
<dependency> |
|
30 |
<groupId>commons-codec</groupId> |
|
31 |
<artifactId>commons-codec</artifactId> |
|
32 |
<version>${commons.codec.version}</version> |
|
33 |
</dependency> |
|
34 |
<dependency> |
|
35 |
<groupId>dom4j</groupId> |
|
36 |
<artifactId>dom4j</artifactId> |
|
37 |
<version>${dom4j.version}</version> |
|
38 |
</dependency> |
|
39 |
<dependency> |
|
40 |
<groupId>eu.dnetlib</groupId> |
|
41 |
<artifactId>dnet-openaire-data-protos</artifactId> |
|
42 |
<version>[3.0.0,4.0.0)</version> |
|
43 |
</dependency> |
|
44 |
<dependency> |
|
45 |
<groupId>eu.dnetlib</groupId> |
|
46 |
<artifactId>dnet-pace-core</artifactId> |
|
47 |
<version>[1.1.0,2.0.0)</version> |
|
48 |
</dependency> |
|
49 |
<dependency> |
|
50 |
<groupId>eu.dnetlib</groupId> |
|
51 |
<artifactId>cnr-misc-utils</artifactId> |
|
52 |
<version>[1.0.0,2.0.0)</version> |
|
53 |
</dependency> |
|
54 |
<dependency> |
|
55 |
<groupId>eu.dnetlib</groupId> |
|
56 |
<artifactId>dnet-hadoop-commons</artifactId> |
|
57 |
<version>[1.0.0,2.0.0)</version> |
|
58 |
</dependency> |
|
59 |
<dependency> |
|
60 |
<groupId>eu.dnetlib</groupId> |
|
61 |
<artifactId>dnet-index-solr-common</artifactId> |
|
62 |
<version>[1.0.0,2.0.0)</version> |
|
63 |
</dependency> |
|
64 |
<dependency> |
|
65 |
<groupId>com.googlecode.protobuf-java-format</groupId> |
|
66 |
<artifactId>protobuf-java-format</artifactId> |
|
67 |
<version>1.2</version> |
|
68 |
<scope>test</scope> |
|
69 |
</dependency> |
|
70 |
</dependencies> |
|
71 |
</project> |
modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-3.0.1/src/main/resources/eu/dnetlib/actionmanager/xslt/dmf2updateActions.xslt | ||
---|---|---|
1 |
<?xml version="1.0" encoding="UTF-8"?> |
|
2 |
<xsl:stylesheet version="1.0" |
|
3 |
xmlns:xsl="http://www.w3.org/1999/XSL/Transform" xmlns:dc="http://purl.org/dc/elements/1.1/" |
|
4 |
xmlns:dr="http://www.driver-repository.eu/namespace/dr" xmlns:dri="http://www.driver-repository.eu/namespace/dri" |
|
5 |
xmlns:oaa="http://namespace.openaire.eu/oaa" xmlns:oaf="http://namespace.openaire.eu/oaf" |
|
6 |
xmlns:dnet="eu.dnetlib.actionmanager.actions.infopackage.DMFInfoPackageToHbaseXsltFunctions" |
|
7 |
xmlns:date="eu.dnetlib.miscutils.datetime.DateUtils" |
|
8 |
xmlns:exslt="http://exslt.org/common" xmlns:action="http://namespace.openaire.eu/action" |
|
9 |
extension-element-prefixes="exslt" |
|
10 |
exclude-result-prefixes="xsl dc dr dri oaa oaf dnet exslt date"> |
|
11 |
|
|
12 |
<xsl:output omit-xml-declaration="yes" indent="yes" /> |
|
13 |
|
|
14 |
<xsl:param name="trust" select="string('0.9')" /> |
|
15 |
<xsl:param name="provenance" select="string('UNKNOWN')" /> |
|
16 |
<xsl:param name="namespaceprefix" select="string('unknown_____')" /> |
|
17 |
|
|
18 |
<xsl:template match="/*"> |
|
19 |
<xsl:variable name="dataInfo" select="/*[local-name() = 'record']/*[local-name() = 'about']/*[local-name() = 'datainfo']"/> |
|
20 |
<xsl:variable name="dateofcollection" select="//dr:dateOfCollection"/> |
|
21 |
|
|
22 |
<xsl:variable name="metadata" select="exslt:node-set(//*[local-name()='metadata']/*)" /> |
|
23 |
<xsl:variable name="collectedDatasourceid"> |
|
24 |
<xsl:choose> |
|
25 |
<xsl:when test="string-length(//oaf:collectedDatasourceid) > 0"> |
|
26 |
<xsl:value-of select="//oaf:collectedDatasourceid" /> |
|
27 |
</xsl:when> |
|
28 |
<xsl:otherwise> |
|
29 |
<xsl:value-of select="UNKNOWN" /> |
|
30 |
</xsl:otherwise> |
|
31 |
</xsl:choose> |
|
32 |
</xsl:variable> |
|
33 |
|
|
34 |
<xsl:choose> |
|
35 |
<xsl:when test="count($metadata) = 0"> |
|
36 |
<ACTIONS /> |
|
37 |
</xsl:when> |
|
38 |
<xsl:otherwise> |
|
39 |
|
|
40 |
<xsl:variable name="objidentifier" select="/record/*[local-name() = 'header']/*[local-name() = 'objIdentifier']" /> |
|
41 |
|
|
42 |
<xsl:variable name="resultId" select="dnet:oafSimpleId('result', $objidentifier)" /> |
|
43 |
|
|
44 |
<xsl:variable name="hostedbyid" select="dnet:oafSplitId('datasource', //oaf:hostedBy/@id)" /> |
|
45 |
<xsl:variable name="hostedbyname" select="//oaf:hostedBy/@name" /> |
|
46 |
|
|
47 |
<xsl:variable name="collectedfromid" select="dnet:oafSplitId('datasource', //oaf:collectedFrom/@id)" /> |
|
48 |
<xsl:variable name="collectedfromname" select="//oaf:collectedFrom/@name" /> |
|
49 |
|
|
50 |
<!-- <xsl:variable name="country" select="substring(//dr:repositoryCountry, 1, 200)" /> --> |
|
51 |
<!-- <xsl:variable name="accessmode" select="substring(//oaf:accessrights, 1, 200)" /> --> |
|
52 |
|
|
53 |
<xsl:variable name="result" select="dnet:oafResultFromInfoPackage($resultId, $dataInfo, $provenance, $trust, $hostedbyid, $hostedbyname, $collectedfromid, $collectedfromname, $objidentifier, $dateofcollection, $metadata)" /> |
|
54 |
|
|
55 |
<ACTIONS> |
|
56 |
<ACTION targetKey="{$resultId}" targetColumnFamily="result" targetColumn="{concat('update_', date:now())}"> |
|
57 |
<xsl:value-of select="$result" /> |
|
58 |
</ACTION> |
|
59 |
</ACTIONS> |
|
60 |
</xsl:otherwise> |
|
61 |
</xsl:choose> |
|
62 |
</xsl:template> |
|
63 |
|
|
64 |
</xsl:stylesheet> |
modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-3.0.1/src/main/java/eu/dnetlib/data/transform/xml/DmfToHbaseXsltFunctions.java | ||
---|---|---|
1 |
package eu.dnetlib.data.transform.xml; |
|
2 |
|
|
3 |
import java.util.List; |
|
4 |
import java.util.Map; |
|
5 |
|
|
6 |
import org.apache.commons.lang.StringUtils; |
|
7 |
import org.w3c.dom.NodeList; |
|
8 |
|
|
9 |
import com.google.common.collect.Iterables; |
|
10 |
import com.google.common.collect.Lists; |
|
11 |
import com.google.protobuf.Descriptors.Descriptor; |
|
12 |
|
|
13 |
import eu.dnetlib.data.mapreduce.util.OafRowKeyDecoder; |
|
14 |
import eu.dnetlib.data.proto.FieldTypeProtos.StructuredProperty; |
|
15 |
import eu.dnetlib.data.proto.OafProtos.Oaf; |
|
16 |
import eu.dnetlib.data.proto.OafProtos.OafEntity; |
|
17 |
import eu.dnetlib.data.proto.OafProtos.OafRel; |
|
18 |
import eu.dnetlib.data.proto.PersonPersonProtos.PersonPerson; |
|
19 |
import eu.dnetlib.data.proto.PersonPersonProtos.PersonPerson.CoAuthorship; |
|
20 |
import eu.dnetlib.data.proto.PersonProtos.Person; |
|
21 |
import eu.dnetlib.data.proto.PersonResultProtos.PersonResult; |
|
22 |
import eu.dnetlib.data.proto.PersonResultProtos.PersonResult.Authorship; |
|
23 |
import eu.dnetlib.data.proto.RelMetadataProtos.RelMetadata; |
|
24 |
import eu.dnetlib.data.proto.RelTypeProtos.RelType; |
|
25 |
import eu.dnetlib.data.proto.RelTypeProtos.SubRelType; |
|
26 |
import eu.dnetlib.data.proto.ResultProjectProtos.ResultProject; |
|
27 |
import eu.dnetlib.data.proto.ResultProjectProtos.ResultProject.Outcome; |
|
28 |
import eu.dnetlib.data.proto.ResultProtos.Result; |
|
29 |
import eu.dnetlib.data.proto.ResultProtos.Result.Context; |
|
30 |
import eu.dnetlib.data.proto.ResultProtos.Result.ExternalReference; |
|
31 |
import eu.dnetlib.data.proto.ResultProtos.Result.Instance; |
|
32 |
import eu.dnetlib.data.proto.ResultProtos.Result.Journal; |
|
33 |
import eu.dnetlib.data.proto.ResultResultProtos.ResultResult; |
|
34 |
import eu.dnetlib.data.proto.ResultResultProtos.ResultResult.PublicationDataset; |
|
35 |
import eu.dnetlib.data.proto.TypeProtos.Type; |
|
36 |
|
|
37 |
public class DmfToHbaseXsltFunctions extends AbstractDNetOafXsltFunctions { |
|
38 |
|
|
39 |
// dnet:oafPersonResultFromDMF($resultId, $oafPerson, position(), "sysimport:crosswalk:repository", "0.9") |
|
40 |
public static String oafPersonResult_Authorship_FromDMF(final String source, |
|
41 |
final String target, |
|
42 |
final int rank, |
|
43 |
final String relClass, |
|
44 |
final String provenanceAction, |
|
45 |
final String trust, |
|
46 |
final NodeList dataInfo) { |
|
47 |
try { |
|
48 |
final String eSource = OafRowKeyDecoder.decode(source).getKey(); |
|
49 |
final String eTarget = OafRowKeyDecoder.decode(target).getKey(); |
|
50 |
|
|
51 |
final Authorship.Builder auth = Authorship.newBuilder().setRanking("" + rank) |
|
52 |
.setRelMetadata(RelMetadata.newBuilder().setSemantics(getSimpleQualifier(relClass, "dnet:personroles"))); |
|
53 |
|
|
54 |
final OafRel.Builder rel = getRel(eSource, eTarget, RelType.personResult, SubRelType.authorship, relClass, false).setPersonResult( |
|
55 |
PersonResult.newBuilder().setAuthorship(auth)); |
|
56 |
|
|
57 |
return base64(getOaf(rel, getDataInfo(dataInfo, provenanceAction, trust, false, false)).toByteArray()); |
|
58 |
} catch (final Throwable e) { |
|
59 |
System.err.println("source: " + source); |
|
60 |
System.err.println("target: " + target); |
|
61 |
System.err.println("provenanceAction: " + provenanceAction); |
|
62 |
System.err.println("trust: " + trust); |
|
63 |
System.err.println("rank: " + rank); |
|
64 |
e.printStackTrace(); |
|
65 |
throw new RuntimeException(e); |
|
66 |
} |
|
67 |
} |
|
68 |
|
|
69 |
// dnet:oafPersonPersonFromMDStore($personId, $coauthorId) |
|
70 |
public static String oafPersonPerson_CoAuthorship_FromDMF(final String source, |
|
71 |
final String target, |
|
72 |
final String relClass, |
|
73 |
final String provenanceAction, |
|
74 |
final String trust, |
|
75 |
final NodeList dataInfo) { |
|
76 |
try { |
|
77 |
final String eSource = OafRowKeyDecoder.decode(source).getKey(); |
|
78 |
final String eTarget = OafRowKeyDecoder.decode(target).getKey(); |
|
79 |
|
|
80 |
final PersonPerson.Builder pp = PersonPerson.newBuilder(); |
|
81 |
final CoAuthorship.Builder coauth = CoAuthorship.newBuilder().setRelMetadata( |
|
82 |
RelMetadata.newBuilder().setSemantics(getSimpleQualifier(relClass, "dnet:personroles"))); |
|
83 |
|
|
84 |
final OafRel.Builder rel = getRel(eSource, eTarget, RelType.personPerson, SubRelType.coauthorship, relClass, false).setPersonPerson( |
|
85 |
pp.setCoauthorship(coauth)); |
|
86 |
|
|
87 |
return base64(getOaf(rel, getDataInfo(dataInfo, provenanceAction, trust, false, false)).toByteArray()); |
|
88 |
} catch (final Throwable e) { |
|
89 |
System.err.println("source: " + source); |
|
90 |
System.err.println("target: " + target); |
|
91 |
System.err.println("provenanceAction: " + provenanceAction); |
|
92 |
System.err.println("trust: " + trust); |
|
93 |
e.printStackTrace(); |
|
94 |
throw new RuntimeException(e); |
|
95 |
} |
|
96 |
} |
|
97 |
|
|
98 |
// dnet:oafPersonFromDMF($personId, ., "sysimport:crosswalk:repository", "0.9") |
|
99 |
public static String oafPerson_FromDMF(final String personId, |
|
100 |
final String fullname, |
|
101 |
final String provenanceAction, |
|
102 |
final String trust, |
|
103 |
final NodeList dataInfo, |
|
104 |
final String collectedFromId, |
|
105 |
final String collectedFromName, |
|
106 |
final String originalId, |
|
107 |
final String dateOfCollection) { |
|
108 |
try { |
|
109 |
final String entityId = OafRowKeyDecoder.decode(personId).getKey(); |
|
110 |
|
|
111 |
final Person.Builder person = Person.newBuilder(); |
|
112 |
final Person.Metadata.Builder metadata = Person.Metadata.newBuilder(); |
|
113 |
|
|
114 |
final String eFullname = escapeXml(fullname); |
|
115 |
|
|
116 |
metadata.setFullname(sf(eFullname)); |
|
117 |
|
|
118 |
final eu.dnetlib.pace.model.Person p = new eu.dnetlib.pace.model.Person(eFullname, false); |
|
119 |
if (p.isAccurate()) { |
|
120 |
metadata.setFirstname(sf(p.getNormalisedFirstName())); |
|
121 |
metadata.clearSecondnames().addSecondnames(sf(p.getNormalisedSurname())); |
|
122 |
metadata.setFullname(sf(p.getNormalisedFullname())); |
|
123 |
} |
|
124 |
|
|
125 |
// metadata.setNationality(getSimpleQualifier("UNKNOWN", "dnet:countries")); |
|
126 |
final List<StructuredProperty> pids = Lists.newArrayList(getStructuredProperty(originalId, "oai", "oai", "dnet:pid_types", "dnet:pid_types")); |
|
127 |
final OafEntity.Builder entity = |
|
128 |
getEntity(Type.person, entityId, getKV(collectedFromId, escapeXml(collectedFromName)), originalId, dateOfCollection, pids) |
|
129 |
.setPerson(person.setMetadata(metadata)); |
|
130 |
|
|
131 |
return base64(getOaf(entity, getDataInfo(dataInfo, provenanceAction, trust, false, false)).toByteArray()); |
|
132 |
} catch (final Throwable e) { |
|
133 |
System.err.println("personId: " + personId); |
|
134 |
System.err.println("fullname: " + fullname); |
|
135 |
System.err.println("provenanceAction: " + provenanceAction); |
|
136 |
System.err.println("trust: " + trust); |
|
137 |
System.err.println("collectedFromId: " + collectedFromId); |
|
138 |
System.err.println("collectedFromName: " + collectedFromName); |
|
139 |
System.err.println("originalId: " + originalId); |
|
140 |
System.err.println("dateOfCollection: " + dateOfCollection); |
|
141 |
e.printStackTrace(); |
|
142 |
throw new RuntimeException(e); |
|
143 |
} |
|
144 |
} |
|
145 |
|
|
146 |
// dnet:oafResultProjectFromDMF($resultId, $projectId, "sysimport:crosswalk:repository", "0.9") |
|
147 |
public static String oafResultProject_Outcome_FromDMF(final String source, |
|
148 |
final String target, |
|
149 |
final String relClass, |
|
150 |
final String provenanceAction, |
|
151 |
final String trust, |
|
152 |
final NodeList dataInfo) { |
|
153 |
try { |
|
154 |
final String eSource = OafRowKeyDecoder.decode(source).getKey(); |
|
155 |
final String eTarget = OafRowKeyDecoder.decode(target).getKey(); |
|
156 |
|
|
157 |
final Outcome.Builder outcome = Outcome.newBuilder().setRelMetadata( |
|
158 |
RelMetadata.newBuilder().setSemantics(getSimpleQualifier(relClass, "dnet:result_project_relations"))); |
|
159 |
|
|
160 |
final ResultProject.Builder rp = ResultProject.newBuilder().setOutcome(outcome); |
|
161 |
|
|
162 |
final OafRel.Builder rel = getRel(eSource, eTarget, RelType.resultProject, SubRelType.outcome, relClass, false).setResultProject(rp); |
|
163 |
|
|
164 |
return base64(getOaf(rel, getDataInfo(dataInfo, provenanceAction, trust, false, false)).toByteArray()); |
|
165 |
} catch (final Throwable e) { |
|
166 |
System.err.println("source: " + source); |
|
167 |
System.err.println("target: " + target); |
|
168 |
System.err.println("provenanceAction: " + provenanceAction); |
|
169 |
System.err.println("trust: " + trust); |
|
170 |
|
|
171 |
e.printStackTrace(); |
|
172 |
throw new RuntimeException(e); |
|
173 |
} |
|
174 |
} |
|
175 |
|
|
176 |
// dnet:oafResultProjectFromDMF($resultId, $projectId, "sysimport:crosswalk:repository", "0.9") |
|
177 |
public static String oafResultProject_Outcome_FromDMF(final String source, |
|
178 |
final String target, |
|
179 |
final String relClass, |
|
180 |
final String provenanceAction, |
|
181 |
final String trust) { |
|
182 |
return oafResultProject_Outcome_FromDMF(source, target, relClass, provenanceAction, trust, null); |
|
183 |
} |
|
184 |
|
|
185 |
public static String oafResultResult_PublicationDataset_FromDMF(final String source, |
|
186 |
final String target, |
|
187 |
final String relClass, |
|
188 |
final String provenanceAction, |
|
189 |
final String trust, |
|
190 |
final NodeList dataInfo) { |
|
191 |
try { |
|
192 |
final String eSource = OafRowKeyDecoder.decode(source).getKey(); |
|
193 |
final String eTarget = OafRowKeyDecoder.decode(target).getKey(); |
|
194 |
|
|
195 |
final PublicationDataset.Builder pd = PublicationDataset.newBuilder().setRelMetadata( |
|
196 |
RelMetadata.newBuilder().setSemantics(getSimpleQualifier(relClass, "dnet:result_result_relations"))); |
|
197 |
|
|
198 |
final ResultResult.Builder rr = ResultResult.newBuilder().setPublicationDataset(pd); |
|
199 |
|
|
200 |
final OafRel.Builder rel = getRel(eSource, eTarget, RelType.resultResult, SubRelType.publicationDataset, relClass, false).setResultResult(rr); |
|
201 |
|
|
202 |
return base64(getOaf(rel, getDataInfo(dataInfo, provenanceAction, trust, false, false)).toByteArray()); |
|
203 |
} catch (final Throwable e) { |
|
204 |
System.err.println("source: " + source); |
|
205 |
System.err.println("target: " + target); |
|
206 |
System.err.println("provenanceAction: " + provenanceAction); |
|
207 |
System.err.println("trust: " + trust); |
|
208 |
|
|
209 |
e.printStackTrace(); |
|
210 |
throw new RuntimeException(e); |
|
211 |
} |
|
212 |
} |
|
213 |
|
|
214 |
public static String oafResult_FromDMF(final String resultId, |
|
215 |
final String provenanceAction, |
|
216 |
final String trust, |
|
217 |
final NodeList dataInfo, |
|
218 |
final String hostedbyId, |
|
219 |
final String hostedbyName, |
|
220 |
final String collectedFromId, |
|
221 |
final String collectedFromName, |
|
222 |
final String originalId, |
|
223 |
final String dateOfCollection, |
|
224 |
final NodeList nodelist) { |
|
225 |
try { |
|
226 |
final String entityId = OafRowKeyDecoder.decode(resultId).getKey(); |
|
227 |
|
|
228 |
final Result.Builder result = Result.newBuilder(); |
|
229 |
|
|
230 |
final ValueMap values = ValueMap.parseNodeList(nodelist); |
|
231 |
|
|
232 |
final Result.Metadata.Builder metadata = Result.Metadata.newBuilder(); |
|
233 |
final Descriptor mDesc = Result.Metadata.getDescriptor(); |
|
234 |
|
|
235 |
addStructuredProps(metadata, mDesc.findFieldByName("subject"), values.get("subject").listValues(), "keyword", "dnet:result_subject"); |
|
236 |
addStructuredProps(metadata, mDesc.findFieldByName("title"), values.get("title").listValues(), "main title", "dnet:dataCite_title"); |
|
237 |
|
|
238 |
for (final String fieldname : Lists.newArrayList("description", "source")) { |
|
239 |
if (values.get(fieldname) != null) { |
|
240 |
for (final String s : values.get(fieldname).listValues()) { |
|
241 |
addField(metadata, mDesc.findFieldByName(fieldname), s); |
|
242 |
} |
|
243 |
} |
|
244 |
} |
|
245 |
|
|
246 |
addField(metadata, mDesc.findFieldByName("language"), setQualifier(getDefaultQualifier("dnet:languages"), values.get("language").listValues())); |
|
247 |
addField(metadata, mDesc.findFieldByName("dateofacceptance"), values.get("dateaccepted").listValues()); |
|
248 |
addField(metadata, mDesc.findFieldByName("publisher"), values.get("publisher").listValues()); |
|
249 |
addField(metadata, mDesc.findFieldByName("embargoenddate"), values.get("embargoenddate").listValues()); |
|
250 |
addField(metadata, mDesc.findFieldByName("storagedate"), values.get("storagedate").listValues()); |
|
251 |
|
|
252 |
addField(metadata, mDesc.findFieldByName("resulttype"), getSimpleQualifier("publication", "dnet:result_typologies")); |
|
253 |
|
|
254 |
addField(metadata, mDesc.findFieldByName("fulltext"), values.get("fulltext").listValues()); |
|
255 |
addField(metadata, mDesc.findFieldByName("format"), values.get("format").listValues()); |
|
256 |
|
|
257 |
// addField(metadata, Result.Metadata.getDescriptor().findFieldByName("provenanceaction"), |
|
258 |
// getSimpleQualifier("sysimport:crosswalk:repository", "dnet:provenanceActions").build()); |
|
259 |
|
|
260 |
if (values.get("concept") != null) { |
|
261 |
for (final Element e : values.get("concept")) { |
|
262 |
final String id = e.getAttributes().get("id"); |
|
263 |
if (StringUtils.isBlank(id)) throw new IllegalArgumentException("Context id cannot be blank"); |
|
264 |
metadata.addContext(Context.newBuilder().setId(id)); |
|
265 |
} |
|
266 |
} |
|
267 |
|
|
268 |
if (values.get("journal") != null) { |
|
269 |
for (final Element e : values.get("journal")) { |
|
270 |
|
|
271 |
final Journal.Builder journal = Journal.newBuilder(); |
|
272 |
if (e.getText() != null) { |
|
273 |
journal.setName(e.getText()); |
|
274 |
} |
|
275 |
|
|
276 |
final Map<String, String> attr = e.getAttributes(); |
|
277 |
if (attr != null) { |
|
278 |
if (attr.get("issn") != null) { |
|
279 |
journal.setIssnPrinted(attr.get("issn")); |
|
280 |
} |
|
281 |
if (attr.get("eissn") != null) { |
|
282 |
journal.setIssnOnline(attr.get("eissn")); |
|
283 |
} |
|
284 |
if (attr.get("lissn") != null) { |
|
285 |
journal.setIssnLinking(attr.get("lissn")); |
|
286 |
} |
|
287 |
} |
|
288 |
metadata.setJournal(journal.build()); |
|
289 |
} |
|
290 |
} |
|
291 |
|
|
292 |
final Instance.Builder instance = Instance.newBuilder().setHostedby(getKV(hostedbyId, escapeXml(hostedbyName))); |
|
293 |
|
|
294 |
addField(instance, Instance.getDescriptor().findFieldByName("licence"), |
|
295 |
setQualifier(getDefaultQualifier("dnet:access_modes"), values.get("accessrights").listValues())); |
|
296 |
addField(instance, Instance.getDescriptor().findFieldByName("instancetype"), |
|
297 |
setQualifier(getDefaultQualifier("dnet:publication_resource"), values.get("cobjcategory").listValues())); |
|
298 |
|
|
299 |
if (values.get("identifier") != null) { |
|
300 |
addField(instance, Instance.getDescriptor().findFieldByName("url"), |
|
301 |
Lists.newArrayList(Iterables.filter(values.get("identifier").listValues(), urlFilter))); |
|
302 |
} |
|
303 |
|
|
304 |
result.addInstance(instance); |
|
305 |
|
|
306 |
final List<Element> extrefs = values.get("reference"); |
|
307 |
if (!extrefs.isEmpty()) { |
|
308 |
final Descriptor extDesc = ExternalReference.getDescriptor(); |
|
309 |
for (final Element element : extrefs) { |
|
310 |
final ExternalReference.Builder extref = ExternalReference.newBuilder(); |
|
311 |
addField(extref, extDesc.findFieldByName("url"), element.getText()); |
|
312 |
addField(extref, extDesc.findFieldByName("sitename"), element.getAttributes().get("source")); |
|
313 |
addField(extref, extDesc.findFieldByName("refidentifier"), element.getAttributes().get("identifier")); |
|
314 |
addField(extref, extDesc.findFieldByName("label"), element.getAttributes().get("title")); |
|
315 |
addField(extref, extDesc.findFieldByName("qualifier"), |
|
316 |
setQualifier(getDefaultQualifier("dnet:externalReference_typologies"), Lists.newArrayList(element.getAttributes().get("type"))) |
|
317 |
.build()); |
|
318 |
|
|
319 |
result.addExternalReference(extref); |
|
320 |
} |
|
321 |
} |
|
322 |
|
|
323 |
final List<StructuredProperty> pids = Lists.newArrayList(); |
|
324 |
pids.addAll(parsePids(nodelist)); |
|
325 |
pids.add(getStructuredProperty(originalId, "oai", getClassName("oai"), "dnet:pid_types", "dnet:pid_types")); |
|
326 |
|
|
327 |
final OafEntity.Builder entity = |
|
328 |
getEntity(Type.result, entityId, getKV(collectedFromId, escapeXml(collectedFromName)), originalId, dateOfCollection, pids) |
|
329 |
.setResult(result.setMetadata(metadata)); |
|
330 |
|
|
331 |
final Oaf oaf = getOaf(entity, getDataInfo(dataInfo, provenanceAction, trust, false, false)); |
|
332 |
return base64(oaf.toByteArray()); |
|
333 |
} catch (final Throwable e) { |
|
334 |
System.err.println("resultId: " + resultId); |
|
335 |
System.err.println("hostedbyId: " + hostedbyId); |
|
336 |
System.err.println("hostedbyName: " + hostedbyName); |
|
337 |
System.err.println("provenanceAction: " + provenanceAction); |
|
338 |
System.err.println("trust: " + trust); |
|
339 |
System.err.println("collectedFromId: " + collectedFromId); |
|
340 |
System.err.println("collectedFromName: " + collectedFromName); |
|
341 |
System.err.println("originalId: " + originalId); |
|
342 |
System.err.println("dateOfCollection: " + dateOfCollection); |
|
343 |
e.printStackTrace(); |
|
344 |
throw new RuntimeException(e); |
|
345 |
} |
|
346 |
} |
|
347 |
|
|
348 |
} |
modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-3.0.1/src/main/resources/eu/dnetlib/data/transform/datacite_2_hbase.xsl | ||
---|---|---|
1 |
<?xml version="1.0" encoding="UTF-8"?> |
|
2 |
<xsl:stylesheet version="1.0" |
|
3 |
xmlns:xsl="http://www.w3.org/1999/XSL/Transform" xmlns:dc="http://purl.org/dc/elements/1.1/" |
|
4 |
xmlns:dr="http://www.driver-repository.eu/namespace/dr" xmlns:dri="http://www.driver-repository.eu/namespace/dri" |
|
5 |
xmlns:oaa="http://namespace.openaire.eu/oaa" xmlns:oaf="http://namespace.openaire.eu/oaf" |
|
6 |
xmlns:dnet="eu.dnetlib.data.transform.xml.DNetMdStoreDataCiteToHbaseXsltFunctions" |
|
7 |
xmlns:exslt="http://exslt.org/common" extension-element-prefixes="exslt" |
|
8 |
exclude-result-prefixes="xsl dc dr dri oaa oaf dnet exslt"> |
|
9 |
|
|
10 |
<xsl:output omit-xml-declaration="yes" indent="yes" /> |
|
11 |
<xsl:template match="/*"> |
|
12 |
<xsl:variable name="dataInfo" select="/*[local-name() = 'record']/*[local-name() = 'about']/*[local-name() = 'datainfo']"/> |
|
13 |
<xsl:variable name="dateofcollection" select="//dri:dateOfCollection" /> |
|
14 |
<xsl:variable name="metadata" |
|
15 |
select="exslt:node-set(//*[local-name()='metadata']/*)" /> |
|
16 |
<xsl:variable name="namespaceprefix"> |
|
17 |
<xsl:choose> |
|
18 |
<!-- TODO check namespaceprefix length is 12 --> |
|
19 |
<xsl:when test="string-length(//oaf:datasourceprefix) > 0"> |
|
20 |
<xsl:value-of select="//oaf:datasourceprefix" /> |
|
21 |
</xsl:when> |
|
22 |
<xsl:otherwise> |
|
23 |
<xsl:value-of select="unknown_____" /> |
|
24 |
</xsl:otherwise> |
|
25 |
</xsl:choose> |
|
26 |
</xsl:variable> |
|
27 |
|
|
28 |
<xsl:choose> |
|
29 |
<xsl:when |
|
30 |
test="count($metadata) = 0 or normalize-space(//oaf:skipRecord)= 'true'"> |
|
31 |
<ROWS /> |
|
32 |
</xsl:when> |
|
33 |
<xsl:otherwise> |
|
34 |
|
|
35 |
<xsl:variable name="resultId" |
|
36 |
select="dnet:oafSimpleId('result', //dri:objIdentifier)" /> |
|
37 |
|
|
38 |
<xsl:if test="string-length($resultId) > 0"> |
|
39 |
<xsl:variable name="originalid" |
|
40 |
select="concat('', //*[local-name() = 'resource']/*[local-name()='identifier'])" /> |
|
41 |
<xsl:variable name="creators" select="//*[local-name() = 'creator']" /> |
|
42 |
<xsl:variable name="titles" select="//*[local-name() = 'title']" /> |
|
43 |
<xsl:variable name="subjects" select="//*[local-name() = 'subject']" /> |
|
44 |
<xsl:variable name="publisher" select="//*[local-name() = 'publisher']" /> |
|
45 |
<xsl:variable name="descriptions" select="//*[local-name() = 'description']" /> |
|
46 |
<xsl:variable name="dates" select="//*[local-name() = 'date']" /> |
|
47 |
<xsl:variable name="dateaccepted" select="//oaf:dateAccepted" /> |
|
48 |
<xsl:variable name="resourceType" |
|
49 |
select="//*[local-name() = 'resourceType']" /> |
|
50 |
<xsl:variable name="formats" select="//*[local-name() = 'format']" /> |
|
51 |
<xsl:variable name="sizes" select="//*[local-name() = 'size']" /> |
|
52 |
<xsl:variable name="rights" select="//oaf:accessrights" /> |
|
53 |
<xsl:variable name="version" select="//*[local-name() = 'version']" /> |
|
54 |
<xsl:variable name="language" select="//oaf:language" /> |
|
55 |
<xsl:variable name="cobjcategory" select="//dr:CobjCategory" /> |
|
56 |
|
|
57 |
<xsl:variable name="instanceURI"> |
|
58 |
<xsl:choose> |
|
59 |
<xsl:when |
|
60 |
test="string-length( //*[local-name() = 'resource']/*[local-name()='identifier' and ./@identifierType='DOI']) > 0"> |
|
61 |
<xsl:value-of |
|
62 |
select="concat('http://dx.doi.org','/', //*[local-name() = 'resource']/*[local-name()='identifier' and ./@identifierType='DOI']/text())" /> |
|
63 |
</xsl:when> |
|
64 |
<xsl:otherwise> |
|
65 |
<xsl:value-of |
|
66 |
select="concat('', //*[local-name() = 'resource']/*[local-name()='identifier' and ./@identifierType='URL'])" /> |
|
67 |
</xsl:otherwise> |
|
68 |
</xsl:choose> |
|
69 |
</xsl:variable> |
|
70 |
|
|
71 |
<xsl:variable name="hostedbyid" |
|
72 |
select="dnet:oafSplitId('datasource', //oaf:hostedBy/@id)" /> |
|
73 |
<xsl:variable name="hostedbyname" select="concat('', //oaf:hostedBy/@name)" /> |
|
74 |
<xsl:variable name="collectedfromid" |
|
75 |
select="dnet:oafSplitId('datasource', //oaf:collectedFrom/@id)" /> |
|
76 |
<xsl:variable name="collectedfromname" |
|
77 |
select="concat('', //oaf:collectedFrom/@name)" /> |
|
78 |
<xsl:variable name="dateOfCollection" |
|
79 |
select="concat('', //dri:dateOfCollection)" /> |
|
80 |
|
|
81 |
<xsl:variable name="result" |
|
82 |
select="dnet:oafResult_FromDatacite($resultId, $dataInfo, $metadata, $titles, |
|
83 |
$subjects, $publisher, $descriptions, $dates, $dateaccepted, $resourceType, |
|
84 |
$formats, $sizes, $language, $cobjcategory, $rights, $version, $hostedbyid, $hostedbyname, |
|
85 |
$collectedfromid, $collectedfromname, $originalid, $instanceURI, $dateOfCollection)" /> |
|
86 |
|
|
87 |
<ROWS> |
|
88 |
<ROW key="{$resultId}" columnFamily="result"> |
|
89 |
<QUALIFIER name="body" type="base64"> |
|
90 |
<xsl:value-of select="$result" /> |
|
91 |
</QUALIFIER> |
|
92 |
</ROW> |
|
93 |
<xsl:for-each select="//*[local-name() = 'creator']"> |
|
94 |
<xsl:variable name="personIdTemp"> |
|
95 |
<xsl:choose> |
|
96 |
<xsl:when |
|
97 |
test="string-length(./*[local-name() = 'nameIdentifier']) > 0"> |
|
98 |
<xsl:value-of |
|
99 |
select="translate(normalize-space(./*[local-name() = 'nameIdentifier']),' .,','___')" /> |
|
100 |
</xsl:when> |
|
101 |
<xsl:otherwise> |
|
102 |
<xsl:value-of |
|
103 |
select="translate(normalize-space(./*[local-name() = 'creatorName']),' .,','___')" /> |
|
104 |
</xsl:otherwise> |
|
105 |
</xsl:choose> |
|
106 |
</xsl:variable> |
|
107 |
<xsl:variable name="personId" |
|
108 |
select="dnet:oafId('person', $namespaceprefix, normalize-space($personIdTemp))" /> |
|
109 |
|
|
110 |
<xsl:variable name="originalPersonId" |
|
111 |
select="./*[local-name() = 'nameIdentifier']" /> |
|
112 |
<xsl:variable name="position" select="position()" /> |
|
113 |
<xsl:if test="string-length($personId) > 0"> |
|
114 |
<xsl:variable name="person" |
|
115 |
select="dnet:oafPerson_FromDatacite($personId, $dataInfo, $collectedfromid, $collectedfromname,$originalPersonId, $dateOfCollection ,normalize-space(./*[local-name() = 'creatorName']))" /> |
|
116 |
|
|
117 |
<xsl:variable name="personresult" |
|
118 |
select="dnet:oafPersonResult_Authorship_FromDatacite($personId, $resultId, $position, 'isAuthorOf', $dataInfo)" /> |
|
119 |
<xsl:variable name="resultperson" |
|
120 |
select="dnet:oafPersonResult_Authorship_FromDatacite($resultId, $personId, $position, 'hasAuthor', $dataInfo)" /> |
|
121 |
<ROW key="{$personId}" columnFamily="person"> |
|
122 |
<QUALIFIER name="body" type="base64"> |
|
123 |
<xsl:value-of select="$person" /> |
|
124 |
</QUALIFIER> |
|
125 |
</ROW> |
|
126 |
<ROW key="{$personId}" columnFamily="personResult_authorship_isAuthorOf"> |
|
127 |
<QUALIFIER name="{$resultId}" type="base64"> |
|
128 |
<xsl:value-of select="$personresult" /> |
|
129 |
</QUALIFIER> |
|
130 |
</ROW> |
|
131 |
<ROW key="{$resultId}" columnFamily="personResult_authorship_hasAuthor"> |
|
132 |
<QUALIFIER name="{$personId}" type="base64"> |
|
133 |
<xsl:value-of select="$resultperson" /> |
|
134 |
</QUALIFIER> |
|
135 |
</ROW> |
|
136 |
</xsl:if> |
|
137 |
</xsl:for-each> |
|
138 |
|
|
139 |
<xsl:for-each select="//*[local-name()='projectid']"> |
|
140 |
|
|
141 |
<xsl:variable name="projectId" |
|
142 |
select="dnet:oafSplitId('project', normalize-space(.))" /> |
|
143 |
|
|
144 |
<xsl:variable name="resultproject" |
|
145 |
select="dnet:oafResultProject_Outcome_FromDatacite($resultId, $projectId, 'isProducedBy', $dataInfo)" /> |
|
146 |
<xsl:variable name="projectresult" |
|
147 |
select="dnet:oafResultProject_Outcome_FromDatacite($projectId, $resultId, 'produces', $dataInfo)" /> |
|
148 |
|
|
149 |
<xsl:if test="string-length($projectId) > 0"> |
|
150 |
<ROW key="{$resultId}" columnFamily="resultProject_outcome_isProducedBy"> |
|
151 |
<QUALIFIER name="{$projectId}" type="base64"> |
|
152 |
<xsl:value-of select="$resultproject" /> |
|
153 |
</QUALIFIER> |
|
154 |
</ROW> |
|
155 |
<ROW key="{$projectId}" columnFamily="resultProject_outcome_produces"> |
|
156 |
<QUALIFIER name="{$resultId}" type="base64"> |
|
157 |
<xsl:value-of select="$projectresult" /> |
|
158 |
</QUALIFIER> |
|
159 |
</ROW> |
|
160 |
</xsl:if> |
|
161 |
</xsl:for-each> |
|
162 |
|
|
163 |
<xsl:for-each select="//*[local-name()='relatedPublication']"> |
|
164 |
|
|
165 |
<!-- relatedDataset ids must be in the openaire format --> |
|
166 |
<xsl:variable name="publicationId" select="dnet:oafSimpleId('result', normalize-space(./@id))"/> |
|
167 |
|
|
168 |
<xsl:if test="string-length($publicationId) > 0"> |
|
169 |
|
|
170 |
<xsl:variable name="resultDataset" select="dnet:oafResultResult_PublicationDataset_FromDatacite($resultId, $publicationId, 'isRelatedTo', $dataInfo)"/> |
|
171 |
<xsl:variable name="datasetResult" select="dnet:oafResultResult_PublicationDataset_FromDatacite($publicationId, $resultId, 'isRelatedTo', $dataInfo)"/> |
|
172 |
|
|
173 |
<ROW key="{$resultId}" columnFamily="resultResult_publicationDataset_isRelatedTo"> |
|
174 |
<QUALIFIER name="{$publicationId}" type="base64"><xsl:value-of select="$resultDataset"/></QUALIFIER> |
|
175 |
</ROW> |
|
176 |
<ROW key="{$publicationId}" columnFamily="resultResult_publicationDataset_isRelatedTo"> |
|
177 |
<QUALIFIER name="{$resultId}" type="base64"><xsl:value-of select="$datasetResult"/></QUALIFIER> |
|
178 |
</ROW> |
|
179 |
</xsl:if> |
|
180 |
</xsl:for-each> |
|
181 |
</ROWS> |
|
182 |
</xsl:if> |
|
183 |
</xsl:otherwise> |
|
184 |
</xsl:choose> |
|
185 |
</xsl:template> |
|
186 |
</xsl:stylesheet> |
modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-3.0.1/src/test/java/eu/dnetlib/pace/clustering/ClusteringCombinerTest.java | ||
---|---|---|
1 |
package eu.dnetlib.pace.clustering; |
|
2 |
|
|
3 |
import org.junit.Before; |
|
4 |
import org.junit.Test; |
|
5 |
|
|
6 |
import eu.dnetlib.pace.AbstractProtoPaceTest; |
|
7 |
import eu.dnetlib.pace.config.Config; |
|
8 |
import eu.dnetlib.pace.config.Type; |
|
9 |
import eu.dnetlib.pace.model.FieldListImpl; |
|
10 |
import eu.dnetlib.pace.model.FieldValueImpl; |
|
11 |
import eu.dnetlib.pace.model.MapDocument; |
|
12 |
|
|
13 |
public class ClusteringCombinerTest extends AbstractProtoPaceTest { |
|
14 |
|
|
15 |
private Config config; |
|
16 |
|
|
17 |
@Before |
|
18 |
public void setUp() { |
|
19 |
config = getResultFullConf(); |
|
20 |
} |
|
21 |
|
|
22 |
@Test |
|
23 |
public void testCombine() { |
|
24 |
String title = "Dipping in Cygnus X-2 in a multi-wavelength campaign due to absorption of extended ADC emission"; |
|
25 |
MapDocument result = result(config, "A", title, "2013"); |
|
26 |
|
|
27 |
FieldListImpl fl = new FieldListImpl(); |
|
28 |
fl.add(new FieldValueImpl(Type.String, "desc", "lorem ipsum cabalie qwerty")); |
|
29 |
|
|
30 |
result.getFieldMap().put("desc", fl); |
|
31 |
System.out.println(title); |
|
32 |
System.out.println(ClusteringCombiner.combine(result, config)); |
|
33 |
} |
|
34 |
|
|
35 |
} |
modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-3.0.1/src/test/resources/eu/dnetlib/pace/organization.pace.conf | ||
---|---|---|
1 |
pace.conf { |
|
2 |
conditions { }, |
|
3 |
model { |
|
4 |
legalname { algo = JaroWinkler, type = String, weight = 0.6, ignoreMissing = false, path = organization/metadata/legalname/value }, |
|
5 |
legalshortname { algo = JaroWinkler, type = String, weight = 0.4, ignoreMissing = true, path = organization/metadata/legalshortname/value } |
|
6 |
} |
|
7 |
} |
modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-3.0.1/src/test/resources/eu/dnetlib/pace/result.authors.pace.conf | ||
---|---|---|
1 |
pace.conf { |
|
2 |
clustering { |
|
3 |
acronyms { fields = [title, desc], params = { max = 1, minLen = 2, maxLen = 4} }, |
|
4 |
ngrampairs { fields = [title], params = { max = 1, ngramLen = 3} }, |
|
5 |
suffixprefix { fields = [title], params = { max = 1, len = 3 } } |
|
6 |
}, |
|
7 |
conditions { |
|
8 |
titleVersionMatch { fields = [title] }, |
|
9 |
sizeMatch { fields = [authors] } |
|
10 |
}, |
|
11 |
model { |
|
12 |
title { algo = JaroWinkler, type = String, weight = 0.5, ignoreMissing = false, path = result/metadata/title/value }, |
|
13 |
authors { algo = SortedLevel2JaroWinkler, type = List, weight = 0.5, ignoreMissing = true, path = result/author/metadata/fullname/value } |
|
14 |
}, |
|
15 |
blacklists = { |
|
16 |
title = [ |
|
17 |
"^(Corpus Oral Dialectal \\(COD\\)\\.).*$", |
|
18 |
"^(Kiri Karl Morgensternile).*$", |
|
19 |
"^(\\[Eksliibris Aleksandr).*\\]$", |
|
20 |
"^(\\[Eksliibris Aleksandr).*$", |
|
21 |
"^(Eksliibris Aleksandr).*$", |
|
22 |
"^(Kiri A\\. de Vignolles).*$", |
|
23 |
"^(2 kirja Karl Morgensternile).*$", |
|
24 |
"^(Pirita kloostri idaosa arheoloogilised).*$", |
|
25 |
"^(Kiri tundmatule).*$", |
|
26 |
"^(Kiri Jenaer Allgemeine Literaturzeitung toimetusele).*$", |
|
27 |
"^(Eksliibris Nikolai Birukovile).*$", |
|
28 |
"^(Eksliibris Nikolai Issakovile).*$", |
|
29 |
"^(WHP Cruise Summary Information of section).*$", |
|
30 |
"^(Measurement of the top quark\\-pair production cross section with ATLAS in pp collisions at).*$", |
|
31 |
"^(Measurement of the spin\\-dependent structure function).*" |
|
32 |
] } |
|
33 |
} |
modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-3.0.1/src/test/java/eu/dnetlib/pace/distance/DetectorTest.java | ||
---|---|---|
1 |
package eu.dnetlib.pace.distance; |
|
2 |
|
|
3 |
import static org.junit.Assert.assertTrue; |
|
4 |
|
|
5 |
import java.util.List; |
|
6 |
|
|
7 |
import org.junit.Test; |
|
8 |
|
|
9 |
import com.google.common.collect.Lists; |
|
10 |
|
|
11 |
import eu.dnetlib.pace.AbstractProtoPaceTest; |
|
12 |
import eu.dnetlib.pace.config.Config; |
|
13 |
import eu.dnetlib.pace.model.MapDocument; |
|
14 |
|
|
15 |
public class DetectorTest extends AbstractProtoPaceTest { |
|
16 |
|
|
17 |
@Test |
|
18 |
public void testDistanceResultSimple() { |
|
19 |
Config config = getResultSimpleConf(); |
|
20 |
|
|
21 |
MapDocument resA = result(config, "A", "Recent results from CDF"); |
|
22 |
MapDocument resB = result(config, "B", "Recent results from CDF"); |
|
23 |
|
|
24 |
double d = new PaceDocumentDistance().between(resA, resB, config); |
|
25 |
System.out.println(String.format(" d ---> %s", d)); |
|
26 |
|
|
27 |
assertTrue(d == 1.0); |
|
28 |
} |
|
29 |
|
|
30 |
@Test |
|
31 |
public void testDistanceResultSimpleMissingDates() { |
|
32 |
Config config = getResultSimpleConf(); |
|
33 |
|
|
34 |
MapDocument resA = result(config, "A", "Recent results from BES"); |
|
35 |
MapDocument resB = result(config, "A", "Recent results from CES"); |
|
36 |
|
|
37 |
double d = new PaceDocumentDistance().between(resA, resB, config); |
|
38 |
System.out.println(String.format(" d ---> %s", d)); |
|
39 |
|
|
40 |
assertTrue(d > 0.97); |
|
41 |
} |
|
42 |
|
|
43 |
@Test |
|
44 |
public void testDistanceResultInvalidDate() { |
|
45 |
Config config = getResultConf(); |
|
46 |
|
|
47 |
MapDocument resA = result(config, "A", "title title title 6BESR", "2013-01-05"); |
|
48 |
MapDocument resB = result(config, "B", "title title title 6BESR", "qwerty"); |
|
49 |
|
|
50 |
double d = new PaceDocumentDistance().between(resA, resB, config); |
|
51 |
System.out.println(String.format(" d ---> %s", d)); |
|
52 |
|
|
53 |
assertTrue(d == 1.0); |
|
54 |
} |
|
55 |
|
|
56 |
@Test |
|
57 |
public void testDistanceResultMissingOneDate() { |
|
58 |
Config config = getResultConf(); |
|
59 |
|
|
60 |
MapDocument resA = result(config, "A", "title title title 6BESR", null); |
|
61 |
MapDocument resB = result(config, "B", "title title title 6CLER", "2012-02"); |
|
62 |
|
|
63 |
double d = new PaceDocumentDistance().between(resA, resB, config); |
|
64 |
System.out.println(String.format(" d ---> %s", d)); |
|
65 |
|
|
66 |
assertTrue(d > 0.9 && d < 1.0); |
|
67 |
} |
|
68 |
|
|
69 |
@Test |
|
70 |
public void testDistanceResult() { |
|
71 |
Config config = getResultConf(); |
|
72 |
|
|
73 |
MapDocument resA = result(config, "A", "title title title BES", ""); |
|
74 |
MapDocument resB = result(config, "B", "title title title CLEO"); |
|
75 |
|
|
76 |
double d = new PaceDocumentDistance().between(resA, resB, config); |
|
77 |
System.out.println(String.format(" d ---> %s", d)); |
|
78 |
|
|
79 |
// assertTrue(d > 0.9 && d < 1.0); |
|
80 |
} |
|
81 |
|
|
82 |
@Test |
|
83 |
public void testDistanceResultMissingTwoDate() { |
|
84 |
Config config = getResultConf(); |
|
85 |
|
|
86 |
MapDocument resA = result(config, "A", "title title title 6BESR"); |
|
87 |
MapDocument resB = result(config, "B", "title title title 6CLER"); |
|
88 |
|
|
89 |
double d = new PaceDocumentDistance().between(resA, resB, config); |
|
90 |
|
|
91 |
System.out.println(String.format(" d ---> %s", d)); |
|
92 |
|
|
93 |
assertTrue(d > 0.9 && d < 1.0); |
|
94 |
} |
|
95 |
|
|
96 |
@Test |
|
97 |
public void testDistanceOrganizationIgnoreMissing() { |
|
98 |
|
|
99 |
Config config = getOrganizationSimpleConf(); |
|
100 |
|
|
101 |
MapDocument orgA = organization(config, "A", "CONSIGLIO NAZIONALE DELLE RICERCHE"); |
|
102 |
MapDocument orgB = organization(config, "B", "CONSIGLIO NAZIONALE DELLE RICERCHE", "CNR"); |
|
103 |
|
|
104 |
double d = new PaceDocumentDistance().between(orgA, orgB, config); |
|
105 |
System.out.println(String.format(" d ---> %s", d)); |
|
106 |
|
|
107 |
assertTrue(d == 1.0); |
|
108 |
} |
|
109 |
|
|
110 |
@Test |
|
111 |
public void testDistanceResultCase1() { |
|
112 |
|
|
113 |
Config config = getResultConf(); |
|
114 |
|
|
115 |
MapDocument resA = result(config, "A", "Search the Standard Model Higgs boson", "2003"); |
|
116 |
MapDocument resB = result(config, "B", "Search for the Standard Model Higgs Boson", "2003"); |
|
117 |
|
|
118 |
double d = new PaceDocumentDistance().between(resA, resB, config); |
|
119 |
System.out.println(String.format(" d ---> %s", d)); |
|
120 |
|
|
121 |
assertTrue(d > 0.9 && d < 1.0); |
|
122 |
} |
|
123 |
|
|
124 |
@Test |
|
125 |
public void testDistanceResultCaseDoiMatch1() { |
|
126 |
Config config = getResultConf(); |
|
127 |
|
|
128 |
MapDocument resA = result(config, "A", "Search the Standard Model Higgs boson", "2003", "http://dx.doi.org/10.1594/PANGAEA.726855"); |
|
129 |
MapDocument resB = result(config, "B", "Search the Standard Model Higgs Boson", "2003", "10.1594/PANGAEA.726855"); |
|
130 |
|
|
131 |
double d = new PaceDocumentDistance().between(resA, resB, config); |
|
132 |
System.out.println(String.format(" d ---> %s", d)); |
|
133 |
|
|
134 |
assertTrue("exact DOIs will produce an exact match", d == 1.0); |
|
135 |
} |
|
136 |
|
|
137 |
@Test |
|
138 |
public void testDistanceResultCaseDoiMatch2() { |
|
139 |
Config config = getResultConf(); |
|
140 |
|
|
141 |
MapDocument resA = result(config, "A", "Conference proceedings on X. Appendix", "2003", "http://dx.doi.org/10.1594/PANGAEA.726855"); |
|
142 |
MapDocument resB = result(config, "B", "Search the Standard Model Higgs Boson", "2005", "doi:10.1594/PANGAEA.726855"); |
|
143 |
|
|
144 |
double d = new PaceDocumentDistance().between(resA, resB, config); |
|
145 |
System.out.println(String.format(" d ---> %s", d)); |
|
146 |
|
|
147 |
assertTrue("exact DOIs will produce an exact match, regardless of different titles or publication years", d == 1.0); |
|
148 |
} |
|
149 |
|
|
150 |
@Test |
|
151 |
public void testDistanceResultCaseDoiMatch3() { |
|
152 |
Config config = getResultConf(); |
|
153 |
|
|
154 |
MapDocument resA = result(config, "A", "Conference proceedings on X. Appendix", "2003", "10.1016/j.jmb.2010.12.024"); |
|
155 |
MapDocument resB = result(config, "B", "Conference proceedings on X. Appendix", "2003"); |
|
156 |
|
|
157 |
double d = new PaceDocumentDistance().between(resA, resB, config); |
|
158 |
System.out.println(String.format(" d ---> %s", d)); |
|
159 |
|
|
160 |
assertTrue("a missing DOI will casue the comparsion to continue with the following conditions", d == 1.0); |
|
161 |
} |
|
162 |
|
|
163 |
@Test |
|
164 |
public void testDistanceResultCaseDoiMatch4() { |
|
165 |
Config config = getResultConf(); |
|
166 |
|
|
167 |
MapDocument resA = result(config, "A", "Conference proceedings on X. Appendix", "2003", "10.1016/j.jmb.2010.12.024"); |
|
168 |
MapDocument resB = result(config, "B", "Conference proceedings on X. Appendix", "2005"); |
|
169 |
|
|
170 |
double d = new PaceDocumentDistance().between(resA, resB, config); |
|
171 |
System.out.println(String.format(" d ---> %s", d)); |
|
172 |
|
|
173 |
assertTrue("a missing DOI, comparsion continues with the following conditions, different publication years will drop the score to 0", d == 0.0); |
|
174 |
} |
|
175 |
|
|
176 |
@Test |
|
177 |
public void testDistanceResultCaseDoiMatch5() { |
|
178 |
|
|
179 |
Config config = getResultConf(); |
|
180 |
|
|
181 |
MapDocument resA = result(config, "A", "Search for the Standard Model Higgs Boson", "2003", "10.1016/j.jmb.2010.12.020"); |
|
182 |
MapDocument resB = result(config, "B", "Search the Standard Model Higgs Boson", "2003"); |
|
183 |
|
|
184 |
double d = new PaceDocumentDistance().between(resA, resB, config); |
|
185 |
System.out.println(String.format(" d ---> %s", d)); |
|
186 |
|
|
187 |
assertTrue("a missing DOI, comparsion continues with the following conditions", d > 0.9 && d < 1.0); |
|
188 |
} |
|
189 |
|
|
190 |
@Test |
|
191 |
public void testDistanceResultCaseDoiMatch6() { |
|
192 |
Config config = getResultConf(); |
|
193 |
|
|
194 |
MapDocument resA = result(config, "A", "Conference proceedings on X. Appendix", "2003", "10.1016/j.jmb.2010.12.024"); |
|
195 |
MapDocument resB = result(config, "B", "Conference proceedings on X. Appendix", "2003", "anotherDifferentDOI"); |
|
196 |
|
|
197 |
double d = new PaceDocumentDistance().between(resA, resB, config); |
|
198 |
System.out.println(String.format(" d ---> %s", d)); |
|
199 |
|
|
200 |
assertTrue("different DOIs will drop the score to 0, regardless of the other fields", d == 0.0); |
|
201 |
} |
|
202 |
|
|
203 |
// http://dx.doi.org/10.1594/PANGAEA.726855 doi:10.1594/PANGAEA.726855 |
|
204 |
|
|
205 |
@Test |
|
206 |
public void testDistanceResultCaseAuthor1() { |
|
207 |
|
|
208 |
Config config = getResultAuthorsConf(); |
|
209 |
|
|
210 |
List<String> authorsA = Lists.newArrayList("a", "b", "c", "d"); |
|
211 |
List<String> authorsB = Lists.newArrayList("a", "b", "c"); |
|
212 |
|
|
213 |
MapDocument resA = result(config, "A", "Search the Standard Model Higgs Boson", "2003", null, authorsA); |
|
214 |
MapDocument resB = result(config, "B", "Search the Standard Model Higgs Boson", "2003", null, authorsB); |
|
215 |
|
|
216 |
double d = new PaceDocumentDistance().between(resA, resB, config); |
|
217 |
System.out.println(String.format(" d ---> %s", d)); |
|
218 |
|
|
219 |
assertTrue(d == 0.0); |
|
220 |
} |
|
221 |
|
|
222 |
@Test |
|
223 |
public void testDistanceResultCaseAuthor2() { |
|
224 |
|
|
225 |
Config config = getResultAuthorsConf(); |
|
226 |
|
|
227 |
List<String> authorsA = Lists.newArrayList("a", "b", "c"); |
|
228 |
List<String> authorsB = Lists.newArrayList("a", "b", "c"); |
|
229 |
|
|
230 |
MapDocument resA = result(config, "A", "Search the Standard Model Higgs Boson", "2003", null, authorsA); |
|
231 |
MapDocument resB = result(config, "B", "Search the Standard Model Higgs Boson", "2003", null, authorsB); |
|
232 |
|
|
233 |
double d = new PaceDocumentDistance().between(resA, resB, config); |
|
234 |
System.out.println(String.format(" d ---> %s", d)); |
|
235 |
|
|
236 |
assertTrue(d == 1.0); |
|
237 |
} |
|
238 |
|
|
239 |
@Test |
|
240 |
public void testDistanceResultCaseAuthor3() { |
|
241 |
|
|
242 |
Config config = getResultAuthorsConf(); |
|
243 |
|
|
244 |
List<String> authorsA = Lists.newArrayList("Bardi, A.", "Manghi, P.", "Artini, M."); |
|
245 |
List<String> authorsB = Lists.newArrayList("Bardi Alessia", "Manghi Paolo", "Artini Michele"); |
|
246 |
|
|
247 |
MapDocument resA = result(config, "A", "Search the Standard Model Higgs Boson", "2003", null, authorsA); |
|
248 |
MapDocument resB = result(config, "B", "Search the Standard Model Higgs Boson", "2003", null, authorsB); |
|
249 |
|
|
250 |
double d = new PaceDocumentDistance().between(resA, resB, config); |
|
251 |
System.out.println(String.format(" d ---> %s", d)); |
|
252 |
|
|
253 |
assertTrue(d > 0.9 && d < 1.0); |
|
254 |
} |
|
255 |
|
|
256 |
@Test |
|
257 |
public void testDistanceResultCaseAuthor4() { |
|
258 |
|
|
259 |
Config config = getResultAuthorsConf(); |
|
260 |
|
|
261 |
List<String> authorsA = Lists.newArrayList("Bardi, Alessia", "Manghi, Paolo", "Artini, Michele", "a"); |
|
262 |
List<String> authorsB = Lists.newArrayList("Bardi Alessia", "Manghi Paolo", "Artini Michele"); |
|
263 |
|
|
264 |
MapDocument resA = result(config, "A", "Search the Standard Model Higgs Boson", "2003", null, authorsA); |
|
265 |
MapDocument resB = result(config, "B", "Search the Standard Model Higgs Boson", "2003", null, authorsB); |
|
266 |
|
|
267 |
double d = new PaceDocumentDistance().between(resA, resB, config); |
|
268 |
System.out.println(String.format(" d ---> %s", d)); |
|
269 |
|
|
270 |
// assertTrue(d == 0.0); |
|
271 |
} |
|
272 |
|
|
273 |
@Test |
|
274 |
public void testDistanceResultFullConf() { |
|
275 |
|
|
276 |
Config config = getResultFullConf(); |
|
277 |
|
|
278 |
List<String> authorsA = Lists.newArrayList("Nagarajan Pranesh", "Guy Vautier", "Punyanganie de Silva"); |
|
279 |
List<String> authorsB = Lists.newArrayList("Pranesh Nagarajan", "Vautier Guy", "de Silva Punyanganie"); |
|
280 |
|
|
281 |
MapDocument resA = |
|
282 |
result(config, "A", "Presentations of perforated colonic pathology in patients with polymyalgia rheumatica: two case reports", "2010", |
|
283 |
"10.1186/1752-1947-4-299", authorsA); |
|
284 |
MapDocument resB = |
|
285 |
result(config, "B", "Presentations of perforated colonic pathology in patients with polymyalgia rheumatica: two case reports", "2010", null, |
|
286 |
authorsB); |
|
287 |
|
|
288 |
double d = new PaceDocumentDistance().between(resA, resB, config); |
|
289 |
System.out.println(String.format(" d ---> %s", d)); |
|
290 |
|
|
291 |
// assertTrue(d == 0.0); |
|
292 |
} |
|
293 |
|
|
294 |
} |
modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-3.0.1/src/test/resources/eu/dnetlib/data/transform/record.xml | ||
---|---|---|
1 |
<?xml version="1.0" encoding="UTF-8"?> |
|
2 |
<record xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:prov="http://www.openarchives.org/OAI/2.0/provenance" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:dri="http://www.driver-repository.eu/namespace/dri" xmlns:dr="http://www.driver-repository.eu/namespace/dr" xmlns:oaf="http://namespace.openaire.eu/oaf"> |
|
3 |
<header xmlns="http://namespace.openaire.eu/"> |
|
4 |
<dri:objIdentifier>od______1064::fe947e59cf7db2f039b4c8cc25693fb0</dri:objIdentifier> |
|
5 |
<dri:recordIdentifier>95168db1-d57e-4b99-855b-993cf91d1283_TURTdG9yZURTUmVzb3VyY2VzL01EU3RvcmVEU1Jlc291cmNlVHlwZQ==::oai:ora.ouls.ox.ac.uk:uuid:5d8f6cbb-1283-4957-8c55-48a4024bed76</dri:recordIdentifier> |
|
6 |
<dri:dateOfCollection/> |
|
7 |
<dri:mdFormat/> |
|
8 |
<dri:mdFormatInterpretation/> |
|
9 |
<dri:repositoryId>2a02b271-0756-453c-b2f0-8c472a8806a5_UmVwb3NpdG9yeVNlcnZpY2VSZXNvdXJjZXMvUmVwb3NpdG9yeVNlcnZpY2VSZXNvdXJjZVR5cGU=</dri:repositoryId> |
|
10 |
<dr:objectIdentifier/> |
|
11 |
<dr:dateOfCollection>2013-05-10T16:04:02Z</dr:dateOfCollection> |
|
12 |
<oaf:datasourceprefix>od______1064</oaf:datasourceprefix> |
|
13 |
</header> |
|
14 |
<metadata xmlns="http://namespace.openaire.eu/"> |
|
15 |
<dc:creator>Uphoff, S</dc:creator> |
|
16 |
<dc:creator>Holden, SJ</dc:creator> |
|
17 |
<dc:dateAccepted>2011-01-01</dc:dateAccepted> |
|
18 |
<dc:description>The analysis of structure and dynamics of biomolecules is important for understanding their function. Toward this aim, we introduce a method called 'switchable FRET', which combines single-molecule fluorescence resonance energy transfer (FRET) with reversible photoswitching of fluorophores. Typically, single-molecule FRET is measured within a single donor-acceptor pair and reports on only one distance. Although multipair FRET approaches that monitor multiple distances have been developed, they are technically challenging and difficult to extend, mainly because of their reliance on spectrally distinct acceptors. In contrast, switchable FRET sequentially probes FRET between a single donor and spectrally identical photoswitchable acceptors, dramatically reducing the experimental and analytical complexity and enabling direct monitoring of multiple distances. Our experiments on DNA molecules, a protein-DNA complex and dynamic Holliday junctions demonstrate the potential of switchable FRET for studying dynamic, multicomponent biomolecules. </dc:description> |
|
19 |
<dc:identifier>http://pub.uni-bielefeld.de/publication/2303387</dc:identifier> |
|
20 |
<dc:language>eng</dc:language> |
|
21 |
<dc:title>Monitoring multiple distances within a single molecule using switchable FRET.</dc:title> |
|
22 |
<dc:source>Symplectic Elements at Oxford</dc:source> |
|
23 |
<dc:source>PubMed (http://www.ncbi.nlm.nih.gov/pubmed/)</dc:source> |
|
24 |
<dc:source>Web of Science (Lite) (http://apps.webofknowledge.com/summary.do)</dc:source> |
|
25 |
<dc:subject>Biotinylation</dc:subject> |
|
26 |
<dc:subject>Computer Simulation</dc:subject> |
|
27 |
<dr:CobjCategory>0001</dr:CobjCategory> |
|
28 |
<dr:CobjIdentifier>urn:uuid:5d8f6cbb-1283-4957-8c55-48a4024bed76</dr:CobjIdentifier> |
|
29 |
<dr:CobjIdentifier>pii:nmeth.1502</dr:CobjIdentifier> |
|
30 |
<dr:CobjIdentifier>local:71163</dr:CobjIdentifier> |
|
31 |
<dr:CobjIdentifier>eissn:1548-7105</dr:CobjIdentifier> |
|
32 |
<dr:CobjIdentifier>doi:10.1038/nmeth.1502</dr:CobjIdentifier> |
|
33 |
<dr:CobjIdentifier>issn:1548-7091</dr:CobjIdentifier> |
|
34 |
<oaf:accessrights>EMBARGO</oaf:accessrights> |
|
35 |
<oaf:collectedDatasourceid>issn____::12345678</oaf:collectedDatasourceid> |
|
36 |
<oaf:hostedBy name="DOAJ" id="doaj____::1234"/> |
|
37 |
<oaf:collectedFrom name="My favourite journal" id="issn____::12345678"/> |
|
38 |
<oaf:fulltext>http://xyz</oaf:fulltext> |
|
39 |
<oaf:journal issn="12345678" eissn="e1234567" lissn="l1234567">My favourite journal</oaf:journal> |
|
40 |
<oaf:journal issn="12345678" eissn="e1234567">My second favourite journal</oaf:journal> |
|
41 |
<oaf:identifier identifierType="doi">10.1038/nmeth.1502</oaf:identifier> |
|
42 |
<oaf:reference identifier="IPR004915" query="http://www.ebi.ac.uk/europepmc/webservices/rest/PMC/PMC155133/databaseLinks/INTERPRO/1/json" source="INTERPRO" title="Nonstructural protein NS-S, bunyaviral" type="dataset">http://www.ebi.ac.uk/interpro/entry/IPR004915</oaf:reference> |
|
43 |
<oaf:reference identifier="IPRXXXXXX" query="http://www.ebi.ac.uk/europepmc/webservices/rest/PMC/PMCYYYYYY/databaseLinks/INTERPRO/2/json" source="INTERPRO" title="Nonstructural protein XY-S, bunyaviral" type="dataset">http://www.ebi.ac.uk/interpro/entry/TTTTTTTTT</oaf:reference> |
|
44 |
</metadata> |
|
45 |
</record> |
modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-3.0.1/src/main/java/eu/dnetlib/data/transform/xml/AbstractDNetOafXsltFunctions.java | ||
---|---|---|
1 |
package eu.dnetlib.data.transform.xml; |
|
2 |
|
|
3 |
import java.nio.charset.Charset; |
|
4 |
import java.security.MessageDigest; |
|
5 |
import java.util.List; |
|
6 |
import java.util.Map; |
|
7 |
|
|
8 |
import org.apache.commons.codec.binary.Base64; |
|
9 |
import org.apache.commons.codec.binary.Hex; |
|
10 |
import org.apache.commons.lang.StringEscapeUtils; |
|
11 |
import org.apache.commons.lang.StringUtils; |
|
12 |
import org.apache.commons.lang.math.NumberUtils; |
|
13 |
import org.w3c.dom.NamedNodeMap; |
|
14 |
import org.w3c.dom.Node; |
|
15 |
import org.w3c.dom.NodeList; |
|
16 |
|
|
17 |
import com.google.common.base.Predicate; |
|
18 |
import com.google.common.base.Predicates; |
|
19 |
import com.google.common.base.Splitter; |
|
20 |
import com.google.common.collect.Iterables; |
|
21 |
import com.google.common.collect.Lists; |
|
22 |
import com.google.common.collect.Maps; |
|
23 |
import com.google.protobuf.Descriptors.Descriptor; |
|
24 |
import com.google.protobuf.Descriptors.FieldDescriptor; |
|
25 |
import com.google.protobuf.InvalidProtocolBufferException; |
|
26 |
import com.google.protobuf.Message; |
|
27 |
import com.google.protobuf.Message.Builder; |
|
28 |
import com.google.protobuf.ProtocolMessageEnum; |
|
29 |
|
|
30 |
import eu.dnetlib.data.proto.FieldTypeProtos.BoolField; |
|
31 |
import eu.dnetlib.data.proto.FieldTypeProtos.DataInfo; |
|
32 |
import eu.dnetlib.data.proto.FieldTypeProtos.IntField; |
|
33 |
import eu.dnetlib.data.proto.FieldTypeProtos.KeyValue; |
|
34 |
import eu.dnetlib.data.proto.FieldTypeProtos.Qualifier; |
|
35 |
import eu.dnetlib.data.proto.FieldTypeProtos.StringField; |
|
36 |
import eu.dnetlib.data.proto.FieldTypeProtos.StructuredProperty; |
|
37 |
import eu.dnetlib.data.proto.KindProtos.Kind; |
|
38 |
import eu.dnetlib.data.proto.OafProtos.Oaf; |
|
39 |
import eu.dnetlib.data.proto.OafProtos.OafEntity; |
|
40 |
import eu.dnetlib.data.proto.OafProtos.OafRel; |
|
41 |
import eu.dnetlib.data.proto.RelTypeProtos.RelType; |
|
42 |
import eu.dnetlib.data.proto.RelTypeProtos.SubRelType; |
|
43 |
import eu.dnetlib.data.proto.TypeProtos.Type; |
|
44 |
import eu.dnetlib.miscutils.collections.Pair; |
|
45 |
import eu.dnetlib.miscutils.iterators.IterablePair; |
|
46 |
|
|
47 |
public abstract class AbstractDNetOafXsltFunctions { |
|
48 |
|
|
49 |
private static final int MAX_NSPREFIX_LEN = 12; |
|
50 |
public static final String URL_REGEX = "^(http|https|ftp)\\://.*"; |
|
51 |
protected static Map<String, String> code2name = Maps.newHashMap(); |
|
52 |
|
|
53 |
public static Predicate<String> urlFilter = new Predicate<String>() { |
|
54 |
|
|
55 |
@Override |
|
56 |
public boolean apply(final String s) { |
|
57 |
return s.trim().matches(URL_REGEX); |
|
58 |
} |
|
59 |
}; |
|
60 |
|
|
61 |
/* |
|
62 |
* Obtained via COPY (select code, name from class) TO '/tmp/class_scheme.csv' (FORMAT csv, delimiter ',', FORCE_QUOTE *); on the |
|
63 |
* relational db |
|
64 |
*/ |
|
65 |
static { |
|
66 |
code2name.put("MO", "Macao"); |
|
67 |
code2name.put("UK", "United Kingdom"); |
|
68 |
code2name.put("WF", "WALLIS AND FUTUNA"); |
|
69 |
code2name.put("srp", "Serbian"); |
|
70 |
code2name.put("FM", "MICRONESIA, FEDERATED STATES OF"); |
|
71 |
code2name.put("PendingRepositoryResources", "Pending datasource"); |
|
72 |
code2name.put("12MONTHS", "12 Months Embargo"); |
|
73 |
code2name.put("chy", "Cheyenne"); |
|
74 |
code2name.put("yid", "Yiddish"); |
|
75 |
code2name.put("sword", "sword"); |
|
76 |
code2name.put("aggregator::pubsrepository::institutional", "Aggregator of Institutional Publication Repositories"); |
|
77 |
code2name.put("LSID", "LSID"); |
|
78 |
code2name.put("BO", "Bolivia"); |
|
79 |
code2name.put("0008", "Bachelor thesis"); |
|
80 |
code2name.put("CW", "CURAÃAO"); |
|
81 |
code2name.put("PURL", "PURL"); |
|
82 |
code2name.put("HR", "Croatia"); |
|
83 |
code2name.put("KM", "COMOROS"); |
|
84 |
code2name.put("VU", "VANUATU"); |
|
85 |
code2name.put("CU", "Cuba"); |
|
86 |
code2name.put("MN", "MONGOLIA"); |
|
87 |
code2name.put("RepositoryServiceResources", "Valid datasource"); |
|
88 |
code2name.put("cop", "Coptic"); |
|
89 |
code2name.put("cpe", "English-based Creoles and Pidgins"); |
|
90 |
code2name.put("ine", "Indo-European"); |
|
91 |
code2name.put("ipk", "Inupiaq"); |
|
92 |
code2name.put("ira", "Iranian"); |
|
93 |
code2name.put("max", "Manx"); |
|
94 |
code2name.put("men", "Mende"); |
|
95 |
code2name.put("mga", "Middle Irish"); |
|
96 |
code2name.put("sco", "Scots"); |
|
97 |
code2name.put("scr", "Serbo-Croatian"); |
|
98 |
code2name.put("yao", "Yao"); |
|
99 |
code2name.put("yap", "Yap"); |
|
100 |
code2name.put("yor", "Yoruba"); |
|
101 |
code2name.put("api", "api"); |
|
102 |
code2name.put("file", "file"); |
|
103 |
code2name.put("files_from_metadata", "files_from_metadata"); |
|
104 |
code2name.put("ftp", "ftp"); |
|
105 |
code2name.put("0009", "External research report"); |
|
106 |
code2name.put("UPC", "UPC"); |
|
107 |
code2name.put("CK", "COOK ISLANDS"); |
|
108 |
code2name.put("CZ", "Czech Republic"); |
|
109 |
code2name.put("MS", "MONTSERRAT"); |
|
110 |
code2name.put("MW", "Malawi"); |
|
111 |
code2name.put("MV", "Maldives"); |
|
112 |
code2name.put("MH", "Marshall Islands"); |
|
113 |
code2name.put("URL", "URL"); |
|
114 |
code2name.put("CG", "Congo"); |
|
115 |
code2name.put("DK", "Denmark"); |
|
116 |
code2name.put("VI", "VIRGIN ISLANDS, U.S."); |
|
117 |
code2name.put("cpf", "French-based Creoles and Pidgins"); |
|
118 |
code2name.put("mic", "Micmac"); |
|
119 |
code2name.put("min", "Minangkabau"); |
|
120 |
code2name.put("sel", "Selkup"); |
|
121 |
code2name.put("sem", "Semitic"); |
|
122 |
code2name.put("sga", "old Irish"); |
|
123 |
code2name.put("zap", "Zapotec"); |
|
124 |
code2name.put("zen", "Zenaga"); |
|
125 |
code2name.put("http", "http"); |
|
126 |
code2name.put("jdbc", "jdbc"); |
|
127 |
code2name.put("cpp", "Portuguese-based Creoles and Pidgins"); |
|
128 |
code2name.put("crp", "Creoles and Pidgins"); |
|
129 |
code2name.put("oai", "oai"); |
|
130 |
code2name.put("mis", "Miscellaneous"); |
|
131 |
code2name.put("mkh", "Mon-Kmer"); |
|
132 |
code2name.put("mni", "Manipuri"); |
|
133 |
code2name.put("mno", "Manobo"); |
|
134 |
code2name.put("moh", "Mohawk"); |
|
135 |
code2name.put("shn", "Shan"); |
|
136 |
code2name.put("sid", "Sidamo"); |
|
137 |
code2name.put("sio", "Siouan"); |
|
138 |
code2name.put("sit", "Sino-Tibetan"); |
|
139 |
code2name.put("zha", "Zhuang; Chuang"); |
|
140 |
code2name.put("other", "other"); |
|
141 |
code2name.put("rest", "rest"); |
|
142 |
code2name.put("soap", "soap"); |
|
143 |
code2name.put("sparql", "sparql"); |
|
144 |
code2name.put("zul", "Zulu"); |
|
145 |
code2name.put("zun", "Zuni"); |
|
146 |
code2name.put("file::EuropePMC", "file::EuropePMC"); |
|
147 |
code2name.put("mos", "Mossi"); |
|
148 |
code2name.put("sla", "Slavic"); |
|
149 |
code2name.put("iro", "Iroquoian"); |
|
150 |
code2name.put("slk/slo", "Slovak"); |
|
151 |
code2name.put("httpCSV", "httpCSV"); |
|
152 |
code2name.put("IS", "Iceland"); |
|
153 |
code2name.put("NA", "Namibia"); |
|
154 |
code2name.put("QA", "Qatar"); |
|
155 |
code2name.put("endDate", "endDate"); |
|
156 |
code2name.put("issued", "issued"); |
|
157 |
code2name.put("CIP-EIP-TN", "CIP-Eco-Innovation - CIP-Thematic Network"); |
|
158 |
code2name.put("ace", "Achinese"); |
|
159 |
code2name.put("akk", "Akkadian"); |
|
160 |
code2name.put("alb/sqi", "Albanian"); |
|
161 |
code2name.put("bra", "Braj"); |
|
162 |
code2name.put("bua", "Buriat"); |
|
163 |
code2name.put("0019", "Patent"); |
|
164 |
code2name.put("NL", "Netherlands"); |
|
165 |
code2name.put("RO", "Romania"); |
|
166 |
code2name.put("CP", "Collaborative project"); |
|
167 |
code2name.put("ach", "Acoli"); |
|
168 |
code2name.put("bug", "Buginese"); |
|
169 |
code2name.put("dra", "Dravidian"); |
|
170 |
code2name.put("dua", "Duala"); |
|
171 |
code2name.put("dum", "Middle Dutch"); |
|
172 |
code2name.put("epo", "Esperanto"); |
|
173 |
code2name.put("esk", "Eskimo"); |
|
174 |
code2name.put("jav/jaw", "Javanese"); |
|
175 |
code2name.put("jpr", "Judeo-Persian"); |
|
176 |
code2name.put("0018", "Annotation"); |
|
177 |
code2name.put("mun", "Munda"); |
|
178 |
code2name.put("mus", "Creek"); |
|
179 |
code2name.put("mwr", "Marwari"); |
|
180 |
code2name.put("myn", "Mayan"); |
|
181 |
code2name.put("smi", "Sami"); |
|
182 |
code2name.put("sog", "Sogdian"); |
|
183 |
code2name.put("son", "Songhai"); |
|
184 |
code2name.put("sot", "Sotho, Southern"); |
|
185 |
code2name.put("tgk", "Tajik"); |
|
186 |
code2name.put("arxiv", "arXiv"); |
|
187 |
code2name.put("datasetsbyproject", "datasetsbyproject"); |
|
188 |
code2name.put("oai_sets", "oai_sets"); |
|
189 |
code2name.put("0021", "Dataset"); |
|
190 |
code2name.put("VA", "HOLY SEE (VATICAN CITY STATE)"); |
|
191 |
code2name.put("HT", "Haiti"); |
|
192 |
code2name.put("IN", "India"); |
|
193 |
code2name.put("ID", "Indonesia"); |
|
194 |
code2name.put("RU", "Russian Federation"); |
|
195 |
code2name.put("CP-CSA", "Combination of CP & CSA"); |
|
196 |
code2name.put("CSA", "Coordination and support action"); |
|
197 |
code2name.put("ada", "Adangme"); |
|
198 |
code2name.put("HN", "Honduras"); |
|
199 |
code2name.put("HK", "Hong Kong"); |
|
200 |
code2name.put("IR", "Iran (Islamic Republic of)"); |
|
201 |
code2name.put("IL", "Israel"); |
|
202 |
code2name.put("RW", "Rwanda"); |
|
203 |
code2name.put("RE", "RÃUNION"); |
|
204 |
code2name.put("BL", "SAINT BARTHÃLEMY"); |
|
205 |
code2name.put("afa", "Afro-Asiatic"); |
|
206 |
code2name.put("afh", "Afrihili"); |
|
207 |
code2name.put("dyu", "Dyula"); |
|
208 |
code2name.put("efi", "Efik"); |
|
209 |
code2name.put("egy", "Ancient Egyptian"); |
|
210 |
code2name.put("jrb", "Judeo-Arabic"); |
|
211 |
code2name.put("kaa", "Kara-Kalpak"); |
|
212 |
code2name.put("kab", "Kabyle"); |
|
213 |
code2name.put("nah", "Aztec"); |
|
214 |
code2name.put("nai", "North American Indian"); |
|
215 |
code2name.put("spa", "Spanish; Castilian"); |
|
216 |
code2name.put("srd", "Sardinian"); |
|
217 |
code2name.put("srr", "Serer"); |
|
218 |
code2name.put("HU", "Hungary"); |
|
219 |
code2name.put("IQ", "IRAQ"); |
|
220 |
code2name.put("SH", "SAINT HELENA, ASCENSION AND TRISTAN DA CUNHA"); |
|
221 |
code2name.put("afr", "Afrikaans"); |
|
222 |
code2name.put("kac", "Kachin"); |
|
223 |
code2name.put("nde", "Ndebele, North"); |
|
224 |
code2name.put("ssa", "Nilo-Saharan"); |
|
225 |
code2name.put("ssw", "Swati"); |
|
226 |
code2name.put("suk", "Sukuma"); |
|
227 |
code2name.put("kam", "Kamba"); |
|
228 |
code2name.put("kan", "Kannada"); |
|
229 |
code2name.put("kar", "Karen"); |
|
230 |
code2name.put("ndo", "Ndonga"); |
Also available in: Unified diff
[maven-release-plugin] copy for tag dnet-openaireplus-mapping-utils-3.0.1