Revision 48842
Added by Alessia Bardi over 7 years ago
modules/dnet-msro-service/branches/saxonHE/src/test/java/eu/dnetlib/x3m/ApplyX3MappingTest.java | ||
---|---|---|
1 |
package eu.dnetlib.x3m; |
|
2 |
|
|
3 |
import java.io.IOException; |
|
4 |
|
|
5 |
import eu.dnetlib.msro.workflows.nodes.transform.ApplyX3Mapping; |
|
6 |
import org.apache.commons.io.IOUtils; |
|
7 |
import org.junit.Ignore; |
|
8 |
import org.junit.Test; |
|
9 |
import org.springframework.core.io.ClassPathResource; |
|
10 |
|
|
11 |
/** |
|
12 |
* Created by alessia on 13/03/17. |
|
13 |
*/ |
|
14 |
public class ApplyX3MappingTest { |
|
15 |
|
|
16 |
//generic mappings |
|
17 |
final String mappingPath = "/eu/dnetlib/x3m/mappings.x3ml"; |
|
18 |
final String policyPath = "/eu/dnetlib/x3m/maria-policy.xml"; |
|
19 |
final String forthMappingPath = "/eu/dnetlib/x3m/mappingsWithoutGenerator.x3ml"; |
|
20 |
//generic files |
|
21 |
final String forthInputPath ="/eu/dnetlib/x3m/input.xml"; |
|
22 |
|
|
23 |
//Parthenos mappings |
|
24 |
final String parthenosPolicyPath = "/eu/dnetlib/x3m/parthenos_policy.xml"; |
|
25 |
final String mappingAriadnePath = "/eu/dnetlib/x3m/ariadne_dataset_mapping377.x3ml"; |
|
26 |
final String mappingEhriPath = "/eu/dnetlib/x3m/ehri_mapping.x3ml"; |
|
27 |
final String mappingCulturaItaliaPath = "/eu/dnetlib/x3m/cultura_italia_musei.x3ml"; |
|
28 |
|
|
29 |
//Parthenos records |
|
30 |
final String ariadnePath = "/eu/dnetlib/x3m/new-10304741.xml"; |
|
31 |
final String ehriRecord = "/eu/dnetlib/x3m/ehri_test.xml"; |
|
32 |
final String ehriRecord2 = "/eu/dnetlib/x3m/EHRI_sample_record_328.xml"; |
|
33 |
final String[] ariadneFiles = |
|
34 |
new String[] { ariadnePath, "/eu/dnetlib/x3m/new-10304737.xml", "/eu/dnetlib/x3m/new-10304738.xml", "/eu/dnetlib/x3m/new-10304739.xml", |
|
35 |
"/eu/dnetlib/x3m/new-10304740.xml", "/eu/dnetlib/x3m/new-10304741.xml", "/eu/dnetlib/x3m/new-10304742.xml" }; |
|
36 |
final String[] clarinFiles = |
|
37 |
new String[] { "/eu/dnetlib/x3m/clarin_dataset.xml", "/eu/dnetlib/x3m/clarin_dataset2.xml", |
|
38 |
"/eu/dnetlib/x3m/clarin_service.xml", "/eu/dnetlib/x3m/clarin_service2.xml" }; |
|
39 |
|
|
40 |
final String culturaItaliaPath = "/eu/dnetlib/x3m/culturaitalia_record.xml"; |
|
41 |
|
|
42 |
@Test |
|
43 |
public void testForth() throws Exception{ |
|
44 |
String m = getString(forthMappingPath); |
|
45 |
String r = getString(forthInputPath); |
|
46 |
|
|
47 |
ApplyX3Mapping x3m = new ApplyX3Mapping(new String[]{m}, null, true); |
|
48 |
String res = x3m.apply(r); |
|
49 |
System.out.println(res); |
|
50 |
} |
|
51 |
|
|
52 |
@Test |
|
53 |
public void test() throws IOException { |
|
54 |
doBasicTest(this.mappingPath, this.ariadnePath); |
|
55 |
} |
|
56 |
|
|
57 |
@Test |
|
58 |
public void testAriadne() throws IOException { |
|
59 |
doBasicTest(this.mappingAriadnePath, this.ariadnePath); |
|
60 |
} |
|
61 |
|
|
62 |
@Test |
|
63 |
public void testAriadneAll() throws IOException { |
|
64 |
String m = getString(mappingAriadnePath); |
|
65 |
String g = getString(parthenosPolicyPath); |
|
66 |
|
|
67 |
ApplyX3Mapping x3m = new ApplyX3Mapping(new String[]{m}, g, false); |
|
68 |
for(String path : ariadneFiles){ |
|
69 |
String res = x3m.apply(getString(path)); |
|
70 |
System.out.println(res); |
|
71 |
} |
|
72 |
} |
|
73 |
|
|
74 |
@Test |
|
75 |
public void testEhri() throws IOException { |
|
76 |
doBasicTest(this.mappingEhriPath, this.ehriRecord); |
|
77 |
} |
|
78 |
@Test |
|
79 |
public void testEhri2(){ |
|
80 |
doBasicTest(this.mappingEhriPath, this.ehriRecord2); |
|
81 |
} |
|
82 |
|
|
83 |
@Ignore |
|
84 |
@Test |
|
85 |
public void testCulturaItalia() throws IOException { |
|
86 |
doBasicTest(this.mappingCulturaItaliaPath, this.culturaItaliaPath); |
|
87 |
} |
|
88 |
|
|
89 |
|
|
90 |
@Test |
|
91 |
public void testClarinGysseling365() throws IOException { |
|
92 |
doBasicTest("/eu/dnetlib/x3m/clarin_mapping_365.x3ml", "/eu/dnetlib/x3m/clarin_gysseling_corpus.xml"); |
|
93 |
} |
|
94 |
|
|
95 |
@Ignore |
|
96 |
@Test |
|
97 |
public void testAllClarin(){ |
|
98 |
String mapService = getString("/eu/dnetlib/x3m/clarin_service_mapping.x3ml"); |
|
99 |
String mapData = getString("/eu/dnetlib/x3m/clarin_dataset_mapping.x3ml"); |
|
100 |
String policy = getString(parthenosPolicyPath); |
|
101 |
ApplyX3Mapping x3m = new ApplyX3Mapping(new String[]{mapData,mapService}, policy, true); |
|
102 |
for(String path : clarinFiles){ |
|
103 |
String res = x3m.apply(getString(path)); |
|
104 |
System.out.println(res); |
|
105 |
} |
|
106 |
} |
|
107 |
|
|
108 |
|
|
109 |
public void doBasicTest(String mappingPath, String recordPath){ |
|
110 |
String m = getString(mappingPath); |
|
111 |
String g = getString(parthenosPolicyPath); |
|
112 |
String r = getString(recordPath); |
|
113 |
ApplyX3Mapping x3m = new ApplyX3Mapping(new String[]{m}, g, true); |
|
114 |
String res = x3m.apply(r); |
|
115 |
System.out.println(res); |
|
116 |
|
|
117 |
} |
|
118 |
|
|
119 |
private String getString(final String classpath) { |
|
120 |
try { |
|
121 |
final ClassPathResource resource = new ClassPathResource(classpath); |
|
122 |
return IOUtils.toString(resource.getInputStream(), "UTF-8"); |
|
123 |
}catch(IOException e){ |
|
124 |
return null; |
|
125 |
} |
|
126 |
} |
|
127 |
} |
modules/dnet-msro-service/branches/saxonHE/src/test/resources/eu/dnetlib/x3m/culturaItalia_record.xml | ||
---|---|---|
1 |
<pico:record xmlns:pico="http://purl.org/pico/1.0/" |
|
2 |
xmlns="" |
|
3 |
xmlns:premis="info:lc/xmlns/premis-v2" |
|
4 |
xmlns:mets="http://www.loc.gov/METS/" |
|
5 |
xmlns:vra="http://www.vraweb.org/vracore4.htm" |
|
6 |
xmlns:f="http://purl.org/pico/iccd/2.00/f/" |
|
7 |
xmlns:nu="http://purl.org/pico/iccd/3.00/nu/" |
|
8 |
xmlns:bdm="http://purl.org/pico/iccd/2.00/bdm/" |
|
9 |
xmlns:iccd="http://purl.org/pico/iccd/2.00/" |
|
10 |
xmlns:iccd3="http://purl.org/pico/iccd/3.00/" |
|
11 |
xmlns:smi="http://purl.org/pico/iccd/2.00/s-mi/" |
|
12 |
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" |
|
13 |
xmlns:oad="http://purl.org/pico/iccd/2.00/oa-d-n/" |
|
14 |
xmlns:dcterms="http://purl.org/dc/terms/" |
|
15 |
xmlns:mix="http://www.loc.gov/mix/v20" |
|
16 |
xmlns:dc="http://purl.org/dc/elements/1.1/" |
|
17 |
xmlns:xlink="http://www.w3.org/1999/xlink" |
|
18 |
xsi:schemaLocation="http://purl.org/pico/1.0/ http://www.culturaitalia.it/pico/schemas/1.0/pico.xsd http://purl.org/pico/iccd/2.00/ http://www.culturaitalia.it/pico/schemas/iccd/2.00/iccd.xsd http://purl.org/pico/iccd/2.00/oa-d-n/ http://www.culturaitalia.it/pico/schemas/iccd/2.00/oa-d-n.xsd http://purl.org/pico/iccd/2.00/s-mi/ http://www.culturaitalia.it/pico/schemas/iccd/2.00/s-mi.xsd http://purl.org/pico/iccd/2.00/bdm/ http://www.culturaitalia.it/pico/schemas/iccd/2.00/bdm.xsd http://purl.org/pico/iccd/2.00/f/ http://www.culturaitalia.it/pico/schemas/iccd/2.00/f.xsd http://purl.org/pico/iccd/3.00/ http://www.culturaitalia.it/pico/schemas/iccd/3.00/iccd.xsd http://purl.org/pico/iccd/3.00/nu/ http://www.culturaitalia.it/pico/schemas/iccd/3.00/nu.xsd"> |
|
19 |
<dc:identifier>coll_91</dc:identifier> |
|
20 |
<dc:title>Collezione di telecomunicazioni del Museo Nazionale della Scienza e della Tecnologia "Leonardo da Vinci"</dc:title> |
|
21 |
<dc:description xml:lang="it">La raccolta si compone di oltre 1300 beni, dal 1850 ad oggi, ed è costituita da strumenti ed apparati di tipo storico e didattico per la comunicazione a distanza, provenienti da enti pubblici, università, aziende e privati cittadini. Fanno parte di questa collezione cimeli rari come quelli marconiani e importanti riproduzioni storiche (come i telefoni di Meucci, il pantelegrafo di Caselli), impianti o parti di apparati pubblici e privati, dispositivi utilizzati in laboratori specialistici e prodotti industriali di largo consumo.</dc:description> |
|
22 |
<dc:description xml:lang="it">Nell'ambito del Sistema Informativo Regionale Beni Culturali sono state realizzate 148 schede di catalogo.</dc:description> |
|
23 |
<dc:description xml:lang="it">La raccolta di telecomunicazioni nei primi anni di vita del Museo, 1953-1958, contava già più di cento importanti beni provenienti da istituti pubblici e privati e da privati cittadini (CNR, l'allora Ministero delle Poste e Telecomunicazioni, la Società Radiomattima, la Compagnia Marconi, la Magneti Marelli, o il cavalier Donner Flori). Nel 1971, in occasione dell'inaugurazione di una nuova sala dedicata alle telecomunicazioni, la raccolta si arricchì di beni aventi una più ampia apertura verso le tecnologie della rete, grazie anche alle collaborazioni con società come la SIT-Siemens, la SIP, la Sirti, la Telettra. La raccolta è costantemente incrementata e aggiornata sulle nuove tecnologie anche in relazione alle nuove sezione espositive di Telegrafo e telefono, Radio (2005) e Televisione (2008).</dc:description> |
|
24 |
<dcterms:spatial>Museo Nazionale della Scienza e della Tecnologia "Leonardo da Vinci", Via San Vittore, 21 - Milano (MI), Italia - proprietà privata</dcterms:spatial> |
|
25 |
<dcterms:spatial xsi:type="pico:ISTAT">name=Milano; year=2001; code=015146</dcterms:spatial> |
|
26 |
<dcterms:created>1953 post; XX/ XXI</dcterms:created> |
|
27 |
<dcterms:created xsi:type="dcterms:Period">start=1953; end=1953</dcterms:created> |
|
28 |
<dc:type xsi:type="mdi:Type">Collezioni</dc:type> |
|
29 |
<dc:type xsi:type="dcterms:DCMIType">Collection</dc:type> |
|
30 |
<dcterms:isPartOf xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-mus_4467</dcterms:isPartOf> |
|
31 |
<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7244</dcterms:hasPart> |
|
32 |
<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7245</dcterms:hasPart> |
|
33 |
<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7246</dcterms:hasPart> |
|
34 |
<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7247</dcterms:hasPart> |
|
35 |
<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7248</dcterms:hasPart> |
|
36 |
<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7249</dcterms:hasPart> |
|
37 |
<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7250</dcterms:hasPart> |
|
38 |
<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7251</dcterms:hasPart> |
|
39 |
<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7252</dcterms:hasPart> |
|
40 |
<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7253</dcterms:hasPart> |
|
41 |
<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7254</dcterms:hasPart> |
|
42 |
<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7255</dcterms:hasPart> |
|
43 |
<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7256</dcterms:hasPart> |
|
44 |
<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7257</dcterms:hasPart> |
|
45 |
<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7258</dcterms:hasPart> |
|
46 |
<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7259</dcterms:hasPart> |
|
47 |
<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7260</dcterms:hasPart> |
|
48 |
<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7261</dcterms:hasPart> |
|
49 |
<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7262</dcterms:hasPart> |
|
50 |
<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7263</dcterms:hasPart> |
|
51 |
<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7264</dcterms:hasPart> |
|
52 |
<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7265</dcterms:hasPart> |
|
53 |
<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7266</dcterms:hasPart> |
|
54 |
<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7267</dcterms:hasPart> |
|
55 |
<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7268</dcterms:hasPart> |
|
56 |
<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7269</dcterms:hasPart> |
|
57 |
<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7270</dcterms:hasPart> |
|
58 |
<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7271</dcterms:hasPart> |
|
59 |
<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7272</dcterms:hasPart> |
|
60 |
<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7273</dcterms:hasPart> |
|
61 |
<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7274</dcterms:hasPart> |
|
62 |
<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7275</dcterms:hasPart> |
|
63 |
<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7276</dcterms:hasPart> |
|
64 |
<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7277</dcterms:hasPart> |
|
65 |
<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7278</dcterms:hasPart> |
|
66 |
<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7279</dcterms:hasPart> |
|
67 |
<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7280</dcterms:hasPart> |
|
68 |
<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7281</dcterms:hasPart> |
|
69 |
<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7282</dcterms:hasPart> |
|
70 |
<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7283</dcterms:hasPart> |
|
71 |
<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7284</dcterms:hasPart> |
|
72 |
<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7285</dcterms:hasPart> |
|
73 |
<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7286</dcterms:hasPart> |
|
74 |
<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7287</dcterms:hasPart> |
|
75 |
<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7288</dcterms:hasPart> |
|
76 |
<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7289</dcterms:hasPart> |
|
77 |
<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7290</dcterms:hasPart> |
|
78 |
<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7291</dcterms:hasPart> |
|
79 |
<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7292</dcterms:hasPart> |
|
80 |
<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7293</dcterms:hasPart> |
|
81 |
<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7294</dcterms:hasPart> |
|
82 |
<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7295</dcterms:hasPart> |
|
83 |
<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7296</dcterms:hasPart> |
|
84 |
<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7297</dcterms:hasPart> |
|
85 |
<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7298</dcterms:hasPart> |
|
86 |
<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7299</dcterms:hasPart> |
|
87 |
<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7300</dcterms:hasPart> |
|
88 |
<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7301</dcterms:hasPart> |
|
89 |
<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7302</dcterms:hasPart> |
|
90 |
<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7303</dcterms:hasPart> |
|
91 |
<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7304</dcterms:hasPart> |
|
92 |
<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7305</dcterms:hasPart> |
|
93 |
<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7306</dcterms:hasPart> |
|
94 |
<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7307</dcterms:hasPart> |
|
95 |
<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7308</dcterms:hasPart> |
|
96 |
<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7309</dcterms:hasPart> |
|
97 |
<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7310</dcterms:hasPart> |
|
98 |
<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7311</dcterms:hasPart> |
|
99 |
<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7312</dcterms:hasPart> |
|
100 |
<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7313</dcterms:hasPart> |
|
101 |
<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7314</dcterms:hasPart> |
|
102 |
<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7315</dcterms:hasPart> |
|
103 |
<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7316</dcterms:hasPart> |
|
104 |
<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7317</dcterms:hasPart> |
|
105 |
<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7318</dcterms:hasPart> |
|
106 |
<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7319</dcterms:hasPart> |
|
107 |
<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7320</dcterms:hasPart> |
|
108 |
<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7321</dcterms:hasPart> |
|
109 |
<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7322</dcterms:hasPart> |
|
110 |
<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7323</dcterms:hasPart> |
|
111 |
<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7324</dcterms:hasPart> |
|
112 |
<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7325</dcterms:hasPart> |
|
113 |
<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7326</dcterms:hasPart> |
|
114 |
<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7327</dcterms:hasPart> |
|
115 |
<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7328</dcterms:hasPart> |
|
116 |
<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7329</dcterms:hasPart> |
|
117 |
<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7330</dcterms:hasPart> |
|
118 |
<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7331</dcterms:hasPart> |
|
119 |
<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7332</dcterms:hasPart> |
|
120 |
<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7333</dcterms:hasPart> |
|
121 |
<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7334</dcterms:hasPart> |
|
122 |
<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7335</dcterms:hasPart> |
|
123 |
<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7336</dcterms:hasPart> |
|
124 |
<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7337</dcterms:hasPart> |
|
125 |
<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7338</dcterms:hasPart> |
|
126 |
<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7339</dcterms:hasPart> |
|
127 |
<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7340</dcterms:hasPart> |
|
128 |
<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7341</dcterms:hasPart> |
|
129 |
<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7342</dcterms:hasPart> |
|
130 |
<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7343</dcterms:hasPart> |
|
131 |
<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7344</dcterms:hasPart> |
|
132 |
<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7345</dcterms:hasPart> |
|
133 |
<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7346</dcterms:hasPart> |
|
134 |
<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7347</dcterms:hasPart> |
|
135 |
<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7348</dcterms:hasPart> |
|
136 |
<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7349</dcterms:hasPart> |
|
137 |
<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7350</dcterms:hasPart> |
|
138 |
<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7351</dcterms:hasPart> |
|
139 |
<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7352</dcterms:hasPart> |
|
140 |
<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7353</dcterms:hasPart> |
|
141 |
<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7354</dcterms:hasPart> |
|
142 |
<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7355</dcterms:hasPart> |
|
143 |
<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7356</dcterms:hasPart> |
|
144 |
<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7357</dcterms:hasPart> |
|
145 |
<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7358</dcterms:hasPart> |
|
146 |
<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7359</dcterms:hasPart> |
|
147 |
<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7360</dcterms:hasPart> |
|
148 |
<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7361</dcterms:hasPart> |
|
149 |
<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7362</dcterms:hasPart> |
|
150 |
<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7363</dcterms:hasPart> |
|
151 |
<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7364</dcterms:hasPart> |
|
152 |
<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7365</dcterms:hasPart> |
|
153 |
<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7366</dcterms:hasPart> |
|
154 |
<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7367</dcterms:hasPart> |
|
155 |
<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7368</dcterms:hasPart> |
|
156 |
<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7369</dcterms:hasPart> |
|
157 |
<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7370</dcterms:hasPart> |
|
158 |
<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7371</dcterms:hasPart> |
|
159 |
<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7372</dcterms:hasPart> |
|
160 |
<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7373</dcterms:hasPart> |
|
161 |
<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7374</dcterms:hasPart> |
|
162 |
<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7375</dcterms:hasPart> |
|
163 |
<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7376</dcterms:hasPart> |
|
164 |
<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7377</dcterms:hasPart> |
|
165 |
<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7378</dcterms:hasPart> |
|
166 |
<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7379</dcterms:hasPart> |
|
167 |
<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7380</dcterms:hasPart> |
|
168 |
<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7381</dcterms:hasPart> |
|
169 |
<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7382</dcterms:hasPart> |
|
170 |
<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7383</dcterms:hasPart> |
|
171 |
<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7384</dcterms:hasPart> |
|
172 |
<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7385</dcterms:hasPart> |
|
173 |
<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7386</dcterms:hasPart> |
|
174 |
<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7387</dcterms:hasPart> |
|
175 |
<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7388</dcterms:hasPart> |
|
176 |
<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7389</dcterms:hasPart> |
|
177 |
<dcterms:hasPart xsi:type="dcterms:URI">oai:culturaitalia.it:museiditalia-work_7390</dcterms:hasPart> |
|
178 |
<dcterms:isReferencedBy xml:lang="it">scheda SIRBeC COL: COL-ST010-0000002</dcterms:isReferencedBy> |
|
179 |
<pico:preview xsi:type="dcterms:URI">http://194.242.241.163/fedora/objects/coll:91/datastreams/MM258501/content</pico:preview> |
|
180 |
<dcterms:isReferencedBy xsi:type="pico:Anchor">title=visualizza il file Mets; URL=fedora/objects/coll:91/datastreams/export/content</dcterms:isReferencedBy> |
|
181 |
</pico:record> |
modules/dnet-msro-service/branches/saxonHE/src/test/resources/eu/dnetlib/x3m/EHRI_sample_record_328.xml | ||
---|---|---|
1 |
<?xml version="1.0" ?> |
|
2 |
<ead xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="urn:isbn:1-931666-22-9 http://www.loc.gov/ead/ead.xsd"> |
|
3 |
<eadheader countryencoding="iso3166-1" dateencoding="iso8601" scriptencoding="iso15924" repositoryencoding="iso15511" relatedencoding="DC"> |
|
4 |
<eadid>us-005578-irn516886</eadid> |
|
5 |
<filedesc> |
|
6 |
<titlestmt> |
|
7 |
<titleproper>Romana Primus photograph collection</titleproper> |
|
8 |
</titlestmt> |
|
9 |
<publicationstmt> |
|
10 |
<publisher>United States Holocaust Memorial Museum</publisher> |
|
11 |
<address> |
|
12 |
<addressline>100 Raoul Wallenberg Place, S.W.</addressline> |
|
13 |
<addressline>DC 20024-2126</addressline> |
|
14 |
<addressline>Washington</addressline> |
|
15 |
<addressline>District of Columbia</addressline> |
|
16 |
<addressline>US</addressline> |
|
17 |
<addressline>202 488 0400</addressline> |
|
18 |
<addressline>202-479-9726</addressline> |
|
19 |
<addressline>http://www.ushmm.org/</addressline> |
|
20 |
<addressline>archives@ushmm.org</addressline> |
|
21 |
<addressline>United States</addressline> |
|
22 |
</address> |
|
23 |
</publicationstmt> |
|
24 |
<notestmt> |
|
25 |
<note> |
|
26 |
<p>This encoded description is derived from structured data provided to EHRI by a partner institution but may differ in structure and/or content from its source. The collection holding institution considers this description as an accurate reflection of the archival holdings to which it refers at the moment of data transfer.</p> |
|
27 |
</note> |
|
28 |
</notestmt> |
|
29 |
</filedesc> |
|
30 |
<profiledesc> |
|
31 |
<creation>This file was exported automatically from the EHRI database administration tool and represents a work-in-progress. |
|
32 |
<date normal="20170601">2017-06-01T16:15:08.688+01:00</date> |
|
33 |
</creation> |
|
34 |
<langusage> |
|
35 |
<language langcode="eng">English</language> |
|
36 |
</langusage> |
|
37 |
</profiledesc> |
|
38 |
<revisiondesc> |
|
39 |
<change> |
|
40 |
<date>2014-12-19T16:12:54.402Z</date> |
|
41 |
<item>These files were provided by the United States Holocaust Memorial Museum to EHRI on 2014-11-21. |
|
42 |
|
|
43 |
[ingest]</item> |
|
44 |
</change> |
|
45 |
</revisiondesc> |
|
46 |
</eadheader> |
|
47 |
<archdesc level="collection"> |
|
48 |
<did> |
|
49 |
<unitid>irn516886</unitid> |
|
50 |
<unittitle encodinganalog="3.1.2">Romana Primus photograph collection</unittitle> |
|
51 |
<unitdate encodinganalog="3.1.3">1946-1947</unitdate> |
|
52 |
<repository> |
|
53 |
<corpname>United States Holocaust Memorial Museum</corpname> |
|
54 |
</repository> |
|
55 |
</did> |
|
56 |
<scopecontent encodinganalog="3.3.1"> |
|
57 |
<p><![CDATA[The collection consists of four photographs of Romana Strochlitz Primus as a baby, her parents, Sigmund and Ruzka (Rose) Grinburg Strochlitz, and other refugees at the Bergen-Belsen displaced persons camp in Germany after World War II.]]></p> |
|
58 |
</scopecontent> |
|
59 |
<accessrestrict encodinganalog="3.4.1"> |
|
60 |
<p><![CDATA[No restrictions on access]]></p> |
|
61 |
</accessrestrict> |
|
62 |
<userestrict encodinganalog="3.4.2"> |
|
63 |
<p><![CDATA[No restrictions on use]]></p> |
|
64 |
</userestrict> |
|
65 |
<acqinfo encodinganalog="3.2.4"> |
|
66 |
<p><![CDATA[Accession number: 1999.18]]></p> |
|
67 |
</acqinfo> |
|
68 |
<custodhist encodinganalog="3.2.3"> |
|
69 |
<p><![CDATA[The collection was donated to the United States Holocaust Memorial Museum by Romana Strochlitz Primus in 1999.]]></p> |
|
70 |
</custodhist> |
|
71 |
<odd encodinganalog="3.6.1"> |
|
72 |
<p><![CDATA[Record type: Document]]></p> |
|
73 |
</odd> |
|
74 |
<controlaccess> |
|
75 |
<subject>Refugees--Germany--1940-1950.</subject> |
|
76 |
<subject>Refugee camps--Germany--1940-1950.</subject> |
|
77 |
</controlaccess> |
|
78 |
<controlaccess> |
|
79 |
<genreform>Photographs.</genreform> |
|
80 |
</controlaccess> |
|
81 |
<controlaccess> |
|
82 |
<persname>Kirszenbaum, Halina Grauman.</persname> |
|
83 |
<persname>Brechner, Dosia Grinburg.</persname> |
|
84 |
<persname>Weinreich, Hela.</persname> |
|
85 |
<persname>Strochlitz, Rose Grinburg.</persname> |
|
86 |
<persname>Strochlitz, Sigmund.</persname> |
|
87 |
<persname>Primus, Romana Strochlitz.</persname> |
|
88 |
</controlaccess> |
|
89 |
</archdesc> |
|
90 |
</ead> |
modules/dnet-msro-service/branches/saxonHE/src/test/resources/eu/dnetlib/x3m/ehri_test.xml | ||
---|---|---|
1 |
<ead xmlns="urn:isbn:1-931666-22-9" |
|
2 |
xmlns:xlink="http://www.w3.org/1999/xlink" |
|
3 |
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" |
|
4 |
xsi:schemaLocation="urn:isbn:1-931666-22-9 http://www.loc.gov/ead/ead.xsd"> |
|
5 |
<eadheader xmlns="" |
|
6 |
countryencoding="iso3166-1" |
|
7 |
dateencoding="iso8601" |
|
8 |
relatedencoding="DC" |
|
9 |
repositoryencoding="iso15511" |
|
10 |
scriptencoding="iso15924"> |
|
11 |
<eadid>us-005578-irn516886</eadid> |
|
12 |
<filedesc> |
|
13 |
<titlestmt> |
|
14 |
<titleproper>Romana Primus photograph collection</titleproper> |
|
15 |
</titlestmt> |
|
16 |
<publicationstmt> |
|
17 |
<publisher>United States Holocaust Memorial Museum</publisher> |
|
18 |
<address> |
|
19 |
<addressline>100 Raoul Wallenberg Place, S.W.</addressline> |
|
20 |
<addressline>DC 20024-2126</addressline> |
|
21 |
<addressline>Washington</addressline> |
|
22 |
<addressline>District of Columbia</addressline> |
|
23 |
<addressline>US</addressline> |
|
24 |
<addressline>202 488 0400</addressline> |
|
25 |
<addressline>202-479-9726</addressline> |
|
26 |
<addressline>http://www.ushmm.org/</addressline> |
|
27 |
<addressline>archives@ushmm.org</addressline> |
|
28 |
<addressline>United States</addressline> |
|
29 |
</address> |
|
30 |
</publicationstmt> |
|
31 |
<notestmt> |
|
32 |
<note> |
|
33 |
<p>This encoded description is derived from structured data provided to EHRI by a partner institution but may differ in structure and/or content from its source. The collection holding institution considers this description as an accurate reflection of the archival holdings to which it refers at the moment of data transfer.</p> |
|
34 |
</note> |
|
35 |
</notestmt> |
|
36 |
</filedesc> |
|
37 |
<profiledesc> |
|
38 |
<creation>This file was exported automatically from the EHRI database administration tool and represents a work-in-progress. |
|
39 |
<date normal="20170513">2017-05-13T09:25:31.407+01:00</date> |
|
40 |
</creation> |
|
41 |
<langusage> |
|
42 |
<language langcode="eng">English</language> |
|
43 |
</langusage> |
|
44 |
</profiledesc> |
|
45 |
<revisiondesc> |
|
46 |
<change> |
|
47 |
<date>2014-12-19T16:12:54.402Z</date> |
|
48 |
<item>These files were provided by the United States Holocaust Memorial Museum to EHRI on 2014-11-21. |
|
49 |
|
|
50 |
[ingest] |
|
51 |
</item> |
|
52 |
</change> |
|
53 |
</revisiondesc> |
|
54 |
</eadheader> |
|
55 |
<archdesc xmlns="" level="collection"> |
|
56 |
<did> |
|
57 |
<unitid>irn516886</unitid> |
|
58 |
<unittitle encodinganalog="3.1.2">Romana Primus photograph collection</unittitle> |
|
59 |
<unitdate encodinganalog="3.1.3">1946-1947</unitdate> |
|
60 |
<repository> |
|
61 |
<corpname>United States Holocaust Memorial Museum</corpname> |
|
62 |
</repository> |
|
63 |
</did> |
|
64 |
<scopecontent encodinganalog="3.3.1"> |
|
65 |
<p>The collection consists of four photographs of Romana Strochlitz Primus as a baby, her parents, Sigmund and Ruzka (Rose) Grinburg Strochlitz, and other refugees at the Bergen-Belsen displaced persons camp in Germany after World War II.</p> |
|
66 |
</scopecontent> |
|
67 |
<accessrestrict encodinganalog="3.4.1"> |
|
68 |
<p>No restrictions on access</p> |
|
69 |
</accessrestrict> |
|
70 |
<userestrict encodinganalog="3.4.2"> |
|
71 |
<p>No restrictions on use</p> |
|
72 |
</userestrict> |
|
73 |
<acqinfo encodinganalog="3.2.4"> |
|
74 |
<p>Accession number: 1999.18</p> |
|
75 |
</acqinfo> |
|
76 |
<custodhist encodinganalog="3.2.3"> |
|
77 |
<p>The collection was donated to the United States Holocaust Memorial Museum by Romana Strochlitz Primus in 1999.</p> |
|
78 |
</custodhist> |
|
79 |
<odd encodinganalog="3.6.1"> |
|
80 |
<p>Record type: Document</p> |
|
81 |
</odd> |
|
82 |
<controlaccess> |
|
83 |
<subject>Refugees--Germany--1940-1950.</subject> |
|
84 |
<subject>Refugee camps--Germany--1940-1950.</subject> |
|
85 |
</controlaccess> |
|
86 |
<controlaccess> |
|
87 |
<persname>Kirszenbaum, Halina Grauman.</persname> |
|
88 |
<persname>Brechner, Dosia Grinburg.</persname> |
|
89 |
<persname>Weinreich, Hela.</persname> |
|
90 |
<persname>Strochlitz, Rose Grinburg.</persname> |
|
91 |
<persname>Strochlitz, Sigmund.</persname> |
|
92 |
<persname>Primus, Romana Strochlitz.</persname> |
|
93 |
</controlaccess> |
|
94 |
<controlaccess> |
|
95 |
<genreform>Photographs.</genreform> |
|
96 |
</controlaccess> |
|
97 |
</archdesc> |
|
98 |
</ead> |
|
99 |
|
|
100 |
|
modules/dnet-msro-service/branches/saxonHE/src/test/resources/eu/dnetlib/x3m/clarin_dataset2.xml | ||
---|---|---|
1 |
<cmd:CMD xmlns:cmd="http://www.clarin.eu/cmd/1" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:cmdp="http://www.clarin.eu/cmd/1/profiles/clarin.eu:cr1:p_1407745711925" CMDVersion="1.2" xsi:schemaLocation="http://www.clarin.eu/cmd/1 https://infra.clarin.eu/CMDI/1.x/xsd/cmd-envelop.xsd http://www.clarin.eu/cmd/1/profiles/clarin.eu:cr1:p_1407745711925 https://catalog.clarin.eu/ds/ComponentRegistry/rest/registry/1.x/profiles/clarin.eu:cr1:p_1407745711925/xsd"> |
|
2 |
<cmd:Header> |
|
3 |
<cmd:MdCreator>Gunn Inger Lyse Samdal</cmd:MdCreator> |
|
4 |
<cmd:MdCreationDate>2015-10-12</cmd:MdCreationDate> |
|
5 |
<cmd:MdSelfLink>http://hdl.handle.net/11509/80</cmd:MdSelfLink> |
|
6 |
<cmd:MdProfile>clarin.eu:cr1:p_1407745711925</cmd:MdProfile> |
|
7 |
<cmd:MdCollectionDisplayName>Clarino UiB</cmd:MdCollectionDisplayName> |
|
8 |
</cmd:Header> |
|
9 |
<cmd:Resources> |
|
10 |
<cmd:ResourceProxyList> |
|
11 |
<cmd:ResourceProxy id="landing-page-ubb"> |
|
12 |
<cmd:ResourceType mimetype="">LandingPage</cmd:ResourceType> |
|
13 |
<cmd:ResourceRef>http://hdl.handle.net/11509/80</cmd:ResourceRef> |
|
14 |
</cmd:ResourceProxy> |
|
15 |
<cmd:ResourceProxy id="search-page-corpuscle"> |
|
16 |
<cmd:ResourceType mimetype="">SearchPage</cmd:ResourceType> |
|
17 |
<cmd:ResourceRef>http://clarino.uib.no/korpuskel/landing-page?identifier=forskning-no&view=short</cmd:ResourceRef> |
|
18 |
</cmd:ResourceProxy> |
|
19 |
<cmd:ResourceProxy id="resource-fn"> |
|
20 |
<cmd:ResourceType mimetype="">Resource</cmd:ResourceType> |
|
21 |
<cmd:ResourceRef>http://hdl.handle.net/11509/80</cmd:ResourceRef> |
|
22 |
</cmd:ResourceProxy> |
|
23 |
</cmd:ResourceProxyList> |
|
24 |
<cmd:JournalFileProxyList/> |
|
25 |
<cmd:ResourceRelationList/> |
|
26 |
</cmd:Resources> |
|
27 |
<cmd:IsPartOfList/> |
|
28 |
<cmd:Components> |
|
29 |
<cmdp:corpusProfile> |
|
30 |
<cmdp:resourceCommonInfo cmd:ComponentId="clarin.eu:cr1:c_1396012485126"> |
|
31 |
<cmdp:resourceType>corpus</cmdp:resourceType> |
|
32 |
<cmdp:identificationInfo cmd:ComponentId="clarin.eu:cr1:c_1396012485125"> |
|
33 |
<cmdp:resourceName xml:lang="en">Text material from Forskning.no (1998 - 2012)</cmdp:resourceName> |
|
34 |
<cmdp:description xml:lang="en">Data set containing texts from the popular science website forskning.no. The text material is constituted by articles published by Forskning.no belonging to the following three categories: |
|
35 |
1) Articles written by journalists employed at forskning.no |
|
36 |
2) Articles written by member institutions of forskning.no (76 universities, colleges, research centers, research departments in government agencies and more). These articles are written by staff journalists, information officers and other non-academic staff. Each article has been edited by forskning.no. |
|
37 |
3) Articles from the newsdesk NRK Viten, with whom forskning.no cooperates. These articles are written by NRK journalists. |
|
38 |
Forskning.no kindly makes this material available in CLARINO as downloadable XML to promote language research. CLARINO's agreement also includes the permission to use future articles to be published by forskning.no; as of October 2015, however, the newest downloadable text is from October 2012. |
|
39 |
|
|
40 |
ACCESS: the material is available in downloadable form at the CLARINO Bergen Centre and in searchable form at Corpuscle (see links in metadata). Corpuscle allows you to pass queries to the corpus, and you may ask for concordances, collocations and distribution.</cmdp:description> |
|
41 |
<cmdp:resourceShortName xml:lang="no">Forskning.no</cmdp:resourceShortName> |
|
42 |
<cmdp:url description="downloadable - UBB" ref="landing-page-ubb">http://hdl.handle.net/11509/80</cmdp:url> |
|
43 |
<cmdp:url description="searchable - Corpuscle" ref="search-page-corpuscle">http://clarino.uib.no/korpuskel/landing-page?identifier=forskning-no&view=short</cmdp:url> |
|
44 |
<cmdp:PID description="handle @ UBB" ref="landing-page-ubb">http://hdl.handle.net/11509/80</cmdp:PID> |
|
45 |
</cmdp:identificationInfo> |
|
46 |
<cmdp:distributionInfo cmd:ComponentId="clarin.eu:cr1:c_1396012485124"> |
|
47 |
<cmdp:licenceInfo cmd:ComponentId="clarin.eu:cr1:c_1396012485158"> |
|
48 |
<cmdp:userCategory>Restricted</cmdp:userCategory> |
|
49 |
<cmdp:distributionAccessMedium ref="landing-page-ubb">downloadable</cmdp:distributionAccessMedium> |
|
50 |
<cmdp:distributionAccessMedium ref="search-page-corpuscle">accessibleThroughInterface</cmdp:distributionAccessMedium> |
|
51 |
<cmdp:downloadLocation description="landing page @ UBB portal" ref="landing-page-ubb">http://hdl.handle.net/11509/80</cmdp:downloadLocation> |
|
52 |
<cmdp:downloadLocation description="landing page @ Corpuscle" ref="search-page-corpuscle">http://clarino.uib.no/korpuskel/landing-page?identifier=forskning-no</cmdp:downloadLocation> |
|
53 |
<cmdp:licence cmd:ComponentId="clarin.eu:cr1:c_1447674760330"> |
|
54 |
<cmdp:licenceFamily>CLARIN</cmdp:licenceFamily> |
|
55 |
<cmdp:licenceName>CLARIN_RES-DEP</cmdp:licenceName> |
|
56 |
<cmdp:licenceURL>https://kitwiki.csc.fi/twiki/bin/view/FinCLARIN/ClarinEula?RES=1&ID=1&PERM=1&PLAN=1&BY=1&NORED=1&DEP=1</cmdp:licenceURL> |
|
57 |
<cmdp:conditionsOfUse>BY</cmdp:conditionsOfUse> |
|
58 |
<cmdp:conditionsOfUse>DEP</cmdp:conditionsOfUse> |
|
59 |
<cmdp:conditionsOfUse>ID</cmdp:conditionsOfUse> |
|
60 |
<cmdp:conditionsOfUse>NORED</cmdp:conditionsOfUse> |
|
61 |
<cmdp:conditionsOfUse>PLAN</cmdp:conditionsOfUse> |
|
62 |
<cmdp:nonStandardConditionsOfUse>It is not allowed to distribute/publish complete articles, presented as a coherent text, from the Resource.</cmdp:nonStandardConditionsOfUse> |
|
63 |
</cmdp:licence> |
|
64 |
<cmdp:licensor> |
|
65 |
<cmdp:actorInfo cmd:ComponentId="clarin.eu:cr1:c_1396012485194"> |
|
66 |
<cmdp:actorType>organization</cmdp:actorType> |
|
67 |
<cmdp:personInfo cmd:ComponentId="clarin.eu:cr1:c_1396012485192"> |
|
68 |
<cmdp:surname>Kristiansen</cmdp:surname> |
|
69 |
<cmdp:givenName>Nina</cmdp:givenName> |
|
70 |
<cmdp:position>Editor in chief</cmdp:position> |
|
71 |
<cmdp:affiliation> |
|
72 |
<cmdp:organizationInfo cmd:ComponentId="clarin.eu:cr1:c_1407745711883"> |
|
73 |
<cmdp:organizationName>forskning.no</cmdp:organizationName> |
|
74 |
</cmdp:organizationInfo> |
|
75 |
</cmdp:affiliation> |
|
76 |
</cmdp:personInfo> |
|
77 |
<cmdp:organizationInfo cmd:ComponentId="clarin.eu:cr1:c_1407745711883"> |
|
78 |
<cmdp:organizationName>forskning.no</cmdp:organizationName> |
|
79 |
</cmdp:organizationInfo> |
|
80 |
<cmdp:communicationInfo cmd:ComponentId="clarin.eu:cr1:c_1352813745460"> |
|
81 |
<cmdp:email>Nina@forskning.no</cmdp:email> |
|
82 |
<cmdp:city>Oslo</cmdp:city> |
|
83 |
<cmdp:country>Norway</cmdp:country> |
|
84 |
</cmdp:communicationInfo> |
|
85 |
</cmdp:actorInfo> |
|
86 |
</cmdp:licensor> |
|
87 |
</cmdp:licenceInfo> |
|
88 |
</cmdp:distributionInfo> |
|
89 |
<cmdp:contact> |
|
90 |
<cmdp:actorInfo cmd:ComponentId="clarin.eu:cr1:c_1396012485194"> |
|
91 |
<cmdp:actorType>organization</cmdp:actorType> |
|
92 |
<cmdp:organizationInfo cmd:ComponentId="clarin.eu:cr1:c_1407745711883"> |
|
93 |
<cmdp:organizationName xml:lang="en">CLARINO Bergen Centre</cmdp:organizationName> |
|
94 |
</cmdp:organizationInfo> |
|
95 |
<cmdp:communicationInfo cmd:ComponentId="clarin.eu:cr1:c_1352813745460"> |
|
96 |
<cmdp:email>clarin@uib.no</cmdp:email> |
|
97 |
<cmdp:url>https://repo.clarino.uib.no/xmlui/</cmdp:url> |
|
98 |
</cmdp:communicationInfo> |
|
99 |
</cmdp:actorInfo> |
|
100 |
</cmdp:contact> |
|
101 |
<cmdp:metadataInfo cmd:ComponentId="clarin.eu:cr1:c_1407745711922"> |
|
102 |
<cmdp:metadataCreationDate>2015-07-29</cmdp:metadataCreationDate> |
|
103 |
<cmdp:metadataLanguageName>English</cmdp:metadataLanguageName> |
|
104 |
<cmdp:metadataLanguageId>en</cmdp:metadataLanguageId> |
|
105 |
<cmdp:metadataLastDateUpdated>2016-02-12</cmdp:metadataLastDateUpdated> |
|
106 |
<cmdp:metadataCreator> |
|
107 |
<cmdp:actorInfo cmd:ComponentId="clarin.eu:cr1:c_1396012485194"> |
|
108 |
<cmdp:actorType>person</cmdp:actorType> |
|
109 |
<cmdp:personInfo cmd:ComponentId="clarin.eu:cr1:c_1396012485192"> |
|
110 |
<cmdp:surname xml:lang="no">Lyse</cmdp:surname> |
|
111 |
<cmdp:givenName xml:lang="no">Gunn Inger</cmdp:givenName> |
|
112 |
<cmdp:sex>female</cmdp:sex> |
|
113 |
<cmdp:position>Researcher (Ph.D)</cmdp:position> |
|
114 |
<cmdp:affiliation> |
|
115 |
<cmdp:organizationInfo cmd:ComponentId="clarin.eu:cr1:c_1407745711883"> |
|
116 |
<cmdp:organizationName xml:lang="en">University of Bergen</cmdp:organizationName> |
|
117 |
<cmdp:organizationName xml:lang="no">Universitetet i Bergen</cmdp:organizationName> |
|
118 |
<cmdp:organizationShortName xml:lang="no">UiB</cmdp:organizationShortName> |
|
119 |
<cmdp:organizationShortName xml:lang="en">UoB</cmdp:organizationShortName> |
|
120 |
<cmdp:departmentName xml:lang="en">Department of Linguistic, Literary and Aesthetic Studies</cmdp:departmentName> |
|
121 |
</cmdp:organizationInfo> |
|
122 |
</cmdp:affiliation> |
|
123 |
</cmdp:personInfo> |
|
124 |
<cmdp:communicationInfo cmd:ComponentId="clarin.eu:cr1:c_1352813745460"> |
|
125 |
<cmdp:email>clarin@uib.no</cmdp:email> |
|
126 |
</cmdp:communicationInfo> |
|
127 |
</cmdp:actorInfo> |
|
128 |
</cmdp:metadataCreator> |
|
129 |
</cmdp:metadataInfo> |
|
130 |
<cmdp:resourceCreationInfo cmd:ComponentId="clarin.eu:cr1:c_1407745711921"> |
|
131 |
<cmdp:resourceCreator> |
|
132 |
<cmdp:actorInfo cmd:ComponentId="clarin.eu:cr1:c_1396012485194" cmd:ref="resource-fn"> |
|
133 |
<cmdp:actorType>person</cmdp:actorType> |
|
134 |
<cmdp:personInfo cmd:ComponentId="clarin.eu:cr1:c_1396012485192"> |
|
135 |
<cmdp:surname xml:lang="en">Hofland</cmdp:surname> |
|
136 |
<cmdp:givenName xml:lang="en">Knut</cmdp:givenName> |
|
137 |
<cmdp:sex>male</cmdp:sex> |
|
138 |
<cmdp:position>Fagkonsulent / Specialist Consultant</cmdp:position> |
|
139 |
<cmdp:affiliation> |
|
140 |
<cmdp:organizationInfo cmd:ComponentId="clarin.eu:cr1:c_1407745711883"> |
|
141 |
<cmdp:organizationName xml:lang="en">Uni Research AS</cmdp:organizationName> |
|
142 |
<cmdp:departmentName xml:lang="en">Uni Research Computing</cmdp:departmentName> |
|
143 |
</cmdp:organizationInfo> |
|
144 |
</cmdp:affiliation> |
|
145 |
</cmdp:personInfo> |
|
146 |
<cmdp:communicationInfo cmd:ComponentId="clarin.eu:cr1:c_1352813745460"> |
|
147 |
<cmdp:email>knut.hofland@uni.no</cmdp:email> |
|
148 |
<cmdp:url>http://uni.no/nb/staff/directory/knut-hofland/</cmdp:url> |
|
149 |
<cmdp:city>Bergen</cmdp:city> |
|
150 |
<cmdp:country>Norway</cmdp:country> |
|
151 |
<cmdp:telephoneNumber>+47 5558 9463</cmdp:telephoneNumber> |
|
152 |
</cmdp:communicationInfo> |
|
153 |
</cmdp:actorInfo> |
|
154 |
<cmdp:actorInfo cmd:ComponentId="clarin.eu:cr1:c_1396012485194" cmd:ref="search-page-corpuscle"> |
|
155 |
<cmdp:actorType>person</cmdp:actorType> |
|
156 |
<cmdp:personInfo cmd:ComponentId="clarin.eu:cr1:c_1396012485192"> |
|
157 |
<cmdp:surname xml:lang="en">Meurer</cmdp:surname> |
|
158 |
<cmdp:givenName xml:lang="en">Paul</cmdp:givenName> |
|
159 |
<cmdp:sex>male</cmdp:sex> |
|
160 |
<cmdp:position>Senior researcher</cmdp:position> |
|
161 |
<cmdp:affiliation> |
|
162 |
<cmdp:organizationInfo cmd:ComponentId="clarin.eu:cr1:c_1407745711883"> |
|
163 |
<cmdp:organizationName xml:lang="en">Uni Research AS</cmdp:organizationName> |
|
164 |
<cmdp:departmentName xml:lang="en">Uni Research Computing</cmdp:departmentName> |
|
165 |
</cmdp:organizationInfo> |
|
166 |
</cmdp:affiliation> |
|
167 |
</cmdp:personInfo> |
|
168 |
<cmdp:communicationInfo cmd:ComponentId="clarin.eu:cr1:c_1352813745460"> |
|
169 |
<cmdp:email>paul.meurer@uni.no</cmdp:email> |
|
170 |
</cmdp:communicationInfo> |
|
171 |
</cmdp:actorInfo> |
|
172 |
</cmdp:resourceCreator> |
|
173 |
</cmdp:resourceCreationInfo> |
|
174 |
</cmdp:resourceCommonInfo> |
|
175 |
<cmdp:corpusInfo cmd:ComponentId="clarin.eu:cr1:c_1407745711878"> |
|
176 |
<cmdp:corpusType>Written Corpus</cmdp:corpusType> |
|
177 |
<cmdp:corpusPartInfo cmd:ComponentId="clarin.eu:cr1:c_1407745711885"> |
|
178 |
<cmdp:mediaType>text</cmdp:mediaType> |
|
179 |
<cmdp:corpusTextInfo cmd:ComponentId="clarin.eu:cr1:c_1396012485188"/> |
|
180 |
</cmdp:corpusPartInfo> |
|
181 |
<cmdp:corpusPartGeneralInfo cmd:ComponentId="clarin.eu:cr1:c_1407745711882"> |
|
182 |
<cmdp:sourceWorkInfo cmd:ComponentId="clarin.eu:cr1:c_1407745712071"> |
|
183 |
<cmdp:workDescription>The text material is constituted by articles published by Forskning.no (CLARINO's agreement also includes the permission to use future articles to be published by Forskning.no) belonging to the following three categories: |
|
184 |
1) Articles written by journalists employed at Forskning.no |
|
185 |
2) Articles written by member institutions of Forskning.no (76 universities, colleges, research |
|
186 |
centers, research departments in government agencies and more). These articles are written by staff journalists, information officers and other non-academic staff. Each article has been edited by Forskning.no. |
|
187 |
3) Articles from the newsdesk NRK Viten, with whom Forskning.no cooperates. These articles are written by NRK journalists. A full list of partner/cooperation institutions may be presented on demand.</cmdp:workDescription> |
|
188 |
<cmdp:publisher> |
|
189 |
<cmdp:actorInfo cmd:ComponentId="clarin.eu:cr1:c_1396012485194"> |
|
190 |
<cmdp:actorType>organization</cmdp:actorType> |
|
191 |
<cmdp:organizationInfo cmd:ComponentId="clarin.eu:cr1:c_1407745711883"> |
|
192 |
<cmdp:organizationName>forskning.no</cmdp:organizationName> |
|
193 |
</cmdp:organizationInfo> |
|
194 |
<cmdp:communicationInfo cmd:ComponentId="clarin.eu:cr1:c_1352813745460"> |
|
195 |
<cmdp:email>Nina@forskning.no</cmdp:email> |
|
196 |
<cmdp:city>Oslo</cmdp:city> |
|
197 |
<cmdp:country>Norway</cmdp:country> |
|
198 |
</cmdp:communicationInfo> |
|
199 |
</cmdp:actorInfo> |
|
200 |
</cmdp:publisher> |
|
201 |
</cmdp:sourceWorkInfo> |
|
202 |
<cmdp:lingualityInfo cmd:ComponentId="clarin.eu:cr1:c_1355150532313"> |
|
203 |
<cmdp:lingualityType>monolingual</cmdp:lingualityType> |
|
204 |
</cmdp:lingualityInfo> |
|
205 |
<cmdp:languageInfo cmd:ComponentId="clarin.eu:cr1:c_1428388179423"> |
|
206 |
<cmdp:languageId>no</cmdp:languageId> |
|
207 |
<cmdp:languageName>Norwegian</cmdp:languageName> |
|
208 |
</cmdp:languageInfo> |
|
209 |
<cmdp:languageInfo cmd:ComponentId="clarin.eu:cr1:c_1428388179423"> |
|
210 |
<cmdp:languageId>nb</cmdp:languageId> |
|
211 |
<cmdp:languageName>Norwegian Bokmål</cmdp:languageName> |
|
212 |
</cmdp:languageInfo> |
|
213 |
<cmdp:modalityInfo cmd:ComponentId="clarin.eu:cr1:c_1447674760356"> |
|
214 |
<cmdp:modalityType>writtenLanguage</cmdp:modalityType> |
|
215 |
</cmdp:modalityInfo> |
|
216 |
<cmdp:sizeInfo cmd:ComponentId="clarin.eu:cr1:c_1353678848785"> |
|
217 |
<cmdp:size>ca. 489 000</cmdp:size> |
|
218 |
<cmdp:sizeUnit>sentences</cmdp:sizeUnit> |
|
219 |
</cmdp:sizeInfo> |
|
220 |
<cmdp:sizeInfo cmd:ComponentId="clarin.eu:cr1:c_1353678848785"> |
|
221 |
<cmdp:size>ca. 8 300 000</cmdp:size> |
|
222 |
<cmdp:sizeUnit>words</cmdp:sizeUnit> |
|
223 |
</cmdp:sizeInfo> |
|
224 |
<cmdp:sizeInfo cmd:ComponentId="clarin.eu:cr1:c_1353678848785"> |
|
225 |
<cmdp:size>ca. 13 200</cmdp:size> |
|
226 |
<cmdp:sizeUnit>articles</cmdp:sizeUnit> |
|
227 |
</cmdp:sizeInfo> |
|
228 |
<cmdp:classificationInfo cmd:ComponentId="clarin.eu:cr1:c_1403588862809"> |
|
229 |
<cmdp:genreInfo cmd:ComponentId="clarin.eu:cr1:c_1407745711877"> |
|
230 |
<cmdp:genreType>textGenre</cmdp:genreType> |
|
231 |
<cmdp:genre>newspaper and magazines</cmdp:genre> |
|
232 |
</cmdp:genreInfo> |
|
233 |
</cmdp:classificationInfo> |
|
234 |
<cmdp:timeCoverageInfo cmd:ComponentId="clarin.eu:cr1:c_1447674760358"> |
|
235 |
<cmdp:timeCoverage>1998-05-01 - 2012-10-20</cmdp:timeCoverage> |
|
236 |
</cmdp:timeCoverageInfo> |
|
237 |
</cmdp:corpusPartGeneralInfo> |
|
238 |
</cmdp:corpusInfo> |
|
239 |
</cmdp:corpusProfile> |
|
240 |
</cmd:Components> |
|
241 |
</cmd:CMD> |
modules/dnet-msro-service/branches/saxonHE/src/test/resources/eu/dnetlib/x3m/clarin_gysseling_corpus.xml | ||
---|---|---|
1 |
<cmd:CMD xmlns:cmd="http://www.clarin.eu/cmd/1" |
|
2 |
xmlns:cmdp="http://www.clarin.eu/cmd/1/profiles/clarin.eu:cr1:p_1271859438164" |
|
3 |
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" |
|
4 |
CMDVersion="1.2" |
|
5 |
xsi:schemaLocation="http://www.clarin.eu/cmd/1 https://infra.clarin.eu/CMDI/1.x/xsd/cmd-envelop.xsd http://www.clarin.eu/cmd/1/profiles/clarin.eu:cr1:p_1271859438164 https://catalog.clarin.eu/ds/ComponentRegistry/rest/registry/1.x/profiles/clarin.eu:cr1:p_1271859438164/xsd"> |
|
6 |
<cmd:Header> |
|
7 |
<cmd:MdCreator>servicedesk@inl.nl</cmd:MdCreator> |
|
8 |
<cmd:MdCreationDate>2012-06-11</cmd:MdCreationDate> |
|
9 |
<cmd:MdSelfLink>hdl:10032/cd747deda6f459853a24906eaa20b3e1</cmd:MdSelfLink> |
|
10 |
<cmd:MdProfile>clarin.eu:cr1:p_1271859438164</cmd:MdProfile> |
|
11 |
<cmd:MdCollectionDisplayName>INL Taalbank Nederlands</cmd:MdCollectionDisplayName> |
|
12 |
</cmd:Header> |
|
13 |
<cmd:Resources> |
|
14 |
<cmd:ResourceProxyList> |
|
15 |
<cmd:ResourceProxy id="resource"> |
|
16 |
<cmd:ResourceType>Resource</cmd:ResourceType> |
|
17 |
<cmd:ResourceRef>hdl:10032/9dd8605956a31dbd8c2b63ffef998bd2</cmd:ResourceRef> |
|
18 |
</cmd:ResourceProxy> |
|
19 |
<cmd:ResourceProxy id="SearchPage"> |
|
20 |
<cmd:ResourceType>SearchPage</cmd:ResourceType> |
|
21 |
<cmd:ResourceRef>hdl:10032/9dd8605956a31dbd8c2b63ffef998bd2</cmd:ResourceRef> |
|
22 |
</cmd:ResourceProxy> |
|
23 |
<cmd:ResourceProxy id="sruCQL"> |
|
24 |
<cmd:ResourceType mimetype="application/sru+xml">SearchService</cmd:ResourceType> |
|
25 |
<!-- http://gysseling.corpus.taalbanknederlands.inl.nl/cqlwebapp/cql --> |
|
26 |
<cmd:ResourceRef>hdl:10032/56a70d6a67e396bdd13b0a67e735d791</cmd:ResourceRef> |
|
27 |
</cmd:ResourceProxy> |
|
28 |
<cmd:ResourceProxy id="LandingPage"> |
|
29 |
<cmd:ResourceType>LandingPage</cmd:ResourceType> |
|
30 |
<cmd:ResourceRef>hdl:10032/99fb7c459b7848118ec1a7cbb14c47ea</cmd:ResourceRef> |
|
31 |
</cmd:ResourceProxy> |
|
32 |
</cmd:ResourceProxyList> |
|
33 |
<cmd:JournalFileProxyList/> |
|
34 |
<cmd:ResourceRelationList/> |
|
35 |
</cmd:Resources> |
|
36 |
<cmd:Components> |
|
37 |
<cmdp:TextCorpusProfile> |
|
38 |
<cmdp:Collection> |
|
39 |
<cmdp:GeneralInfo> |
|
40 |
<cmdp:Name>Corpus Gysseling</cmdp:Name> |
|
41 |
<cmdp:TimeCoverage> |
|
42 |
<cmdp:minDate>1200-01-01</cmdp:minDate> |
|
43 |
<cmdp:maxDate>1300-01-01</cmdp:maxDate> |
|
44 |
</cmdp:TimeCoverage> |
|
45 |
<cmdp:Description> |
|
46 |
<cmdp:Description> |
|
47 |
Corpus Gysseling van 13de eeuwse tekstem |
|
48 |
</cmdp:Description> |
|
49 |
</cmdp:Description> |
|
50 |
</cmdp:GeneralInfo> |
|
51 |
<cmdp:OriginLocation> |
|
52 |
<cmdp:Location> |
|
53 |
<cmdp:Country> |
|
54 |
<cmdp:Code>NL</cmdp:Code> |
|
55 |
</cmdp:Country> |
|
56 |
</cmdp:Location> |
|
57 |
</cmdp:OriginLocation> |
|
58 |
<cmdp:Creators> |
|
59 |
<cmdp:Creator> |
|
60 |
<cmdp:Contact> |
|
61 |
<cmdp:Email>servicedesk@inl.nl</cmdp:Email> |
|
62 |
<cmdp:Organisation>INL</cmdp:Organisation> |
|
63 |
</cmdp:Contact> |
|
64 |
</cmdp:Creator> |
|
65 |
</cmdp:Creators> |
|
66 |
|
|
67 |
<cmdp:DocumentationLanguages> |
|
68 |
<cmdp:Language> |
|
69 |
<cmdp:LanguageName>Dutch</cmdp:LanguageName> |
|
70 |
<cmdp:ISO639> |
|
71 |
<cmdp:iso-639-3-code>nld</cmdp:iso-639-3-code> |
|
72 |
</cmdp:ISO639> |
|
73 |
</cmdp:Language> |
|
74 |
</cmdp:DocumentationLanguages> |
|
75 |
|
|
76 |
<cmdp:Access cmd:ref="resource"> |
|
77 |
<cmdp:Availability>free for academic use; non appliccable for commercial parties</cmdp:Availability> |
|
78 |
<cmdp:DistributionMedium>online application</cmdp:DistributionMedium> |
|
79 |
<cmdp:CatalogueLink>http://gysseling.corpus.taalbanknederlands.inl.nl/gysseling/page/search</cmdp:CatalogueLink> |
|
80 |
<cmdp:Contact> |
|
81 |
<cmdp:Email>servidesk@inl.nl</cmdp:Email> |
|
82 |
<cmdp:Organisation>INL</cmdp:Organisation> |
|
83 |
<cmdp:Website>www.inl.nl</cmdp:Website> |
|
84 |
</cmdp:Contact> |
|
85 |
<cmdp:Price> |
|
86 |
<cmdp:Price>free</cmdp:Price> |
|
87 |
</cmdp:Price> |
|
88 |
</cmdp:Access> |
|
89 |
|
|
90 |
</cmdp:Collection> |
|
91 |
<cmdp:Corpus> |
|
92 |
<cmdp:SubjectLanguages> |
|
93 |
<!-- Middle Dutch (ca. 1050-1350) --> |
|
94 |
<cmdp:SubjectLanguage> |
|
95 |
<cmdp:Language> |
|
96 |
<cmdp:LanguageName>Middle Dutch</cmdp:LanguageName> |
|
97 |
<cmdp:ISO639> |
|
98 |
<cmdp:iso-639-3-code>dum</cmdp:iso-639-3-code> |
|
99 |
</cmdp:ISO639> |
|
100 |
</cmdp:Language> |
|
101 |
</cmdp:SubjectLanguage> |
|
102 |
</cmdp:SubjectLanguages> |
|
103 |
</cmdp:Corpus> |
|
104 |
<cmdp:TextCorpus> </cmdp:TextCorpus> |
|
105 |
</cmdp:TextCorpusProfile> |
|
106 |
</cmd:Components> |
|
107 |
</cmd:CMD> |
modules/dnet-msro-service/branches/saxonHE/src/test/resources/eu/dnetlib/x3m/clarin_dataset.xml | ||
---|---|---|
1 |
<cmd:CMD xmlns:cmd="http://www.clarin.eu/cmd/1" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:cmdp="http://www.clarin.eu/cmd/1/profiles/clarin.eu:cr1:p_1403526079380" CMDVersion="1.2" xsi:schemaLocation="http://www.clarin.eu/cmd/1 https://infra.clarin.eu/CMDI/1.x/xsd/cmd-envelop.xsd http://www.clarin.eu/cmd/1/profiles/clarin.eu:cr1:p_1403526079380 https://catalog.clarin.eu/ds/ComponentRegistry/rest/registry/1.x/profiles/clarin.eu:cr1:p_1403526079380/xsd"> |
|
2 |
<cmd:Header> |
|
3 |
<cmd:MdCreationDate>2017-01-20</cmd:MdCreationDate> |
|
4 |
<cmd:MdSelfLink>http://hdl.handle.net/11356/1052@format=cmdi</cmd:MdSelfLink> |
|
5 |
<cmd:MdProfile>clarin.eu:cr1:p_1403526079380</cmd:MdProfile> |
|
6 |
<cmd:MdCollectionDisplayName>CLARIN.SI data & tools</cmd:MdCollectionDisplayName> |
|
7 |
</cmd:Header> |
|
8 |
<cmd:Resources> |
|
9 |
<cmd:ResourceProxyList> |
|
10 |
<cmd:ResourceProxy id="lp_1590"> |
|
11 |
<cmd:ResourceType>LandingPage</cmd:ResourceType> |
|
12 |
<cmd:ResourceRef>http://hdl.handle.net/11356/1052</cmd:ResourceRef> |
|
13 |
</cmd:ResourceProxy> |
|
14 |
<cmd:ResourceProxy id="uri_1"> |
|
15 |
<cmd:ResourceType mimetype="text/html">Resource</cmd:ResourceType> |
|
16 |
<cmd:ResourceRef>http://eng.slovenscina.eu/tehnologije/ucni-korpus</cmd:ResourceRef> |
|
17 |
</cmd:ResourceProxy> |
|
18 |
</cmd:ResourceProxyList> |
|
19 |
<cmd:JournalFileProxyList/> |
|
20 |
<cmd:ResourceRelationList/> |
|
21 |
</cmd:Resources> |
|
22 |
<cmd:Components> |
|
23 |
<cmdp:LINDAT_CLARIN> |
|
24 |
<cmdp:bibliographicInfo> |
|
25 |
<cmdp:projectUrl>http://eng.slovenscina.eu/tehnologije/ucni-korpus</cmdp:projectUrl> |
|
26 |
<cmdp:titles> |
|
27 |
<cmdp:title xml:lang="en">Training corpus ssj500k 1.4</cmdp:title> |
|
28 |
</cmdp:titles> |
|
29 |
<cmdp:authors> |
|
30 |
<cmdp:author> |
|
31 |
<cmdp:lastName>Krek</cmdp:lastName> |
|
32 |
<cmdp:firstName> Simon</cmdp:firstName> |
|
33 |
</cmdp:author> |
|
34 |
<cmdp:author> |
|
35 |
<cmdp:lastName>Dobrovoljc</cmdp:lastName> |
|
36 |
<cmdp:firstName> Kaja</cmdp:firstName> |
|
37 |
</cmdp:author> |
|
38 |
<cmdp:author> |
|
39 |
<cmdp:lastName>Erjavec</cmdp:lastName> |
|
40 |
<cmdp:firstName> Tomaž</cmdp:firstName> |
|
41 |
</cmdp:author> |
|
42 |
<cmdp:author> |
|
43 |
<cmdp:lastName>Može</cmdp:lastName> |
|
44 |
<cmdp:firstName> Sara</cmdp:firstName> |
|
45 |
</cmdp:author> |
|
46 |
<cmdp:author> |
|
47 |
<cmdp:lastName>Ledinek</cmdp:lastName> |
|
48 |
<cmdp:firstName> Nina</cmdp:firstName> |
|
49 |
</cmdp:author> |
|
50 |
<cmdp:author> |
|
51 |
<cmdp:lastName>Holz</cmdp:lastName> |
|
52 |
<cmdp:firstName> Nanika</cmdp:firstName> |
|
53 |
</cmdp:author> |
|
54 |
</cmdp:authors> |
|
55 |
<cmdp:dates> |
|
56 |
<cmdp:dateIssued>2015-10-26</cmdp:dateIssued> |
|
57 |
</cmdp:dates> |
|
58 |
<cmdp:identifiers> |
|
59 |
<cmdp:identifier type="Handle">http://hdl.handle.net/11356/1052</cmdp:identifier> |
|
60 |
</cmdp:identifiers> |
|
61 |
<cmdp:funds> |
|
62 |
<cmdp:funding> |
|
63 |
<cmdp:organization>Ministry of Education, Science and Sport</cmdp:organization> |
|
64 |
<cmdp:code>3311-08-986003</cmdp:code> |
|
65 |
<cmdp:projectName>Communication in Slovene</cmdp:projectName> |
|
66 |
<cmdp:fundsType>euFunds</cmdp:fundsType> |
|
67 |
</cmdp:funding> |
|
68 |
</cmdp:funds> |
|
69 |
<cmdp:contactPerson> |
|
70 |
<cmdp:firstName>Simon</cmdp:firstName> |
|
71 |
<cmdp:lastName>Krek</cmdp:lastName> |
|
72 |
<cmdp:email>simon.krek@guest.arnes.si</cmdp:email> |
|
73 |
<cmdp:affiliation>Jožef Stefan Institute</cmdp:affiliation> |
|
74 |
</cmdp:contactPerson> |
|
75 |
<cmdp:publishers> |
|
76 |
<cmdp:publisher>Centre for Language Resources and Technologies, University of Ljubljana</cmdp:publisher> |
|
77 |
</cmdp:publishers> |
|
78 |
</cmdp:bibliographicInfo> |
|
79 |
<cmdp:dataInfo> |
|
80 |
<cmdp:type>corpus</cmdp:type> |
|
81 |
<cmdp:description>The ssj500k training corpus contains 500,000 words, manually annotated on the levels of tokenization, sentence segmentation, morphosyntactic tagging, lemmatisation, named entities, and, partially, syntactic dependencies. The ssj500k corpus uses the MULTEXT-East / JOS morphosyntactic tagset and the JOS dependency schema and is based on the jos100k and jos1M corpora. Note that this entry updates ssj500k 1.3 by fixing many annotation errors.</cmdp:description> |
|
82 |
<cmdp:languages> |
|
83 |
<cmdp:language> |
|
84 |
<cmdp:code>slv</cmdp:code> |
|
85 |
<cmdp:name>Slovenian</cmdp:name> |
|
86 |
</cmdp:language> |
|
87 |
</cmdp:languages> |
|
88 |
<cmdp:keywords> |
|
89 |
<cmdp:keyword>tagging</cmdp:keyword> |
|
90 |
<cmdp:keyword>dependency treebank</cmdp:keyword> |
|
91 |
<cmdp:keyword>parsing</cmdp:keyword> |
|
92 |
<cmdp:keyword>named entities</cmdp:keyword> |
|
93 |
<cmdp:keyword>tokenisation</cmdp:keyword> |
|
94 |
<cmdp:keyword>manual annotation</cmdp:keyword> |
|
95 |
<cmdp:keyword>TEI</cmdp:keyword> |
|
96 |
</cmdp:keywords> |
|
97 |
<cmdp:links> |
|
98 |
<cmdp:link>http://nl.ijs.si/noske/sl-ref.cgi/corp_info?corpname=ssj500k</cmdp:link> |
|
99 |
</cmdp:links> |
|
100 |
<cmdp:sizeInfo> |
|
101 |
<cmdp:size> |
|
102 |
<cmdp:size>500295</cmdp:size> |
|
103 |
<cmdp:unit>words</cmdp:unit> |
|
104 |
</cmdp:size> |
|
105 |
<cmdp:size> |
|
106 |
<cmdp:size>586248</cmdp:size> |
|
107 |
<cmdp:unit>tokens</cmdp:unit> |
|
108 |
</cmdp:size> |
|
109 |
<cmdp:size> |
|
110 |
<cmdp:size>27829</cmdp:size> |
|
111 |
<cmdp:unit>sentences</cmdp:unit> |
|
112 |
</cmdp:size> |
|
113 |
</cmdp:sizeInfo> |
|
114 |
</cmdp:dataInfo> |
|
115 |
<cmdp:licenseInfo> |
|
116 |
<cmdp:license> |
|
117 |
<cmdp:uri>https://creativecommons.org/licenses/by-nc-sa/4.0/</cmdp:uri> |
|
118 |
</cmdp:license> |
|
119 |
</cmdp:licenseInfo> |
|
120 |
</cmdp:LINDAT_CLARIN> |
|
121 |
</cmd:Components> |
|
122 |
</cmd:CMD> |
modules/dnet-msro-service/branches/saxonHE/src/test/resources/eu/dnetlib/x3m/parthenos_policy.xml | ||
---|---|---|
1 |
<?xml version="1.0" encoding="UTF-8"?> |
|
2 |
<generator_policy> |
|
3 |
|
|
4 |
<!-- The prefix is a namespace that must be declared in the X3ML definition. |
|
5 |
In PARTHENOS it was decided that it is constructed by the namespace of PARTHENOS followed by the provider |
|
6 |
and the database that is being mapped |
|
7 |
e.g. prefix="parthenos" and in the x3ml define <namespace prefix="parthenos" uri="http://parthenos-project.eu/FORTH/myDB/"/> --> |
|
8 |
|
|
9 |
<generator name="ServiceURI" prefix="parthenos"> |
|
10 |
<pattern>Service/{term}</pattern> |
|
11 |
</generator> |
|
12 |
<generator name="DatasetURI" prefix="parthenos"> |
|
13 |
<pattern>Dataset/{term}</pattern> |
|
14 |
</generator> |
|
15 |
<generator name="SoftwareURI" prefix="parthenos"> |
|
16 |
<pattern>Software/{term}</pattern> |
|
17 |
</generator> |
|
18 |
<generator name="ActorURI" prefix="parthenos"> |
|
19 |
<pattern>Actor/{term}</pattern> |
|
20 |
</generator> |
|
21 |
<generator name="ProjectURI" prefix="parthenos"> |
|
22 |
<pattern>Project/{term}</pattern> |
|
23 |
</generator> |
|
24 |
<generator name="ServiceAppellationURI" prefix="parthenos"> |
|
25 |
<pattern>Service/Appellation/{term}</pattern> |
|
26 |
</generator> |
|
27 |
<generator name="DatasetAppellationURI" prefix="parthenos"> |
|
28 |
<pattern>Dataset/Appellation/{term}</pattern> |
|
29 |
</generator> |
|
30 |
<generator name="SoftwareAppellationURI" prefix="parthenos"> |
|
31 |
<pattern>Software/Appellation/{term}</pattern> |
|
32 |
</generator> |
|
33 |
<generator name="ActorAppellationURI" prefix="parthenos"> |
|
34 |
<pattern>Actor/Appellation/{term}</pattern> |
|
35 |
</generator> |
|
36 |
<generator name="ProjectAppellationURI" prefix="parthenos"> |
|
37 |
<pattern>Project/Appellation/{term}</pattern> |
|
38 |
</generator> |
|
39 |
<generator name="ThingAppellationURI" prefix="parthenos"> |
|
40 |
<pattern>Thing/Appellation/{term}</pattern> |
|
41 |
</generator> |
|
42 |
<generator name="EventAppellationURI" prefix="parthenos"> |
|
43 |
<pattern>Event/Appellation/{term}</pattern> |
|
44 |
</generator> |
|
45 |
<generator name="PlaceAppellationURI" prefix="parthenos"> |
|
46 |
<pattern>Place/Appellation/{term}</pattern> |
|
47 |
</generator> |
|
48 |
<generator name="ThingURI" prefix="parthenos"> |
|
49 |
<pattern>Thing/{term}</pattern> |
|
50 |
</generator> |
|
51 |
<generator name="EventURI" prefix="parthenos"> |
|
52 |
<pattern>Event/{term}</pattern> |
|
53 |
</generator> |
|
54 |
<generator name="Time-SpanURI" prefix="parthenos"> |
|
55 |
<pattern>Time-Span/{term}</pattern> |
|
56 |
</generator> |
|
57 |
<generator name="PlaceURI" prefix="parthenos"> |
|
58 |
<pattern>Place/{term}</pattern> |
|
59 |
</generator> |
|
60 |
<generator name="DimensionURI" prefix="parthenos"> |
|
61 |
<pattern>Dimension/{term}</pattern> |
|
62 |
</generator> |
|
63 |
<generator name="ConceptURI" prefix="parthenos"> |
|
64 |
<pattern>Concept/{term}</pattern> |
|
65 |
</generator> |
|
66 |
<generator name="OneLevelCustomURI" prefix="parthenos"> |
|
67 |
<pattern>{level1}/{term}</pattern> |
|
68 |
</generator> |
|
69 |
<generator name="TwoLevelCustomURI" prefix="parthenos"> |
|
70 |
<pattern>{level1}/{level2}/{term}</pattern> |
|
71 |
</generator> |
|
72 |
<generator name="OneLevelCustomURIwCount" prefix="parthenos"> |
|
73 |
<pattern>{level1}/{count}/{term}</pattern> |
|
74 |
</generator> |
|
75 |
<generator name="TwoLevelCustomURIwCount" prefix="parthenos"> |
|
76 |
<pattern>{level1}/{level2}/{count}/{term}</pattern> |
|
77 |
</generator> |
|
78 |
<generator name="SimpleLabel"> |
|
79 |
<pattern>{label}</pattern> |
|
80 |
</generator> |
|
81 |
<generator name="CompositeLabel"> |
|
82 |
<pattern>{label} {text}</pattern> |
|
83 |
</generator> |
|
84 |
<generator name="GermanDateTime"> |
|
85 |
<custom generatorClass="gr.forth.GermanDate"> |
|
86 |
<set-arg name="bound" type="constant"/> |
|
87 |
<set-arg name="text"/> |
|
88 |
</custom> |
|
89 |
</generator> |
|
90 |
<generator name="URIorUUID"> |
|
91 |
<custom generatorClass="gr.forth.URIorUUID"> |
|
92 |
<set-arg name="text"/> |
|
93 |
</custom> |
|
94 |
</generator> |
|
95 |
</generator_policy> |
modules/dnet-msro-service/branches/saxonHE/src/test/resources/eu/dnetlib/x3m/clarin_service2.xml | ||
---|---|---|
1 |
<cmd:CMD xmlns:cmd="http://www.clarin.eu/cmd/1" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:cmdp="http://www.clarin.eu/cmd/1/profiles/clarin.eu:cr1:p_1423750293168" CMDVersion="1.2" xsi:schemaLocation="http://www.clarin.eu/cmd/1 https://infra.clarin.eu/CMDI/1.x/xsd/cmd-envelop.xsd http://www.clarin.eu/cmd/1/profiles/clarin.eu:cr1:p_1423750293168 https://catalog.clarin.eu/ds/ComponentRegistry/rest/registry/1.x/profiles/clarin.eu:cr1:p_1423750293168/xsd"> |
|
2 |
<cmd:Header> |
|
3 |
<cmd:MdCreator>Thomas Kisler</cmd:MdCreator> |
|
4 |
<cmd:MdCreationDate>2013-12-05</cmd:MdCreationDate> |
|
5 |
<cmd:MdSelfLink>https://clarin.phonetik.uni-muenchen.de/BASRepository/WebServices/BAS_Webservices.cmdi.xml</cmd:MdSelfLink> |
|
6 |
<cmd:MdProfile>clarin.eu:cr1:p_1423750293168</cmd:MdProfile> |
|
7 |
<cmd:MdCollectionDisplayName>Bavarian Archive for Speech Signals (BAS)</cmd:MdCollectionDisplayName> |
|
8 |
</cmd:Header> |
|
9 |
<cmd:Resources> |
|
10 |
<cmd:ResourceProxyList> |
|
11 |
<cmd:ResourceProxy id="locid1"> |
|
12 |
<cmd:ResourceType mimetype="application/vnd.sun.wadl+xml">Resource</cmd:ResourceType> |
|
13 |
<cmd:ResourceRef>https://clarin.phonetik.uni-muenchen.de/BASWebServices/application-hand.wadl</cmd:ResourceRef> |
|
14 |
</cmd:ResourceProxy> |
|
15 |
<cmd:ResourceProxy id="lp_0000000001"> |
|
16 |
<cmd:ResourceType mimetype="text/html">LandingPage</cmd:ResourceType> |
|
17 |
<cmd:ResourceRef>http://clarin.phonetik.uni-muenchen.de/BASWebServices/</cmd:ResourceRef> |
|
18 |
</cmd:ResourceProxy> |
|
19 |
</cmd:ResourceProxyList> |
|
20 |
<cmd:JournalFileProxyList/> |
|
21 |
<cmd:ResourceRelationList/> |
|
22 |
</cmd:Resources> |
|
23 |
<cmd:Components> |
|
24 |
<cmdp:BASWebService> |
|
25 |
<cmdp:Description> |
|
26 |
<cmdp:Description>This is the description of the BAS Web Services being hosted in the Bavarian Archive for Speech |
|
27 |
Signals (BAS) in Munich. Parameters possessing a "mimetype" tag are being processed as files and need to be |
|
28 |
provided to ensure the services to run (other options have a default option, so it is optional if those are |
|
29 |
passed).</cmdp:Description> |
|
30 |
</cmdp:Description> |
|
31 |
<cmdp:Service> |
|
32 |
<cmdp:Name>BAS Webservices</cmdp:Name> |
|
33 |
<cmdp:Description>Several services processing phonetic data (signals and annotations/segmentations) provided by |
|
34 |
BAS</cmdp:Description> |
|
35 |
<cmdp:ServiceDescriptionLocation cmd:ref="locid1"/> |
|
36 |
<cmdp:CollectionType> |
|
37 |
<cmdp:CollectionType>tool</cmdp:CollectionType> |
|
38 |
</cmdp:CollectionType> |
|
39 |
<cmdp:Operations> |
|
40 |
<cmdp:Operation> |
|
41 |
<cmdp:Name>runMAUSBasic</cmdp:Name> |
|
42 |
<cmdp:Description>segments an audio file into SAM-PA phonetic segments given an orthographic transcription; |
|
43 |
result is stored in a three-layer (word segmentation with orthographic labels, word segmentation with |
|
44 |
canonical pronunciation labels in SAM-PA, phonemic segmentation with SAM-PA labels) praat textgrid file; |
|
45 |
this is a basic MAUS service which uses only default options, for a more controllable service see operation |
|
46 |
'runMAUS'.</cmdp:Description> |
|
47 |
<cmdp:Input> |
|
48 |
<cmdp:Parameter> |
|
49 |
<cmdp:Name>SIGNAL</cmdp:Name> |
|
50 |
<cmdp:Description>mono sound file containing the speech signal to be segmented; PCM 16 bit resolution; any |
|
51 |
sampling rate; optimal results if leading and trailing silence intervals are truncated before |
|
52 |
processing; max. file size is 20MBytes. Although the mimetype of this input file is restricted to |
|
53 |
audio/x-wav (wav|WAV), the service will also process NIST/SPHERE (nis|NIS) and ALAW |
|
54 |
(al|AL|dea|DEA).</cmdp:Description> |
|
55 |
<cmdp:MIMEType>audio/x-wav</cmdp:MIMEType> |
|
56 |
<cmdp:isConfigurationParameter>false</cmdp:isConfigurationParameter> |
|
57 |
<cmdp:DataCategory>http://www.isocat.org/datcat/DC-2653</cmdp:DataCategory> |
|
58 |
<cmdp:displayName>Signal file</cmdp:displayName> |
|
59 |
<cmdp:MaxFileSize> |
|
60 |
<cmdp:TotalSize> |
|
61 |
<cmdp:Number>200</cmdp:Number> |
|
62 |
<cmdp:SizeUnit>MB</cmdp:SizeUnit> |
|
63 |
</cmdp:TotalSize> |
|
64 |
</cmdp:MaxFileSize> |
|
65 |
</cmdp:Parameter> |
|
66 |
<cmdp:Parameter> |
|
67 |
<cmdp:Name>TEXT</cmdp:Name> |
|
68 |
<cmdp:Description>orthographic text of the utterance to be segmented; words are white space separated; |
|
69 |
encoding is utf-8; punctuations are ignored</cmdp:Description> |
|
70 |
<cmdp:MIMEType>text/plain; charset=UTF-8</cmdp:MIMEType> |
|
71 |
<cmdp:isConfigurationParameter>false</cmdp:isConfigurationParameter> |
|
72 |
<cmdp:DataCategory>http://www.isocat.org/datcat/DC-2462</cmdp:DataCategory> |
|
73 |
<cmdp:displayName>Text file</cmdp:displayName> |
|
74 |
<cmdp:MaxFileSize> |
|
75 |
<cmdp:TotalSize> |
|
76 |
<cmdp:Number>20</cmdp:Number> |
|
77 |
<cmdp:SizeUnit>MB</cmdp:SizeUnit> |
|
78 |
</cmdp:TotalSize> |
|
79 |
</cmdp:MaxFileSize> |
|
80 |
</cmdp:Parameter> |
|
81 |
<cmdp:Parameter> |
|
82 |
<cmdp:Name>LANGUAGE</cmdp:Name> |
|
83 |
<cmdp:Description>Language of the speech to be processed; we use the RFC5646 sub-structure 'iso639-3 - |
|
84 |
iso3166-1 [ - iso3166-2], e.g. 'eng-US' for American English, 'deu-AT-1' for Austrian German spoken in |
|
85 |
'Oberoesterreich'; defines the possible orthographic text language in the input, the text-to-phoneme |
|
86 |
tranformation and some language specific transformations within the MAUS process. The code 'gsw-CH' (= |
|
87 |
Swiss German) denotes orthographic text input in Swiss German 'Dieth' encoding.</cmdp:Description> |
|
88 |
<cmdp:DataType>xsd:string</cmdp:DataType> |
|
89 |
<cmdp:isConfigurationParameter>true</cmdp:isConfigurationParameter> |
|
90 |
<cmdp:DataCategory>http://www.isocat.org/datcat/DC-2482</cmdp:DataCategory> |
|
91 |
<cmdp:Default>deu-DE</cmdp:Default> |
|
92 |
<cmdp:displayName>Language</cmdp:displayName> |
|
93 |
<cmdp:Values> |
|
94 |
<cmdp:ParameterValue> |
|
95 |
<cmdp:Value>cat-ES</cmdp:Value> |
|
96 |
<cmdp:Description>Catalan (ES)</cmdp:Description> |
|
97 |
</cmdp:ParameterValue> |
|
98 |
<cmdp:ParameterValue> |
|
99 |
<cmdp:Value>nld-NL</cmdp:Value> |
|
100 |
<cmdp:Description>Dutch (NL)</cmdp:Description> |
|
101 |
</cmdp:ParameterValue> |
|
102 |
<cmdp:ParameterValue> |
|
103 |
<cmdp:Value>eng-AU</cmdp:Value> |
|
104 |
<cmdp:Description>English (AU)</cmdp:Description> |
|
105 |
</cmdp:ParameterValue> |
|
106 |
<cmdp:ParameterValue> |
|
107 |
<cmdp:Value>eng-US</cmdp:Value> |
|
108 |
<cmdp:Description>English (US) </cmdp:Description> |
|
109 |
</cmdp:ParameterValue> |
|
110 |
<cmdp:ParameterValue> |
|
111 |
<cmdp:Value>eng-GB</cmdp:Value> |
|
112 |
<cmdp:Description>English (GB) </cmdp:Description> |
|
113 |
</cmdp:ParameterValue> |
|
114 |
<cmdp:ParameterValue> |
|
115 |
<cmdp:Value>eng-NZ</cmdp:Value> |
|
116 |
<cmdp:Description>English (NZ) </cmdp:Description> |
|
117 |
</cmdp:ParameterValue> |
|
118 |
<cmdp:ParameterValue> |
|
119 |
<cmdp:Value>ekk-EE</cmdp:Value> |
|
120 |
<cmdp:Description>Estonian (EE)</cmdp:Description> |
|
121 |
</cmdp:ParameterValue> |
|
122 |
<cmdp:ParameterValue> |
|
123 |
<cmdp:Value>fin-FI</cmdp:Value> |
|
124 |
<cmdp:Description>Finnish (FI)</cmdp:Description> |
|
125 |
</cmdp:ParameterValue> |
|
126 |
<cmdp:ParameterValue> |
|
127 |
<cmdp:Value>fra-FR</cmdp:Value> |
|
128 |
<cmdp:Description>French (FR)</cmdp:Description> |
|
129 |
</cmdp:ParameterValue> |
|
130 |
<cmdp:ParameterValue> |
|
131 |
<cmdp:Value>kat-GE</cmdp:Value> |
|
132 |
<cmdp:Description>Georgian (GE)</cmdp:Description> |
|
133 |
</cmdp:ParameterValue> |
|
134 |
<cmdp:ParameterValue> |
|
135 |
<cmdp:Value>deu-DE</cmdp:Value> |
|
136 |
<cmdp:Description>German (DE)</cmdp:Description> |
|
137 |
</cmdp:ParameterValue> |
|
138 |
<cmdp:ParameterValue> |
|
139 |
<cmdp:Value>gsw-CH</cmdp:Value> |
|
140 |
<cmdp:Description>German Dieth (CH)</cmdp:Description> |
|
141 |
</cmdp:ParameterValue> |
|
142 |
<cmdp:ParameterValue> |
|
143 |
<cmdp:Value>gsw-CH-BE</cmdp:Value> |
|
144 |
<cmdp:Description>German Dieth (CH), Bern dialect</cmdp:Description> |
|
145 |
</cmdp:ParameterValue> |
|
146 |
<cmdp:ParameterValue> |
|
147 |
<cmdp:Value>gsw-CH-BS</cmdp:Value> |
|
148 |
<cmdp:Description>German Dieth (CH), Basel dialect</cmdp:Description> |
|
149 |
</cmdp:ParameterValue> |
|
150 |
<cmdp:ParameterValue> |
|
151 |
<cmdp:Value>gsw-CH-GR</cmdp:Value> |
|
152 |
<cmdp:Description>German Dieth (CH), Graubunden dialect</cmdp:Description> |
|
153 |
</cmdp:ParameterValue> |
|
154 |
<cmdp:ParameterValue> |
|
155 |
<cmdp:Value>gsw-CH-SG</cmdp:Value> |
|
156 |
<cmdp:Description>German Dieth (CH), St. Gallen dialect</cmdp:Description> |
|
157 |
</cmdp:ParameterValue> |
|
158 |
<cmdp:ParameterValue> |
|
159 |
<cmdp:Value>gsw-CH-ZH</cmdp:Value> |
|
160 |
<cmdp:Description>German Dieth (CH), Zurich dialect</cmdp:Description> |
|
161 |
</cmdp:ParameterValue> |
|
162 |
<cmdp:ParameterValue> |
|
163 |
<cmdp:Value>hun-HU</cmdp:Value> |
|
164 |
<cmdp:Description>Hungarian (HU)</cmdp:Description> |
|
165 |
</cmdp:ParameterValue> |
|
166 |
<cmdp:ParameterValue> |
|
167 |
<cmdp:Value>ita-IT</cmdp:Value> |
|
168 |
<cmdp:Description>Italian (IT)</cmdp:Description> |
|
169 |
</cmdp:ParameterValue> |
|
170 |
<cmdp:ParameterValue> |
|
171 |
<cmdp:Value>mlt-MT</cmdp:Value> |
|
172 |
<cmdp:Description>Maltese (MT)</cmdp:Description> |
|
173 |
</cmdp:ParameterValue> |
|
174 |
<cmdp:ParameterValue> |
|
175 |
<cmdp:Value>pol-PL</cmdp:Value> |
|
176 |
<cmdp:Description>Polish (PL)</cmdp:Description> |
|
177 |
</cmdp:ParameterValue> |
|
178 |
<cmdp:ParameterValue> |
|
179 |
<cmdp:Value>rus-RU</cmdp:Value> |
|
180 |
<cmdp:Description>Russian (RU)</cmdp:Description> |
|
181 |
</cmdp:ParameterValue> |
|
182 |
<cmdp:ParameterValue> |
|
183 |
<cmdp:Value>spa-ES</cmdp:Value> |
|
184 |
<cmdp:Description>Spanish (ES)</cmdp:Description> |
|
185 |
</cmdp:ParameterValue> |
|
186 |
<cmdp:ParameterValue> |
|
187 |
<cmdp:Value>cat</cmdp:Value> |
|
188 |
<cmdp:Description/> |
|
189 |
</cmdp:ParameterValue> |
|
190 |
<cmdp:ParameterValue> |
|
191 |
<cmdp:Value>deu</cmdp:Value> |
|
192 |
<cmdp:Description/> |
|
193 |
</cmdp:ParameterValue> |
|
194 |
<cmdp:ParameterValue> |
|
195 |
<cmdp:Value>eng</cmdp:Value> |
|
196 |
<cmdp:Description/> |
|
197 |
</cmdp:ParameterValue> |
|
198 |
<cmdp:ParameterValue> |
|
199 |
<cmdp:Value>fra</cmdp:Value> |
|
200 |
<cmdp:Description/> |
|
201 |
</cmdp:ParameterValue> |
|
202 |
<cmdp:ParameterValue> |
|
203 |
<cmdp:Value>hun</cmdp:Value> |
|
204 |
<cmdp:Description/> |
|
205 |
</cmdp:ParameterValue> |
|
206 |
<cmdp:ParameterValue> |
|
207 |
<cmdp:Value>ita</cmdp:Value> |
|
208 |
<cmdp:Description/> |
|
209 |
</cmdp:ParameterValue> |
|
210 |
<cmdp:ParameterValue> |
|
211 |
<cmdp:Value>mlt</cmdp:Value> |
|
212 |
<cmdp:Description/> |
|
213 |
</cmdp:ParameterValue> |
|
214 |
<cmdp:ParameterValue> |
|
215 |
<cmdp:Value>nld</cmdp:Value> |
|
216 |
<cmdp:Description/> |
|
217 |
</cmdp:ParameterValue> |
|
218 |
<cmdp:ParameterValue> |
|
219 |
<cmdp:Value>aus</cmdp:Value> |
|
220 |
<cmdp:Description/> |
|
221 |
</cmdp:ParameterValue> |
|
222 |
<cmdp:ParameterValue> |
|
223 |
<cmdp:Value>pol</cmdp:Value> |
|
224 |
<cmdp:Description/> |
|
225 |
</cmdp:ParameterValue> |
|
226 |
<cmdp:ParameterValue> |
|
227 |
<cmdp:Value>nze</cmdp:Value> |
|
228 |
<cmdp:Description/> |
|
229 |
</cmdp:ParameterValue> |
|
230 |
<cmdp:ParameterValue> |
|
231 |
<cmdp:Value>fin</cmdp:Value> |
|
232 |
<cmdp:Description/> |
|
233 |
</cmdp:ParameterValue> |
|
234 |
<cmdp:ParameterValue> |
|
235 |
<cmdp:Value>spa</cmdp:Value> |
|
236 |
<cmdp:Description/> |
|
237 |
</cmdp:ParameterValue> |
|
238 |
</cmdp:Values> |
|
239 |
</cmdp:Parameter> |
|
240 |
<cmdp:Parameter> |
|
241 |
<cmdp:Name>INSKANTEXTGRID</cmdp:Name> |
|
242 |
<cmdp:Description>Switch to create an additional tier in the TextGrid output file with a word segmentation |
|
243 |
labelled with the canonic phonemic transcript (taken from the input KAN tier). This option can not be |
|
244 |
set in this service.</cmdp:Description> |
|
245 |
<cmdp:isConfigurationParameter>true</cmdp:isConfigurationParameter> |
|
246 |
<cmdp:DataCategory>http://www.isocat.org/datcat/DC-3825</cmdp:DataCategory> |
|
247 |
<cmdp:Default>true</cmdp:Default> |
|
248 |
<cmdp:isInternalDefaultConfigurationParameter>true</cmdp:isInternalDefaultConfigurationParameter> |
|
249 |
<cmdp:displayName>KAN tier in TextGrid</cmdp:displayName> |
|
250 |
</cmdp:Parameter> |
|
251 |
<cmdp:Parameter> |
|
252 |
<cmdp:Name>INSORTTEXTGRID</cmdp:Name> |
|
253 |
<cmdp:Description>Switch to create an additional tier ORT in the TextGrid output file with a word |
|
254 |
segmentation labelled with the orthographic transcript (taken from the input ORT tier); this option is |
|
255 |
only effective, if the input BPF contains an additional ORT tier. This option can not be set in this |
|
256 |
service.</cmdp:Description> |
|
257 |
<cmdp:isConfigurationParameter>true</cmdp:isConfigurationParameter> |
|
258 |
<cmdp:DataCategory>http://www.isocat.org/datcat/DC-3825</cmdp:DataCategory> |
|
259 |
<cmdp:Default>true</cmdp:Default> |
|
260 |
<cmdp:isInternalDefaultConfigurationParameter>true</cmdp:isInternalDefaultConfigurationParameter> |
|
261 |
<cmdp:displayName>ORT tier in TextGrid</cmdp:displayName> |
|
262 |
</cmdp:Parameter> |
|
263 |
<cmdp:Parameter> |
|
264 |
<cmdp:Name>OUTFORMAT</cmdp:Name> |
|
265 |
<cmdp:Description>Defines the possible output formats: TextGrid - a praat compatible TextGrid file with two |
|
266 |
tiers; par|mau-append - the input BPF file with a new (or replaced) tier MAU; csv|mau - only the BPF |
|
267 |
MAU tier (CSV table); legacyEMU - a file with extension *.EMU that contains in the first part the Emu |
|
268 |
hlb file (*.hlb) and in the second part the Emu phonetic segmentation (*.phonetic)(parts are separated |
|
269 |
by a line '--- cut here ---'); for a description of BPF see |
|
270 |
http://www.bas.uni-muenchen.de/forschung/Bas/BasFormatseng.html This option can not be set in this |
|
271 |
service.</cmdp:Description> |
|
272 |
<cmdp:isConfigurationParameter>true</cmdp:isConfigurationParameter> |
|
273 |
<cmdp:DataCategory>http://www.isocat.org/datcat/DC-3825</cmdp:DataCategory> |
|
274 |
<cmdp:Default>TextGrid</cmdp:Default> |
|
275 |
<cmdp:isInternalDefaultConfigurationParameter>true</cmdp:isInternalDefaultConfigurationParameter> |
|
276 |
<cmdp:displayName>Output format</cmdp:displayName> |
|
277 |
</cmdp:Parameter> |
|
278 |
<cmdp:Parameter> |
|
279 |
<cmdp:Name>USETRN</cmdp:Name> |
|
280 |
<cmdp:Description>If set to true, the service searches the input BPF for a TRN tier (turn/chunk |
|
281 |
segmentation, see http://www.bas.uni-muenchen.de/forschung/Bas/BasFormatsdeu.html#TRN). The synopsis |
|
282 |
for a TRN entry is: 'TRN: (start-sample) (duration-sample) (word-link-list) (label)', e.g. 'TRN: 23654 |
|
283 |
56432 0,1,2,3,4,5,6 sentence1' (the speech within the recording 'sentence1' starts with sample 23654, |
|
284 |
last for 56432 samples and covers the words 0-6). If only one TRN entry is found, the segmentation is |
|
285 |
restricted within a time range given by this TRN tier entry; this is useful, if there exists a reliable |
|
286 |
pre-segmentation of the recorded utterance, i.e. the start and end of speech within the recording is |
|
287 |
known. If more than one TRN entry is found, the webservice performs an segmentation for each 'chunk' |
|
288 |
defined by a TRN entry and aggregates all individual results into a single results file; this is useful |
|
289 |
if the input consists of long recordings, for which a manual chunk segmentation is available. If USETRN |
|
290 |
is set to 'force', a pre-segmentation using the wav2trn tool is done by the webservice on-the-fly; this |
|
291 |
is useful, if the input BPF does not contain a TRN entry and the input signal has leading and/or |
|
292 |
trailing silence. This option can not be set in this service.</cmdp:Description> |
|
293 |
<cmdp:isConfigurationParameter>true</cmdp:isConfigurationParameter> |
|
294 |
<cmdp:DataCategory>http://www.isocat.org/datcat/DC-3825</cmdp:DataCategory> |
|
295 |
<cmdp:Default>force</cmdp:Default> |
|
296 |
<cmdp:isInternalDefaultConfigurationParameter>true</cmdp:isInternalDefaultConfigurationParameter> |
|
297 |
<cmdp:displayName>Chunk Segmentation</cmdp:displayName> |
|
298 |
</cmdp:Parameter> |
|
299 |
<cmdp:Parameter> |
|
300 |
<cmdp:Name>NOINITIALFINALSILENCE</cmdp:Name> |
|
301 |
<cmdp:Description>Switch to suppress the automatic modeling on a leading/trailing silence interval. This is |
|
302 |
useful if the signal is for instance cut from a larger utterance and is known to have no |
|
303 |
leading/trailing silence. This option can not be set in this service.</cmdp:Description> |
|
304 |
<cmdp:DataType>xsd:boolean { pattern='true|false' }</cmdp:DataType> |
|
305 |
<cmdp:isConfigurationParameter>true</cmdp:isConfigurationParameter> |
|
306 |
<cmdp:DataCategory>http://www.isocat.org/datcat/DC-3825</cmdp:DataCategory> |
|
307 |
<cmdp:Default>false</cmdp:Default> |
|
308 |
<cmdp:isInternalDefaultConfigurationParameter>true</cmdp:isInternalDefaultConfigurationParameter> |
|
309 |
<cmdp:displayName>No silence model</cmdp:displayName> |
|
310 |
</cmdp:Parameter> |
|
311 |
<cmdp:Parameter> |
|
312 |
<cmdp:Name>RELAXMINDUR</cmdp:Name> |
|
313 |
<cmdp:Description>Option RELAXMINDUR changes the default minimum duration of 30msec for consonants |
|
314 |
and short/lax vowels and of 40msec for tense/long vowels and diphthongs to 10 and 20msec respectively. |
|
315 |
This is not optimal for general segmentation because MAUS will start to insert many very short |
|
316 |
vowels/glottal stops where they are not appropriate. But for some special investigations |
|
317 |
(e.g. the duration of /t/) it alleviates the ceiling problem at 30msec duration. |
|
318 |
</cmdp:Description> |
|
319 |
<cmdp:DataType>xsd:boolean { pattern='true|false' }</cmdp:DataType> |
|
320 |
<cmdp:isConfigurationParameter>true</cmdp:isConfigurationParameter> |
|
321 |
<cmdp:Default>false</cmdp:Default> |
|
322 |
<cmdp:isInternalDefaultConfigurationParameter>true</cmdp:isInternalDefaultConfigurationParameter> |
|
323 |
<cmdp:displayName>Relax Min Duration</cmdp:displayName> |
|
324 |
</cmdp:Parameter> |
|
325 |
<cmdp:Parameter> |
|
326 |
<cmdp:Name>BPFTHRESHOLD</cmdp:Name> |
|
327 |
<cmdp:Description>Option BPFTHRESHOLD changes the threshold of the MAUS pre-validation: if the |
|
328 |
input BPF file contains more KAN tier lines than this value, maus exits with an ERROR |
|
329 |
message and exit code 2. The motivation is that the computational effort of MAUS increases |
|
330 |
quadratically with input length. Using this option you can set the threshhold to another |
|
331 |
value. |
|
332 |
</cmdp:Description> |
|
333 |
<cmdp:DataType>xsd:string</cmdp:DataType> |
|
334 |
<cmdp:isConfigurationParameter>true</cmdp:isConfigurationParameter> |
|
335 |
<cmdp:Default>3000</cmdp:Default> |
|
336 |
<cmdp:isInternalDefaultConfigurationParameter>true</cmdp:isInternalDefaultConfigurationParameter> |
|
337 |
<cmdp:displayName>Max number of input words</cmdp:displayName> |
|
338 |
</cmdp:Parameter> |
|
339 |
</cmdp:Input> |
|
340 |
<cmdp:Output> |
|
341 |
<cmdp:Parameter> |
|
342 |
<cmdp:Name>processfiles-out</cmdp:Name> |
|
343 |
<cmdp:Description>A XML response containing the tags "success", "downloadLink", "output" and "warning. |
|
344 |
success states if the processing was successful or not, downloadLink specifies the location where the |
|
345 |
Praat TextGrid file can be found, output contains the output that is mostly useful during debugging |
|
346 |
errors and warnings if any warnings occured during the processing. The Praat TextGrid file containing |
|
347 |
three tiers: orthographic transcription (segmented in words), canonical phonemic transcription in |
|
348 |
SAM-PA (segmented in words), phonemic segmentation by MAUS in SAM-PA</cmdp:Description> |
|
349 |
<cmdp:MIMEType>application/xml; charset=UTF-8</cmdp:MIMEType> |
|
350 |
<cmdp:isConfigurationParameter>false</cmdp:isConfigurationParameter> |
|
351 |
<cmdp:DataCategory>http://www.isocat.org/datcat/DC-2462</cmdp:DataCategory> |
|
352 |
</cmdp:Parameter> |
|
353 |
</cmdp:Output> |
|
354 |
<cmdp:Developer> |
|
355 |
<cmdp:developerName>Florian Schiel</cmdp:developerName> |
|
356 |
<cmdp:developerName>Andreas Kipp</cmdp:developerName> |
|
357 |
<cmdp:developerName>Thomas Kisler</cmdp:developerName> |
|
358 |
<cmdp:developerName>Ines Wendler</cmdp:developerName> |
|
359 |
<cmdp:developerFundingOrg>Bavarian Archive for Speech Signals, Munich, Germany</cmdp:developerFundingOrg> |
|
360 |
<cmdp:developerFundingOrg>Bundesminister für Bildung und Forschung, Germany</cmdp:developerFundingOrg> |
|
361 |
<cmdp:developerCitation>Schiel, F. (1999). Automatic Phonetic Transcription of Non-Prompted Speech. In Proc. |
|
362 |
of the ICPhS (pp. 607-610).</cmdp:developerCitation> |
|
363 |
<cmdp:developerCitation>Kisler, T. and Reichel U. D. and Schiel, F. and Draxler, Ch. and Jackl, B. and Pörner, N. (2016): BAS Speech Science Web Services - an Update of Current Developments, Proceedings of the 10th International Conference on Language Resources and Evaluation (LREC 2016), Portorož, Slovenia, paper id 668. </cmdp:developerCitation> |
|
364 |
</cmdp:Developer> |
|
365 |
</cmdp:Operation> |
|
366 |
<cmdp:Operation> |
|
367 |
<cmdp:Name>runMAUSBasicGerman</cmdp:Name> |
|
368 |
<cmdp:Description>segments a German audio file into SAM-PA phonetic segments given a German orthographic |
|
369 |
transcription; result is stored in a three-layer (word segmentation with orthographic labels, word |
|
370 |
segmentation with canonical pronunciation labels in SAM-PA, phonemic segmentation with SAM-PA labels) praat |
|
371 |
textgrid file; this is a basic MAUS service which uses only default options, for a more controllable |
|
372 |
service see operation 'runMAUS'.</cmdp:Description> |
|
373 |
<cmdp:Input> |
|
374 |
<cmdp:Parameter> |
|
375 |
<cmdp:Name>SIGNAL</cmdp:Name> |
|
376 |
<cmdp:Description>mono sound file containing the speech signal to be segmented; PCM 16 bit resolution; any |
|
377 |
sampling rate; optimal results if leading and trailing silence intervals are truncated before |
|
378 |
processing; max. file size is 20MBytes. Although the mimetype of this input file is restricted to |
|
379 |
audio/x-wav (wav|WAV), the service will also process NIST/SPHERE (nis|NIS) and ALAW |
|
380 |
(al|AL|dea|DEA).</cmdp:Description> |
|
381 |
<cmdp:MIMEType>audio/x-wav</cmdp:MIMEType> |
|
382 |
<cmdp:isConfigurationParameter>false</cmdp:isConfigurationParameter> |
|
383 |
<cmdp:DataCategory>http://www.isocat.org/datcat/DC-2653</cmdp:DataCategory> |
|
384 |
<cmdp:displayName>Signal file</cmdp:displayName> |
|
385 |
<cmdp:MaxFileSize> |
|
386 |
<cmdp:TotalSize> |
|
387 |
<cmdp:Number>200</cmdp:Number> |
|
388 |
<cmdp:SizeUnit>MB</cmdp:SizeUnit> |
Also available in: Unified diff
Tests moved from msro to dnet-parthenos, where they should have belonged in the first place