Project

General

Profile

« Previous | Next » 

Revision 37873

Added by Marek Horst over 9 years ago

#1381 porting pmc citations ingestion from cascading framework to pig. Moving code from icm-iis-ingest-pmc to icm-iis-transformers including itegration tests, removing obsolete scala code along with unneded dependencies. Switching subworkflow in primary workflow.

View differences:

modules/icm-iis-mainworkflows/trunk/src/main/resources/eu/dnetlib/iis/mainworkflows/primary/main/job.properties
22 22
#puma content
23 23
#import_content_objectstores_csv=794e8173-8be3-4f51-a12e-b43d12ab3b7d_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl
24 24
#arxiv content
25
import_content_objectstores_csv=258755af-0b48-41ee-9652-939c5bd2fca3_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl
25
#import_content_objectstores_csv=258755af-0b48-41ee-9652-939c5bd2fca3_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl
26 26
#pmc content
27
#import_content_objectstores_csv=b2b6fca5-ce18-498c-a375-b02df97998f0_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl
27
import_content_objectstores_csv=b2b6fca5-ce18-498c-a375-b02df97998f0_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl
28 28
#wos content
29 29
#import_content_objectstores_csv=a1a35f9d-dc12-44e0-8781-d8273f5ef017_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl
30 30
#all contents (including wos)
31 31
#node6 and openaire-services.vls.icm.edu.pl have the same ids
32 32
#import_content_objectstores_csv=794e8173-8be3-4f51-a12e-b43d12ab3b7d_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,258755af-0b48-41ee-9652-939c5bd2fca3_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,b2b6fca5-ce18-498c-a375-b02df97998f0_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,a1a35f9d-dc12-44e0-8781-d8273f5ef017_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl
33

  
34 33
#puma and pmc
35 34
#import_content_objectstores_csv=794e8173-8be3-4f51-a12e-b43d12ab3b7d_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl,b2b6fca5-ce18-498c-a375-b02df97998f0_T2JqZWN0U3RvcmVEU1Jlc291cmNlcy9PYmplY3RTdG9yZURTUmVzb3VyY2VUeXBl
36 35

  
......
40 39

  
41 40
#processing modes
42 41
active_metadataextraction_export=false
43
active_referenceextraction_project=true
42
active_referenceextraction_project=false
44 43
active_referenceextraction_dataset=false
45 44
active_referenceextraction_researchinitiative=false
46 45
active_documentsclassification=false
47 46
active_documentssimilarity=false
48
active_citationmatching=false
47
active_citationmatching=true
49 48
active_statistics=false
50 49
active_websiteusage_analysis=false
51 50
#disabling hbase export, enabling json export
52
active_export=false
51
active_export_hbase=false
52
active_export_json=true
53 53

  
54 54
#export
55 55
export_action_hbase_table_name=mainworkflows_primary_test
modules/icm-iis-mainworkflows/trunk/src/main/resources/eu/dnetlib/iis/mainworkflows/common/import/oozie_app/import.txt
8 8
ingest_html_plaintext classpath eu/dnetlib/iis/ingest/html/plaintext/oozie_app
9 9
ingest_pmc_plaintext classpath eu/dnetlib/iis/ingest/pmc/plaintext/oozie_app
10 10
ingest_pmc_metadata classpath eu/dnetlib/iis/ingest/pmc/metadata/oozie_app
11
ingest_pmc_citations classpath eu/dnetlib/iis/ingest/pmc/citations/oozie_app
11
ingest_pmc_citations classpath eu/dnetlib/iis/transformers/ingest/pmc/citations/oozie_app
12 12
basic_collapser classpath eu/dnetlib/iis/collapsers/basic_collapser/oozie_app
13 13
multiple_input_collapser classpath eu/dnetlib/iis/collapsers/multiple_input_collapser/oozie_app
14 14
metadataextraction classpath eu/dnetlib/iis/metadataextraction/oozie_app

Also available in: Unified diff