Project

General

Profile

« Previous | Next » 

Revision 36309

Added by Marek Horst over 9 years ago

#1257 updating core-examples: dropping schema generation related hacks in streaming modules, switching to literal schema parameters

View differences:

modules/icm-iis-core-examples/trunk/src/test/resources/eu/dnetlib/iis/core/examples/hadoopstreaming/cloner_with_unicode_escape_codes/oozie_app/workflow.xml
2 2
<!-- Note that documentation placed in comments in this file uses the
3 3
"markdown" syntax (along with its way of dividing text into sections). -->
4 4
<workflow-app xmlns="uri:oozie:workflow:0.3" name="test-core_examples_hadoopstreaming_cloner_with_unicode_escape_codes">
5
	<start to="data_producer" />
5
	<start to="generate-schema" />
6 6

  
7
	<action name="generate-schema">
8
	    <java>
9
    		<job-tracker>${jobTracker}</job-tracker>
10
        	<name-node>${nameNode}</name-node>
11
	        <main-class>eu.dnetlib.iis.core.javamapreduce.hack.AvroSchemaGenerator</main-class>
12
	        <arg>eu.dnetlib.iis.core.examples.schemas.documenttext.DocumentText</arg>
13
	        <capture-output />
14
	    </java>
15
	    <ok to="data_producer" />
16
	    <error to="fail" />
17
	</action>
18

  
7 19
	<action name="data_producer">
8 20
		<java>
9 21
			<job-tracker>${jobTracker}</job-tracker>
......
82 94
                    <value>${workingDir}/data_producer/document_text</value>
83 95
                </property>
84 96

  
85
                <property>
86
                    <name>eu.dnetlib.iis.avro.input.class</name>
87
                    <value>eu.dnetlib.iis.core.examples.schemas.documenttext.DocumentText</value>
88
                </property>
97
				<property>
98
				    <name>input.schema.literal</name>
99
				    <value>${wf:actionData('generate-schema')['eu.dnetlib.iis.core.examples.schemas.documenttext.DocumentText']}</value>
100
				</property>
89 101

  
90 102
				<!-- OUTPUT -->
91 103
                <property>
......
93 105
                    <value>${workingDir}/python_cloner/document_text</value>
94 106
                </property>          
95 107

  
96
                <property>
97
                    <name>eu.dnetlib.iis.avro.output.class</name>
98
                    <value>eu.dnetlib.iis.core.examples.schemas.documenttext.DocumentText</value>
99
                </property>
108
				<property>
109
				    <name>output.schema.literal</name>
110
				    <value>${wf:actionData('generate-schema')['eu.dnetlib.iis.core.examples.schemas.documenttext.DocumentText']}</value>
111
				</property>
100 112

  
101 113
            </configuration>
102 114
        </map-reduce>
modules/icm-iis-core-examples/trunk/src/test/resources/eu/dnetlib/iis/core/examples/hadoopstreaming/cloner/oozie_app/workflow.xml
2 2
<!-- Note that documentation placed in comments in this file uses the 
3 3
"markdown" syntax (along with its way of dividing text into sections). -->
4 4
<workflow-app xmlns="uri:oozie:workflow:0.3" name="test-core_examples_hadoopstreaming_cloner">
5
	<start to="producer" />
5
	<start to="generate-schema" />
6

  
7
	<action name="generate-schema">
8
	    <java>
9
    		<job-tracker>${jobTracker}</job-tracker>
10
        	<name-node>${nameNode}</name-node>
11
	        <main-class>eu.dnetlib.iis.core.javamapreduce.hack.AvroSchemaGenerator</main-class>
12
	        <arg>eu.dnetlib.iis.core.examples.schemas.documentandauthor.Person</arg>
13
	        <capture-output />
14
	    </java>
15
	    <ok to="producer" />
16
	    <error to="fail" />
17
	</action>
18
	
6 19
	<action name="producer">
7 20
		<java>
8 21
			<job-tracker>${jobTracker}</job-tracker>
......
64 77
                    <name>mapred.input.dir</name>
65 78
                    <value>${workingDir}/producer/person</value>
66 79
                </property>
67
                <!-- Name of the input schema. -->
68 80
                <property>
69
                    <name>eu.dnetlib.iis.avro.input.class</name>
70
                    <value>eu.dnetlib.iis.core.examples.schemas.documentandauthor.Person</value>
71
                </property>
81
				    <name>input.schema.literal</name>
82
				    <value>${wf:actionData('generate-schema')['eu.dnetlib.iis.core.examples.schemas.documentandauthor.Person']}</value>
83
				</property>
72 84
                <property>
73 85
                    <name>mapred.output.dir</name>
74 86
                    <value>${workingDir}/cloner/person</value>
75 87
                </property>
76
                <!-- Name of the output schema. -->
77
                <property>
78
                    <name>eu.dnetlib.iis.avro.output.class</name>
79
                    <value>eu.dnetlib.iis.core.examples.schemas.documentandauthor.Person</value>
80
                </property>
88
				<property>
89
				    <name>output.schema.literal</name>
90
				    <value>${wf:actionData('generate-schema')['eu.dnetlib.iis.core.examples.schemas.documentandauthor.Person']}</value>
91
				</property>
81 92
            </configuration>
82 93
        </map-reduce>
83 94
        <ok to="consumer"/>
modules/icm-iis-core-examples/trunk/src/test/resources/eu/dnetlib/iis/core/examples/hadoopstreaming/wordcount/oozie_app/workflow.xml
2 2
<!-- Note that documentation placed in comments in this file uses the 
3 3
"markdown" syntax (along with its way of dividing text into sections). -->
4 4
<workflow-app xmlns="uri:oozie:workflow:0.3" name="test-core_examples_hadoopstreaming_wordcount">
5
	<start to="producer" />
5
	
6
	<start to="generate-schema" />
7

  
8
	<action name="generate-schema">
9
	    <java>
10
    		<job-tracker>${jobTracker}</job-tracker>
11
        	<name-node>${nameNode}</name-node>
12
	        <main-class>eu.dnetlib.iis.core.javamapreduce.hack.AvroSchemaGenerator</main-class>
13
	        <arg>eu.dnetlib.iis.core.examples.schemas.documentandauthor.Document</arg>
14
	        <arg>eu.dnetlib.iis.core.examples.schemas.WordCount</arg>
15
	        <capture-output />
16
	    </java>
17
	    <ok to="producer" />
18
	    <error to="fail" />
19
	</action>
20
	
6 21
	<action name="producer">
7 22
		<java>
8 23
			<job-tracker>${jobTracker}</job-tracker>
......
67 82
                    <name>mapred.input.dir</name>
68 83
                    <value>${workingDir}/producer/document</value>
69 84
                </property>
70
                <!-- Name of the input schema. -->
71 85
                <property>
72
                    <name>eu.dnetlib.iis.avro.input.class</name>
73
                    <value>eu.dnetlib.iis.core.examples.schemas.documentandauthor.Document</value>
74
                </property>
86
				    <name>input.schema.literal</name>
87
				    <value>${wf:actionData('generate-schema')['eu.dnetlib.iis.core.examples.schemas.documentandauthor.Document']}</value>
88
				</property>
75 89
                <property>
76 90
                    <name>mapred.output.dir</name>
77 91
                    <value>${workingDir}/title_wordcount/wordcount</value>
78 92
                </property>
79
                <!-- Name of the output schema. -->
80
                <property>
81
                    <name>eu.dnetlib.iis.avro.output.class</name>
82
                    <value>eu.dnetlib.iis.core.examples.schemas.WordCount</value>
83
                </property>
93
				<property>
94
				    <name>output.schema.literal</name>
95
				    <value>${wf:actionData('generate-schema')['eu.dnetlib.iis.core.examples.schemas.WordCount']}</value>
96
				</property>
84 97
            </configuration>
85 98
        </map-reduce>
86 99
        <ok to="consumer"/>
modules/icm-iis-core-examples/trunk/src/test/resources/eu/dnetlib/iis/core/examples/hadoopstreaming/wordcount_with_distributed_cache/oozie_app/workflow.xml
2 2
<!-- Note that documentation placed in comments in this file uses the 
3 3
"markdown" syntax (along with its way of dividing text into sections). -->
4 4
<workflow-app xmlns="uri:oozie:workflow:0.3" name="test-core_examples_hadoopstreaming_wordcount_with_distributed_cache">
5
	<start to="document_producer" />
5
	
6
	<start to="generate-schema" />
7

  
8
	<action name="generate-schema">
9
	    <java>
10
    		<job-tracker>${jobTracker}</job-tracker>
11
        	<name-node>${nameNode}</name-node>
12
	        <main-class>eu.dnetlib.iis.core.javamapreduce.hack.AvroSchemaGenerator</main-class>
13
	        <arg>eu.dnetlib.iis.core.examples.schemas.documentandauthor.Document</arg>
14
	        <arg>eu.dnetlib.iis.core.examples.schemas.WordCount</arg>
15
	        <capture-output />
16
	    </java>
17
	    <ok to="document_producer" />
18
	    <error to="fail" />
19
	</action>
20
	
6 21
	<action name="document_producer">
7 22
		<java>
8 23
			<job-tracker>${jobTracker}</job-tracker>
......
99 114
                    <name>mapred.input.dir</name>
100 115
                    <value>${workingDir}/document_producer/document</value>
101 116
                </property>
102
                <!-- Name of the input schema. -->
117
				<property>
118
				    <name>input.schema.literal</name>
119
				    <value>${wf:actionData('generate-schema')['eu.dnetlib.iis.core.examples.schemas.documentandauthor.Document']}</value>
120
				</property>
103 121
                <property>
104
                    <name>eu.dnetlib.iis.avro.input.class</name>
105
                    <value>eu.dnetlib.iis.core.examples.schemas.documentandauthor.Document</value>
106
                </property>
107
                <property>
108 122
                    <name>mapred.output.dir</name>
109 123
                    <value>${workingDir}/title_wordcount/wordcount</value>
110 124
                </property>
111
                <!-- Name of the output schema. -->
112 125
                <property>
113
                    <name>eu.dnetlib.iis.avro.output.class</name>
114
                    <value>eu.dnetlib.iis.core.examples.schemas.WordCount</value>
115
                </property>
126
				    <name>output.schema.literal</name>
127
				    <value>${wf:actionData('generate-schema')['eu.dnetlib.iis.core.examples.schemas.WordCount']}</value>
128
				</property>
116 129
            </configuration>
117 130
            <!--
118 131
            Files listed in <file> tags are copied from HDFS to DistibutedCache
modules/icm-iis-core-examples/trunk/src/test/resources/eu/dnetlib/iis/core/examples/hadoopstreaming/cloner_without_reducer_with_subworkflow/oozie_app/my_subworkflow/workflow.xml
1 1
<workflow-app xmlns="uri:oozie:workflow:0.3" name="my_subworkflow">
2
    <start to="python_cloner2"/>
2
    
3
    <start to="generate-schema" />
4

  
5
	<action name="generate-schema">
6
	    <java>
7
    		<job-tracker>${jobTracker}</job-tracker>
8
        	<name-node>${nameNode}</name-node>
9
	        <main-class>eu.dnetlib.iis.core.javamapreduce.hack.AvroSchemaGenerator</main-class>
10
	        <arg>eu.dnetlib.iis.core.examples.schemas.documentandauthor.Person</arg>
11
	        <capture-output />
12
	    </java>
13
	    <ok to="python_cloner2" />
14
	    <error to="fail" />
15
	</action>
16
    
3 17
    <action name="python_cloner2">
4 18
        <map-reduce>
5 19
            <job-tracker>${jobTracker}</job-tracker>
......
39 53
                    <name>mapred.input.dir</name>
40 54
                    <value>${input_person}</value>
41 55
                </property>
42
                 <!-- Name of the input schema. -->
56
				<property>
57
				    <name>input.schema.literal</name>
58
				    <value>${wf:actionData('generate-schema')['eu.dnetlib.iis.core.examples.schemas.documentandauthor.Person']}</value>
59
				</property>
43 60
                <property>
44
                    <name>eu.dnetlib.iis.avro.input.class</name>
45
                    <value>eu.dnetlib.iis.core.examples.schemas.documentandauthor.Person</value>
46
                </property>
47
                <property>
48 61
                    <name>mapred.output.dir</name>
49 62
                    <value>${output_person}</value>
50 63
                </property>
51
                <!-- Name of the output schema. -->
52
                <property>
53
                    <name>eu.dnetlib.iis.avro.output.class</name>
54
                    <value>eu.dnetlib.iis.core.examples.schemas.documentandauthor.Person</value>
55
                </property>
64
				<property>
65
				    <name>output.schema.literal</name>
66
				    <value>${wf:actionData('generate-schema')['eu.dnetlib.iis.core.examples.schemas.documentandauthor.Person']}</value>
67
				</property>
56 68
            </configuration>
57 69
        </map-reduce>
58 70
        <ok to="end"/>
modules/icm-iis-core-examples/trunk/src/test/resources/eu/dnetlib/iis/core/examples/hadoopstreaming/cloner_without_reducer_with_subworkflow/oozie_app/workflow.xml
2 2
<!-- Note that documentation placed in comments in this file uses the 
3 3
"markdown" syntax (along with its way of dividing text into sections). -->
4 4
<workflow-app xmlns="uri:oozie:workflow:0.3" name="test-core_examples_hadoopstreaming_cloner_without_reducer_with_subworkflow">
5
	<start to="data_producer" />
5
	<start to="generate-schema" />
6

  
7
	<action name="generate-schema">
8
	    <java>
9
    		<job-tracker>${jobTracker}</job-tracker>
10
        	<name-node>${nameNode}</name-node>
11
	        <main-class>eu.dnetlib.iis.core.javamapreduce.hack.AvroSchemaGenerator</main-class>
12
	        <arg>eu.dnetlib.iis.core.examples.schemas.documentandauthor.Person</arg>
13
	        <capture-output />
14
	    </java>
15
	    <ok to="data_producer" />
16
	    <error to="fail" />
17
	</action>
18
	
6 19
	<action name="data_producer">
7 20
		<java>
8 21
			<job-tracker>${jobTracker}</job-tracker>
......
68 81
                    <name>mapred.input.dir</name>
69 82
                    <value>${workingDir}/data_producer/person</value>
70 83
                </property>
71
                <!-- Name of the input schema. -->
84
				<property>
85
				    <name>input.schema.literal</name>
86
				    <value>${wf:actionData('generate-schema')['eu.dnetlib.iis.core.examples.schemas.documentandauthor.Person']}</value>
87
				</property>
72 88
                <property>
73
                    <name>eu.dnetlib.iis.avro.input.class</name>
74
                    <value>eu.dnetlib.iis.core.examples.schemas.documentandauthor.Person</value>
75
                </property>
76
                <property>
77 89
                    <name>mapred.output.dir</name>
78 90
                    <value>${workingDir}/python_cloner/output</value>
79 91
                </property>
80
                <!-- Name of the output schema. -->
81
                <property>
82
                    <name>eu.dnetlib.iis.avro.output.class</name>
83
                    <value>eu.dnetlib.iis.core.examples.schemas.documentandauthor.Person</value>
84
                </property>
92
				<property>
93
				    <name>output.schema.literal</name>
94
				    <value>${wf:actionData('generate-schema')['eu.dnetlib.iis.core.examples.schemas.documentandauthor.Person']}</value>
95
				</property>
85 96
            </configuration>
86 97
        </map-reduce>
87 98
        <ok to="my_subworkflow"/>
modules/icm-iis-core-examples/trunk/src/test/resources/eu/dnetlib/iis/core/examples/hadoopstreaming/cloner_without_reducer/oozie_app/workflow.xml
2 2
<!-- Note that documentation placed in comments in this file uses the 
3 3
"markdown" syntax (along with its way of dividing text into sections). -->
4 4
<workflow-app xmlns="uri:oozie:workflow:0.3" name="test-core_examples_hadoopstreaming_cloner_without_reducer">
5
	<start to="data_producer" />
5
	<start to="generate-schema" />
6

  
7
	<action name="generate-schema">
8
	    <java>
9
    		<job-tracker>${jobTracker}</job-tracker>
10
        	<name-node>${nameNode}</name-node>
11
	        <main-class>eu.dnetlib.iis.core.javamapreduce.hack.AvroSchemaGenerator</main-class>
12
	        <arg>eu.dnetlib.iis.core.examples.schemas.documentandauthor.Person</arg>
13
	        <capture-output />
14
	    </java>
15
	    <ok to="data_producer" />
16
	    <error to="fail" />
17
	</action>
18
	
6 19
	<action name="data_producer">
7 20
		<java>
8 21
			<job-tracker>${jobTracker}</job-tracker>
......
68 81
                    <name>mapred.input.dir</name>
69 82
                    <value>${workingDir}/data_producer/person</value>
70 83
                </property>
71
                <!-- Name of the input schema. -->
84
				<property>
85
				    <name>input.schema.literal</name>
86
				    <value>${wf:actionData('generate-schema')['eu.dnetlib.iis.core.examples.schemas.documentandauthor.Person']}</value>
87
				</property>
72 88
                <property>
73
                    <name>eu.dnetlib.iis.avro.input.class</name>
74
                    <value>eu.dnetlib.iis.core.examples.schemas.documentandauthor.Person</value>
75
                </property>
76
                <property>
77 89
                    <name>mapred.output.dir</name>
78 90
                    <value>${workingDir}/python_cloner/output</value>
79 91
                </property>
80
                <!-- Name of the output schema. -->
81
                <property>
82
                    <name>eu.dnetlib.iis.avro.output.class</name>
83
                    <value>eu.dnetlib.iis.core.examples.schemas.documentandauthor.Person</value>
84
                </property>
92
				<property>
93
				    <name>output.schema.literal</name>
94
				    <value>${wf:actionData('generate-schema')['eu.dnetlib.iis.core.examples.schemas.documentandauthor.Person']}</value>
95
				</property>
85 96
            </configuration>
86 97
        </map-reduce>
87 98
        <ok to="consumer"/>

Also available in: Unified diff