Hadoop MultipleOutputCollector MRunit Testcase

Code-Snippet for a test case with MRUnit and Mockito to test a Hadoop Reducer with MultipleOutputs. MRUnit ReduceDriver unfortunately does not return result data when using multiple Ouptuts.

private HashMap<String, MockOutputCollector> mockOutputCollectors = new HashMap<String, MockOutputCollector>();

@Test
public void test() {
	driver.withInput(new Text(INPUT_KEY, INPUT).run();
	List<Type> resultList = new ArrayList<Type>();

	// This has to be done that complicated way as the above driver.run() only returns an empty list
	// if run with ReduceDriver. For some reason it works with MapDriver, though...
	for (String collectorDirectory : mockOutputCollectors.keySet()) {
		MockOutputCollector<NullWritable, Object> mockOutputCollector = mockOutputCollectors
				.get(collectorDirectory);
		for (Pair<NullWritable, Object> pair : mockOutputCollector.getOutputs()) {
			if (pair.getSecond() instanceof MultipleOutputType1) {
				MultipleOutputType1 multipleOutputType = (MultipleOutputType1) pair.getSecond();
				logger.debug("MultipleOutputType1: "
						+ multipleOutputType.toString());
			}
			if  (pair.getSecond() instanceof MultipleOutputType2) {
				[...]
			}

		}
	}
}

@Before
public void setup() {
	for (Directories directory : Directories.values()) {
		mockOutputCollectors.put(directory.getDirectoryName(),
				new MockOutputCollector());
		when(
				multipleOutputs.getCollector(
						eq(directory.getDirectoryName()),
						argThat(new IsMockReporter()))).thenReturn(
				mockOutputCollectors.get(directory.getDirectoryName()));
	}
	mockOutputCollectors.put(
			OUTPUT_DIRECTORY_NAME,
			new MockOutputCollector());
	
	when(
			multipleOutputs.getCollector(
					eq(OUTPUT_DIRECTORY_NAME),
					argThat(new IsMockReporter()))).thenReturn(
				mockOutputCollectors.get(Constants.OUTPUT_DIRECTORY_NAME));
}

Leave a Reply

Your email address will not be published. Required fields are marked *