| DEFLATE | org.apache.hadoop.io.compress.DefaultCodec |
| gzip | org.apache.hadoop.io.compress.GzipCodec |
| bzip2 | org.apache.hadoop.io.compress.BZip2Codec |
| LZO | org.apache.hadoop.io.compress.lzo.LzopCodec |
압축기본코드
import java.io.IOException; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.io.IOUtils; import org.apache.hadoop.io.compress.CompressionCodec; import org.apache.hadoop.io.compress.CompressionOutputStream; import org.apache.hadoop.util.ReflectionUtils; public class StreamCompressor { /** * @param args */ public static void main(String[] args) { // TODO Auto-generated method stub String codecClassName = args[0]; try { Class<?> codecClass = Class.forName(codecClassName); Configuration conf = new Configuration(); CompressionCodec codec = (CompressionCodec) >ReflectionUtils.newInstance(codecClass, conf); CompressionOutputStream out = codec.createOutputStream(System.out); IOUtils.copyBytes(System.in, out, 4096, false); out.finish(); } catch (IOException e) { e.printStackTrace(); } catch (ClassNotFoundException e) { // TODO Auto-generated catch block e.printStackTrace(); } } }
(1) Writeable 인터페이스
(2) WritableComparable 인터페이스
(3) RawComparator 인터페이스
(4) WriableComparator
(1) SequenceFile
생성코드
import java.io.IOException; import java.net.URI; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.IOUtils; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.SequenceFile; import org.apache.hadoop.io.Text; public class SequenceFileWriteDemo { private static final String[] DATA ={ "One, two, buckle my shoe", "Three, four, shut the door", "five, six, pick up sticks", "Seven, eight, lay them straight", "Nine, ten, a big fat gen" }; /** * @param args */ public static void main(String[] args) { // TODO Auto-generated method stub String uri = args[0]; Configuration conf = new Configuration(); SequenceFile.Writer writer = null; try { FileSystem fs = FileSystem.get(URI.create(uri), conf); Path path = new Path(uri); IntWritable key = new IntWritable(); Text value = new Text(); writer = SequenceFile.createWriter(fs, conf, path, key.getClass(), value.getClass()); for(int i=0; i<100; i++){ key.set(100 - i); value.set(DATA[i%DATA.length]); System.out.printf("[%s]\t%s\t%s\n", writer.getLength(), key, value); writer.append(key, value); } } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); }finally{ IOUtils.closeStream(writer); } } }


(2) MapFile