import java.io.IOException; // // Sample Hbase data importer // reads from a file called cdr.data and injects into the cdrs table reporting the date / time every 1000 commits // by Anders Brownworth // http://www.anders.com/ // Data is tab delimited and looks like this: // Acme 2008-10-01 00:00:00 18663001234 19183243501 886 \N SAPULPA \N OK // Acme 2008-10-01 00:00:00 17134871101 15088794403 1206 HOUSTON FOXBORO TX MA // Pathmark 2008-10-01 00:00:00 17132711111 12767392014 1169 HOUSTON GATE CITY TX VA // Archline 2008-10-01 00:00:00 18888001234 19187396502 886 \N SAPULPA \N OK // Pathmark 2008-10-01 00:00:00 18888111234 12622757766 864 \N KENOSHA \N WI // Acme 2008-10-01 00:00:00 17134341202 17578303931 622 HOUSTON NRFOLKZON1 TX VA import java.io.*; import java.util.*; import org.apache.hadoop.fs.*; import org.apache.hadoop.conf.*; import org.apache.hadoop.hbase.*; import org.apache.hadoop.hbase.io.*; import org.apache.hadoop.hbase.client.*; public class ImportData { public static void main( String args[] ) throws IOException { System.out.println( "starting..." ); HBaseConfiguration hc = new HBaseConfiguration( new Configuration( ) ); HTable ht = new HTable( hc, "cdrs" ); try { int counter = 0; BufferedReader in = new BufferedReader( new FileReader( "cdrs.data" ) ); String line; while ( ( line = in.readLine( ) ) != null ) { StringTokenizer st = new StringTokenizer( line, "\t" ); if ( st.countTokens( ) == 9 ) { String company = st.nextToken( ); String timestamp = st.nextToken( ); String mutatedTimestamp = "2008-10-02 " + timestamp.substring( 11, timestamp.length( ) ); BatchUpdate bu = new BatchUpdate( mutatedTimestamp ); bu.put( "number:from", st.nextToken( ).getBytes( ) ); bu.put( "number:to", st.nextToken( ).getBytes( ) ); bu.put( "company:", company.getBytes( ) ); bu.put( "time:billingSeconds", st.nextToken( ).getBytes( ) ); bu.put( "location:rateCenterFromCity", st.nextToken( ).getBytes( ) ); bu.put( "location:rateCenterfromState", st.nextToken( ).getBytes( ) ); bu.put( "location:rateCenterToCity", st.nextToken( ).getBytes( ) ); bu.put( "location:rateCenterToState", st.nextToken( ).getBytes( ) ); //System.out.println( "committing..." ); ht.commit( bu ); } else System.err.println( "Line " + ( counter + 1 ) + " has wrong number of tokens: " + line ); counter ++; if ( counter % 1000 == 0 ) System.out.println( new Date( ) + " count=" + counter ); } } catch ( IOException ex ) { ex.printStackTrace( ); } System.out.println( "done" ); } }