特色栏目： python 批处理 net编程 Javascript Php Asp Css Html5 Android seo centos

HadoopMapRduce重写DBOutputFormat更新mysql数据库【MySQL】

来源：互联网收集：自由互联发布时间：2023-07-02

数据库|mysql教程数据库-mysql教程在http:blog.csdn.netsunflower_caoarticledetails28266939写过可以通过继承Writab 数据库|mysql教程数据库-mysql教程在http://blog.csdn.net/sunflower_cao/article/details/28266939 写过可

数据库|mysql教程数据库-mysql教程在http:blog.csdn.netsunflower_caoarticledetails28266939写过可以通过继承Writab 数据库|mysql教程 Hadoop MapRduce 重写DBOutputFormat更新mysql数据库【MySQL】

Hadoop MapRduce 重写DBOutputFormat更新mysql数据库【MySQL】

数据库-mysql教程在http://blog.csdn.net/sunflower_cao/article/details/28266939 写过可以通过继承 Writable, DBWritable实现在reduce过程中讲结果写入到mysql数据库里边，但是一直有一个问题就是只能实现insert 没法去更新已经存在的数据，这就导致不同的mapreduce程序获得的数据只能插入到不同的数据库中在使用的时候需要建立view或者使用复杂的sql语句去查询，今天调查了下，发现可以通过重写DBOutputFormat贷款软件源码,自动发邮件 ubuntu,隐藏tomcat程序名,网页爬虫符号,php数组用逗号分割,seo餐馆lzw上代码：安卓微信源码,ubuntu开启桌面显示,如何阿里云用tomcat,上海婚恋爬虫,php好看的控件,seo是seolzwTblsWritable.javaasp的网站源码,vscode bug,ubuntu 跑分,tomcat软件,sqlite实现聊天,云服务器样备份数据,ecshop小京东插件,前端框架模板网站,爬虫加热垫温度,php操作类,黑酷SEO培训,鼎峰互动网站源码源码,网页游戏一键安装端,色块网页模板,dede商品页面,内容管理系统开发,影视管理程序吧lzw

import java.io.DataInput;import java.io.DataOutput;import java.io.IOException;import java.sql.PreparedStatement;import java.sql.ResultSet;import java.sql.SQLException;import org.apache.hadoop.io.Writable;import org.apache.hadoop.mapreduce.lib.db.DBWritable;/** * 重写DBWritable * * @author caozw TblsWritable需要向mysql中写入数据 */public class TblsWritable implements Writable, DBWritable {String initTime;String new_android_user;String new_ios_user;String new_total_user;String iosUserTotal;String androidUserTotal;String userTotal;public TblsWritable() {}public TblsWritable(String initTime, String new_android_user,String new_ios_user, String new_total_user,String iosUserTotal, String androidUserTotal, String userTotal) {this.initTime = initTime;this.new_android_user = new_android_user;this.new_ios_user = new_ios_user;this.new_total_user = new_total_user;this.iosUserTotal = iosUserTotal;this.androidUserTotal = androidUserTotal;this.userTotal = userTotal;}@Overridepublic void write(PreparedStatement statement) throws SQLException {statement.setString(1, this.new_android_user);statement.setString(2, this.new_ios_user);statement.setString(3, this.new_total_user);statement.setString(4, this.androidUserTotal);statement.setString(5, this.iosUserTotal);statement.setString(6, this.userTotal);statement.setString(7, this.initTime);}@Overridepublic void readFields(ResultSet resultSet) throws SQLException {this.new_android_user = resultSet.getString(1);this.new_ios_user = resultSet.getString(2);this.new_total_user = resultSet.getString(3);this.androidUserTotal = resultSet.getString(4);this.iosUserTotal = resultSet.getString(5);this.userTotal = resultSet.getString(6);this.initTime = resultSet.getString(7);}@Overridepublic void write(DataOutput out) throws IOException {out.writeUTF(this.new_android_user);out.writeUTF(this.new_ios_user);out.writeUTF(this.new_total_user);out.writeUTF(this.androidUserTotal);out.writeUTF(this.iosUserTotal);out.writeUTF(this.userTotal);out.writeUTF(this.initTime);}@Overridepublic void readFields(DataInput in) throws IOException {this.new_android_user = in.readUTF();this.new_ios_user = in.readUTF();this.new_total_user = in.readUTF();this.androidUserTotal = in.readUTF();this.iosUserTotal = in.readUTF();this.userTotal = in.readUTF();this.initTime = in.readUTF();}public String toString() {return new String(this.initTime + " " + this.new_android_user + " "+ this.new_ios_user + " " + this.new_total_user + " "+ this.androidUserTotal + " " + this.iosUserTotal + " "+ this.userTotal);}}

WriteDataToMysql.java

import java.io.IOException;import java.text.ParseException;import java.text.SimpleDateFormat;import java.util.Date;import java.util.Iterator;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.fs.Path;import org.apache.hadoop.io.IntWritable;import org.apache.hadoop.io.LongWritable;import org.apache.hadoop.io.Text;import org.apache.hadoop.mapreduce.Job;import org.apache.hadoop.mapreduce.Mapper;import org.apache.hadoop.mapreduce.Reducer;import org.apache.hadoop.mapreduce.lib.db.DBConfiguration;import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;/** * 将mapreduce的结果数据写入mysql中 * * @author caozw */public class WriteDataToMysql {public static String[] fieldNames = { "INITTIME", "NEW_ANDROID_USER","NEW_IOS_USER", "NEW_USER_TOTAL", "TOTAL_ANDROID_USER","TOTAL_IOS_USER", "TOTAL_USER" };public static String table = "USER_INFO_STATIC";public static class ConnMysqlMapper extendsMapper {enum Counter {LINESKIP,}private final static IntWritable One= new IntWritable(1);private final static IntWritable zero = new IntWritable(0);public void map(LongWritable key, Text value, Context context)throws IOException, InterruptedException {try {String line = value.toString();String[] strings = line.split("/t");String initTime = strings[1];String devType = strings[4];if (initTime.length() == 19) {SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");Date date = sdf.parse(initTime);if ("1".equals(devType)) {context.write(new Text(initTime.substring(0, 10)), one);context.write(new Text(initTime.substring(0, 10)), zero);}} else {// System.err.println(initTime);context.getCounter(Counter.LINESKIP).increment(1);}// } catch (ArrayIndexOutOfBoundsException e) {} catch (ArrayIndexOutOfBoundsException e) {context.getCounter(Counter.LINESKIP).increment(1);return;} catch (ParseException e) {context.getCounter(Counter.LINESKIP).increment(1);return;}}}public static class ConnMysqlReducer extendsReducer {static int iosUserTotal = 0;static int androidUserTotal = 0;public void reduce(Text key, Iterable values,Context context) throws IOException, InterruptedException {int android = 0;int ios = 0;int total = 0;for (Iterator itr = values.iterator(); itr.hasNext();) {total++;if (0 == itr.next().get()) {android++;} else {ios++;}}iosUserTotal += ios;androidUserTotal += android;/* * System.err.println(key.toString() + ":" + String.valueOf(android) * + ":" + String.valueOf(ios) + ":" + String.valueOf(total)); */context.write(new TblsWritable(key.toString(), String.valueOf(android),String.valueOf(ios), String.valueOf(total), String.valueOf(androidUserTotal), String.valueOf(iosUserTotal), String.valueOf(androidUserTotal + iosUserTotal)),null);}}public static void main(String args[]) throws IOException,InterruptedException, ClassNotFoundException {Configuration cOnf= new Configuration();DBConfiguration.configureDB(conf, "com.mysql.jdbc.Driver","jdbc:mysql://127.0.0.1:3306/XINGXUNTONG", "hadoop", "123456");Job job = new Job(conf, "test mysql connection");job.setJarByClass(WriteDataToMysql.class);job.setMapperClass(ConnMysqlMapper.class);job.setReducerClass(ConnMysqlReducer.class);job.setOutputKeyClass(Text.class);job.setOutputValueClass(IntWritable.class);job.setSpeculativeExecution(false);job.setInputFormatClass(TextInputFormat.class);job.setOutputFormatClass(MysqlDBOutputFormat.class);//job.setOutputFormatClass(DBOutputFormat.class);FileInputFormat.addInputPath(job, new Path(args[0]));// DBOutputFormat.setOutput(job, "test", "initTime", "new_user_total");//DBOutputFormat.setOutput(job, table, fieldNames);MysqlDBOutputFormat.setOutput(job, table, fieldNames);System.exit(job.waitForCompletion(true) ? 0 : 1);}}

MysqlDBOutputFormat.java

/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */import java.io.IOException;import java.sql.Connection;import java.sql.PreparedStatement;import java.sql.SQLException;import org.apache.commons.logging.Log;import org.apache.commons.logging.LogFactory;import org.apache.hadoop.classification.InterfaceAudience;import org.apache.hadoop.classification.InterfaceStability;import org.apache.hadoop.mapreduce.Job;import org.apache.hadoop.mapreduce.JobContext;import org.apache.hadoop.mapreduce.OutputCommitter;import org.apache.hadoop.mapreduce.OutputFormat;import org.apache.hadoop.mapreduce.RecordWriter;import org.apache.hadoop.mapreduce.TaskAttemptContext;import org.apache.hadoop.mapreduce.lib.db.DBConfiguration;import org.apache.hadoop.mapreduce.lib.db.DBWritable;import org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter;import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;import org.apache.hadoop.util.StringUtils;/** * A OutputFormat that sends the reduce output to a SQL table. *

* {@link MysqlDBOutputFormat} accepts pairs, where * key has a type extending DBWritable. Returned {@link RecordWriter} * writes only the key to the database with a batch SQL query. * */@InterfaceAudience.Public@InterfaceStability.Stablepublic class MysqlDBOutputFormat extends OutputFormat { private static final Log LOG = LogFactory.getLog(MysqlDBOutputFormat.class); public void checkOutputSpecs(JobContext context) throws IOException, InterruptedException {} public OutputCommitter getOutputCommitter(TaskAttemptContext context) throws IOException, InterruptedException { return new FileOutputCommitter(FileOutputFormat.getOutputPath(context), context); } /** * A RecordWriter that writes the reduce output to a SQL table */ @InterfaceStability.Evolving public class DBRecordWriter extends RecordWriter { private Connection connection; private PreparedStatement statement; public DBRecordWriter() throws SQLException { } public DBRecordWriter(Connection connection , PreparedStatement statement) throws SQLException { this.cOnnection= connection; this.statement = statement; this.connection.setAutoCommit(false); } public Connection getConnection() { return connection; } public PreparedStatement getStatement() { return statement; } /** {@inheritDoc} */ public void close(TaskAttemptContext context) throws IOException { try { statement.executeBatch(); connection.commit(); } catch (SQLException e) { try { connection.rollback(); } catch (SQLException ex) { LOG.warn(StringUtils.stringifyException(ex)); } throw new IOException(e.getMessage()); } finally { try { statement.close(); connection.close(); } catch (SQLException ex) { throw new IOException(ex.getMessage()); } } } /** {@inheritDoc} */ public void write(K key, V value) throws IOException { try { key.write(statement); statement.addBatch(); } catch (SQLException e) { e.printStackTrace(); } } } /** * Constructs the query used as the prepared statement to insert data. * * @param table * the table to insert into * @param fieldNames * the fields to insert into. If field names are unknown, supply an * array of nulls. */ public String constructQuery(String table, String[] fieldNames) {if (fieldNames == null) {throw new IllegalArgumentException("Field names may not be null");}StringBuilder query = new StringBuilder();query.append("UPDATE ").append(table);System.err.println("fieldNames.length:" + fieldNames.length);if (fieldNames.length > 0) {query.append(" SET ");query.append(fieldNames[1] + " = ?");query.append("," + fieldNames[2] + " = ?");query.append("," + fieldNames[3] + " = ?");query.append("," + fieldNames[4] + " = ?");query.append("," + fieldNames[5] + " = ?");query.append("," + fieldNames[6] + " = ?");query.append(" WHERE ");query.append(fieldNames[0] + " = ?");System.err.println(query.toString());return query.toString();} else {return null;} } /** {@inheritDoc} */ public RecordWriter getRecordWriter(TaskAttemptContext context) throws IOException { DBConfiguration dbCOnf= new DBConfiguration(context.getConfiguration()); String tableName = dbConf.getOutputTableName(); String[] fieldNames = dbConf.getOutputFieldNames(); if(fieldNames == null) { fieldNames = new String[dbConf.getOutputFieldCount()]; } try { Connection cOnnection= dbConf.getConnection(); PreparedStatement statement = null; statement = connection.prepareStatement( constructQuery(tableName, fieldNames)); return new DBRecordWriter(connection, statement); } catch (Exception ex) { throw new IOException(ex.getMessage()); } } /** * Initializes the reduce-part of the job with * the appropriate output settings * * @param job The job * @param tableName The table to insert data into * @param fieldNames The field names in the table. */ public static void setOutput(Job job, String tableName, String... fieldNames) throws IOException { if(fieldNames.length > 0 dbConf.setOutputFieldNames(fieldNames); } else { if (fieldNames.length > 0) { setOutput(job, tableName, fieldNames.length); } else { throw new IllegalArgumentException( "Field names must be greater than 0"); } } } /** * Initializes the reduce-part of the job * with the appropriate output settings * * @param job The job * @param tableName The table to insert data into * @param fieldCount the number of fields in the table. */ public static void setOutput(Job job, String tableName, int fieldCount) throws IOException { DBConfiguration dbCOnf= setOutput(job, tableName); dbConf.setOutputFieldCount(fieldCount); } private static DBConfiguration setOutput(Job job, String tableName) throws IOException { job.setOutputFormatClass(MysqlDBOutputFormat.class); job.setReduceSpeculativeExecution(false); DBConfiguration dbCOnf= new DBConfiguration(job.getConfiguration()); dbConf.setOutputTableName(tableName); return dbConf; }}

MysqlDBOutputFormat.java是将hadoop源码里边的DBOutputFormat.java拿过来重写了里边的constructQuery的方法，在生成sql语句的时候产生update的sql语句来实现的

DBOutputFormat.java里边的constructQuery

public String constructQuery(String table, String[] fieldNames) { if(fieldNames == null) { throw new IllegalArgumentException("Field names may not be null"); } StringBuilder query = new StringBuilder(); query.append("INSERT INTO ").append(table); if (fieldNames.length > 0 for (int i = 0; i 重写后：

public String constructQuery(String table, String[] fieldNames) {if (fieldNames == null) {throw new IllegalArgumentException("Field names may not be null");}StringBuilder query = new StringBuilder();query.append("UPDATE ").append(table);System.err.println("fieldNames.length:" + fieldNames.length);if (fieldNames.length > 0) {query.append(" SET ");query.append(fieldNames[1] + " = ?");query.append("," + fieldNames[2] + " = ?");query.append("," + fieldNames[3] + " = ?");query.append("," + fieldNames[4] + " = ?");query.append("," + fieldNames[5] + " = ?");query.append("," + fieldNames[6] + " = ?");query.append(" WHERE ");query.append(fieldNames[0] + " = ?");System.err.println(query.toString());return query.toString();} else {return null;} }

按照java的思想我这里其实可以通过extends DBOutputFomat来实现update mysql的功能，但是我试了死活不行，暂且记下有时间再来仔细揣摩

重载方式代码（测试未通过）有知道为什么的还请不吝赐教

import org.apache.hadoop.mapreduce.lib.db.DBOutputFormat;import org.apache.hadoop.mapreduce.lib.db.DBWritable;public class MysqlOutputFormat1 extends DBOutputFormat {public String constructQuery(String table, String[] fieldNames) {if (fieldNames == null) {throw new IllegalArgumentException("Field names may not be null");}StringBuilder query = new StringBuilder();query.append("UPDATE ").append(table);System.err.println("fieldNames.length:"+fieldNames.length);if (fieldNames.length > 0) {query.append(" SET ");query.append(fieldNames[1] + " = ?");query.append("," + fieldNames[2] + " = ?");query.append("," + fieldNames[3] + " = ?");query.append("," + fieldNames[4] + " = ?");query.append("," + fieldNames[5] + " = ?");query.append("," + fieldNames[6] + " = ?");query.append(" WHERE ");query.append(fieldNames[0] + " = ?");System.err.println(query.toString());return query.toString();} else {return null;}}}

另外可以通过我感觉可以通过sql语句的merge方式生成merge语句从而实现有则更新没有则插入参考网页http://en.wikipedia.org/wiki/Merge_%28SQL%29

--http://en.wikipedia.org/wiki/Merge_%28SQL%29 MERGE INTO tablename USING table_reference ON (condition) WHEN MATCHED THEN UPDATE SET column1 = value1 [, column2 = value2 ...] WHEN NOT MATCHED THEN INSERT (column1 [, column2 ...]) VALUES (value1 [, value2 ...

上一篇：Angular——npmERR!
下一篇：没有了

HadoopMapRduce重写DBOutputFormat更新mysql数据库【MySQL】

相关文章