# 安装pyodbc
pip install pyodbc

# 安装pandas
pip install pandas
  1. OleDbConnection con = new OleDbConnection("Provider=Microsoft.Jet.OLEDB.4.0;" 
  2. "Data Source=" 
  3. + Server.MapPath("../../uploadfiles/")
  4.  + str + ";Extended Properties=Excel 8.0;");  
  5.                     OleDbDataAdapter da =
  6. new OleDbDataAdapter("SELECT * FROM [Sheet1$]", con);  
  7.                     DataTable dt = new DataTable();  
  8.                     da.Fill(dt);                       
  9.                     string rowGuid = Guid.NewGuid().ToString();   
  10.                     DataRow[] rows = 
  11. dt.Select("[经销商编码(*)] Is Null And [日期(*)] Is Null And [金额(*)] Is Null");  
  12.                     foreach (DataRow row in rows)  
  13.                     {  
  14.                         dt.Rows.Remove(row);  
  15.                     }  
  16.                     DataColumn column = new DataColumn("rowGuid");  
  17.                     column.DefaultValue = rowGuid;  
  18.  
  19.                     dt.Columns.Add(column);
  20.  
  21.                     SqlBulkCopy copy = 
  22. new SqlBulkCopy(WebConfigurationManager.AppSettings["ConnectionString"].ToString());  
  23.                     copy.BatchSize = 1000;  
  24.                     copy.ColumnMappings.Add(0, "SDDealerCode");  
  25.                     copy.ColumnMappings.Add(1, "SDDealerName");  
  26.                     copy.ColumnMappings.Add(2, "PayDate");  
  27.                     copy.ColumnMappings.Add(3, "Balance");  
  28.                     copy.ColumnMappings.Add(4, "RowGuid");   
  29.                     copy.DestinationTableName = "UT_PrePaid_Temp";  
  30.                     copy.WriteToServer(dt); 
HdfsWriter:
  
 public void startWrite(RecordReceiver lineReceiver) {
             LOG.info("begin do write...");
             LOG.info(String.format("write to file : [%s]", this.fileName));
             if(fileType.equalsIgnoreCase("TEXT")){
                 //写TEXT FILE
                 hdfsHelper.textFileStartWrite(lineReceiver,this.writerSliceConfig, this.fileName,
                         this.getTaskPluginCollector());
             }else if(fileType.equalsIgnoreCase("ORC")){
                 //写ORC FILE
                 hdfsHelper.orcFileStartWrite(lineReceiver,this.writerSliceConfig, this.fileName,
                         this.getTaskPluginCollector());
             }
  
             LOG.info("end do write");
 }
        进入hdfsHelper查看具体的写入逻辑:HdfsHelper:
  
 // TEXT
 public void textFileStartWrite(RecordReceiver lineReceiver, Configuration config, String fileName,
                                    TaskPluginCollector taskPluginCollector){
 ...
             RecordWriter writer = outFormat.getRecordWriter(fileSystem, conf, outputPath.toString(), Reporter.NULL);
             Record record = null;
             while ((record = lineReceiver.getFromReader()) != null) {
                 MutablePair<Text, Boolean> transportResult = transportOneRecord(record, fieldDelimiter, columns, taskPluginCollector);
                 if (!transportResult.getRight()) {
                     writer.write(NullWritable.get(),transportResult.getLeft());
                 }
             }
             writer.close(Reporter.NULL);
 ...
 }
  
 // ORC
 public void orcFileStartWrite(RecordReceiver lineReceiver, Configuration config, String fileName,
                                   TaskPluginCollector taskPluginCollector){
 ...
         List<String> columnNames = getColumnNames(columns);
         // 获取字段类型序列化器,这个方法很关键,后续对于decimal类型字段的改造需要用到
         List<ObjectInspector> columnTypeInspectors = getColumnTypeInspectors(columns);
         StructObjectInspector inspector = (StructObjectInspector)ObjectInspectorFactory
                 .getStandardStructObjectInspector(columnNames, columnTypeInspectors);
 ...
             RecordWriter writer = outFormat.getRecordWriter(fileSystem, conf, fileName, Reporter.NULL);
             Record record = null;
             while ((record = lineReceiver.getFromReader()) != null) {
                 MutablePair<List<Object>, Boolean> transportResult =  transportOneRecord(record,columns,taskPluginCollector);
                 if (!transportResult.getRight()) {
 // orc 格式的需要对应类型序列化器才能写入到hdfs
                     writer.write(NullWritable.get(), orcSerde.serialize(transportResult.getLeft(), inspector));
                 }
             }
             writer.close(Reporter.NULL);
 ...
 }
  
 // 将从channel中收到的record字符串按照对应的字段类型进行转换
 public static MutablePair<List<Object>, Boolean> transportOneRecord(
             Record record,List<Configuration> columnsConfiguration,
             TaskPluginCollector taskPluginCollector){
 ...
  for (int i = 0; i < recordLength; i++) {
       column = record.getColumn(i);
       //todo as method
       if (null != column.getRawData()) {
       String rowData = column.getRawData().toString();
       // datax定义的hive支持类型枚举类
       SupportHiveDataType columnType = SupportHiveDataType.valueOf(columnsConfiguration.get(i).getString(Key.TYPE).toUpperCase());
       //根据writer端类型配置做类型转换
       switch (columnType) {
           case TINYINT:
                recordList.add(Byte.valueOf(rowData));
                break;
 ...
 }
-- 创建临时表
CREATE TABLE temp_table (
  numerator DECIMAL(10, 2),
  denominator DECIMAL(10, 2)
);

-- 向临时表插入测试数据
INSERT INTO temp_table VALUES (10.5, 2.5), (20.0, 4.0), (30.75, 3.25);
from decimal import Decimal

# 1.传入浮点数 5.55
a = Decimal(5.55)
print(type(a))
print('a = ', a)

print('\n')

# 2.传入字符串 '5.55'
b = Decimal('5.55')
print(type(b))
print('b = ', b)

print('\n')

# 3.传入整形 '5'
c = Decimal(5)
print(type(c))
print('c=', c)
>>> 0.1 + 0.2
0.30000000000000004
>>> 0.1 + 0.2 == 0.3
False
>>> 0.1 + 0.2
0.30000000000000004
>>> 0.1 + 0.2 == 0.3
False
def readBinLog():
    stream = BinLogStreamReader(
        # 填写IP、账号、密码即可
        connection_settings = {
            'host': '',
            'port': 3306, 
            'user': '', 
            'passwd': ''
        },
        # 每台服务器唯一
        server_id = 3, 
        # 主库Binlog读写完毕时是否阻塞连接
        blocking = True, 
        # 筛选指定的表
        only_tables = ['order_info', 'log_info'], 
        # 筛选指定的事件
        only_events = [DeleteRowsEvent, WriteRowsEvent, UpdateRowsEvent]) 

    for binlogevent in stream:
        for row in binlogevent.rows:
            event = {
                "schema": binlogevent.schema,
                "table": binlogevent.table,
                "log_pos": binlogevent.packet.log_pos
            }
            if isinstance(binlogevent, DeleteRowsEvent):
                event["action"] = "delete"
                event["origin"] = dict(row["values"].items())
                event["current"] = None
                event = dict(event.items())
            elif isinstance(binlogevent, UpdateRowsEvent):
                event["action"] = "update"
                event["origin"] = dict(row["before_values"].items())
                event["current"] = dict(row["after_values"].items())
                event = dict(event.items())
            elif isinstance(binlogevent, WriteRowsEvent):
                event["action"] = "insert"
                event["origin"] = None
                event["current"] = dict(row["values"].items())
                event = dict(event.items())
    stream.close()
这个入参是计算后的精度例如 decimal(77,36) decimal(38,18) 各种
protected DecimalTypeInfo adjustPrecScale(int precision, int scale) {
  // Assumptions:
  // precision >= scale
  // scale >= 0
  //  这里说明 精度p >=刻度s 没啥好说的 必须的
  if (precision <= HiveDecimal.MAX_PRECISION) { 
    // Adjustment only needed when we exceed max precision
    return new DecimalTypeInfo(precision, scale);
  }
  //这个max=38 说明p<=38了 那么直接就返回了(38,18) (38,38) (38,0)

  // Precision/scale exceed maximum precision. Result must be adjusted to HiveDecimal.MAX_PRECISION.
  // See https://blogs.msdn.microsoft.com/sqlprogrammability/2006/03/29/multiplication-and-division-with-numerics/
  int intDigits = precision - scale; //算出有多少整数位
  // If original scale less than 6, use original scale value; otherwise preserve at least 6 fractional digits 
  //如果原先的刻度也就是小数位小于6,那么就用原来的小数,否则就用6
  int minScaleValue = Math.min(scale, MINIMUM_ADJUSTED_SCALE); 算出最小的刻度,反正不超过6,原先是1还是1 原先是6还是6 原先是7变6 原先30变6
  int adjustedScale = HiveDecimal.MAX_PRECISION - intDigits;//38-整数位
  adjustedScale = Math.max(adjustedScale, minScaleValue); // 两者取最大值
// 其实这里就是算小数位到底是多少。因为精度已经确定了就是38
  return new DecimalTypeInfo(HiveDecimal.MAX_PRECISION, adjustedScale);
}
CREATE FUNCTION calculate_tax(amount DECIMAL(10,2)) RETURNS DECIMAL(10,2)
-- 创建一个表格
CREATE TABLE IF NOT EXISTS test_table (
    dividend DECIMAL(10, 2),
    divisor DECIMAL(10, 2)
);

-- 插入数据
INSERT INTO test_table (dividend, divisor) VALUES (10.50, 2.5);

-- 查询结果
SELECT dividend, divisor, dividend/divisor AS result FROM test_table;
CREATE TABLE `test_float_double_decimal` (
  `id` int(11) NOT NULL COMMENT 'id',
  `float7.4` float(7,4) NOT NULL,
  `float255.30` float(255,30) NOT NULL,
  `double15.4` double(15,4) NOT NULL,
  `double255.30` double(255,30) NOT NULL,
  `decimal10.4` decimal(10,4) NOT NULL,
  `decimal65.30` decimal(65,30) NOT NULL,
  `float_default` float NOT NULL,
  `double_default` double NOT NULL,
  `decimal_default` decimal(10,0) NOT NULL
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4;
import java.math.BigDecimal;

public class Test {
    public static void main(String[] args){
        BigDecimal b1 = new BigDecimal("1");
        BigDecimal b2 = new BigDecimal("2");
        BigDecimal b3 = new BigDecimal("4");
        System.out.println("相加:"+b1.add(b2));
        System.out.println("相减:"+b1.subtract(b2));
        System.out.println("相乘:"+b2.multiply(b3));
        System.out.println("相除:"+b2.divide(b3));
    }
}
decimal.ROUND_CEILING							| 					舍入方向为 Infinity。
decimal.ROUND_DOWN								| 					舍入方向为零。
decimal.ROUND_FLOOR								| 					舍入方向为 -Infinity。
decimal.ROUND_HALF_DOWN							| 					舍入到最接近的数,同样接近则舍入方向为零。
decimal.ROUND_HALF_EVEN							| 					舍入到最接近的数,同样接近则舍入到最接近的偶数。
decimal.ROUND_HALF_UP							| 					舍入到最接近的数,同样接近则舍入到零的反方向。
decimal.ROUND_UP								| 					舍入到零的反方向。
decimal.ROUND_05UP								| 					如果最后一位朝零的方向舍入后为 0 或 5 则舍入到零的反方向;否则舍入方向为零。
表创建的时候可以用 location 指定一个文件或者文件夹
create  table stu(id int ,name string)  location '/user/stu2';
  • 1
  • 2
  • 3
  • 4
  • 5