#!/bin/bash # ========================================================= # 同步脚本: td_grid (Linux Bash 范式) # 执行流: Hive 计算 -> HDFS 提取 -> PSQL Copy 加载 # ========================================================= # 1. 变量配置区 SCHEMA="dmk" TABLE_NAME="td_grid" HDFS_ROOT="/user/hive/warehouse/dmk.db" LOCAL_TEMP_DIR="/tmp/dmk_sync_$(date +%Y%m%d)" PG_CONN_STR="-h localhost -p 5432 -U postgres -d dmk_db" mkdir -p ${LOCAL_TEMP_DIR} # 2. 执行 Hive 计算 echo "Step 1: Running Hive computation..." hive -e "source compute.sql" # 3. HDFS 提取数据 echo "Step 2: Merging data from HDFS..." hdfs dfs -getmerge ${HDFS_ROOT}/${TABLE_NAME}/* ${LOCAL_TEMP_DIR}/${TABLE_NAME}.csv # 4. PostgreSQL 载入 echo "Step 3: Loading data into PostgreSQL via \copy..." psql ${PG_CONN_STR} -c "TRUNCATE TABLE ${SCHEMA}.${TABLE_NAME};" psql ${PG_CONN_STR} -c "\copy ${SCHEMA}.${TABLE_NAME}(regionid, x_offset_20, y_offset_20, provincecode, province_name, citycode, city_name, districtcode, district_name, center_lon, center_lat, grid_wkt) FROM '${LOCAL_TEMP_DIR}/${TABLE_NAME}.csv' WITH CSV DELIMITER ',';" echo "Done: ${TABLE_NAME} sync completed."