30 lines
1.1 KiB
Bash

#!/bin/bash
# =========================================================
# 同步脚本: td_grid (Linux Bash 范式)
# 执行流: Hive 计算 -> HDFS 提取 -> PSQL Copy 加载
# =========================================================
# 1. 变量配置区
SCHEMA="dmk"
TABLE_NAME="td_grid"
HDFS_ROOT="/user/hive/warehouse/dmk.db"
LOCAL_TEMP_DIR="/tmp/dmk_sync_$(date +%Y%m%d)"
PG_CONN_STR="-h localhost -p 5432 -U postgres -d dmk_db"
mkdir -p ${LOCAL_TEMP_DIR}
# 2. 执行 Hive 计算
echo "Step 1: Running Hive computation..."
hive -e "source compute.sql"
# 3. HDFS 提取数据
echo "Step 2: Merging data from HDFS..."
hdfs dfs -getmerge ${HDFS_ROOT}/${TABLE_NAME}/* ${LOCAL_TEMP_DIR}/${TABLE_NAME}.csv
# 4. PostgreSQL 载入
echo "Step 3: Loading data into PostgreSQL via \copy..."
psql ${PG_CONN_STR} -c "TRUNCATE TABLE ${SCHEMA}.${TABLE_NAME};"
psql ${PG_CONN_STR} -c "\copy ${SCHEMA}.${TABLE_NAME}(regionid, x_offset_20, y_offset_20, provincecode, province_name, citycode, city_name, districtcode, district_name, center_lon, center_lat, grid_wkt) FROM '${LOCAL_TEMP_DIR}/${TABLE_NAME}.csv' WITH CSV DELIMITER ',';"
echo "Done: ${TABLE_NAME} sync completed."