想从mysql导出一个表到csv文件,然后上传到hdfs,开始使用sqoop,结果各种问题频出:
https://blog.csdn.net/weixin_45357522/article/details/149498030
https://blog.csdn.net/weixin_45357522/article/details/149449413
特别是那个QueryResult的问题,基本没有完美解决方法,所以才有了本脚本完美替换sqoop方式。
本脚本从mysql导出一个表到csv文件,然后上传到hdfs。其中有些步骤要检测是否成功,命令后面加了checkRst,不成功就中断退出,否则dolphinscheduler会认为任务成功了,接着执行后续任务,误导管理员以为任务成功,有些是不用管是否成功的。本脚本用于dolphinscheduler中加载数据。
另外,要注意dolphinscheduler worker和数据库服务器之间,用户(hive)要设置好ssh免登录,用于下载数据文件到本地。数据库的数据导出目录也要给写权限,以免残留文件在服务器上。
#!/bin/bash
if [[ $# < 5 ]]
thenecho "usage: $0 mysqlConnect sql hdfsRoot srcSystem bizDate"echo "e.g.: $0 \"-h 10.6.2.9 -P 13306 -u root -ppasswd123\" \"SELECT id,is_set,zl_office_id FROM loan.project_info\" \"/dmp/biz\" \"yecai\" \"20250727\""exit 1
fi
checkRst(){if [[ $? != 0 ]]thenecho "--- check failed"exit 1elseecho "--- check success"fi
}
#解析参数
mysqlConnect=$1
sql=$2
dmpRoot=$3
srcSystem=$4
bizDate=$5
echo "===== got input params:"
echo "mysqlConnect: $mysqlConnect"
echo "sql: $sql"
echo "dmpRoot: $dmpRoot"
echo "srcSystem: $srcSystem"
echo "bizDate: $bizDate"
dbHost=$( echo $mysqlConnect | awk -F '-h ' '{print $2}'|awk -F ' ' '{print $1}')
echo "bizDate: $bizDate"
echo "===== parsed params:"
tableName=$(echo $sql | awk -F ' from ' '{print $2}' |awk -F ' ' '{print $1}')
if [ -z $tableName ]; thentableName=$(echo $sql | awk -F ' FROM ' '{print $2}' |awk -F ' ' '{print $1}')
fi
if [[ $tableName == *.* ]]
thentableName=$(echo $tableName | awk -F '.' '{print $2}')
fi
echo "tableName: $tableName"
echo "===== end of params"echo "1.尝试删除数据库服务器残留文件"
ssh hive@${dbHost} rm -f /tmp/${tableName}.csvecho "2.导出数据到数据库服务器csv文件"
mysql ${mysqlConnect} -e "$sql INTO OUTFILE '/tmp/$tableName.csv' FIELDS TERMINATED BY ',' LINES TERMINATED BY '\n'"
checkRstecho "3.从数据库服务器下载文件到本地"
scp hive@${dbHost}:/tmp/${tableName}.csv ~/
checkRstecho "4.删除数据库服务器残留文件"
ssh hive@${dbHost} rm -f /tmp/${tableName}.csvecho "5.尝试清除hdfs旧文件"
hdfs dfs -rm -r ${dmpRoot}/tmp/${srcSystem}/${tableName}/${bizDate}echo "6.尝试创建hdfs文件目录"
hdfs dfs -mkdir -p ${dmpRoot}/tmp/${srcSystem}/${tableName}/${bizDate}echo "7.上传本地文件到hdfs"
hdfs dfs -put ~/${tableName}.csv ${dmpRoot}/tmp/${srcSystem}/${tableName}/${bizDate}
checkRstecho "8.清除本地临时文件"
rm -f ~/${tableName}.csv