hdfs定时清理shell
#!/bin/bash
set the path to your hdfs bin directory
HADOOP_HOME=/usr/local/hadoop/bin
set the hdfs directory path to clean up
HDFS_DIR=/user/hadoop/tmp
set the number of days to keep files in hdfs
DAYS_TO_KEEP=7
get the current timestamp
CURRENT_TIME=$(date +%s)
convert days to seconds
SECONDS_TO_KEEP=$((DAYS_TO_KEEP * 86400))
calculate the timestamp for the oldest file to keep
OLDEST_TIME=$((CURRENT_TIME - SECONDS_TO_KEEP))
delete files older than the specified number of days
$HADOOP_HOME/hdfs dfs -ls -R $HDFS_DIR | grep -e '^-.*' | awk '{print $6" "$7" "$8" "$9}' | while read line ; do file_time=$(date -d "$(echo $line | awk '{print $1" "$2" "$3}')" +%s) if [[ $file_time -lt $OLDEST_TIME ]] ; then file_path=$(echo $line | awk '{print $4}') $HADOOP_HOME/hdfs dfs -rm -r -skipTrash $file_path echo "Deleted file: $file_path" fi done echo "HDFS cleanup complete.
原文地址: https://www.cveoy.top/t/topic/cpn9 著作权归作者所有。请勿转载和采集!