#!/bin/bash

set the path to your hdfs bin directory

HADOOP_HOME=/usr/local/hadoop/bin

set the hdfs directory path to clean up

HDFS_DIR=/user/hadoop/tmp

set the number of days to keep files in hdfs

DAYS_TO_KEEP=7

get the current timestamp

CURRENT_TIME=$(date +%s)

convert days to seconds

SECONDS_TO_KEEP=$((DAYS_TO_KEEP * 86400))

calculate the timestamp for the oldest file to keep

OLDEST_TIME=$((CURRENT_TIME - SECONDS_TO_KEEP))

delete files older than the specified number of days

$HADOOP_HOME/hdfs dfs -ls -R $HDFS_DIR | grep -e '^-.*' | awk '{print $6" "$7" "$8" "$9}' | while read line ; do file_time=$(date -d "$(echo $line | awk '{print $1" "$2" "$3}')" +%s) if [[ $file_time -lt $OLDEST_TIME ]] ; then file_path=$(echo $line | awk '{print $4}') $HADOOP_HOME/hdfs dfs -rm -r -skipTrash $file_path echo "Deleted file: $file_path" fi done echo "HDFS cleanup complete.

hdfs定时清理shell

原文地址: https://www.cveoy.top/t/topic/cpn9 著作权归作者所有。请勿转载和采集!

免费AI点我,无需注册和登录