This script takes a VCF file and extracts information for one or multiple patients

If only one patient is specified, the output file will have a header

If multiple patients are specified, the output file will not have a header

Check if the correct number of arguments has been provided

if [[ $# -lt 2 ]]; then echo "Usage: $0 <input_vcf_file> <output_directory> <patient1[,patient2,...]>" exit 1 fi

Assign input arguments to variables

vcf="$1" output_dir="$2" patient="$3"

Create output directory if it does not exist

mkdir -p "$output_dir"

Extract the date and time to use in output file names

date_time=$(date +"%Y-%m-%d_%H-%M-%S")

Check how many patients were specified

length=$(echo "$patient" | tr ',' '\n' | wc -l)

if [[ $length -eq 1 ]]; then # If only one patient is specified, create output file with header grep -v "##" "$vcf" | awk -v patient="$patient" 'BEGIN{OFS="\t"}NR==1{for(i=1;i<=NF;i++){a[$i]=i}}NR>=1{print $a["#CHROM"],$a["POS"],$a["ID"],$a["REF"],$a["ALT"],$a["QUAL"],$a["FILTER"],$a["INFO"],$a["FORMAT"],$a[patient]}' > "$output_dir/vcf_noheader_patient_$date_time" else # If multiple patients are specified, create separate output files and then merge them echo "You specified $length patients"

# Create output file without header
grep -v "##" "$vcf" | awk 'BEGIN{OFS="\t"}NR==1{for(i=1;i<=NF;i++){a[$i]=i}}NR>=1{print $a["#CHROM"],$a["POS"],$a["ID"],$a["REF"],$a["ALT"],$a["QUAL"],$a["FILTER"],$a["INFO"],$a["FORMAT"]}' > "$output_dir/vcf_noheader_$date_time"

# Create output files for each patient
for i in ${patient//,/ }; do
	grep -v "##" "$vcf" | awk -v patient="$i" 'BEGIN{OFS="\t"}NR==1{for(i=1;i<=NF;i++){a[$i]=i}}NR>=1{print $a[patient]}' > "$output_dir/vcf_patient_$i_$date_time"
done

# Merge output files
paste "$output_dir/vcf_noheader_$date_time" "$output_dir/vcf_patient"*"_"$date_time > "$output_dir/vcf_noheader_patient_$date_time"

# Remove individual patient output files
rm "$output_dir/vcf_patient"*"_"$date_time

fi

echo "Done!

if $length -eq 1 ; then		grep -v ## $vcf awk -v patient=$patient BEGINOFS=tNR==1fori=1;i=NF;i++a$i=iNR=1print $a#CHROM$aPOS$aID$aREF$aALT$aQUAL$aFILTER$aINFO$aFORMAT$apatient $dirdatevcf_noheader_pa

原文地址: https://www.cveoy.top/t/topic/cFC4 著作权归作者所有。请勿转载和采集!

免费AI点我,无需注册和登录