if $length -eq 1 ; then grep -v ## $vcf awk -v patient=$patient BEGINOFS=tNR==1fori=1;i=NF;i++a$i=iNR=1print $a#CHROM$aPOS$aID$aREF$aALT$aQUAL$aFILTER$aINFO$aFORMAT$apatient $dirdatevcf_noheader_pa
This script takes a VCF file and extracts information for one or multiple patients
If only one patient is specified, the output file will have a header
If multiple patients are specified, the output file will not have a header
Check if the correct number of arguments has been provided
if [[ $# -lt 2 ]]; then echo "Usage: $0 <input_vcf_file> <output_directory> <patient1[,patient2,...]>" exit 1 fi
Assign input arguments to variables
vcf="$1" output_dir="$2" patient="$3"
Create output directory if it does not exist
mkdir -p "$output_dir"
Extract the date and time to use in output file names
date_time=$(date +"%Y-%m-%d_%H-%M-%S")
Check how many patients were specified
length=$(echo "$patient" | tr ',' '\n' | wc -l)
if [[ $length -eq 1 ]]; then # If only one patient is specified, create output file with header grep -v "##" "$vcf" | awk -v patient="$patient" 'BEGIN{OFS="\t"}NR==1{for(i=1;i<=NF;i++){a[$i]=i}}NR>=1{print $a["#CHROM"],$a["POS"],$a["ID"],$a["REF"],$a["ALT"],$a["QUAL"],$a["FILTER"],$a["INFO"],$a["FORMAT"],$a[patient]}' > "$output_dir/vcf_noheader_patient_$date_time" else # If multiple patients are specified, create separate output files and then merge them echo "You specified $length patients"
# Create output file without header
grep -v "##" "$vcf" | awk 'BEGIN{OFS="\t"}NR==1{for(i=1;i<=NF;i++){a[$i]=i}}NR>=1{print $a["#CHROM"],$a["POS"],$a["ID"],$a["REF"],$a["ALT"],$a["QUAL"],$a["FILTER"],$a["INFO"],$a["FORMAT"]}' > "$output_dir/vcf_noheader_$date_time"
# Create output files for each patient
for i in ${patient//,/ }; do
grep -v "##" "$vcf" | awk -v patient="$i" 'BEGIN{OFS="\t"}NR==1{for(i=1;i<=NF;i++){a[$i]=i}}NR>=1{print $a[patient]}' > "$output_dir/vcf_patient_$i_$date_time"
done
# Merge output files
paste "$output_dir/vcf_noheader_$date_time" "$output_dir/vcf_patient"*"_"$date_time > "$output_dir/vcf_noheader_patient_$date_time"
# Remove individual patient output files
rm "$output_dir/vcf_patient"*"_"$date_time
fi
echo "Done!
原文地址: https://www.cveoy.top/t/topic/cFC4 著作权归作者所有。请勿转载和采集!