I need to take multiple FASTA files as reference genome and for each reference genome, I need to do reference-based assembly.
Here is my code,
import os
import glob
from Bio.Sequencing.Applications import BwaIndexCommandline
from Bio.Sequencing.Applications import BwaMemCommandline
from Bio.Sequencing.Applications import SamtoolsViewCommandline
from Bio.Sequencing.Applications import SamtoolsVersion1xSortCommandline
from Bio.Sequencing.Applications import SamtoolsIndexCommandline
list_of_files = glob.glob('GCA_*')
for file in list_of_files[0:]:
reference_genome = file
in_file = 'SampleGenomeFragments.fasta'
index_cmd = BwaIndexCommandline(infile=reference_genome, algorithm='bwtsw')
index_cmd()
out_sam_file = file + '_out.sam'
create_sam_cmd = BwaMemCommandline(reference=reference_genome, read_file1=in_file)
create_sam_cmd(stdout=out_sam_file)
out_bam_file = file +'_out.bam'
samtools_view_cmd = SamtoolsViewCommandline(input_file=out_sam_file, t=reference_genome) #input file is output from create_sam_cmd
samtools_view_cmd(stdout=out_bam_file)
out_bam_file_to_sort = out_bam_file
sorted_out_bam_file = file + '_bamout_sorted.bam'
samtools_sort_cmd = SamtoolsVersion1xSortCommandline(input=out_bam_file_to_sort, o=sorted_out_bam_file) #input file is output from samtools_view_cmd
samtools_sort_cmd()
samtools_index_cmd = SamtoolsIndexCommandline(input_bam=sorted_out_bam_file) #input file is output from samtools_sort_cmd
samtools_index_cmd()