I need to take multiple FASTA files as reference genome and for each reference genome, I need to do reference-based assembly.

Here is my code,

import os

import glob

from Bio.Sequencing.Applications import BwaIndexCommandline

from Bio.Sequencing.Applications import BwaMemCommandline

from Bio.Sequencing.Applications import SamtoolsViewCommandline

from Bio.Sequencing.Applications import SamtoolsVersion1xSortCommandline

from Bio.Sequencing.Applications import SamtoolsIndexCommandline

list_of_files = glob.glob('GCA_*')

for file in list_of_files[0:]:

reference_genome = file

in_file = 'SampleGenomeFragments.fasta'

index_cmd = BwaIndexCommandline(infile=reference_genome, algorithm='bwtsw')

index_cmd()

out_sam_file = file + '_out.sam'

create_sam_cmd = BwaMemCommandline(reference=reference_genome, read_file1=in_file)

create_sam_cmd(stdout=out_sam_file)

out_bam_file = file +'_out.bam'

samtools_view_cmd = SamtoolsViewCommandline(input_file=out_sam_file, t=reference_genome) #input file is output from create_sam_cmd

samtools_view_cmd(stdout=out_bam_file)

out_bam_file_to_sort = out_bam_file

sorted_out_bam_file = file + '_bamout_sorted.bam'

samtools_sort_cmd = SamtoolsVersion1xSortCommandline(input=out_bam_file_to_sort, o=sorted_out_bam_file) #input file is output from samtools_view_cmd

samtools_sort_cmd()

samtools_index_cmd = SamtoolsIndexCommandline(input_bam=sorted_out_bam_file) #input file is output from samtools_sort_cmd

samtools_index_cmd()

More Pavithra Anantharaman Sudhakari's questions See All
Similar questions and discussions