Question

Implement the Smith-Waterman algorithm in C that accepts a substitution matrix. You may assume the substitution...

Implement the Smith-Waterman algorithm in C that accepts a substitution matrix.

You may assume the substitution matrix is formatted as a CSV, without the labels.

A,C,G,T

0,1,1,1

1,0,1,1

1,1,0,1

1,1,1,0

The returned values from the functions should contain all the information needed to construct alignments and the score.

0 0
Add a comment Improve this question Transcribed image text
Answer #1

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

/*File pointers*/
FILE *ptr_file_1, *ptr_file_2;   //test file pointers

/*Definitions*/
#define TRUE 1
#define FALSE 0
#define Match 5
#define MissMatch -4
#define GapPenalty 10
#define GapExt 8

/*Global Variables*/
char inputC[5];       //user input character
int inputI;
int StrLen1,StrLen2;
int intcheck = TRUE;

char holder, ch;
int filelen1 = 0;
int filelen2 = 0;
int i,j,k,l,m,n,lenA,lenB,compval;
char FASTA1[200],FASTA2[200];
char dash = '-';

char strA[20];           //holds 1st string to be aligned in character array      
char strB[20];           //holds 2nd string to be aligned in character array
int HiScore;           //holds value of highest scoring alignment(s).
int HiScorePos[2];       //holds the position of the HiScore
int SWArray[21][21];   //S-W Matrix

char MaxA[20];
char MaxB[20];
char OptA[20];
char OptB[20];

int MaxAcounter = 1;   //MaxA counter  
int MaxBcounter = 1;   //MaxB counter
int cont = TRUE;
int check;


/*ALIGNMENT FUNCTION*/
int Align(PosA, PosB) {

   /*Function Variables*/
   int relmax = -1;       //hold highest value in sub columns and rows
   int relmaxpos[2];       //holds position of relmax

   if(SWArray[PosA-1][PosB-1] == 0) {
       cont = FALSE;
   }

   while(cont == TRUE) {   //until the diagonal of the current cell has a value of zero
      
       /*Find relmax in sub columns and rows*/
       for(i=PosA; i>0; --i) {
  
           if(relmax < SWArray[i-1][PosB-1]) {
              
               relmax = SWArray[i-1][PosB-1];
               relmaxpos[0]=i-1;
               relmaxpos[1]=PosB-1;
           }
       }

       for(j=PosB; j>0; --j) {

           if(relmax < SWArray[PosA-1][j-1]) {

               relmax = SWArray[PosA-1][j-1];
               relmaxpos[0]=PosA-1;
               relmaxpos[1]=j-1;
           }
       }

       /*Align strings to relmax*/
       if((relmaxpos[0] == PosA-1) && (relmaxpos[1] == PosB-1)) {   //if relmax position is diagonal from current position simply align and increment counters

           MaxA[MaxAcounter] = strA[relmaxpos[0]-1];
           ++MaxAcounter;
           MaxB[MaxBcounter] = strB[relmaxpos[1]-1];
           ++MaxBcounter;

       }

       else {

           if((relmaxpos[1] == PosB-1) && (relmaxpos[0] != PosA-1)) {   //maxB needs at least one '-'
              
               for(i=PosA-1; i>relmaxpos[0]-1; --i) {   //for all elements of strA between PosA and relmaxpos[0]
              
                       MaxA[MaxAcounter]= strA[i-1];
                       ++MaxAcounter;
               }
               for(j=PosA-1; j>relmaxpos[0]; --j) {   //set dashes to MaxB up to relmax

                   MaxB[MaxBcounter] = dash;
                   ++MaxBcounter;
               }

               MaxB[MaxBcounter] = strB[relmaxpos[1]-1];   //at relmax set pertinent strB value to MaxB
               ++MaxBcounter;
           }

           if((relmaxpos[0] == PosA-1) && (relmaxpos[1] != PosB-1)) {   //MaxA needs at least one '-'

               for(j=PosB-1; j>relmaxpos[1]-1; --j) {   //for all elements of strB between PosB and relmaxpos[1]

                   MaxB[MaxBcounter] = strB[j-1];
                   ++MaxBcounter;
               }
               for(i=PosB-1; i>relmaxpos[1]; --i) {       //set dashes to strA

                   MaxA[MaxAcounter] = dash;
                   ++MaxAcounter;
               }

               MaxA[MaxAcounter] = strA[relmaxpos[0]-1];
               ++MaxAcounter;
           }
       }

       //printf("(%i,%i)",relmaxpos[0],relmaxpos[1]);
       Align(relmaxpos[0],relmaxpos[1]);
   }

   return(cont);
}


/*MAIN FUNCTIONS*/
int main()
{
   /*Introduction*/
   printf("Smith-Waterman Global String Alignment Program Version 1.0\n");
   printf("By: Benjamin Corum -- Marlboro College -- Spring 2001\n");
   printf("\n");
   printf("Enter:\n");

   /*Validate User Input*/
   while(intcheck == TRUE) {              
      
       /*list options*/
       printf("   1 - To align two test files\n");
       printf("   2 - To align two novel sequences\n");
       printf("   3 - To align a novel sequence with a test file\n:");

       fgets(inputC, sizeof(inputC), stdin);   //get character
       sscanf(inputC, "%i", &inputI);           //convert to integer
      
       if((inputI < 4) && (inputI > 0)){      
           intcheck = FALSE;
       }
       else {
           printf("Please enter a 1,2,or 3\n");
           intcheck = TRUE;
       }
   }
  
  
   if(inputI == 1) {   //If user would like to align two test files  
      
       /*open first file*/
       ptr_file_1 = fopen("str1.fa", "r");
       /*check to see if it opened okay*/
       if(ptr_file_1 == NULL) {
           printf("Error opening 'str1.fa'\n");
           system("PAUSE");
           exit(8);
       }

       /*open second file*/
       ptr_file_2 = fopen("str2.fa", "r");
       /*check to see if it opened okay*/
       if(ptr_file_2 == NULL) {
           printf("Error opening 'str2.fa'\n");
           system("PAUSE");  
           exit(8);
       }

       /*retrieve fasta information from files*/
       fgets(FASTA1, sizeof(FASTA1), ptr_file_1);
       fgets(FASTA2, sizeof(FASTA2), ptr_file_2);      

       /*measure file1 length*/
       while(holder != EOF) {
           holder = fgetc(ptr_file_1);
           if(filelen1 < 20) {
               strA[filelen1] = holder;
           }
           ++filelen1;
       }

       holder = '0';

       /*measure file2 length*/
       while(holder != EOF) {
           holder = fgetc(ptr_file_2);
           if(filelen2 < 20) {
               strB[filelen2] = holder;
           }
           ++filelen2;
       }

       fclose(ptr_file_1);
       fclose(ptr_file_2);                              
   }
                          
   if(inputI == 2) {   //two novel strings
       /*get string*/
       printf("Novel strings should be no longer than 20 symbols. Program is case sensitive\n");
       printf("Please enter 1st string:");
       fgets(strA,sizeof(strA),stdin);
       printf("Please enter 2nd string:");
       fgets(strB,sizeof(strB),stdin);

       if(strA[strlen(strA)-1] = '\n') {
       strA[strlen(strA)-1] = '\0';   //edit '\n' out of string
       }
       if(strB[strlen(strB)-1] = '\n') {
       strB[strlen(strB)-1] = '\0';   //edit '\n' out of string
       }

   }

   if(inputI == 3) {   //novel string vs. test
       /*get string*/
       printf("Novel strings should be no longer than 20 symbols. Program is case sensitive\n");
       printf("Please enter 1st string:");
       fgets(strA,sizeof(strA),stdin);                  
       strA[strlen(strA)-1] = '\0';   //edit '\n' out of string

       /*open test file*/
       ptr_file_1 = fopen("str1.fa", "r");
       /*check to see if it opened okay*/
       if(ptr_file_1 == NULL) {
           printf("Error opening 'str1.fa'\n");
           exit(8);
       }

       fgets(FASTA1, sizeof(FASTA1), ptr_file_1);
  
       /*measure file1 length*/
       while(holder != EOF) {
           holder = fgetc(ptr_file_1);
           if(filelen1 < 20) {
               strB[filelen1] = holder;
           }
           ++filelen1;
       }
  
   }


   lenA = strlen(strA);
   lenB = strlen(strB);


   //Create empty table
   for(i=0;i<=lenA;++i){
       SWArray[0][i]=0;
   }
   for(i=0;i<=lenB;++i){
       SWArray[i][0]=0;
   }


   //score table with S-W
   compval = 0;
   for(i = 1; i <= lenA; ++i) {   //for all values of strA
      
       for(j = 1; j <= lenB; ++j) {   //for all values of strB
      
           //MATCH
           if(strA[i-1] == strB[j-1]) {               //if current sequence values are the same
               compval = (SWArray[i-1][j-1] + Match);   //compval = diagonal + match score
           }
          
           for(k = i-1; k > 0; --k) {       //check all sub rows
              
               if(compval < ((SWArray[i-k][j]) - (GapPenalty + (GapExt * k)))) {        //if cell above has a greater value
                  
                   compval = ((SWArray[i-k][j]) - (GapPenalty + (GapExt * k)));       //set compval to that square
               }
           }

           for(k=j-1; k>0; --k) {       //check all sub columns
              
               if(compval < ((SWArray[i][j-k]) - (GapPenalty + (GapExt * k)))) {   //if square to the left has the highest value
                  
                   compval = ((SWArray[i][j-k]) - (GapPenalty + (GapExt * k)));    //set compval to that square
               }
           }      
          
           if(compval < 0) {
               compval = 0;
           }
      
           //MISMATCH
           if(strA[i-1] != strB[j-1]) {                   //if current sequence values are the same

               if(compval < (SWArray[i-1][j-1] + MissMatch)) {   //compval = diagonal + match score
                   compval = SWArray[i-1][j-1] + MissMatch;
                   }
           }
              
           for(k=i-1; k>0; --k) {       //check all sub rows
              
               if(compval < ((SWArray[i-k][j]) - (GapPenalty + (GapExt * k)))) {        //if cell above has a greater value
                  
                   compval = ((SWArray[i-k][j]) - (GapPenalty + (GapExt * k)));       //set compval to that square
               }
           }

           for(k=j-1; k>0; --k) {       //check all sub columns
              
               if(compval < ((SWArray[i][j-k]) - (GapPenalty + (GapExt * k)))) {   //if square to the left has the highest value
                  
                   compval = ((SWArray[i][j-k]) - (GapPenalty + (GapExt * k)));    //set compval to that square
               }
           }      
          
           if(compval < 0) {
               compval = 0;
           }          
          
      
           SWArray[i][j] = compval;   //set current cell to highest possible score and move on


           compval = 0;
       }
   }

   /*PRINT S-W Table*/
   printf("   0");
   for(i = 0; i <= lenB; ++i) {
       printf(" %c",strB[i]);
   }
   printf("\n");

   for(i = 0; i <= lenA; ++i) {
       if(i < 1) {
           printf("0");
       }

       if(i > 0) {
           printf("%c",strA[i-1]);
       }

       for(j = 0; j <= lenB; ++j) {
           //if(SWArray[i][j] > 0){
               printf("%3i",SWArray[i][j]);
           //}

       }
       printf("\n");
   }

   /*MAKE ALIGNMENTT*/
   for(i=0; i<=lenA; ++i) {   //find highest score in matrix: this is the starting point of an optimal local alignment

       for(j=0; j<=lenB; ++j) {
  
           if(SWArray[i][j] > HiScore) {

               HiScore = SWArray[i][j];
               HiScorePos[0]=i;
               HiScorePos[1]=j;
           }
       }
   }

   /*send Position to alignment function*/
   MaxA[0] = strA[HiScorePos[0]-1];
   MaxB[0] = strB[HiScorePos[1]-1];

   check = Align(HiScorePos[0],HiScorePos[1]);

   /*in the end reverse Max A and B*/
   k=0;
   for(i = strlen(MaxA)-1; i > -1; --i) {
       OptA[k] = MaxA[i];
       ++k;
   }

   k=0;
   for(j=strlen(MaxB)-1; j > -1; --j) {
       OptB[k] = MaxB[j];
       ++k;
   }
//   printf("\n%s\n%s\n",MaxA,MaxB);
   printf("%s\n%s   ",OptA,OptB);

   system("PAUSE");

   return(0);

}

Add a comment
Know the answer?
Add Answer to:
Implement the Smith-Waterman algorithm in C that accepts a substitution matrix. You may assume the substitution...
Your Answer:

Post as a guest

Your Name:

What's your source?

Earn Coins

Coins can be redeemed for fabulous gifts.

Not the answer you're looking for? Ask your own homework help question. Our experts will answer your question WITHIN MINUTES for Free.
Similar Homework Help Questions
  • Just Q3 and Q4 Q1] Write a C function to implement the binary search algorithm over...

    Just Q3 and Q4 Q1] Write a C function to implement the binary search algorithm over an array of integer numbers and size n. The function should return the index of the search key if the search key exists and return - 1 if the search key doesn't exist. [10 Points] Q2] Write a C function to implement the selection sort algorithm, to sort an array of float values and size n. The function should sort the array in ascending...

  • You are going to implement Treesort algorithm in C++ to sort string data. Here are the...

    You are going to implement Treesort algorithm in C++ to sort string data. Here are the steps to complete the homework 1) Use the following class definition for binary search tree nodes. Its constructor is incomplete you should first complete the constructor. class TreeNode t public: string data; / this is the string stored in the node TreeNode left: TreeNode right; TreeNode (string element, TreeNode 1t, TreeNode rt //your code here 2) Write a function that will insert a string...

  • Implement a tic-tac-toe game. At the bottom of these specifications you will see a template you...

    Implement a tic-tac-toe game. At the bottom of these specifications you will see a template you must copy and paste to cloud9. Do not change the provided complete functions, or the function stub headers / return values. Currently, if the variables provided in main are commented out, the program will compile. Complete the specifications for each function. As you develop the program, implement one function at a time, and test that function. The provided comments provide hints as to what...

  • You shall develop a grammar and implement a parser which recognizes valid statements as described below in the assignment specification. You may develop your code using C, C++. The test file include...

    You shall develop a grammar and implement a parser which recognizes valid statements as described below in the assignment specification. You may develop your code using C, C++. The test file include these expressions below. The first six should pass and the rest should fail: first = one1 + two2 - three3 / four4 ; second = one1 * (two2 * three3) ; second = one1 * (two2 * three3) ; third = ONE + twenty - three3 ; third...

  • [C++]Project #3 – Traversing Property Graphs EDIT: in reply to the comment, what excerpt are you...

    [C++]Project #3 – Traversing Property Graphs EDIT: in reply to the comment, what excerpt are you talking about? EDIT #2 : Need to construct a directed graph, where the property names label edges connecting nodes; either using array or linked list implementation. Learning Objectives Implement a data structure to meet given specifications Design, implement, and use a graph data structure Perform analysis of algorithm performance Utilize Dijkstra's algorithm Overview Your task for this assignment is to implement a graph data...

  • Design and implement a C version of the color match program. As a starting point, use...

    Design and implement a C version of the color match program. As a starting point, use the file HW2-1-shell.c. The program should employ a reasonable algorithm that compares all pairings of colors in the palette exactly once. A color should not be compared to itself. Nor should two colors be compared to each other more than once. Your C program should print out the total component color difference for the closest pair using the print string provided in the shell...

  • I need help writing this code in C++ Proj11.cpp is provided as well as the randomdata.txt thank you in advance! Objectives: The main objectives of this project is to introduce you to recursion,...

    I need help writing this code in C++ Proj11.cpp is provided as well as the randomdata.txt thank you in advance! Objectives: The main objectives of this project is to introduce you to recursion, and test your ability to work with some STL functionalities. A review of your knowledge on working with templates, dynamic data structures, as well as manipulating dynamic memory, classes, pointers and iostream to all extents, is also included. Description: For the entirety of this project, you will...

  • For C++ This is the information about the meal class 2-1. (24 marks) Define and implement...

    For C++ This is the information about the meal class 2-1. (24 marks) Define and implement a class named Dessert, which represents a special kind of Meal. It is to be defined by inheriting from the Meal class. The Dessert class has the following constructor: Dessert (string n, int co) // creates a meal with name n, whose type // is "dessert" and cost is co The class must have a private static attribute static int nextID ; which is...

  • 1. Write a C++ program called Password that handles encrypting a password. 2. The program must...

    1. Write a C++ program called Password that handles encrypting a password. 2. The program must perform encryption as follows: a. main method i. Ask the user for a password. ii. Sends the password to a boolean function called isValidPassword to check validity. 1. Returns true if password is at least 8 characters long 2. Returns false if it is not at least 8 characters long iii. If isValidPassword functions returns false 1. Print the following error message “The password...

  • C++ help Format any numerical decimal values with 2 digits after the decimal point. Problem descr...

    C++ help Format any numerical decimal values with 2 digits after the decimal point. Problem description Write the following functions as prototyped below. Do not alter the function signatures. /// Returns a copy of the string with its first character capitalized and the rest lowercased. /// @example capitalize("hELLO wORLD") returns "Hello world" std::string capitalize(const std::string& str); /// Returns a copy of the string centered in a string of length 'width'. /// Padding is done using the specified 'fillchar' (default is...

ADVERTISEMENT
Free Homework Help App
Download From Google Play
Scan Your Homework
to Get Instant Free Answers
Need Online Homework Help?
Ask a Question
Get Answers For Free
Most questions answered within 3 hours.
ADVERTISEMENT
ADVERTISEMENT
ADVERTISEMENT