Implement the Smith-Waterman algorithm in C that accepts a substitution matrix.
You may assume the substitution matrix is formatted as a CSV, without the labels.
A,C,G,T
0,1,1,1
1,0,1,1
1,1,0,1
1,1,1,0
The returned values from the functions should contain all the information needed to construct alignments and the score.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
/*File pointers*/
FILE *ptr_file_1, *ptr_file_2; //test file pointers
/*Definitions*/
#define TRUE 1
#define FALSE 0
#define Match 5
#define MissMatch -4
#define GapPenalty 10
#define GapExt 8
/*Global Variables*/
char inputC[5]; //user input
character
int inputI;
int StrLen1,StrLen2;
int intcheck = TRUE;
char holder, ch;
int filelen1 = 0;
int filelen2 = 0;
int i,j,k,l,m,n,lenA,lenB,compval;
char FASTA1[200],FASTA2[200];
char dash = '-';
char strA[20];
//holds 1st string to be aligned in character array
char strB[20];
//holds 2nd string to be aligned in character array
int HiScore;
//holds value of highest scoring alignment(s).
int HiScorePos[2]; //holds the
position of the HiScore
int SWArray[21][21]; //S-W Matrix
char MaxA[20];
char MaxB[20];
char OptA[20];
char OptB[20];
int MaxAcounter = 1; //MaxA
counter
int MaxBcounter = 1; //MaxB counter
int cont = TRUE;
int check;
/*ALIGNMENT FUNCTION*/
int Align(PosA, PosB) {
/*Function Variables*/
int relmax = -1; //hold
highest value in sub columns and rows
int relmaxpos[2];
//holds position of relmax
if(SWArray[PosA-1][PosB-1] == 0) {
cont = FALSE;
}
while(cont == TRUE) { //until the
diagonal of the current cell has a value of zero
/*Find relmax in sub columns and
rows*/
for(i=PosA; i>0; --i) {
if(relmax <
SWArray[i-1][PosB-1]) {
relmax = SWArray[i-1][PosB-1];
relmaxpos[0]=i-1;
relmaxpos[1]=PosB-1;
}
}
for(j=PosB; j>0; --j) {
if(relmax < SWArray[PosA-1][j-1]) {
relmax = SWArray[PosA-1][j-1];
relmaxpos[0]=PosA-1;
relmaxpos[1]=j-1;
}
}
/*Align strings to
relmax*/
if((relmaxpos[0] == PosA-1)
&& (relmaxpos[1] == PosB-1)) { //if relmax
position is diagonal from current position simply align and
increment counters
MaxA[MaxAcounter] = strA[relmaxpos[0]-1];
++MaxAcounter;
MaxB[MaxBcounter] = strB[relmaxpos[1]-1];
++MaxBcounter;
}
else {
if((relmaxpos[1] == PosB-1) && (relmaxpos[0] != PosA-1))
{ //maxB needs at least one '-'
for(i=PosA-1; i>relmaxpos[0]-1; --i)
{ //for all elements of strA between PosA and
relmaxpos[0]
MaxA[MaxAcounter]= strA[i-1];
++MaxAcounter;
}
for(j=PosA-1; j>relmaxpos[0]; --j)
{ //set dashes to MaxB up to relmax
MaxB[MaxBcounter] =
dash;
++MaxBcounter;
}
MaxB[MaxBcounter] =
strB[relmaxpos[1]-1]; //at relmax set pertinent strB
value to MaxB
++MaxBcounter;
}
if((relmaxpos[0] == PosA-1) && (relmaxpos[1] != PosB-1)) { //MaxA needs at least one '-'
for(j=PosB-1; j>relmaxpos[1]-1; --j) { //for all elements of strB between PosB and relmaxpos[1]
MaxB[MaxBcounter] =
strB[j-1];
++MaxBcounter;
}
for(i=PosB-1; i>relmaxpos[1]; --i)
{ //set dashes to strA
MaxA[MaxAcounter] =
dash;
++MaxAcounter;
}
MaxA[MaxAcounter] = strA[relmaxpos[0]-1];
++MaxAcounter;
}
}
//printf("(%i,%i)",relmaxpos[0],relmaxpos[1]);
Align(relmaxpos[0],relmaxpos[1]);
}
return(cont);
}
/*MAIN FUNCTIONS*/
int main()
{
/*Introduction*/
printf("Smith-Waterman Global String Alignment Program
Version 1.0\n");
printf("By: Benjamin Corum -- Marlboro College --
Spring 2001\n");
printf("\n");
printf("Enter:\n");
/*Validate User Input*/
while(intcheck == TRUE) {
/*list options*/
printf(" 1 - To align
two test files\n");
printf(" 2 - To align
two novel sequences\n");
printf(" 3 - To align a
novel sequence with a test file\n:");
fgets(inputC, sizeof(inputC),
stdin); //get character
sscanf(inputC, "%i",
&inputI);
//convert to integer
if((inputI < 4) &&
(inputI > 0)){
intcheck =
FALSE;
}
else {
printf("Please
enter a 1,2,or 3\n");
intcheck =
TRUE;
}
}
if(inputI == 1) { //If user would like to
align two test files
/*open first file*/
ptr_file_1 = fopen("str1.fa",
"r");
/*check to see if it opened
okay*/
if(ptr_file_1 == NULL) {
printf("Error
opening 'str1.fa'\n");
system("PAUSE");
exit(8);
}
/*open second file*/
ptr_file_2 = fopen("str2.fa",
"r");
/*check to see if it opened
okay*/
if(ptr_file_2 == NULL) {
printf("Error
opening 'str2.fa'\n");
system("PAUSE");
exit(8);
}
/*retrieve fasta information
from files*/
fgets(FASTA1, sizeof(FASTA1),
ptr_file_1);
fgets(FASTA2, sizeof(FASTA2),
ptr_file_2);
/*measure file1 length*/
while(holder != EOF) {
holder =
fgetc(ptr_file_1);
if(filelen1 <
20) {
strA[filelen1] = holder;
}
++filelen1;
}
holder = '0';
/*measure file2 length*/
while(holder != EOF) {
holder =
fgetc(ptr_file_2);
if(filelen2 <
20) {
strB[filelen2] = holder;
}
++filelen2;
}
fclose(ptr_file_1);
fclose(ptr_file_2);
}
if(inputI == 2) { //two novel
strings
/*get string*/
printf("Novel strings should be no
longer than 20 symbols. Program is case sensitive\n");
printf("Please enter 1st
string:");
fgets(strA,sizeof(strA),stdin);
printf("Please enter 2nd
string:");
fgets(strB,sizeof(strB),stdin);
if(strA[strlen(strA)-1] = '\n')
{
strA[strlen(strA)-1] =
'\0'; //edit '\n' out of string
}
if(strB[strlen(strB)-1] = '\n')
{
strB[strlen(strB)-1] =
'\0'; //edit '\n' out of string
}
}
if(inputI == 3) { //novel string vs.
test
/*get string*/
printf("Novel strings should be no
longer than 20 symbols. Program is case sensitive\n");
printf("Please enter 1st
string:");
fgets(strA,sizeof(strA),stdin);
strA[strlen(strA)-1] =
'\0'; //edit '\n' out of string
/*open test file*/
ptr_file_1 = fopen("str1.fa",
"r");
/*check to see if it opened
okay*/
if(ptr_file_1 == NULL) {
printf("Error
opening 'str1.fa'\n");
exit(8);
}
fgets(FASTA1, sizeof(FASTA1),
ptr_file_1);
/*measure file1 length*/
while(holder != EOF) {
holder =
fgetc(ptr_file_1);
if(filelen1 <
20) {
strB[filelen1] = holder;
}
++filelen1;
}
}
lenA = strlen(strA);
lenB = strlen(strB);
//Create empty table
for(i=0;i<=lenA;++i){
SWArray[0][i]=0;
}
for(i=0;i<=lenB;++i){
SWArray[i][0]=0;
}
//score table with S-W
compval = 0;
for(i = 1; i <= lenA; ++i) { //for all
values of strA
for(j = 1; j <= lenB; ++j)
{ //for all values of strB
//MATCH
if(strA[i-1] ==
strB[j-1]) {
//if current sequence values are the same
compval = (SWArray[i-1][j-1] +
Match); //compval = diagonal + match score
}
for(k = i-1; k
> 0; --k) { //check all sub
rows
if(compval < ((SWArray[i-k][j]) - (GapPenalty
+ (GapExt * k)))) { //if cell
above has a greater value
compval = ((SWArray[i-k][j])
- (GapPenalty + (GapExt * k)));
//set compval to that square
}
}
for(k=j-1;
k>0; --k) { //check all sub
columns
if(compval < ((SWArray[i][j-k]) - (GapPenalty
+ (GapExt * k)))) { //if square to the left has the
highest value
compval = ((SWArray[i][j-k])
- (GapPenalty + (GapExt * k))); //set compval to
that square
}
}
if(compval <
0) {
compval = 0;
}
//MISMATCH
if(strA[i-1] !=
strB[j-1]) {
//if current sequence values
are the same
if(compval < (SWArray[i-1][j-1] + MissMatch))
{ //compval = diagonal + match score
compval = SWArray[i-1][j-1] +
MissMatch;
}
}
for(k=i-1;
k>0; --k) { //check all sub
rows
if(compval < ((SWArray[i-k][j]) - (GapPenalty
+ (GapExt * k)))) { //if cell
above has a greater value
compval = ((SWArray[i-k][j])
- (GapPenalty + (GapExt * k)));
//set compval to that square
}
}
for(k=j-1;
k>0; --k) { //check all sub
columns
if(compval < ((SWArray[i][j-k]) - (GapPenalty
+ (GapExt * k)))) { //if square to the left has the
highest value
compval = ((SWArray[i][j-k])
- (GapPenalty + (GapExt * k))); //set compval to
that square
}
}
if(compval <
0) {
compval = 0;
}
SWArray[i][j] =
compval; //set current cell to highest possible score
and move on
compval =
0;
}
}
/*PRINT S-W Table*/
printf(" 0");
for(i = 0; i <= lenB; ++i) {
printf(" %c",strB[i]);
}
printf("\n");
for(i = 0; i <= lenA; ++i) {
if(i < 1) {
printf("0");
}
if(i > 0) {
printf("%c",strA[i-1]);
}
for(j = 0; j <= lenB; ++j)
{
//if(SWArray[i][j] > 0){
printf("%3i",SWArray[i][j]);
//}
}
printf("\n");
}
/*MAKE ALIGNMENTT*/
for(i=0; i<=lenA; ++i) { //find highest
score in matrix: this is the starting point of an optimal local
alignment
for(j=0; j<=lenB; ++j)
{
if(SWArray[i][j]
> HiScore) {
HiScore = SWArray[i][j];
HiScorePos[0]=i;
HiScorePos[1]=j;
}
}
}
/*send Position to alignment function*/
MaxA[0] = strA[HiScorePos[0]-1];
MaxB[0] = strB[HiScorePos[1]-1];
check = Align(HiScorePos[0],HiScorePos[1]);
/*in the end reverse Max A and B*/
k=0;
for(i = strlen(MaxA)-1; i > -1; --i) {
OptA[k] = MaxA[i];
++k;
}
k=0;
for(j=strlen(MaxB)-1; j > -1; --j) {
OptB[k] = MaxB[j];
++k;
}
// printf("\n%s\n%s\n",MaxA,MaxB);
printf("%s\n%s ",OptA,OptB);
system("PAUSE");
return(0);
}
Implement the Smith-Waterman algorithm in C that accepts a substitution matrix. You may assume the substitution...
Just Q3 and Q4 Q1] Write a C function to implement the binary search algorithm over an array of integer numbers and size n. The function should return the index of the search key if the search key exists and return - 1 if the search key doesn't exist. [10 Points] Q2] Write a C function to implement the selection sort algorithm, to sort an array of float values and size n. The function should sort the array in ascending...
You are going to implement Treesort algorithm in C++ to sort string data. Here are the steps to complete the homework 1) Use the following class definition for binary search tree nodes. Its constructor is incomplete you should first complete the constructor. class TreeNode t public: string data; / this is the string stored in the node TreeNode left: TreeNode right; TreeNode (string element, TreeNode 1t, TreeNode rt //your code here 2) Write a function that will insert a string...
Implement a tic-tac-toe game. At the bottom of these specifications you will see a template you must copy and paste to cloud9. Do not change the provided complete functions, or the function stub headers / return values. Currently, if the variables provided in main are commented out, the program will compile. Complete the specifications for each function. As you develop the program, implement one function at a time, and test that function. The provided comments provide hints as to what...
You shall develop a grammar and implement a parser which recognizes valid statements as described below in the assignment specification. You may develop your code using C, C++. The test file include these expressions below. The first six should pass and the rest should fail: first = one1 + two2 - three3 / four4 ; second = one1 * (two2 * three3) ; second = one1 * (two2 * three3) ; third = ONE + twenty - three3 ; third...
[C++]Project #3 – Traversing Property Graphs EDIT: in reply to the comment, what excerpt are you talking about? EDIT #2 : Need to construct a directed graph, where the property names label edges connecting nodes; either using array or linked list implementation. Learning Objectives Implement a data structure to meet given specifications Design, implement, and use a graph data structure Perform analysis of algorithm performance Utilize Dijkstra's algorithm Overview Your task for this assignment is to implement a graph data...
Design and implement a C version of the color match program. As a starting point, use the file HW2-1-shell.c. The program should employ a reasonable algorithm that compares all pairings of colors in the palette exactly once. A color should not be compared to itself. Nor should two colors be compared to each other more than once. Your C program should print out the total component color difference for the closest pair using the print string provided in the shell...
I need help writing this code in C++ Proj11.cpp is provided as well as the randomdata.txt thank you in advance! Objectives: The main objectives of this project is to introduce you to recursion, and test your ability to work with some STL functionalities. A review of your knowledge on working with templates, dynamic data structures, as well as manipulating dynamic memory, classes, pointers and iostream to all extents, is also included. Description: For the entirety of this project, you will...
For C++ This is the information about the meal class 2-1. (24 marks) Define and implement a class named Dessert, which represents a special kind of Meal. It is to be defined by inheriting from the Meal class. The Dessert class has the following constructor: Dessert (string n, int co) // creates a meal with name n, whose type // is "dessert" and cost is co The class must have a private static attribute static int nextID ; which is...
1. Write a C++ program called Password that handles encrypting a password. 2. The program must perform encryption as follows: a. main method i. Ask the user for a password. ii. Sends the password to a boolean function called isValidPassword to check validity. 1. Returns true if password is at least 8 characters long 2. Returns false if it is not at least 8 characters long iii. If isValidPassword functions returns false 1. Print the following error message “The password...
C++ help Format any numerical decimal values with 2 digits after the decimal point. Problem description Write the following functions as prototyped below. Do not alter the function signatures. /// Returns a copy of the string with its first character capitalized and the rest lowercased. /// @example capitalize("hELLO wORLD") returns "Hello world" std::string capitalize(const std::string& str); /// Returns a copy of the string centered in a string of length 'width'. /// Padding is done using the specified 'fillchar' (default is...