C++: Translating mRNA sequence help
We just follow the problem definition. Here is the C++ code. Filename is amino_acid.cpp:
amino_acid.cpp:
#include<iostream>
#include<string>
#include<algorithm>
using namespace std;
string translate_acid(string sequence){
/*
* this function will translate the sequence to amino
acid equivalent
* we just iterate over the string in triads and check
which ribosome to translate to by using mapping from codon to
ribosome provided.
*/
string acid = "";
//first we convert sequence to uppercase
transform(sequence.begin(), sequence.end(),
sequence.begin(), ::toupper);
//note we move counter three at a time to club all
triads together
for(string::iterator it=sequence.begin();
it!=sequence.end(); it+=3){
char letter1 = *it;
char letter2 = *(it+1);
char letter3 = *(it+2);
string s1 = "";
s1.push_back(letter1);
s1.push_back(letter2);
s1.push_back(letter3);
//we now compare s1 against codon
list
if(s1.compare("AUG") == 0){
//this means it
is "MET"
//hence we add
M, E, T to acid in succession
//rest of the
cases are filled in exactly in a similar fashion
acid.push_back('M');
acid.push_back('E');
acid.push_back('T');
acid.push_back('-'); //for separating acids
}
else if(s1.compare("UUU") == 0 or
s1.compare("UUC") == 0){
acid.push_back('P');
acid.push_back('H');
acid.push_back('E');
acid.push_back('-');
}
else if(s1.compare("AUU") == 0 or
s1.compare("AUC") == 0 or s1.compare("AUA") == 0){
acid.push_back('I');
acid.push_back('L');
acid.push_back('E');
acid.push_back('-');
}
else if(s1.compare("GUU") == 0 or
s1.compare("GUC") == 0 or s1.compare("GUA") == 0 or
s1.compare("GUG") == 0){
acid.push_back('V');
acid.push_back('A');
acid.push_back('L');
acid.push_back('-');
}
else if(s1.compare("UGA") ==
0){
//ending codon.
we add nothing to acid
//however we pop
the '-' added because otherwise there will be a '-' at the end of
the string.
acid.pop_back();
}
else{
//other case. we
just add "ALA"
acid.push_back('A');
acid.push_back('L');
acid.push_back('A');
acid.push_back('-');
}
}
return acid;
}
int count_bases(string sequence, char type){
/*
* this function will count the number of bases present
of the given type
*/
int count = 0;
//first we transform both the type and sequence to
upper case
transform(sequence.begin(), sequence.end(),
sequence.begin(), ::toupper);
char u_type = toupper(type);
for(string::iterator it=sequence.begin();
it!=sequence.end(); it++){
char letter = *it;
if(letter == u_type)
count++;
}
return count;
}
int check_valid(string sequence){
/*
* this function checks validity of sequence and
returns error codes as:
* 0: sequence is valid
* 1: sequence contains letters other than A, C, G,
U.
* 2: sequence length is not a multiple of 3
* 3: sequence doesn't start with AUG
* 4: sequence doesn't end with UGA
*/
bool f1, f2, f3, f4; //for capturing truth
values of cases 1, 2, 3, 4 above
bool f; //this will be and of f1, f2, f3
and f4. i.e. only when all conditions are true, is the sequence
valid.
//for checking presence of letter other than A, G, U,
C; we loop over the string and if other letter is found, we
quit
//first we convert sequence to uppercase using STL's
transform function.
//for checking we only use uppercases.
f1 = true; f2 = true; f3 = true; f4 = true;
transform(sequence.begin(), sequence.end(),
sequence.begin(), ::toupper);
for(string::iterator it=sequence.begin();
it!=sequence.end(); it++){
char letter = *it;
if(letter!='A' and letter!='C' and
letter!='G' and letter!='U'){
f1 =
false;
break;
}
}
//now we check for sequence length
if(sequence.length() % 3 != 0)
f2 = false;
//now we check for starting codon to be "AUG"
//note arguments to compare function are: arg1:
position of substring in sequence to start comparison from
//arg2: length of substring in sequence to
compare
//arg3: string to compare against
if(sequence.compare(0, 3, "AUG") != 0)
f3 = false;
//now we check for ending codon to be "UGA"
if(sequence.compare(sequence.length() - 3, 3, "UGA")
!= 0)
f4 = false;
f = f1 and f2 and f3 and f4;
if(f)
return 0;
else if(!f1)
return 1;
else if(!f2)
return 2;
else if(!f3)
return 3;
else
return 4;
}
void input(){
/*
* this function will prompt user to enter mRNA
sequence and..
* will keep on prompting till user enter's exit.
* at each string entered, we will call valid function
to check if string is valid.
* if it is valid, then we will count number of bases
and call translate function to translate into amino acid.
*/
cout << "Enter exit to stop the
program\n";
//we make while loop to exit when "exit" is entered by
using a flag
bool flag = false;
string sequence;
while(!flag){
//we now read in the string entered
by user
cout << "Enter mRNA
sequence(or exit to quit): ";
cin >> sequence;
//we now check if this equals
"exit" or not
if(sequence.compare("exit") ==
0){
flag =
true;
break;
}
//we now call check_valid function
to check validity of mRNA sequence entered
else{
int status_code
= check_valid(sequence);
if(status_code
== 0)
{
//we now count total bases for each type and
translate it to amino acid
cout << "Total counts of bases in the
sequence: \n";
cout << "A count: " <<
count_bases(sequence, 'a') << endl;
cout << "C count: " <<
count_bases(sequence, 'c') << endl;
cout << "G count: " <<
count_bases(sequence, 'g') << endl;
cout << "U count: " <<
count_bases(sequence, 'u') << endl;
string amino_acid =
translate_acid(sequence);
cout << "The amino acid corresponding to
the mRNA " << sequence << " is: " << amino_acid
<< endl;
}
else{
//we use status_code to capture the cause of
invalidity of mRNA sequence entered.
//we now use switch case to display correct
reason for not accepting sequence.
switch(status_code){
case 1: {cout << "The
mRNA sequence contains letters other than A, C, G, U\n";
break;}
case 2: {cout << "The
mRNA sequence length is not a multiple of 3\n"; break; }
case 3: {cout << "The
mRNA sequence doesn't start with the start codon(AUG)\n";
break;}
case 4: {cout << "The
mRNA sequence doesn't end with the stop codon(UGA)\n";
break;}
default: break;
}//switch-case
}//else
}//else for sequence other than
exit entered
}//while
}//function
int main(){
//here we use a function to take input loop
input();
return 0;
}
***********************************************************************************************
Sample Input and Outputs:
Enter exit to stop the program
Enter mRNA sequence(or exit to quit): AGUUGA
The mRNA sequence doesn't start with the start codon(AUG)
Enter mRNA sequence(or exit to quit): AUGGGU
The mRNA sequence doesn't end with the stop codon(UGA)
Enter mRNA sequence(or exit to quit): AUGCER
The mRNA sequence contains letters other than A, C, G, U
Enter mRNA sequence(or exit to quit): AUGUG
The mRNA sequence length is not a multiple of 3
Enter mRNA sequence(or exit to quit): AUGUGA
Total counts of bases in the sequence:
A count: 2
C count: 0
G count: 2
U count: 2
The amino acid corresponding to the mRNA AUGUGA is: MET
Enter mRNA sequence(or exit to quit): AUGUUCAUUGUUAAAUGA
Total counts of bases in the sequence:
A count: 6
C count: 1
G count: 3
U count: 8
The amino acid corresponding to the mRNA AUGUUCAUUGUUAAAUGA is:
MET-PHE-ILE-VAL-ALA
Enter mRNA sequence(or exit to quit):
AUGUUUAUAACAGUGGUUGUCUGA
Total counts of bases in the sequence:
A count: 6
C count: 2
G count: 6
U count: 10
The amino acid corresponding to the mRNA AUGUUUAUAACAGUGGUUGUCUGA
is: MET-PHE-ILE-ALA-VAL-VAL-VAL
Enter mRNA sequence(or exit to quit): exit
************************************************************************************************
C++: Translating mRNA sequence help Homework Description Codon 1 You are working in a bioinformatics lab...
2. On the mRNA codon table, the first nucleotide in mRNA is to the left, the second is above, and third is to the right. On the sequence, the 5'cap is indicated by (5'). The poly (A) tail is not shown. Use the codon table to translate this short mRNA. Mark the codons and write the amino acid sequence beneath them. (5') CGUUACAAUGUAUCGCGCGGUACUCGGCAAAGUGCCCUGAAUAGAGUUGGUA (3') 3. DNA polymerase made a mistake and added a C on the DNA template strand. In...
1. Which one of the following describes the NORMAL FUNCTION of a stop codon in mRNA during bacterial protein synthesis? a. It is at the end of a mRNA molecule and terminates translation once the protein is completed. b. It prematurely terminates protein synthesis resulting in an incomplete protein. c. The 3 stop codons are UGA, UAG, and UGG. 2. A tRNA with an ACC anticodon will insert the amino acid ________ during translation. (Use your codon sheet, Fig. 6...
BONUS (10 points, 2 points each): Given below is a sequence of mRNA that is transcribed from a structural and mRNA: AUG CGC GOA UCC CCC ACC AGA ACG GAX UGA-3 G-C 1. Using the codon chart provided below, write down the predicted amino acid sequence of the protein the produced from this mRNA 3-UAC GCG EXU AGG GGG UGG UCU UGC COU 2). Write down the DNA sequence of the structural pene from which this mRNA sequence is transcribed...
2) On your first day working in my lab, you obtain the following DNA sequence: 3' AATTATACACGATGAAGCTTGTGACAGGTTTCCAATCATTAA 5 5' TTAATATGTGCTACTTCGAACACTGTECCAAAGGTTAGTAATT 3' a) What are the two possible RNA molecules that could be transcribed from this DNA? Indicate the 5' and 3' ends of the RNA. b) Only one of these two RNA molecules can actually be translated. Explain why. c) It turns out that the RNA molecule that can be translated is the mRNA for p53. What is the amino...
10. Examine more closely how DNA changes in a co questions. Write the codon for the DNA sequence ATA then change only one nucleotide so that it codes for an RNA codon that is a STOP codon. If a codon changes to cause an mRNA to have a STOP codon instead of a codon for an amino acid, what type of protein mutation is this? Write the RNA codon(s) for histidine then change the last letter to a "G." What...
Hello please please help !! Thank you!! Please and thank you soo much!!! Question Completion Status: Question 10: The genetic code consists of 64 triplets of nucleotides (called codons). Each codon (with the exception of the 3 stop codons) encodes for one of the 20 amino acids used in the synthesis of proteins. This produces some redundancy in the code as most amino acids are encoded by more than one codon. One codon, AUG serves two related functions: it signals...
Question 10 (15 points) Given the following sequence for a template strand of DNA 3 - ATACTTTGTCGAGACCCGCTTCTTGCAGACTGGG A. Provide the mRNA sequence following transcription (include polarity) B. Provide the amino acid sequence using either the one letter or three letter abbreviations. Include polarity (N-or C-terminus) and be careful to start in the correct place: C. What if the "C" underlined above was changed to a T. What is the new codon? How does that affect the amino acid sequence? What...
The sequence below represents a middle section of the template strand of DNA of a structural gene in an eukaryote organism. Please fill in the blanks that correspond. The consensus sequences that the spliceosome recognizes are marked in red. The intron(s) are marked in lowercase. YOUR RESPONSES SHOULD ALL BE IN UPPER CASE. Amino acid sequences should be written in the format ALA-TYR-LEU Stop codon is not written. DNA: 3'CATGGACAGgtaagaatacaacacagGTCGGCATGACG 5 GUACCUGUCcauucuuauguugugucCAGCCGUACUGC What would be the immatur RNA sequence transcribed...
Answer please? The sequence below represents a middle section of the template strand of DNA of a structural gene in an eukaryote organism. Please fill in the blanks that correspond. The consensus sequences that the spliceosome recognizes are marked in red. The intron(s) are marked in lowercase. YOUR RESPONSES SHOULD ALL BE IN UPPER CASE. Amino acid sequences should be written in the format ALA-TYR-LEU Stop codon is not written. DNA: 3 CATGGACAGgtaagaatacaacacagGTCGGCATGACG 5' What would be the immature RNA...
50 LAB 2 Genetics EXERCISE 10 PROTEIN SYNTHESIS Work with a partner to complete this exercise and answer the questions that follow. You will use the DNA strand from Exercise to make the protein for which it codes STEP 1 Review the imaginary strand of DNA below. Note the complementary base pairs. AGCAATCCGTCTTGG TCGTTAGG CAGAACC STEP 2 Draw the DNA strand separating down the middle las in the beginning of DNA replication STEP 3 Draw the free-floating RNA bases linking...