MATLAB Task Of The Day Covid-19 Sequence




sarscov_gbk = getgenbank('NC_045512');

%Calculate nucleotide count of the Covid19 virus genome sequence.

sarscov_virus = sarscov_gbk.Sequence;

%Identify the length of the nucleotide

sarscov_virus_length = length(sarscov_virus)

basecount(sarscov_virus )



%Visualize the nucleotide count.

figure 

basecount(sarscov_virus ,'chart','pie');

title('Distribution of Nucleotide Bases for SARS Covid19 Virus');


figure

dimers = dimercount(sarscov_virus,'chart','bar')

title('SARS Covid19 Virus Genome Dimer Histogram');

Map = geneticcode


for frame = 1:3 figure 

    subplot(2,1,1)

    codoncount(sarscov_virus,'frame',frame,'figure',true,'geneticcode','none')

end

%Visualize ORF of nucleotide sequence.

seqshoworfs(sarscov_virus);

orfs = seqshoworfs(sarscov_virus,'GeneticCode','Standard','AlternativeStartCodons',true)


%Identify the three highest codon numbers in the nucleotide.

gbkStruct = getgenbank('NC_045512')



features = featureparse(gbkStruct,'Sequence',true)

coding_sequences = features.CDS;

coding_sequences_id = sprintf('%s ',coding_sequences.gene)


POLYCDS = coding_sequences(1)


fseq=fastaread('sequencecovid.fasta')

[h,l] = featureview(gbkStruct,{'CDS','tRNA','rRNA','D_loop'},...

                                      [2 1 2 2 2],'Fontsize',9);

legend(h,l,'interpreter','none');

title('Covid-19 Virus, complete genome')


figure

subplot(2,1,1)

CVaaCount = aacount(fseq,'chart','bar');

title('Histogram of Amino Acid Count for the SARS Covid-19 Protein');


CV = nt2aa(fseq,'GeneticCode','Standard');

disp(seqdisp(CV))

%Identify the three highest codon numbers in the nucleotide.


codons = codoncount(fseq)


% Top 3 Highest Codon is TTT, TGT and TAA based on Heatmap



%Look up the amino acids for codons in (5)


TTT_aa = aminolookup('code',nt2aa('TTT'))

TGT_aa = aminolookup('code',nt2aa('TGT'))

TAA_aa = aminolookup('code',nt2aa('TAA'))



figure

count = codoncount(fseq,'figure',true);

title('Covid-19 Virus Genome Codon Frequency')

%Convert a nucleotide sequence to an amino acid sequence


CVSeq = nt2aa(fseq,'geneticcode','Standard')

CVprotein = getgenpept('YP_009724389','sequenceonly',true)

aacount(CVSeq, 'chart','bar')



covidpro = getgenpept('YP_009724389')

covidproAC = atomiccomp(covidpro)


covidproAC.C

covidproMW = molweight(covidpro)


Output

sarscov_virus_length = 29903

ans = struct with fields:
    A: 8954
    C: 5492
    G: 5863
    T: 9594



dimers = struct with fields:
    AA: 2880
    AC: 2023
    AG: 1742
    AT: 2308
    CA: 2084
    CC: 888
    CG: 439
    CT: 2081
    GA: 1612
    GC: 1168
    GG: 1093
    GT: 1990
    TA: 2377
    TC: 1413
    TG: 2589
    TT: 3215



Map = struct with fields:
      Name: 'Standard'
       AAA: 'K'
       AAC: 'N'
       AAG: 'K'
       AAT: 'N'
       ACA: 'T'
       ACC: 'T'
       ACG: 'T'
       ACT: 'T'
       AGA: 'R'
       AGC: 'S'
       AGG: 'R'
       AGT: 'S'
       ATA: 'I'
       ATC: 'I'
       ATG: 'M'
       ATT: 'I'
       CAA: 'Q'
       CAC: 'H'
       CAG: 'Q'
       CAT: 'H'
       CCA: 'P'
       CCC: 'P'
       CCG: 'P'
       CCT: 'P'
       CGA: 'R'
       CGC: 'R'
       CGG: 'R'
       CGT: 'R'
       CTA: 'L'
       CTC: 'L'
       CTG: 'L'
       CTT: 'L'
       GAA: 'E'
       GAC: 'D'
       GAG: 'E'
       GAT: 'D'
       GCA: 'A'
       GCC: 'A'
       GCG: 'A'
       GCT: 'A'
       GGA: 'G'
       GGC: 'G'
       GGG: 'G'
       GGT: 'G'
       GTA: 'V'
       GTC: 'V'
       GTG: 'V'
       GTT: 'V'
       TAA: '*'
       TAC: 'Y'
       TAG: '*'
       TAT: 'Y'
       TCA: 'S'
       TCC: 'S'
       TCG: 'S'
       TCT: 'S'
       TGA: '*'
       TGC: 'C'
       TGG: 'W'
       TGT: 'C'
       TTA: 'L'
       TTC: 'F'
       TTG: 'L'
       TTT: 'F'
    Starts: {'ATG'  'CTG'  'TTG'}

AAA - 303     AAC - 212     AAG - 110     AAT - 260     
ACA - 249     ACC - 150     ACG -  60     ACT - 220     
AGA - 277     AGC - 141     AGG - 123     AGT - 197     
ATA - 116     ATC - 112     ATG - 117     ATT - 208     
CAA - 233     CAC - 160     CAG -  92     CAT - 172     
CCA - 115     CCC -  42     CCG -  28     CCT - 107     
CGA -  31     CGC -  40     CGG -  33     CGT -  54     
CTA - 110     CTC -  78     CTG -  80     CTT - 208     
GAA - 186     GAC - 123     GAG -  84     GAT - 167     
GCA - 116     GCC -  68     GCG -  22     GCT - 169     
GGA - 112     GGC -  84     GGG -  49     GGT - 149     
GTA - 121     GTC - 101     GTG -  93     GTT - 233     
TAA - 342     TAC - 227     TAG - 128     TAT - 278     
TCA - 173     TCC -  75     TCG -  34     TCT - 190     
TGA - 304     TGC - 268     TGG - 263     TGT - 367     
TTA - 232     TTC - 210     TTG - 178     TTT - 383     
AAA - 337     AAC - 190     AAG - 222     AAT - 315     
ACA - 288     ACC - 101     ACG -  42     ACT - 291     
AGA - 130     AGC -  54     AGG -  78     AGT - 152     
ATA - 182     ATC - 106     ATG - 311     ATT - 278     
CAA - 246     CAC - 108     CAG - 163     CAT - 136     
CCA - 139     CCC -  31     CCG -  19     CCT - 146     
CGA -  28     CGC -  29     CGG -  20     CGT -  67     
CTA - 184     CTC -  93     CTG - 150     CTT - 259     
GAA - 252     GAC - 142     GAG - 116     GAT - 214     
GCA - 158     GCC -  73     GCG -  37     GCT - 265     
GGA -  99     GGC -  90     GGG -  34     GGT - 245     
GTA - 174     GTC - 101     GTG - 181     GTT - 311     
TAA -  95     TAC - 182     TAG -  89     TAT - 213     
TCA - 189     TCC -  56     TCG -  39     TCT - 217     
TGA -  71     TGC -  87     TGG -  99     TGT - 211     
TTA - 283     TTC - 140     TTG - 277     TTT - 332     
AAA - 283     AAC - 213     AAG - 248     AAT - 186     
ACA - 272     ACC - 125     ACG -  62     ACT - 163     
AGA - 198     AGC - 106     AGG - 128     AGT - 158     
ATA - 173     ATC - 121     ATG - 297     ATT - 287     
CAA - 224     CAC - 191     CAG - 183     CAT - 176     
CCA - 100     CCC -  43     CCG -  27     CCT -  91     
CGA -  36     CGC -  28     CGG -  23     CGT -  50     
CTA - 267     CTC - 116     CTG - 265     CTT - 271     
GAA -  97     GAC -  75     GAG -  97     GAT -  59     
GCA -  98     GCC -  46     GCG -  29     GCT -  87     
GGA -  71     GGC -  49     GGG -  51     GGT -  60     
GTA - 174     GTC -  67     GTG - 278     GTT - 156     
TAA - 282     TAC - 200     TAG - 210     TAT - 131     
TCA - 187     TCC -  78     TCG -  40     TCT - 135     
TGA - 255     TGC - 192     TGG - 192     TGT - 280     
TTA - 361     TTC - 168     TTG - 362     TTT - 289     




orfs = 1×3 struct
FieldsStartStop
11×68 double1×67 double
21×89 double1×89 double
31×160 double1×160 double

gbkStruct = struct with fields:
                LocusName: 'NC_045512'
      LocusSequenceLength: '29903'
     LocusNumberofStrands: 'ss-'
            LocusTopology: 'linear'
        LocusMoleculeType: 'RNA'
     LocusGenBankDivision: 'VRL'
    LocusModificationDate: '18-JUL-2020'
               Definition: 'Severe acute respiratory syndrome coronavirus 2 isolate Wuhan-Hu-1, complete genome.'
                Accession: 'NC_045512'
                  Version: 'NC_045512.2'
                       GI: ''
                  Project: []
                   DBLink: 'BioProject: PRJNA485481'
                 Keywords: 'RefSeq.'
                  Segment: []
                   Source: 'Severe acute respiratory syndrome coronavirus 2 (SARS-CoV-2)'
           SourceOrganism: [4×65 char]
                Reference: {[1×1 struct]  [1×1 struct]  [1×1 struct]  [1×1 struct]  [1×1 struct]  [1×1 struct]}
                  Comment: [17×67 char]
                 Features: [617×74 char]
                      CDS: [1×12 struct]
                 Sequence: 'attaaaggtttataccttcccaggtaacaaaccaaccaactttcgatctcttgtagatctgttctctaaacgaactttaaaatctgtgtggctgtcactcggctgcatgcttagtgcactcacgcagtataattaataactaattactgtcgttgacaggacacgagtaactcgtctatcttctgcaggctgcttacggtttcgtccgtgttgcagccgatcatcagcacatctaggtttcgtccgggtgtgaccgaaaggtaagatggagagccttgtccctggtttcaacgagaaaacacacgtccaactcagtttgcctgttttacaggttcgcgacgtgctcgtacgtggctttggagactccgtggaggaggtcttatcagaggcacgtcaacatcttaaagatggcacttgtggcttagtagaagttgaaaaaggcgttttgcctcaacttgaacagccctatgtgttcatcaaacgttcggatgctcgaactgcacctcatggtcatgttatggttgagctggtagcagaactcgaaggcattcagtacggtcgtagtggtgagacacttggtgtccttgtccctcatgtgggcgaaataccagtggcttaccgcaaggttcttcttcgtaagaacggtaataaaggagctggtggccatagttacggcgccgatctaaagtcatttgacttaggcgacgagcttggcactgatccttatgaagattttcaagaaaactggaacactaaacatagcagtggtgttacccgtgaactcatgcgtgagcttaacggaggggcatacactcgctatgtcgataacaacttctgtggccctgatggctaccctcttgagtgcattaaagaccttctagcacgtgctggtaaagcttcatgcactttgtccgaacaactggactttattgacactaagaggggtgtatactgctgccgtgaacatgagcatgaaattgcttggtacacggaacgttctgaaaagagctatgaattgcagacaccttttgaaattaaattggcaaagaaatttgacaccttcaatggggaatgtccaaattttgtatttcccttaaattccataatcaagactattcaaccaagggttgaaaagaaaaagcttgatggctttatgggtagaattcgatctgtctatccagttgcgtcaccaaatgaatgcaaccaaatgtgcctttcaactctcatgaagtgtgatcattgtggtgaaacttcatggcagacgggcgattttgttaaagccacttgcgaattttgtggcactgagaatttgactaaagaaggtgccactacttgtggttacttaccccaaaatgctgttgttaaaatttattgtccagcatgtcacaattcagaagtaggacctgagcatagtcttgccgaataccataatgaatctggcttgaaaaccattcttcgtaagggtggtcgcactattgcctttggaggctgtgtgttctcttatgttggttgccataacaagtgtgcctattgggttccacgtgctagcgctaacataggttgtaaccatacaggtgttgttggagaaggttccgaaggtcttaatgacaaccttcttgaaatactccaaaaagagaaagtcaacatcaatattgttggtgactttaaacttaatgaagagatcgccattattttggcatctttttctgcttccacaagtgcttttgtggaaactgtgaaaggtttggattataaagcattcaaacaaattgttgaatcctgtggtaattttaaagttacaaaaggaaaagctaaaaaaggtgcctggaatattggtgaacagaaatcaatactgagtcctctttatgcatttgcatcagaggctgctcgtgttgtacgatcaattttctcccgcactcttgaaactgctcaaaattctgtgcgtgttttacagaaggccgctataacaatactagatggaatttcacagtattcactgagactcattgatgctatgatgttcacatctgatttggctactaacaatctagttgtaatggcctacattacaggtggtgttgttcagttgacttcgcagtggctaactaacatctttggcactgtttatgaaaaactcaaacccgtccttgattggcttgaagagaagtttaaggaaggtgtagagtttcttagagacggttgggaaattgttaaatttatctcaacctgtgcttgtgaaattgtcggtggacaaattgtcacctgtgcaaaggaaattaaggagagtgttcagacattctttaagcttgtaaataaatttttggctttgtgtgctgactctatcattattggtggagctaaacttaaagccttgaatttaggtgaaacatttgtcacgcactcaaagggattgtacagaaagtgtgttaaatccagagaagaaactggcctactcatgcctctaaaagccccaaaagaaattatcttcttagagggagaaacacttcccacagaagtgttaacagaggaagttgtcttgaaaactggtgatttacaaccattagaacaacctactagtgaagctgttgaagctccattggttggtacaccagtttgtattaacgggcttatgttgctcgaaatcaaagacacagaaaagtactgtgcccttgcacctaatatgatggtaacaaacaataccttcacactcaaaggcggtgcaccaacaaaggttacttttggtgatgacactgtgatagaagtgcaaggttacaagagtgtgaatatcacttttgaacttgatgaaaggattgataaagtacttaatgagaagtgctctgcctatacagttgaactcggtacagaagtaaatgagttcgcctgtgttgtggcagatgctgtcataaaaactttgcaaccagtatctgaattacttacaccactgggcattgatttagatgagtggagtatggctacatactacttatttgatgagtctggtgagtttaaattggcttcacatatgtattgttctttctaccctccagatgaggatgaagaagaaggtgattgtgaagaagaagagtttgagccatcaactcaatatgagtatggtactgaagatgattaccaaggtaaacctttggaatttggtgccacttctgctgctcttcaacctgaagaagagcaagaagaagattggttagatgatgatagtcaacaaactgttggtcaacaagacggcagtgaggacaatcagacaactactattcaaacaattgttgaggttcaacctcaattagagatggaacttacaccagttgttcagactattgaagtgaatagttttagtggttatttaaaacttactgacaatgtatacattaaaaatgcagacattgtggaagaagctaaaaaggtaaaaccaacagtggttgttaatgcagccaatgtttaccttaaacatggaggaggtgttgcaggagccttaaataaggctactaacaatgccatgcaagttgaatctgatgattacatagctactaatggaccacttaaagtgggtggtagttgtgttttaagcggacacaatcttgctaaacactgtcttcatgttgtcggcccaaatgttaacaaaggtgaagacattcaacttcttaagagtgcttatgaaaattttaatcagcacgaagttctacttgcaccattattatcagctggtatttttggtgctgaccctatacattctttaagagtttgtgtagatactgttcgcacaaatgtctacttagctgtctttgataaaaatctctatgacaaacttgtttcaagctttttggaaatgaagagtgaaaagcaagttgaacaaaagatcgctgagattcctaaagaggaagttaagccatttataactgaaagtaaaccttcagttgaacagagaaaacaagatgataagaaaatcaaagcttgtgttgaagaagttacaacaactctggaagaaactaagttcctcacagaaaacttgttactttatattgacattaatggcaatcttcatccagattctgccactcttgttagtgacattgacatcactttcttaaagaaagatgctccatatatagtgggtgatgttgttcaagagggtgttttaactgctgtggttatacctactaaaaaggctggtggcactactgaaatgctagcgaaagctttgagaaaagtgccaacagacaattatataaccacttacccgggtcagggtttaaatggttacactgtagaggaggcaaagacagtgcttaaaaagtgtaaaagtgccttttacattctaccatctattatctctaatgagaagcaagaaattcttggaactgtttcttggaatttgcgagaaatgcttgcacatgcagaagaaacacgcaaattaatgcctgtctgtgtggaaactaaagccatagtttcaactatacagcgtaaatataagggtattaaaatacaagagggtgtggttgattatggtgctagattttacttttacaccagtaaaacaactgtagcgtcacttatcaacacacttaacgatctaaatgaaactcttgttacaatgccacttggctatgtaacacatggcttaaatttggaagaagctgctcggtatatgagatctctcaaagtgccagctacagtttctgtttcttcacctgatgctgttacagcgtataatggttatcttacttcttcttctaaaacacctgaagaacattttattgaaaccatctcacttgctggttcctataaagattggtcctattctggacaatctacacaactaggtatagaatttcttaagagaggtgataaaagtgtatattacactagtaatcctaccacattccacctagatggtgaagttatcacctttgacaatcttaagacacttctttctttgagagaagtgaggactattaaggtgtttacaacagtagacaacattaacctccacacgcaagttgtggacatgtcaatgacatatggacaacagtttggtccaacttatttggatggagctgatgttactaaaataaaacctcataattcacatgaaggtaaaacattttatgttttacctaatgatgacactctacgtgttgaggcttttgagtactaccacacaactgatcctagttttctgggtaggtacatgtcagcattaaatcacactaaaaagtggaaatacccacaagttaatggtttaacttctattaaatgggcagataacaactgttatcttgccactgcattgttaacactccaacaaatagagttgaagtttaatccacctgctctacaagatgcttattacagagcaagggctggtgaagctgctaacttttgtgcacttatcttagcctactgtaataagacagtaggtgagttaggtgatgttagagaaacaatgagttacttgtttcaacatgccaatttagattcttgcaaaagagtcttgaacgtggtgtgtaaaacttgtggacaacagcagacaacccttaagggtgtagaagctgttatgtacatgggcacactttcttatgaacaatttaagaaaggtgttcagataccttgtacgtgtggtaaacaagctacaaaatatctagtacaacaggagtcaccttttgttatgatgtcagcaccacctgctcagtatgaacttaagcatggtacatttacttgtgctagtgagtacactggtaattaccagtgtggtcactataaacatataacttctaaagaaactttgtattgcatagacggtgctttacttacaaagtcctcagaatacaaaggtcctattacggatgttttctacaaagaaaacagttacacaacaaccataaaaccagttacttataaattggatggtgttgtttgtacagaaattgaccctaagttggacaattattataagaaagacaattcttatttcacagagcaaccaattgatcttgtaccaaaccaaccatatccaaacgcaagcttcgataattttaagtttgtatgtgataatatcaaatttgctgatgatttaaaccagttaactggttataagaaacctgcttcaagagagcttaaagttacatttttccctgacttaaatggtgatgtggtggctattgattataaacactacacaccctcttttaagaaaggagctaaattgttacataaacctattgtttggcatgttaacaatgcaactaataaagccacgtataaaccaaatacctggtgtatacgttgtctttggagcacaaaaccagttgaaacatcaaattcgtttgatgtactgaagtcagaggacgcgcagggaatggataatcttgcctgcgaagatctaaaaccagtctctgaagaagtagtggaaaatcctaccatacagaaagacgttcttgagtgtaatgtgaaaactaccgaagttgtaggagacattatacttaaaccagcaaataatagtttaaaaattacagaagaggttggccacacagatctaatggctgcttatgtagacaattctagtcttactattaagaaacctaatgaattatctagagtattaggtttgaaaacccttgctactcatggtttagctgctgttaatagtgtcccttgggatactatagctaattatgctaagccttttcttaacaaagttgttagtacaactactaacatagttacacggtgtttaaaccgtgtttgtactaattatatgccttatttctttactttattgctacaattgtgtacttttactagaagtacaaattctagaattaaagcatctatgccgactactatagcaaagaatactgttaagagtgtcggtaaattttgtctagaggcttcatttaattatttgaagtcacctaatttttctaaactgataaatattataatttggtttttactattaagtgtttgcctaggttctttaatctactcaaccgctgctttaggtgttttaatgtctaatttaggcatgccttcttactgtactggttacagagaaggctatttgaactctactaatgtcactattgcaacctactgtactggttctataccttgtagtgtttgtcttagtggtttagattctttagacacctatccttctttagaaactatacaaattaccatttcatcttttaaatgggatttaactgcttttggcttagttgcagagtggtttttggcatatattcttttcactaggtttttctatgtacttggattggctgcaatcatgcaattgtttttcagctattttgcagtacattttattagtaattcttggcttatgtggttaataattaatcttgtacaaatggccccgatttcagctatggttagaatgtacatcttctttgcatcattttattatgtatggaaaagttatgtgcatgttgtagacggttgtaattcatcaacttgtatgatgtgttacaaacgtaatagagcaacaagagtcgaatgtacaactattgttaatggtgttagaaggtccttttatgtctatgctaatggaggtaaaggcttttgcaaactacacaattggaattgtgttaattgtgatacattctgtgctggtagtacatttattagtgatgaagttgcgagagacttgtcactacagtttaaaagaccaataaatcctactgaccagtcttcttacatcgttgatagtgttacagtgaagaatggttccatccatctttactttgataaagctggtcaaaagacttatgaaagacattctctctctcattttgttaacttagacaacctgagagctaataacactaaaggttcattgcctattaatgttatagtttttgatggtaaatcaaaatgtgaagaatcatctgcaaaatcagcgtctgtttactacagtcagcttatgtgtcaacctatactgttactagatcaggcattagtgtctgatgttggtgatagtgcggaagttgcagttaaaatgtttgatgcttacgttaatacgttttcatcaacttttaacgtaccaatggaaaaactcaaaacactagttgcaactgcagaagctgaacttgcaaagaatgtgtccttagacaatgtcttatctacttttatttcagcagctcggcaagggtttgttgattcagatgtagaaactaaagatgttgttgaatgtcttaaattgtcacatcaatctgacatagaagttactggcgatagttgtaataactatatgctcacctataacaaagttgaaaacatgacaccccgtgaccttggtgcttgtattgactgtagtgcgcgtcatattaatgcgcaggtagcaaaaagtcacaacattgctttgatatggaacgttaaagatttcatgtcattgtctgaacaactacgaaaacaaatacgtagtgctgctaaaaagaataacttaccttttaagttgacatgtgcaactactagacaagttgttaatgttgtaacaacaaagatagcacttaagggtggtaaaattgttaataattggttgaagcagttaattaaagttacacttgtgttcctttttgttgctgctattttctatttaataacacctgttcatgtcatgtctaaacatactgacttttcaagtgaaatcataggatacaaggctattgatggtggtgtcactcgtgacatagcatctacagatacttgttttgctaacaaacatgctgattttgacacatggtttagccagcgtggtggtagttatactaatgacaaagcttgcccattgattgctgcagtcataacaagagaagtgggttttgtcgtgcctggtttgcctggcacgatattacgcacaactaatggtgactttttgcatttcttacctagagtttttagtgcagttggtaacatctgttacacaccatcaaaacttatagagtacactgactttgcaacatcagcttgtgttttggctgctgaatgtacaatttttaaagatgcttctggtaagccagtaccatattgttatgataccaatgtactagaaggttctgttgcttatgaaagtttacgccctgacacacgttatgtgctcatggatggctctattattcaatttcctaacacctaccttgaaggttctgttagagtggtaacaacttttgattctgagtactgtaggcacggcacttgtgaaagatcagaagctggtgtttgtgtatctactagtggtagatgggtacttaacaatgattattacagatctttaccaggagttttctgtggtgtagatgctgtaaatttacttactaatatgtttacaccactaattcaacctattggtgctttggacatatcagcatctatagtagctggtggtattgtagctatcgtagtaacatgccttgcctactattttatgaggtttagaagagcttttggtgaatacagtcatgtagttgcctttaatactttactattccttatgtcattcactgtactctgtttaacaccagtttactcattcttacctggtgtttattctgttatttacttgtacttgacattttatcttactaatgatgtttcttttttagcacatattcagtggatggttatgttcacacctttagtacctttctggataacaattgcttatatcatttgtatttccacaaagcatttctattggttctttagtaattacctaaagagacgtgtagtctttaatggtgtttcctttagtacttttgaagaagctgcgctgtgcacctttttgttaaataaagaaatgtatctaaagttgcgtagtgatgtgctattacctcttacgcaatataatagatacttagctctttataataagtacaagtattttagtggagcaatggatacaactagctacagagaagctgcttgttgtcatctcgcaaaggctctcaatgacttcagtaactcaggt…'
                SearchURL: 'https://www.ncbi.nlm.nih.gov/entrez/viewer.fcgi?db=nuccore&id=NC_045512'
              RetrieveURL: 'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=nuccore&id=1798174254&rettype=gb&retmode=text'

features = struct with fields:
             source: [1×1 struct]
     five_prime_UTR: [1×1 struct]
               gene: [1×11 struct]
                CDS: [1×12 struct]
        mat_peptide: [1×26 struct]
          stem_loop: [1×5 struct]
    three_prime_UTR: [1×1 struct]

coding_sequences_id = 'ORF1ab ORF1ab S ORF3a E M ORF6 ORF7a ORF7b ORF8 N ORF10 '

POLYCDS = struct with fields:
              Location: 'join(266..13468,13468..21555)'
               Indices: [266 13468 13468 21555]
                  gene: 'ORF1ab'
             locus_tag: 'GU280_gp01'
    ribosomal_slippage: 1
                  note: 'pp1ab; translated by -1 ribosomal frameshift'
           codon_start: '1'
               product: 'ORF1ab polyprotein'
            protein_id: 'YP_009724389.1'
               db_xref: 'GeneID:43740578'
           translation: 'MESLVPGFNEKTHVQLSLPVLQVRDVLVRGFGDSVEEVLSEARQHLKDGTCGLVEVEKGVLPQLEQPYVFIKRSDARTAPHGHVMVELVAELEGIQYGRSGETLGVLVPHVGEIPVAYRKVLLRKNGNKGAGGHSYGADLKSFDLGDELGTDPYEDFQENWNTKHSSGVTRELMRELNGGAYTRYVDNNFCGPDGYPLECIKDLLARAGKASCTLSEQLDFIDTKRGVYCCREHEHEIAWYTERSEKSYELQTPFEIKLAKKFDTFNGECPNFVFPLNSIIKTIQPRVEKKKLDGFMGRIRSVYPVASPNECNQMCLSTLMKCDHCGETSWQTGDFVKATCEFCGTENLTKEGATTCGYLPQNAVVKIYCPACHNSEVGPEHSLAEYHNESGLKTILRKGGRTIAFGGCVFSYVGCHNKCAYWVPRASANIGCNHTGVVGEGSEGLNDNLLEILQKEKVNINIVGDFKLNEEIAIILASFSASTSAFVETVKGLDYKAFKQIVESCGNFKVTKGKAKKGAWNIGEQKSILSPLYAFASEAARVVRSIFSRTLETAQNSVRVLQKAAITILDGISQYSLRLIDAMMFTSDLATNNLVVMAYITGGVVQLTSQWLTNIFGTVYEKLKPVLDWLEEKFKEGVEFLRDGWEIVKFISTCACEIVGGQIVTCAKEIKESVQTFFKLVNKFLALCADSIIIGGAKLKALNLGETFVTHSKGLYRKCVKSREETGLLMPLKAPKEIIFLEGETLPTEVLTEEVVLKTGDLQPLEQPTSEAVEAPLVGTPVCINGLMLLEIKDTEKYCALAPNMMVTNNTFTLKGGAPTKVTFGDDTVIEVQGYKSVNITFELDERIDKVLNEKCSAYTVELGTEVNEFACVVADAVIKTLQPVSELLTPLGIDLDEWSMATYYLFDESGEFKLASHMYCSFYPPDEDEEEGDCEEEEFEPSTQYEYGTEDDYQGKPLEFGATSAALQPEEEQEEDWLDDDSQQTVGQQDGSEDNQTTTIQTIVEVQPQLEMELTPVVQTIEVNSFSGYLKLTDNVYIKNADIVEEAKKVKPTVVVNAANVYLKHGGGVAGALNKATNNAMQVESDDYIATNGPLKVGGSCVLSGHNLAKHCLHVVGPNVNKGEDIQLLKSAYENFNQHEVLLAPLLSAGIFGADPIHSLRVCVDTVRTNVYLAVFDKNLYDKLVSSFLEMKSEKQVEQKIAEIPKEEVKPFITESKPSVEQRKQDDKKIKACVEEVTTTLEETKFLTENLLLYIDINGNLHPDSATLVSDIDITFLKKDAPYIVGDVVQEGVLTAVVIPTKKAGGTTEMLAKALRKVPTDNYITTYPGQGLNGYTVEEAKTVLKKCKSAFYILPSIISNEKQEILGTVSWNLREMLAHAEETRKLMPVCVETKAIVSTIQRKYKGIKIQEGVVDYGARFYFYTSKTTVASLINTLNDLNETLVTMPLGYVTHGLNLEEAARYMRSLKVPATVSVSSPDAVTAYNGYLTSSSKTPEEHFIETISLAGSYKDWSYSGQSTQLGIEFLKRGDKSVYYTSNPTTFHLDGEVITFDNLKTLLSLREVRTIKVFTTVDNINLHTQVVDMSMTYGQQFGPTYLDGADVTKIKPHNSHEGKTFYVLPNDDTLRVEAFEYYHTTDPSFLGRYMSALNHTKKWKYPQVNGLTSIKWADNNCYLATALLTLQQIELKFNPPALQDAYYRARAGEAANFCALILAYCNKTVGELGDVRETMSYLFQHANLDSCKRVLNVVCKTCGQQQTTLKGVEAVMYMGTLSYEQFKKGVQIPCTCGKQATKYLVQQESPFVMMSAPPAQYELKHGTFTCASEYTGNYQCGHYKHITSKETLYCIDGALLTKSSEYKGPITDVFYKENSYTTTIKPVTYKLDGVVCTEIDPKLDNYYKKDNSYFTEQPIDLVPNQPYPNASFDNFKFVCDNIKFADDLNQLTGYKKPASRELKVTFFPDLNGDVVAIDYKHYTPSFKKGAKLLHKPIVWHVNNATNKATYKPNTWCIRCLWSTKPVETSNSFDVLKSEDAQGMDNLACEDLKPVSEEVVENPTIQKDVLECNVKTTEVVGDIILKPANNSLKITEEVGHTDLMAAYVDNSSLTIKKPNELSRVLGLKTLATHGLAAVNSVPWDTIANYAKPFLNKVVSTTTNIVTRCLNRVCTNYMPYFFTLLLQLCTFTRSTNSRIKASMPTTIAKNTVKSVGKFCLEASFNYLKSPNFSKLINIIIWFLLLSVCLGSLIYSTAALGVLMSNLGMPSYCTGYREGYLNSTNVTIATYCTGSIPCSVCLSGLDSLDTYPSLETIQITISSFKWDLTAFGLVAEWFLAYILFTRFFYVLGLAAIMQLFFSYFAVHFISNSWLMWLIINLVQMAPISAMVRMYIFFASFYYVWKSYVHVVDGCNSSTCMMCYKRNRATRVECTTIVNGVRRSFYVYANGGKGFCKLHNWNCVNCDTFCAGSTFISDEVARDLSLQFKRPINPTDQSSYIVDSVTVKNGSIHLYFDKAGQKTYERHSLSHFVNLDNLRANNTKGSLPINVIVFDGKSKCEESSAKSASVYYSQLMCQPILLLDQALVSDVGDSAEVAVKMFDAYVNTFSSTFNVPMEKLKTLVATAEAELAKNVSLDNVLSTFISAARQGFVDSDVETKDVVECLKLSHQSDIEVTGDSCNNYMLTYNKVENMTPRDLGACIDCSARHINAQVAKSHNIALIWNVKDFMSLSEQLRKQIRSAAKKNNLPFKLTCATTRQVVNVVTTKIALKGGKIVNNWLKQLIKVTLVFLFVAAIFYLITPVHVMSKHTDFSSEIIGYKAIDGGVTRDIASTDTCFANKHADFDTWFSQRGGSYTNDKACPLIAAVITREVGFVVPGLPGTILRTTNGDFLHFLPRVFSAVGNICYTPSKLIEYTDFATSACVLAAECTIFKDASGKPVPYCYDTNVLEGSVAYESLRPDTRYVLMDGSIIQFPNTYLEGSVRVVTTFDSEYCRHGTCERSEAGVCVSTSGRWVLNNDYYRSLPGVFCGVDAVNLLTNMFTPLIQPIGALDISASIVAGGIVAIVVTCLAYYFMRFRRAFGEYSHVVAFNTLLFLMSFTVLCLTPVYSFLPGVYSVIYLYLTFYLTNDVSFLAHIQWMVMFTPLVPFWITIAYIICISTKHFYWFFSNYLKRRVVFNGVSFSTFEEAALCTFLLNKEMYLKLRSDVLLPLTQYNRYLALYNKYKYFSGAMDTTSYREAACCHLAKALNDFSNSGSDVLYQPPQTSITSAVLQSGFRKMAFPSGKVEGCMVQVTCGTTTLNGLWLDDVVYCPRHVICTSEDMLNPNYEDLLIRKSNHNFLVQAGNVQLRVIGHSMQNCVLKLKVDTANPKTPKYKFVRIQPGQTFSVLACYNGSPSGVYQCAMRPNFTIKGSFLNGSCGSVGFNIDYDCVSFCYMHHMELPTGVHAGTDLEGNFYGPFVDRQTAQAAGTDTTITVNVLAWLYAAVINGDRWFLNRFTTTLNDFNLVAMKYNYEPLTQDHVDILGPLSAQTGIAVLDMCASLKELLQNGMNGRTILGSALLEDEFTPFDVVRQCSGVTFQSAVKRTIKGTHHWLLLTILTSLLVLVQSTQWSLFFFLYENAFLPFAMGIIAMSAFAMMFVKHKHAFLCLFLLPSLATVAYFNMVYMPASWVMRIMTWLDMVDTSLSGFKLKDCVMYASAVVLLILMTARTVYDDGARRVWTLMNVLTLVYKVYYGNALDQAISMWALIISVTSNYSGVVTTVMFLARGIVFMCVEYCPIFFITGNTLQCIMLVYCFLGYFCTCYFGLFCLLNRYFRLTLGVYDYLVSTQEFRYMNSQGLLPPKNSIDAFKLNIKLLGVGGKPCIKVATVQSKMSDVKCTSVVLLSVLQQLRVESSSKLWAQCVQLHNDILLAKDTTEAFEKMVSLLSVLLSMQGAVDINKLCEEMLDNRATLQAIASEFSSLPSYAAFATAQEAYEQAVANGDSEVVLKKLKKSLNVAKSEFDRDAAMQRKLEKMADQAMTQMYKQARSEDKRAKVTSAMQTMLFTMLRKLDNDALNNIINNARDGCVPLNIIPLTTAAKLMVVIPDYNTYKNTCDGTTFTYASALWEIQQVVDADSKIVQLSEISMDNSPNLAWPLIVTALRANSAVKLQNNELSPVALRQMSCAAGTTQTACTDDNALAYYNTTKGGRFVLALLSDLQDLKWARFPKSDGTGTIYTELEPPCRFVTDTPKGPKVKYLYFIKGLNNLNRGMVLGSLAATVRLQAGNATEVPANSTVLSFCAFAVDAAKAYKDYLASGGQPITNCVKMLCTHTGTGQAITVTPEANMDQESFGGASCCLYCRCHIDHPNPKGFCDLKGKYVQIPTTCANDPVGFTLKNTVCTVCGMWKGYGCSCDQLREPMLQSADAQSFLNRVCGVSAARLTPCGTGTSTDVVYRAFDIYNDKVAGFAKFLKTNCCRFQEKDEDDNLIDSYFVVKRHTFSNYQHEETIYNLLKDCPAVAKHDFFKFRIDGDMVPHISRQRLTKYTMADLVYALRHFDEGNCDTLKEILVTYNCCDDDYFNKKDWYDFVENPDILRVYANLGERVRQALLKTVQFCDAMRNAGIVGVLTLDNQDLNGNWYDFGDFIQTTPGSGVPVVDSYYSLLMPILTLTRALTAESHVDTDLTKPYIKWDLLKYDFTEERLKLFDRYFKYWDQTYHPNCVNCLDDRCILHCANFNVLFSTVFPPTSFGPLVRKIFVDGVPFVVSTGYHFRELGVVHNQDVNLHSSRLSFKELLVYAADPAMHAASGNLLLDKRTTCFSVAALTNNVAFQTVKPGNFNKDFYDFAVSKGFFKEGSSVELKHFFFAQDGNAAISDYDYYRYNLPTMCDIRQLLFVVEVVDKYFDCYDGGCINANQVIVNNLDKSAGFPFNKWGKARLYYDSMSYEDQDALFAYTKRNVIPTITQMNLKYAISAKNRARTVAGVSICSTMTNRQFHQKLLKSIAATRGATVVIGTSKFYGGWHNMLKTVYSDVENPHLMGWDYPKCDRAMPNMLRIMASLVLARKHTTCCSLSHRFYRLANECAQVLSEMVMCGGSLYVKPGGTSSGDATTAYANSVFNICQAVTANVNALLSTDGNKIADKYVRNLQHRLYECLYRNRDVDTDFVNEFYAYLRKHFSMMILSDDAVVCFNSTYASQGLVASIKNFKSVLYYQNNVFMSEAKCWTETDLTKGPHEFCSQHTMLVKQGDDYVYLPYPDPSRILGAGCFVDDIVKTDGTLMIERFVSLAIDAYPLTKHPNQEYADVFHLYLQYIRKLHDELTGHMLDMYSVMLTNDNTSRYWEPEFYEAMYTPHTVLQAVGACVLCNSQTSLRCGACIRRPFLCCKCCYDHVISTSHKLVLSVNPYVCNAPGCDVTDVTQLYLGGMSYYCKSHKPPISFPLCANGQVFGLYKNTCVGSDNVTDFNAIATCDWTNAGDYILANTCTERLKLFAAETLKATEETFKLSYGIATVREVLSDRELHLSWEVGKPRPPLNRNYVFTGYRVTKNSKVQIGEYTFEKGDYGDAVVYRGTTTYKLNVGDYFVLTSHTVMPLSAPTLVPQEHYVRITGLYPTLNISDEFSSNVANYQKVGMQKYSTLQGPPGTGKSHFAIGLALYYPSARIVYTACSHAAVDALCEKALKYLPIDKCSRIIPARARVECFDKFKVNSTLEQYVFCTVNALPETTADIVVFDEISMATNYDLSVVNARLRAKHYVYIGDPAQLPAPRTLLTKGTLEPEYFNSVCRLMKTIGPDMFLGTCRRCPAEIVDTVSALVYDNKLKAHKDKSAQCFKMFYKGVITHDVSSAINRPQIGVVREFLTRNPAWRKAVFISPYNSQNAVASKILGLPTQTVDSSQGSEYDYVIFTQTTETAHSCNVNRFNVAITRAKVGILCIMSDRDLYDKLQFTSLEIPRRNVATLQAENVTGLFKDCSKVITGLHPTQAPTHLSVDTKFKTEGLCVDIPGIPKDMTYRRLISMMGFKMNYQVNGYPNMFITREEAIRHVRAWIGFDVEGCHATREAVGTNLPLQLGFSTGVNLVAVPTGYVDTPNNTDFSRVSAKPPPGDQFKHLIPLMYKGLPWNVVRIKIVQMLSDTLKNLSDRVVFVLWAHGFELTSMKYFVKIGPERTCCLCDRRATCFSTASDTYACWHHSIGFDYVYNPFMIDVQQWGFTGNLQSNHDLYCQVHGNAHVASCDAIMTRCLAVHECFVKRVDWTIEYPIIGDELKINAACRKVQHMVVKAALLADKFPVLHDIGNPKAIKCVPQADVEWKFYDAQPCSDKAYKIEELFYSYATHSDKFTDGVCLFWNCNVDRYPANSIVCRFDTRVLSNLNLPGCDGGSLYVNKHAFHTPAFDKSAFVNLKQLPFFYYSDSPCESHGKQVVSDIDYVPLKSATCITRCNLGGAVCRHHANEYRLYLDAYNMMISAGFSLWVYKQFDTYNLWNTFTRLQSLENVAFNVVNKGHFDGQQGEVPVSIINNTVYTKVDGVDVELFENKTTLPVNVAFELWAKRNIKPVPEVKILNNLGVDIAANTVIWDYKRDAPAHISTIGVCSMTDIAKKPTETICAPLTVFFDGRVDGQVDLFRNARNGVLITEGSVKGLQPSVGPKQASLNGVTLIGEAVKTQFNYYKKVDGVVQQLPETYFTQSRNLQEFKPRSQMEIDFLELAMDEFIERYKLEGYAFEHIVYGDFSHSQLGGLHLLIGLAKRFKESPFELEDFIPMDSTVKNYFITDAQTGSSKCVCSVIDLLLDDFVEIIKSQDLSVVSKVVKVTIDYTEISFMLWCKDGHVETFYPKLQSSQAWQPGVAMPNLYKMQRMLLEKCDLQNYGDSATLPKGIMMNVAKYTQLCQYLNTLTLAVPYNMRVIHFGAGSDKGVAPGTAVLRQWLPTGTLLVDSDLNDFVSDADSTLIGDCATVHTANKWDLIISDMYDPKTKNVTKENDSKEGFFTYICGFIQQKLALGGSVAIKITEHSWNADLYKLMGHFAWWTAFVTNVNASSSEAFLIGCNYLGKPREQIDGYVMHANYIFWRNTNPIQLSSYSLFDMSKFPLKLRGTAVMSLKEGQINDMILSLLSKGRLIIRENNRVVISSDVLVNN'
              Sequence: 'atggagagccttgtccctggtttcaacgagaaaacacacgtccaactcagtttgcctgttttacaggttcgcgacgtgctcgtacgtggctttggagactccgtggaggaggtcttatcagaggcacgtcaacatcttaaagatggcacttgtggcttagtagaagttgaaaaaggcgttttgcctcaacttgaacagccctatgtgttcatcaaacgttcggatgctcgaactgcacctcatggtcatgttatggttgagctggtagcagaactcgaaggcattcagtacggtcgtagtggtgagacacttggtgtccttgtccctcatgtgggcgaaataccagtggcttaccgcaaggttcttcttcgtaagaacggtaataaaggagctggtggccatagttacggcgccgatctaaagtcatttgacttaggcgacgagcttggcactgatccttatgaagattttcaagaaaactggaacactaaacatagcagtggtgttacccgtgaactcatgcgtgagcttaacggaggggcatacactcgctatgtcgataacaacttctgtggccctgatggctaccctcttgagtgcattaaagaccttctagcacgtgctggtaaagcttcatgcactttgtccgaacaactggactttattgacactaagaggggtgtatactgctgccgtgaacatgagcatgaaattgcttggtacacggaacgttctgaaaagagctatgaattgcagacaccttttgaaattaaattggcaaagaaatttgacaccttcaatggggaatgtccaaattttgtatttcccttaaattccataatcaagactattcaaccaagggttgaaaagaaaaagcttgatggctttatgggtagaattcgatctgtctatccagttgcgtcaccaaatgaatgcaaccaaatgtgcctttcaactctcatgaagtgtgatcattgtggtgaaacttcatggcagacgggcgattttgttaaagccacttgcgaattttgtggcactgagaatttgactaaagaaggtgccactacttgtggttacttaccccaaaatgctgttgttaaaatttattgtccagcatgtcacaattcagaagtaggacctgagcatagtcttgccgaataccataatgaatctggcttgaaaaccattcttcgtaagggtggtcgcactattgcctttggaggctgtgtgttctcttatgttggttgccataacaagtgtgcctattgggttccacgtgctagcgctaacataggttgtaaccatacaggtgttgttggagaaggttccgaaggtcttaatgacaaccttcttgaaatactccaaaaagagaaagtcaacatcaatattgttggtgactttaaacttaatgaagagatcgccattattttggcatctttttctgcttccacaagtgcttttgtggaaactgtgaaaggtttggattataaagcattcaaacaaattgttgaatcctgtggtaattttaaagttacaaaaggaaaagctaaaaaaggtgcctggaatattggtgaacagaaatcaatactgagtcctctttatgcatttgcatcagaggctgctcgtgttgtacgatcaattttctcccgcactcttgaaactgctcaaaattctgtgcgtgttttacagaaggccgctataacaatactagatggaatttcacagtattcactgagactcattgatgctatgatgttcacatctgatttggctactaacaatctagttgtaatggcctacattacaggtggtgttgttcagttgacttcgcagtggctaactaacatctttggcactgtttatgaaaaactcaaacccgtccttgattggcttgaagagaagtttaaggaaggtgtagagtttcttagagacggttgggaaattgttaaatttatctcaacctgtgcttgtgaaattgtcggtggacaaattgtcacctgtgcaaaggaaattaaggagagtgttcagacattctttaagcttgtaaataaatttttggctttgtgtgctgactctatcattattggtggagctaaacttaaagccttgaatttaggtgaaacatttgtcacgcactcaaagggattgtacagaaagtgtgttaaatccagagaagaaactggcctactcatgcctctaaaagccccaaaagaaattatcttcttagagggagaaacacttcccacagaagtgttaacagaggaagttgtcttgaaaactggtgatttacaaccattagaacaacctactagtgaagctgttgaagctccattggttggtacaccagtttgtattaacgggcttatgttgctcgaaatcaaagacacagaaaagtactgtgcccttgcacctaatatgatggtaacaaacaataccttcacactcaaaggcggtgcaccaacaaaggttacttttggtgatgacactgtgatagaagtgcaaggttacaagagtgtgaatatcacttttgaacttgatgaaaggattgataaagtacttaatgagaagtgctctgcctatacagttgaactcggtacagaagtaaatgagttcgcctgtgttgtggcagatgctgtcataaaaactttgcaaccagtatctgaattacttacaccactgggcattgatttagatgagtggagtatggctacatactacttatttgatgagtctggtgagtttaaattggcttcacatatgtattgttctttctaccctccagatgaggatgaagaagaaggtgattgtgaagaagaagagtttgagccatcaactcaatatgagtatggtactgaagatgattaccaaggtaaacctttggaatttggtgccacttctgctgctcttcaacctgaagaagagcaagaagaagattggttagatgatgatagtcaacaaactgttggtcaacaagacggcagtgaggacaatcagacaactactattcaaacaattgttgaggttcaacctcaattagagatggaacttacaccagttgttcagactattgaagtgaatagttttagtggttatttaaaacttactgacaatgtatacattaaaaatgcagacattgtggaagaagctaaaaaggtaaaaccaacagtggttgttaatgcagccaatgtttaccttaaacatggaggaggtgttgcaggagccttaaataaggctactaacaatgccatgcaagttgaatctgatgattacatagctactaatggaccacttaaagtgggtggtagttgtgttttaagcggacacaatcttgctaaacactgtcttcatgttgtcggcccaaatgttaacaaaggtgaagacattcaacttcttaagagtgcttatgaaaattttaatcagcacgaagttctacttgcaccattattatcagctggtatttttggtgctgaccctatacattctttaagagtttgtgtagatactgttcgcacaaatgtctacttagctgtctttgataaaaatctctatgacaaacttgtttcaagctttttggaaatgaagagtgaaaagcaagttgaacaaaagatcgctgagattcctaaagaggaagttaagccatttataactgaaagtaaaccttcagttgaacagagaaaacaagatgataagaaaatcaaagcttgtgttgaagaagttacaacaactctggaagaaactaagttcctcacagaaaacttgttactttatattgacattaatggcaatcttcatccagattctgccactcttgttagtgacattgacatcactttcttaaagaaagatgctccatatatagtgggtgatgttgttcaagagggtgttttaactgctgtggttatacctactaaaaaggctggtggcactactgaaatgctagcgaaagctttgagaaaagtgccaacagacaattatataaccacttacccgggtcagggtttaaatggttacactgtagaggaggcaaagacagtgcttaaaaagtgtaaaagtgccttttacattctaccatctattatctctaatgagaagcaagaaattcttggaactgtttcttggaatttgcgagaaatgcttgcacatgcagaagaaacacgcaaattaatgcctgtctgtgtggaaactaaagccatagtttcaactatacagcgtaaatataagggtattaaaatacaagagggtgtggttgattatggtgctagattttacttttacaccagtaaaacaactgtagcgtcacttatcaacacacttaacgatctaaatgaaactcttgttacaatgccacttggctatgtaacacatggcttaaatttggaagaagctgctcggtatatgagatctctcaaagtgccagctacagtttctgtttcttcacctgatgctgttacagcgtataatggttatcttacttcttcttctaaaacacctgaagaacattttattgaaaccatctcacttgctggttcctataaagattggtcctattctggacaatctacacaactaggtatagaatttcttaagagaggtgataaaagtgtatattacactagtaatcctaccacattccacctagatggtgaagttatcacctttgacaatcttaagacacttctttctttgagagaagtgaggactattaaggtgtttacaacagtagacaacattaacctccacacgcaagttgtggacatgtcaatgacatatggacaacagtttggtccaacttatttggatggagctgatgttactaaaataaaacctcataattcacatgaaggtaaaacattttatgttttacctaatgatgacactctacgtgttgaggcttttgagtactaccacacaactgatcctagttttctgggtaggtacatgtcagcattaaatcacactaaaaagtggaaatacccacaagttaatggtttaacttctattaaatgggcagataacaactgttatcttgccactgcattgttaacactccaacaaatagagttgaagtttaatccacctgctctacaagatgcttattacagagcaagggctggtgaagctgctaacttttgtgcacttatcttagcctactgtaataagacagtaggtgagttaggtgatgttagagaaacaatgagttacttgtttcaacatgccaatttagattcttgcaaaagagtcttgaacgtggtgtgtaaaacttgtggacaacagcagacaacccttaagggtgtagaagctgttatgtacatgggcacactttcttatgaacaatttaagaaaggtgttcagataccttgtacgtgtggtaaacaagctacaaaatatctagtacaacaggagtcaccttttgttatgatgtcagcaccacctgctcagtatgaacttaagcatggtacatttacttgtgctagtgagtacactggtaattaccagtgtggtcactataaacatataacttctaaagaaactttgtattgcatagacggtgctttacttacaaagtcctcagaatacaaaggtcctattacggatgttttctacaaagaaaacagttacacaacaaccataaaaccagttacttataaattggatggtgttgtttgtacagaaattgaccctaagttggacaattattataagaaagacaattcttatttcacagagcaaccaattgatcttgtaccaaaccaaccatatccaaacgcaagcttcgataattttaagtttgtatgtgataatatcaaatttgctgatgatttaaaccagttaactggttataagaaacctgcttcaagagagcttaaagttacatttttccctgacttaaatggtgatgtggtggctattgattataaacactacacaccctcttttaagaaaggagctaaattgttacataaacctattgtttggcatgttaacaatgcaactaataaagccacgtataaaccaaatacctggtgtatacgttgtctttggagcacaaaaccagttgaaacatcaaattcgtttgatgtactgaagtcagaggacgcgcagggaatggataatcttgcctgcgaagatctaaaaccagtctctgaagaagtagtggaaaatcctaccatacagaaagacgttcttgagtgtaatgtgaaaactaccgaagttgtaggagacattatacttaaaccagcaaataatagtttaaaaattacagaagaggttggccacacagatctaatggctgcttatgtagacaattctagtcttactattaagaaacctaatgaattatctagagtattaggtttgaaaacccttgctactcatggtttagctgctgttaatagtgtcccttgggatactatagctaattatgctaagccttttcttaacaaagttgttagtacaactactaacatagttacacggtgtttaaaccgtgtttgtactaattatatgccttatttctttactttattgctacaattgtgtacttttactagaagtacaaattctagaattaaagcatctatgccgactactatagcaaagaatactgttaagagtgtcggtaaattttgtctagaggcttcatttaattatttgaagtcacctaatttttctaaactgataaatattataatttggtttttactattaagtgtttgcctaggttctttaatctactcaaccgctgctttaggtgttttaatgtctaatttaggcatgccttcttactgtactggttacagagaaggctatttgaactctactaatgtcactattgcaacctactgtactggttctataccttgtagtgtttgtcttagtggtttagattctttagacacctatccttctttagaaactatacaaattaccatttcatcttttaaatgggatttaactgcttttggcttagttgcagagtggtttttggcatatattcttttcactaggtttttctatgtacttggattggctgcaatcatgcaattgtttttcagctattttgcagtacattttattagtaattcttggcttatgtggttaataattaatcttgtacaaatggccccgatttcagctatggttagaatgtacatcttctttgcatcattttattatgtatggaaaagttatgtgcatgttgtagacggttgtaattcatcaacttgtatgatgtgttacaaacgtaatagagcaacaagagtcgaatgtacaactattgttaatggtgttagaaggtccttttatgtctatgctaatggaggtaaaggcttttgcaaactacacaattggaattgtgttaattgtgatacattctgtgctggtagtacatttattagtgatgaagttgcgagagacttgtcactacagtttaaaagaccaataaatcctactgaccagtcttcttacatcgttgatagtgttacagtgaagaatggttccatccatctttactttgataaagctggtcaaaagacttatgaaagacattctctctctcattttgttaacttagacaacctgagagctaataacactaaaggttcattgcctattaatgttatagtttttgatggtaaatcaaaatgtgaagaatcatctgcaaaatcagcgtctgtttactacagtcagcttatgtgtcaacctatactgttactagatcaggcattagtgtctgatgttggtgatagtgcggaagttgcagttaaaatgtttgatgcttacgttaatacgttttcatcaacttttaacgtaccaatggaaaaactcaaaacactagttgcaactgcagaagctgaacttgcaaagaatgtgtccttagacaatgtcttatctacttttatttcagcagctcggcaagggtttgttgattcagatgtagaaactaaagatgttgttgaatgtcttaaattgtcacatcaatctgacatagaagttactggcgatagttgtaataactatatgctcacctataacaaagttgaaaacatgacaccccgtgaccttggtgcttgtattgactgtagtgcgcgtcatattaatgcgcaggtagcaaaaagtcacaacattgctttgatatggaacgttaaagatttcatgtcattgtctgaacaactacgaaaacaaatacgtagtgctgctaaaaagaataacttaccttttaagttgacatgtgcaactactagacaagttgttaatgttgtaacaacaaagatagcacttaagggtggtaaaattgttaataattggttgaagcagttaattaaagttacacttgtgttcctttttgttgctgctattttctatttaataacacctgttcatgtcatgtctaaacatactgacttttcaagtgaaatcataggatacaaggctattgatggtggtgtcactcgtgacatagcatctacagatacttgttttgctaacaaacatgctgattttgacacatggtttagccagcgtggtggtagttatactaatgacaaagcttgcccattgattgctgcagtcataacaagagaagtgggttttgtcgtgcctggtttgcctggcacgatattacgcacaactaatggtgactttttgcatttcttacctagagtttttagtgcagttggtaacatctgttacacaccatcaaaacttatagagtacactgactttgcaacatcagcttgtgttttggctgctgaatgtacaatttttaaagatgcttctggtaagccagtaccatattgttatgataccaatgtactagaaggttctgttgcttatgaaagtttacgccctgacacacgttatgtgctcatggatggctctattattcaatttcctaacacctaccttgaaggttctgttagagtggtaacaacttttgattctgagtactgtaggcacggcacttgtgaaagatcagaagctggtgtttgtgtatctactagtggtagatgggtacttaacaatgattattacagatctttaccaggagttttctgtggtgtagatgctgtaaatttacttactaatatgtttacaccactaattcaacctattggtgctttggacatatcagcatctatagtagctggtggtattgtagctatcgtagtaacatgccttgcctactattttatgaggtttagaagagcttttggtgaatacagtcatgtagttgcctttaatactttactattccttatgtcattcactgtactctgtttaacaccagtttactcattcttacctggtgtttattctgttatttacttgtacttgacattttatcttactaatgatgtttcttttttagcacatattcagtggatggttatgttcacacctttagtacctttctggataacaattgcttatatcatttgtatttccacaaagcatttctattggttctttagtaattacctaaagagacgtgtagtctttaatggtgtttcctttagtacttttgaagaagctgcgctgtgcacctttttgttaaataaagaaatgtatctaaagttgcgtagtgatgtgctattacctcttacgcaatataatagatacttagctctttataataagtacaagtattttagtggagcaatggatacaactagctacagagaagctgcttgttgtcatctcgcaaaggctctcaatgacttcagtaactcaggttctgatgttctttaccaaccaccacaaacctctatcacctcagctgttttgcagagtggttttagaaaaatggcattcccatctggtaaagttgagggttgtatggtacaagtaacttgtggtacaactacacttaacggtctttggcttgatgacgtagtttactgtccaagacatgtgatctgcacctctgaagacatgcttaaccctaattatgaagatttactcattcgtaagtctaatcataatttcttggtacaggctg…'
          gene_synonym: []

fseq = struct with fields:
      Header: 'NC_045512.2 Severe acute respiratory syndrome coronavirus 2 isolate Wuhan-Hu-1, complete genome'
    Sequence: 'ATTAAAGGTTTATACCTTCCCAGGTAACAAACCAACCAACTTTCGATCTCTTGTAGATCTGTTCTCTAAACGAACTTTAAAATCTGTGTGGCTGTCACTCGGCTGCATGCTTAGTGCACTCACGCAGTATAATTAATAACTAATTACTGTCGTTGACAGGACACGAGTAACTCGTCTATCTTCTGCAGGCTGCTTACGGTTTCGTCCGTGTTGCAGCCGATCATCAGCACATCTAGGTTTCGTCCGGGTGTGACCGAAAGGTAAGATGGAGAGCCTTGTCCCTGGTTTCAACGAGAAAACACACGTCCAACTCAGTTTGCCTGTTTTACAGGTTCGCGACGTGCTCGTACGTGGCTTTGGAGACTCCGTGGAGGAGGTCTTATCAGAGGCACGTCAACATCTTAAAGATGGCACTTGTGGCTTAGTAGAAGTTGAAAAAGGCGTTTTGCCTCAACTTGAACAGCCCTATGTGTTCATCAAACGTTCGGATGCTCGAACTGCACCTCATGGTCATGTTATGGTTGAGCTGGTAGCAGAACTCGAAGGCATTCAGTACGGTCGTAGTGGTGAGACACTTGGTGTCCTTGTCCCTCATGTGGGCGAAATACCAGTGGCTTACCGCAAGGTTCTTCTTCGTAAGAACGGTAATAAAGGAGCTGGTGGCCATAGTTACGGCGCCGATCTAAAGTCATTTGACTTAGGCGACGAGCTTGGCACTGATCCTTATGAAGATTTTCAAGAAAACTGGAACACTAAACATAGCAGTGGTGTTACCCGTGAACTCATGCGTGAGCTTAACGGAGGGGCATACACTCGCTATGTCGATAACAACTTCTGTGGCCCTGATGGCTACCCTCTTGAGTGCATTAAAGACCTTCTAGCACGTGCTGGTAAAGCTTCATGCACTTTGTCCGAACAACTGGACTTTATTGACACTAAGAGGGGTGTATACTGCTGCCGTGAACATGAGCATGAAATTGCTTGGTACACGGAACGTTCTGAAAAGAGCTATGAATTGCAGACACCTTTTGAAATTAAATTGGCAAAGAAATTTGACACCTTCAATGGGGAATGTCCAAATTTTGTATTTCCCTTAAATTCCATAATCAAGACTATTCAACCAAGGGTTGAAAAGAAAAAGCTTGATGGCTTTATGGGTAGAATTCGATCTGTCTATCCAGTTGCGTCACCAAATGAATGCAACCAAATGTGCCTTTCAACTCTCATGAAGTGTGATCATTGTGGTGAAACTTCATGGCAGACGGGCGATTTTGTTAAAGCCACTTGCGAATTTTGTGGCACTGAGAATTTGACTAAAGAAGGTGCCACTACTTGTGGTTACTTACCCCAAAATGCTGTTGTTAAAATTTATTGTCCAGCATGTCACAATTCAGAAGTAGGACCTGAGCATAGTCTTGCCGAATACCATAATGAATCTGGCTTGAAAACCATTCTTCGTAAGGGTGGTCGCACTATTGCCTTTGGAGGCTGTGTGTTCTCTTATGTTGGTTGCCATAACAAGTGTGCCTATTGGGTTCCACGTGCTAGCGCTAACATAGGTTGTAACCATACAGGTGTTGTTGGAGAAGGTTCCGAAGGTCTTAATGACAACCTTCTTGAAATACTCCAAAAAGAGAAAGTCAACATCAATATTGTTGGTGACTTTAAACTTAATGAAGAGATCGCCATTATTTTGGCATCTTTTTCTGCTTCCACAAGTGCTTTTGTGGAAACTGTGAAAGGTTTGGATTATAAAGCATTCAAACAAATTGTTGAATCCTGTGGTAATTTTAAAGTTACAAAAGGAAAAGCTAAAAAAGGTGCCTGGAATATTGGTGAACAGAAATCAATACTGAGTCCTCTTTATGCATTTGCATCAGAGGCTGCTCGTGTTGTACGATCAATTTTCTCCCGCACTCTTGAAACTGCTCAAAATTCTGTGCGTGTTTTACAGAAGGCCGCTATAACAATACTAGATGGAATTTCACAGTATTCACTGAGACTCATTGATGCTATGATGTTCACATCTGATTTGGCTACTAACAATCTAGTTGTAATGGCCTACATTACAGGTGGTGTTGTTCAGTTGACTTCGCAGTGGCTAACTAACATCTTTGGCACTGTTTATGAAAAACTCAAACCCGTCCTTGATTGGCTTGAAGAGAAGTTTAAGGAAGGTGTAGAGTTTCTTAGAGACGGTTGGGAAATTGTTAAATTTATCTCAACCTGTGCTTGTGAAATTGTCGGTGGACAAATTGTCACCTGTGCAAAGGAAATTAAGGAGAGTGTTCAGACATTCTTTAAGCTTGTAAATAAATTTTTGGCTTTGTGTGCTGACTCTATCATTATTGGTGGAGCTAAACTTAAAGCCTTGAATTTAGGTGAAACATTTGTCACGCACTCAAAGGGATTGTACAGAAAGTGTGTTAAATCCAGAGAAGAAACTGGCCTACTCATGCCTCTAAAAGCCCCAAAAGAAATTATCTTCTTAGAGGGAGAAACACTTCCCACAGAAGTGTTAACAGAGGAAGTTGTCTTGAAAACTGGTGATTTACAACCATTAGAACAACCTACTAGTGAAGCTGTTGAAGCTCCATTGGTTGGTACACCAGTTTGTATTAACGGGCTTATGTTGCTCGAAATCAAAGACACAGAAAAGTACTGTGCCCTTGCACCTAATATGATGGTAACAAACAATACCTTCACACTCAAAGGCGGTGCACCAACAAAGGTTACTTTTGGTGATGACACTGTGATAGAAGTGCAAGGTTACAAGAGTGTGAATATCACTTTTGAACTTGATGAAAGGATTGATAAAGTACTTAATGAGAAGTGCTCTGCCTATACAGTTGAACTCGGTACAGAAGTAAATGAGTTCGCCTGTGTTGTGGCAGATGCTGTCATAAAAACTTTGCAACCAGTATCTGAATTACTTACACCACTGGGCATTGATTTAGATGAGTGGAGTATGGCTACATACTACTTATTTGATGAGTCTGGTGAGTTTAAATTGGCTTCACATATGTATTGTTCTTTCTACCCTCCAGATGAGGATGAAGAAGAAGGTGATTGTGAAGAAGAAGAGTTTGAGCCATCAACTCAATATGAGTATGGTACTGAAGATGATTACCAAGGTAAACCTTTGGAATTTGGTGCCACTTCTGCTGCTCTTCAACCTGAAGAAGAGCAAGAAGAAGATTGGTTAGATGATGATAGTCAACAAACTGTTGGTCAACAAGACGGCAGTGAGGACAATCAGACAACTACTATTCAAACAATTGTTGAGGTTCAACCTCAATTAGAGATGGAACTTACACCAGTTGTTCAGACTATTGAAGTGAATAGTTTTAGTGGTTATTTAAAACTTACTGACAATGTATACATTAAAAATGCAGACATTGTGGAAGAAGCTAAAAAGGTAAAACCAACAGTGGTTGTTAATGCAGCCAATGTTTACCTTAAACATGGAGGAGGTGTTGCAGGAGCCTTAAATAAGGCTACTAACAATGCCATGCAAGTTGAATCTGATGATTACATAGCTACTAATGGACCACTTAAAGTGGGTGGTAGTTGTGTTTTAAGCGGACACAATCTTGCTAAACACTGTCTTCATGTTGTCGGCCCAAATGTTAACAAAGGTGAAGACATTCAACTTCTTAAGAGTGCTTATGAAAATTTTAATCAGCACGAAGTTCTACTTGCACCATTATTATCAGCTGGTATTTTTGGTGCTGACCCTATACATTCTTTAAGAGTTTGTGTAGATACTGTTCGCACAAATGTCTACTTAGCTGTCTTTGATAAAAATCTCTATGACAAACTTGTTTCAAGCTTTTTGGAAATGAAGAGTGAAAAGCAAGTTGAACAAAAGATCGCTGAGATTCCTAAAGAGGAAGTTAAGCCATTTATAACTGAAAGTAAACCTTCAGTTGAACAGAGAAAACAAGATGATAAGAAAATCAAAGCTTGTGTTGAAGAAGTTACAACAACTCTGGAAGAAACTAAGTTCCTCACAGAAAACTTGTTACTTTATATTGACATTAATGGCAATCTTCATCCAGATTCTGCCACTCTTGTTAGTGACATTGACATCACTTTCTTAAAGAAAGATGCTCCATATATAGTGGGTGATGTTGTTCAAGAGGGTGTTTTAACTGCTGTGGTTATACCTACTAAAAAGGCTGGTGGCACTACTGAAATGCTAGCGAAAGCTTTGAGAAAAGTGCCAACAGACAATTATATAACCACTTACCCGGGTCAGGGTTTAAATGGTTACACTGTAGAGGAGGCAAAGACAGTGCTTAAAAAGTGTAAAAGTGCCTTTTACATTCTACCATCTATTATCTCTAATGAGAAGCAAGAAATTCTTGGAACTGTTTCTTGGAATTTGCGAGAAATGCTTGCACATGCAGAAGAAACACGCAAATTAATGCCTGTCTGTGTGGAAACTAAAGCCATAGTTTCAACTATACAGCGTAAATATAAGGGTATTAAAATACAAGAGGGTGTGGTTGATTATGGTGCTAGATTTTACTTTTACACCAGTAAAACAACTGTAGCGTCACTTATCAACACACTTAACGATCTAAATGAAACTCTTGTTACAATGCCACTTGGCTATGTAACACATGGCTTAAATTTGGAAGAAGCTGCTCGGTATATGAGATCTCTCAAAGTGCCAGCTACAGTTTCTGTTTCTTCACCTGATGCTGTTACAGCGTATAATGGTTATCTTACTTCTTCTTCTAAAACACCTGAAGAACATTTTATTGAAACCATCTCACTTGCTGGTTCCTATAAAGATTGGTCCTATTCTGGACAATCTACACAACTAGGTATAGAATTTCTTAAGAGAGGTGATAAAAGTGTATATTACACTAGTAATCCTACCACATTCCACCTAGATGGTGAAGTTATCACCTTTGACAATCTTAAGACACTTCTTTCTTTGAGAGAAGTGAGGACTATTAAGGTGTTTACAACAGTAGACAACATTAACCTCCACACGCAAGTTGTGGACATGTCAATGACATATGGACAACAGTTTGGTCCAACTTATTTGGATGGAGCTGATGTTACTAAAATAAAACCTCATAATTCACATGAAGGTAAAACATTTTATGTTTTACCTAATGATGACACTCTACGTGTTGAGGCTTTTGAGTACTACCACACAACTGATCCTAGTTTTCTGGGTAGGTACATGTCAGCATTAAATCACACTAAAAAGTGGAAATACCCACAAGTTAATGGTTTAACTTCTATTAAATGGGCAGATAACAACTGTTATCTTGCCACTGCATTGTTAACACTCCAACAAATAGAGTTGAAGTTTAATCCACCTGCTCTACAAGATGCTTATTACAGAGCAAGGGCTGGTGAAGCTGCTAACTTTTGTGCACTTATCTTAGCCTACTGTAATAAGACAGTAGGTGAGTTAGGTGATGTTAGAGAAACAATGAGTTACTTGTTTCAACATGCCAATTTAGATTCTTGCAAAAGAGTCTTGAACGTGGTGTGTAAAACTTGTGGACAACAGCAGACAACCCTTAAGGGTGTAGAAGCTGTTATGTACATGGGCACACTTTCTTATGAACAATTTAAGAAAGGTGTTCAGATACCTTGTACGTGTGGTAAACAAGCTACAAAATATCTAGTACAACAGGAGTCACCTTTTGTTATGATGTCAGCACCACCTGCTCAGTATGAACTTAAGCATGGTACATTTACTTGTGCTAGTGAGTACACTGGTAATTACCAGTGTGGTCACTATAAACATATAACTTCTAAAGAAACTTTGTATTGCATAGACGGTGCTTTACTTACAAAGTCCTCAGAATACAAAGGTCCTATTACGGATGTTTTCTACAAAGAAAACAGTTACACAACAACCATAAAACCAGTTACTTATAAATTGGATGGTGTTGTTTGTACAGAAATTGACCCTAAGTTGGACAATTATTATAAGAAAGACAATTCTTATTTCACAGAGCAACCAATTGATCTTGTACCAAACCAACCATATCCAAACGCAAGCTTCGATAATTTTAAGTTTGTATGTGATAATATCAAATTTGCTGATGATTTAAACCAGTTAACTGGTTATAAGAAACCTGCTTCAAGAGAGCTTAAAGTTACATTTTTCCCTGACTTAAATGGTGATGTGGTGGCTATTGATTATAAACACTACACACCCTCTTTTAAGAAAGGAGCTAAATTGTTACATAAACCTATTGTTTGGCATGTTAACAATGCAACTAATAAAGCCACGTATAAACCAAATACCTGGTGTATACGTTGTCTTTGGAGCACAAAACCAGTTGAAACATCAAATTCGTTTGATGTACTGAAGTCAGAGGACGCGCAGGGAATGGATAATCTTGCCTGCGAAGATCTAAAACCAGTCTCTGAAGAAGTAGTGGAAAATCCTACCATACAGAAAGACGTTCTTGAGTGTAATGTGAAAACTACCGAAGTTGTAGGAGACATTATACTTAAACCAGCAAATAATAGTTTAAAAATTACAGAAGAGGTTGGCCACACAGATCTAATGGCTGCTTATGTAGACAATTCTAGTCTTACTATTAAGAAACCTAATGAATTATCTAGAGTATTAGGTTTGAAAACCCTTGCTACTCATGGTTTAGCTGCTGTTAATAGTGTCCCTTGGGATACTATAGCTAATTATGCTAAGCCTTTTCTTAACAAAGTTGTTAGTACAACTACTAACATAGTTACACGGTGTTTAAACCGTGTTTGTACTAATTATATGCCTTATTTCTTTACTTTATTGCTACAATTGTGTACTTTTACTAGAAGTACAAATTCTAGAATTAAAGCATCTATGCCGACTACTATAGCAAAGAATACTGTTAAGAGTGTCGGTAAATTTTGTCTAGAGGCTTCATTTAATTATTTGAAGTCACCTAATTTTTCTAAACTGATAAATATTATAATTTGGTTTTTACTATTAAGTGTTTGCCTAGGTTCTTTAATCTACTCAACCGCTGCTTTAGGTGTTTTAATGTCTAATTTAGGCATGCCTTCTTACTGTACTGGTTACAGAGAAGGCTATTTGAACTCTACTAATGTCACTATTGCAACCTACTGTACTGGTTCTATACCTTGTAGTGTTTGTCTTAGTGGTTTAGATTCTTTAGACACCTATCCTTCTTTAGAAACTATACAAATTACCATTTCATCTTTTAAATGGGATTTAACTGCTTTTGGCTTAGTTGCAGAGTGGTTTTTGGCATATATTCTTTTCACTAGGTTTTTCTATGTACTTGGATTGGCTGCAATCATGCAATTGTTTTTCAGCTATTTTGCAGTACATTTTATTAGTAATTCTTGGCTTATGTGGTTAATAATTAATCTTGTACAAATGGCCCCGATTTCAGCTATGGTTAGAATGTACATCTTCTTTGCATCATTTTATTATGTATGGAAAAGTTATGTGCATGTTGTAGACGGTTGTAATTCATCAACTTGTATGATGTGTTACAAACGTAATAGAGCAACAAGAGTCGAATGTACAACTATTGTTAATGGTGTTAGAAGGTCCTTTTATGTCTATGCTAATGGAGGTAAAGGCTTTTGCAAACTACACAATTGGAATTGTGTTAATTGTGATACATTCTGTGCTGGTAGTACATTTATTAGTGATGAAGTTGCGAGAGACTTGTCACTACAGTTTAAAAGACCAATAAATCCTACTGACCAGTCTTCTTACATCGTTGATAGTGTTACAGTGAAGAATGGTTCCATCCATCTTTACTTTGATAAAGCTGGTCAAAAGACTTATGAAAGACATTCTCTCTCTCATTTTGTTAACTTAGACAACCTGAGAGCTAATAACACTAAAGGTTCATTGCCTATTAATGTTATAGTTTTTGATGGTAAATCAAAATGTGAAGAATCATCTGCAAAATCAGCGTCTGTTTACTACAGTCAGCTTATGTGTCAACCTATACTGTTACTAGATCAGGCATTAGTGTCTGATGTTGGTGATAGTGCGGAAGTTGCAGTTAAAATGTTTGATGCTTACGTTAATACGTTTTCATCAACTTTTAACGTACCAATGGAAAAACTCAAAACACTAGTTGCAACTGCAGAAGCTGAACTTGCAAAGAATGTGTCCTTAGACAATGTCTTATCTACTTTTATTTCAGCAGCTCGGCAAGGGTTTGTTGATTCAGATGTAGAAACTAAAGATGTTGTTGAATGTCTTAAATTGTCACATCAATCTGACATAGAAGTTACTGGCGATAGTTGTAATAACTATATGCTCACCTATAACAAAGTTGAAAACATGACACCCCGTGACCTTGGTGCTTGTATTGACTGTAGTGCGCGTCATATTAATGCGCAGGTAGCAAAAAGTCACAACATTGCTTTGATATGGAACGTTAAAGATTTCATGTCATTGTCTGAACAACTACGAAAACAAATACGTAGTGCTGCTAAAAAGAATAACTTACCTTTTAAGTTGACATGTGCAACTACTAGACAAGTTGTTAATGTTGTAACAACAAAGATAGCACTTAAGGGTGGTAAAATTGTTAATAATTGGTTGAAGCAGTTAATTAAAGTTACACTTGTGTTCCTTTTTGTTGCTGCTATTTTCTATTTAATAACACCTGTTCATGTCATGTCTAAACATACTGACTTTTCAAGTGAAATCATAGGATACAAGGCTATTGATGGTGGTGTCACTCGTGACATAGCATCTACAGATACTTGTTTTGCTAACAAACATGCTGATTTTGACACATGGTTTAGCCAGCGTGGTGGTAGTTATACTAATGACAAAGCTTGCCCATTGATTGCTGCAGTCATAACAAGAGAAGTGGGTTTTGTCGTGCCTGGTTTGCCTGGCACGATATTACGCACAACTAATGGTGACTTTTTGCATTTCTTACCTAGAGTTTTTAGTGCAGTTGGTAACATCTGTTACACACCATCAAAACTTATAGAGTACACTGACTTTGCAACATCAGCTTGTGTTTTGGCTGCTGAATGTACAATTTTTAAAGATGCTTCTGGTAAGCCAGTACCATATTGTTATGATACCAATGTACTAGAAGGTTCTGTTGCTTATGAAAGTTTACGCCCTGACACACGTTATGTGCTCATGGATGGCTCTATTATTCAATTTCCTAACACCTACCTTGAAGGTTCTGTTAGAGTGGTAACAACTTTTGATTCTGAGTACTGTAGGCACGGCACTTGTGAAAGATCAGAAGCTGGTGTTTGTGTATCTACTAGTGGTAGATGGGTACTTAACAATGATTATTACAGATCTTTACCAGGAGTTTTCTGTGGTGTAGATGCTGTAAATTTACTTACTAATATGTTTACACCACTAATTCAACCTATTGGTGCTTTGGACATATCAGCATCTATAGTAGCTGGTGGTATTGTAGCTATCGTAGTAACATGCCTTGCCTACTATTTTATGAGGTTTAGAAGAGCTTTTGGTGAATACAGTCATGTAGTTGCCTTTAATACTTTACTATTCCTTATGTCATTCACTGTACTCTGTTTAACACCAGTTTACTCATTCTTACCTGGTGTTTATTCTGTTATTTACTTGTACTTGACATTTTATCTTACTAATGATGTTTCTTTTTTAGCACATATTCAGTGGATGGTTATGTTCACACCTTTAGTACCTTTCTGGATAACAATTGCTTATATCATTTGTATTTCCACAAAGCATTTCTATTGGTTCTTTAGTAATTACCTAAAGAGACGTGTAGTCTTTAATGGTGTTTCCTTTAGTACTTTTGAAGAAGCTGCGCTGTGCACCTTTTTGTTAAATAAAGAAATGTATCTAAAGTTGCGTAGTGATGTGCTATTACCTCTTACGCAATATAATAGATACTTAGCTCTTTATAATAAGTACAAGTATTTTAGTGGAGCAATGGATACAACTAGCTACAGAGAAGCTGCTTGTTGTCATCTCGCAAAGGCTCTCAATGACTTCAGTAACTCAGGT…'



   1  IKGLYLPR*Q TNQLSISCRS VL*TNFKICV AVTRLHA*CT HAV*LITNYC R*QDTSNSSI
  61  FCRLLTVSSV LQPIISTSRF RPGVTER*DG EPCPWFQREN TRPTQFACFT GSRRARTWLW
 121  RLRGGGLIRG TSTS*RWHLW LSRS*KRRFA ST*TALCVHQ TFGCSNCTSW SCYG*AGSRT
 181  RRHSVRS*W* DTWCPCPSCG RNTSGLPQGS SS*ER**RSW WP*LRRRSKV I*LRRRAWH*
 241  SL*RFSRKLE H*T*QWCYP* THA*A*RRGI HSLCR*QLLW P*WLPS*VH* RPSSTCW*SF
 301  MHFVRTTGLY *H*EGCILLP *T*A*NCLVH GTF*KEL*IA DTF*N*IGKE I*HLQWGMSK
 361  FCISLKFHNQ DYSTKG*KEK A*WLYG*NSI CLSSCVTK*M QPNVPFNSHE V*SLW*NFMA
 421  DGRFC*SHLR ILWH*EFD*R RCHYLWLLTP KCCC*NLLSS MSQFRSRT*A *SCRIP**IW
 481  LENHSS*GWS HYCLWRLCVL LCWLP*QVCL LGSTC*R*HR L*PYRCCWRR FRRS**QPS*
 541  NTPKRESQHQ YCW*L*T**R DRHYFGIFFC FHKCFCGNCE RFGL*SIQTN C*ILW*F*SY
 601  KRKS*KRCLE YW*TEINTES SLCICIRGCS CCTINFLPHS *NCSKFCACF TEGRYNNTRW
 661  NFTVFTETH* CYDVHI*FGY *QSSCNGLHY RWCCSVDFAV AN*HLWHCL* KTQTRP*LA*
 721  REV*GRCRVS *RRLGNC*IY LNLCL*NCRW TNCHLCKGN* GECSDIL*AC K*IFGFVC*L
 781  YHYWWS*T*S LEFR*NICHA LKGIVQKVC* IQRRNWPTHA SKSPKRNYLL RGRNTSHRSV
 841  NRGSCLENW* FTTIRTTY** SC*SSIGWYT SLY*RAYVAR NQRHRKVLCP CT*YDGNKQY
 901  LHTQRRCTNK GYFW**HCDR SARLQECEYH F*T**KD**S T**EVLCLYS *TRYRSK*VR
 961  LCCGRCCHKN FATSI*ITYT TGH*FR*VEY GYILLI**VW *V*IGFTYVL FFLPSR*G*R
1021  RR*L*RRRV* AINSI*VWY* R*LPR*TFGI WCHFCCSST* RRARRRLVR* **STNCWSTR
1081  RQ*GQSDNYY SNNC*GSTSI RDGTYTSCSD Y*SE*F*WLF KTY*QCIH*K CRHCGRS*KG
1141  KTNSGC*CSQ CLP*TWRRCC RSLK*GY*QC HAS*I**LHS Y*WTT*SGW* LCFKRTQSC*
1201  TLSSCCRPKC *QR*RHSTS* ECL*KF*SAR SSTCTIIISW YFWC*PYTFF KSLCRYCSHK
1261  CLLSCL**KS L*QTCFKLFG NEE*KAS*TK DR*DS*RGS* AIYN*K*TFS *TEKTR**EN
1321  QSLC*RSYNN SGRN*VPHRK LVTLY*H*WQ SSSRFCHSC* *H*HHFLKER CSIYSG*CCS
1381  RGCFNCCGYT Y*KGWWHY*N ASESFEKSAN RQLYNHLPGS GFKWLHCRGG KDSA*KV*KC
1441  LLHSTIYYL* *EARNSWNCF LEFARNACTC RRNTQINACL CGN*SHSFNY TA*I*GY*NT
1501  RGCG*LWC*I LLLHQ*NNCS VTYQHT*RSK *NSCYNATWL CNTWLKFGRS CSVYEISQSA
1561  SYSFCFFT*C CYSV*WLSYF FF*NT*RTFY *NHLTCWFL* RLVLFWTIYT TRYRIS*ER*
1621  *KCILH**SY HIPPRW*SYH L*QS*DTSFF ERSEDY*GVY NSRQH*PPHA SCGHVNDIWT
1681  TVWSNLFGWS *CY*NKTS*F T*R*NILCFT ***HSTC*GF *VLPHN*S*F SG*VHVSIKS
1741  H*KVEIPTS* WFNFY*MGR* QLLSCHCIVN TPTNRVEV*S TCSTRCLLQS KGW*SC*LLC
1801  TYLSLL**DS R*VR*C*RNN ELLVSTCQFR FLQKSLERGV *NLWTTADNP *GCRSCYVHG
1861  HTFL*TI*ER CSDTLYVW*T SYKISSTTGV TFCYDVSTTC SV*T*AWYIY LC**VHW*LP
1921  VWSL*TYNF* RNFVLHRRCF TYKVLRIQRS YYGCFLQRKQ LHNNHKTSYL *IGWCCLYRN
1981  *P*VGQLL*E RQFLFHRATN *SCTKPTISK RKLR*F*VCM **YQIC**FK PVNWL*ETCF
2041  KRA*SYIFP* LKW*CGGY*L *TLHTLF*ER S*IVT*TYCL AC*QCN**SH V*TKYLVYTL
2101  SLEHKTS*NI KFV*CTEVRG RAGNG*SCLR RSKTSL*RSS GKSYHTERRS *V*CENYRSC
2161  RRHYT*TSK* *FKNYRRGWP HRSNGCLCRQ F*SYY*ET** II*SIRFENP CYSWFSCC**
2221  CPLGYYS*LC *AFS*QSC*Y NY*HSYTVFK PCLY*LYALF LYFIATIVYF Y*KYKF*N*S
2281  IYADYYSKEY C*ECR*ILSR GFI*LFEVT* FF*TDKYYNL VFTIKCLPRF FNLLNRCFRC
2341  FNV*FRHAFL LYWLQRRLFE LY*CHYCNLL YWFYTL*CLS *WFRFFRHLS FFRNYTNYHF
2401  IF*MGFNCFW LSCRVVFGIY SFH*VFLCTW IGCNHAIVFQ LFCSTFY**F LAYVVNN*SC
2461  TNGPDFSYG* NVHLLCIILL CMEKLCACCR RL*FINLYDV LQT**SNKSR MYNYC*WC*K
2521  VLLCLC*WR* RLLQTTQLEL C*L*YILCW* YIY***SCER LVTTV*KTNK SY*PVFLHR*
2581  *CYSEEWFHP SLL**SWSKD L*KTFSLSFC *LRQPES**H *RFIAY*CYS F*W*IKM*RI
2641  ICKISVCLLQ SAYVSTYTVT RSGISV*CW* *CGSCS*NV* CLR*YVFINF *RTNGKTQNT
2701  SCNCRS*TCK ECVLRQCLIY FYFSSSARVC *FRCRN*RCC *MS*IVTSI* HRSYWR*L**
2761  LYAHL*QS*K HDTP*PWCLY *L*CASY*CA GSKKSQHCFD MER*RFHVIV *TTTKTNT*C
2821  C*KE*LTF*V DMCNY*TSC* CCNNKDST*G W*NC**LVEA VN*SYTCVPF CCCYFLFNNT
2881  CSCHV*TY*L FK*NHRIQGY *WWCHS*HSI YRYLFC*QTC *F*HMV*PAW W*LY**QSLP
2941  IDCCSHNKRS GFCRAWFAWH DITHN*W*LF AFLT*SF*CS W*HLLHTIKT YRVH*LCNIS
3001  LCFGC*MYNF *RCFW*ASTI LL*YQCTRRF CCL*KFTP*H TLCAHGWLYY SIS*HLP*RF
3061  C*SGNNF*F* VL*ARHL*KI RSWCLCIY*W *MGT*Q*LLQ IFTRSFLWCR CCKFTY*YVY
3121  TTNSTYWCFG HISIYSSWWY CSYRSNMPCL LFYEV*KSFW *IQSCSCL*Y FTIPYVIHCT
3181  LFNTSLLILT WCLFCYLLVL DILSY**CFF FSTYSVDGYV HTFSTFLDNN CLYHLYFHKA
3241  FLLVL**LPK ETCSL*WCFL *YF*RSCAVH LFVK*RNVSK VA**CAITSY AI**ILSSL*
3301  *VQVF*WSNG YN*LQRSCLL SSRKGSQ*LQ *LRF*CSLPT TTNLYHLSCF AEWF*KNGIP
3361  IW*S*GLYGT SNLWYNYT*R SLA**RSLLS KTCDLHL*RH A*P*L*RFTH S*V*S*FLGT
3421  GW*CSTQGYW TFYAKLCT*A *G*YSQS*DT *V*VCSHSTR TDFFSVSLLQ WFTIWCLPMC
3481  YEAQFHY*GF IP*WFMW*CW F*HRL*LCLF LLHAPYGITN WSSCWHRLRR *LLWTFC*QA
3541  NSTSSWYGHN YYS*CFSLVV RCCYKWRQVV SQSIYHNS** L*PCGYEVQL *TSNTRPC*H
3601  TRTSFCSNWN CRFRYVCFIK RITAKWYEWT YHIG*CFIRR *IYTF*CC*T MLRCYFPKCS
3661  EKNNQGYTPL VVTHNFDFTF SFSPEYSMVF VLFFV*KCLF TFCYGYYCYV CFCNDVCQT*
3721  ACISLFVFVT FSCHCSLF*Y GLYAC*LGDA YYDMVGYG*Y *FVWF*AKRL CYVCISCSVT
3781  NPYDSKNCV* *WC*ESVDTY ECLDTRL*SL LW*CFRSSHF HVGSYNLCYF *LLRCSYNCH
3841  VFGQRYCFYV C*VLPYFLHN W*YTSVYNAS LLFLRLFLYL LLWPLLFTQP LL*TDSWCL*
3901  LLSFYTGV*I YEFTGTTPTQ E*HRCLQTQH *IVGCWWQTL YQSSHCTV*N VRCKVHISSL
3961  TLSFATTQSR III*IVGSMC PVTQ*HSLS* RYY*SL*KNG FTTFCFAFHA GCCRHKQAL*
4021  RNAGQQGNLT SYSLRV*FPS IICSFCYCSR SL*AGCC*W* F*SCS*KVEE VFECG*I*I*
4081  P*CSHAT*VG KDG*SSYDPN V*TG*I*GQE GKSY*CYADN AFHYA*KVG* *CTQQHYQQC
4141  KRWLCSLEHN TSYNSSQTNG CHTRL*HI*K YV*WYNIYLC ISIVGNPTGC RCR**NCST*
4201  *N*YGQFT*F SMASYCNSFK GQFCCQITE* *A*SCCTTTD VLCCRYYTNC LH**QCVSLL
4261  QHNKGR*VCT CTVIRFTGFE MG*IP*E*WN WYYLYRTGTT L*VCYRHT*R S*SEVFILY*
4321  RIKQPK*RYG TW*FSCHSTS TSW*CNRSAC QFNCIIFLCF CCRCC*SLQR LSS*WGTTNH
4381  *LC*DVVYTH WYWSGNNSYT GSQYGSRILW WCIVLSVLPL PHRSSKS*RI L*LKR*VCTN
4441  TYNLC**PCG FYT*KHSLYR LRYVERLWL* L*STPRTHAS VS*CTIVFKR VCGVSAARLT
4501  PCGTGTSTDV VYRAFDIYND KVAGFAKFLK TNCCRFQEKD EDDNLIDSYF VVKRHTFSNY
4561  QHEETIYNLL KDCPAVAKHD FFKFRIDGDM VPHISRQRLT KYTMADLVYA LRHFDEGNCD
4621  TLKEILVTYN CCDDDYFNKK DWYDFVENPD ILRVYANLGE RVRQALLKTV QFCDAMRNAG
4681  IVGVLTLDNQ DLNGNWYDFG DFIQTTPGSG VPVVDSYYSL LMPILTLTRA LTAESHVDTD
4741  LTKPYIKWDL LKYDFTEERL KLFDRYFKYW DQTYHPNCVN CLDDRCILHC ANFNVLFSTV
4801  FPPTSFGPLV RKIFVDGVPF VVSTGYHFRE LGVVHNQDVN LHSSRLSFKE LLVYAADPAM
4861  HAASGNLLLD KRTTCFSVAA LTNNVAFQTV KPGNFNKDFY DFAVSKGFFK EGSSVELKHF
4921  FFAQDGNAAI SDYDYYRYNL PTMCDIRQLL FVVEVVDKYF DCYDGGCINA NQVIVNNLDK
4981  SAGFPFNKWG KARLYYDSMS YEDQDALFAY TKRNVIPTIT QMNLKYAISA KNRARTVAGV
5041  SICSTMTNRQ FHQKLLKSIA ATRGATVVIG TSKFYGGWHN MLKTVYSDVE NPHLMGWDYP
5101  KCDRAMPNML RIMASLVLAR KHTTCCSLSH RFYRLANECA QVLSEMVMCG GSLYVKPGGT
5161  SSGDATTAYA NSVFNICQAV TANVNALLST DGNKIADKYV RNLQHRLYEC LYRNRDVDTD
5221  FVNEFYAYLR KHFSMMILSD DAVVCFNSTY ASQGLVASIK NFKSVLYYQN NVFMSEAKCW
5281  TETDLTKGPH EFCSQHTMLV KQGDDYVYLP YPDPSRILGA GCFVDDIVKT DGTLMIERFV
5341  SLAIDAYPLT KHPNQEYADV FHLYLQYIRK LHDELTGHML DMYSVMLTND NTSRYWEPEF
5401  YEAMYTPHTV LQAVGACVLC NSQTSLRCGA CIRRPFLCCK CCYDHVISTS HKLVLSVNPY
5461  VCNAPGCDVT DVTQLYLGGM SYYCKSHKPP ISFPLCANGQ VFGLYKNTCV GSDNVTDFNA
5521  IATCDWTNAG DYILANTCTE RLKLFAAETL KATEETFKLS YGIATVREVL SDRELHLSWE
5581  VGKPRPPLNR NYVFTGYRVT KNSKVQIGEY TFEKGDYGDA VVYRGTTTYK LNVGDYFVLT
5641  SHTVMPLSAP TLVPQEHYVR ITGLYPTLNI SDEFSSNVAN YQKVGMQKYS TLQGPPGTGK
5701  SHFAIGLALY YPSARIVYTA CSHAAVDALC EKALKYLPID KCSRIIPARA RVECFDKFKV
5761  NSTLEQYVFC TVNALPETTA DIVVFDEISM ATNYDLSVVN ARLRAKHYVY IGDPAQLPAP
5821  RTLLTKGTLE PEYFNSVCRL MKTIGPDMFL GTCRRCPAEI VDTVSALVYD NKLKAHKDKS
5881  AQCFKMFYKG VITHDVSSAI NRPQIGVVRE FLTRNPAWRK AVFISPYNSQ NAVASKILGL
5941  PTQTVDSSQG SEYDYVIFTQ TTETAHSCNV NRFNVAITRA KVGILCIMSD RDLYDKLQFT
6001  SLEIPRRNVA TLQAENVTGL FKDCSKVITG LHPTQAPTHL SVDTKFKTEG LCVDIPGIPK
6061  DMTYRRLISM MGFKMNYQVN GYPNMFITRE EAIRHVRAWI GFDVEGCHAT REAVGTNLPL
6121  QLGFSTGVNL VAVPTGYVDT PNNTDFSRVS AKPPPGDQFK HLIPLMYKGL PWNVVRIKIV
6181  QMLSDTLKNL SDRVVFVLWA HGFELTSMKY FVKIGPERTC CLCDRRATCF STASDTYACW
6241  HHSIGFDYVY NPFMIDVQQW GFTGNLQSNH DLYCQVHGNA HVASCDAIMT RCLAVHECFV
6301  KRVDWTIEYP IIGDELKINA ACRKVQHMVV KAALLADKFP VLHDIGNPKA IKCVPQADVE
6361  WKFYDAQPCS DKAYKIEELF YSYATHSDKF TDGVCLFWNC NVDRYPANSI VCRFDTRVLS
6421  NLNLPGCDGG SLYVNKHAFH TPAFDKSAFV NLKQLPFFYY SDSPCESHGK QVVSDIDYVP
6481  LKSATCITRC NLGGAVCRHH ANEYRLYLDA YNMMISAGFS LWVYKQFDTY NLWNTFTRLQ
6541  SLENVAFNVV NKGHFDGQQG EVPVSIINNT VYTKVDGVDV ELFENKTTLP VNVAFELWAK
6601  RNIKPVPEVK ILNNLGVDIA ANTVIWDYKR DAPAHISTIG VCSMTDIAKK PTETICAPLT
6661  VFFDGRVDGQ VDLFRNARNG VLITEGSVKG LQPSVGPKQA SLNGVTLIGE AVKTQFNYYK
6721  KVDGVVQQLP ETYFTQSRNL QEFKPRSQME IDFLELAMDE FIERYKLEGY AFEHIVYGDF
6781  SHSQLGGLHL LIGLAKRFKE SPFELEDFIP MDSTVKNYFI TDAQTGSSKC VCSVIDLLLD
6841  DFVEIIKSQD LSVVSKVVKV TIDYTEISFM LWCKDGHVET FYPKLQSSQA WQPGVAMPNL
6901  YKMQRMLLEK CDLQNYGDSA TLPKGIMMNV AKYTQLCQYL NTLTLAVPYN MRVIHFGAGS
6961  DKGVAPGTAV LRQWLPTGTL LVDSDLNDFV SDADSTLIGD CATVHTANKW DLIISDMYDP
7021  KTKNVTKEND SKEGFFTYIC GFIQQKLALG GSVAIKITEH SWNADLYKLM GHFAWWTAFV
7081  TNVNASSSEA FLIGCNYLGK PREQIDGYVM HANYIFWRNT NPIQLSSYSL FDMSKFPLKL
7141  RGTAVMSLKE GQINDMILSL LSKGRLIIRE NNRVVISSDV LVNN*TNNVC FSCFIATSL*
7201  SVC*SYNQNS ITPCIH*FFH TWCLLP*QSF QILSFTFNSG LVLTFLFQCY LVPCYTCLWD
7261  QWY*EV**PC PTI**WCLFC FH*EV*HNKR LDFWYYFRFE DPVPTYC**R Y*CCY*SL*I
7321  SIL**SIFGC LLPQKQQKLD GK*VQSLF*C E*LHF*ICLS AFSYGP*RKT G*FQKS*GIC
7381  V*EY*WLF*N IF*AHAY*FS A*SPSGFFGF RTIGRFANRY *HH*VSNFTC FT*KLFDSW*
7441  FFFRLDSWCC SLLCGLSST* DFSIKI**KW NHYRCCRLCT *PSLRNKVYV EILHCRKRNL
7501  SNF*L*SPTN RIYC*IS*YY KLVPFW*SF* RHQICICLCL EQEENQQLCC *LFCPI*FRI
7561  IFHF*VLWSV SY*IK*SLLY *CLCRFICN* R**SQTNRSR ANWKDC*L*L *ITR*FYRLR
7621  YSLEF*QS*F *GWW*L*LPV *IV*EV*SQT F*ERYFN*NL SGR*HTL*WC *RF*LLLSFT
7681  IIWFPTH*WC WLPTIQSSST FF*TSTCTSN CLWT*KVY*F G*KQMCQFQL QWFNRHRCSY
7741  *V*QKVSAFP TIWQRHC*HY *CCP*STDT* DS*HYTMFFW WCQCYNTRNK YF*PGCCSLS
7801  GC*LHRSPCC YSCRSTYSYL ACLFYRF*CF SNTCRLFNRG *TCQQLI*V* HTHWCRYMR*
7861  LSDSD*FSSA GT*CS*SIHH CLHYVTWCRK FSCLL**LYC HTHKFYY*CY HRNSTSVYDQ
7921  DISRLYNVHL W*FN*MQQSF VAIWQFLYTI KPCFNWNSC* TRQKHPRSFC TSQTNLQNTT
7981  N*RFWWF*FF TNITRSIKTK QEVIY*RSTF QQSDTCRCWL HQTIW*LPW* YCC*RPHLCT
8041  KV*RPYCFAT FAHR*NDCSI HFCTVSGYNH FWLDLWCRCC ITNTICYANG L*V*WYWSYT
8101  ECSL*EPKID CQPI**CYWQ NSRLTFFHSK CTWKTSRCGQ PKCTSFKHAC *TT*LQFWCN
8161  FKCFK*YPFT S*QS*G*SAN **VDHRQTSK FADICDSTIN *SCRNQSFC* SCCY*NVRVC
8221  TWTIKKS*FL WKGLSSYVLP SVSTSWCSLL ACDLCPCTRK ELHNCSCHLS *WKSTLSS*R
8281  CLCFKWHTLV CNTKEFL*TT NHYYRQHICV W*L*CCNRNC QQHSL*SFAT *IRLIQGGVR
8341  *IF*ESYITR C*FR*HLWH* CFSCKHSKRN *PPQ*GCQEF K*ISHRSPRT WKV*AVYKMA
8401  MVHLARFYSW LDCHSNGDNY ALLYDQLL*L SQGLLFLWIL LQI**RRL*A SAQRSQITLH
8461  INELMDLFMR IFTIGTVTLK QGEIKDATPS DFVRATATIP IQASLPFGWL IVGVALLAVF
8521  QSASKIITLK KRWQLALSKG VHFVCNLLLL FVTVYSHLLL VAAGLEAPFL YLYALVYFLQ
8581  SINFVRIIMR LWLCWKCRSK NPLLYDANYF LCWHTNCYDY CIPYNSVTSS IVITSGDGTT
8641  SPISEHDYQI GGYTEKWESG VKDCVVLHSY FTSDYYQLYS TQLSTDTGVE HVTFFIYNKI
8701  VDEPEEHVQI HTIDGSSGVV NPVMEPIYDE PTTTTSVPL* AQADEYELMY SFVSEETGTL
8761  IVNSVLLFLA FVVFLLVTLA ILTALRLCAY CCNIVNVSLV KPSFYVYSRV KNLNSSRVPD
8821  LLV*TN*ILY *FFCLEL*F* PWQIPTVLLP LKSLKSSLNN GT***VSYSL HGFVFYNLPM
8881  PTGIGFCI*L S*FSSGCYGQ *L*LVLCLLL FTE*IGSPVE LLSQWLVL*A *CGSATSLLL
8941  SDCLRVRVPC GHSIQKLTFF STCHSMALF* PDRF*KVNS* SEL*SFVDIF VLLDTI*DAV
9001  TSRTCLKKSL LLHHERFLIT NWELRSV*QV TQVLLHTVAT GLATIN*TQT IPVAVTILLC
9061  LYSK*QQMFH LVDFQVTIAE ILLIIMRTFK VSIWNLDYII NLIIKNLSKS LTENKYSQLD
9121  EEQPMEID*T NMKIILFLAL ITLATCELYH YQECVRGTTV LLKEPCSSGT YEGNSPFHPL
9181  ADNKFALTCF STQFAFACPD GVKHVYQLRA RSVSPKLFIR QEEVQELYSP IFLIVAAIVF
9241  ITLCFTLKRK TE*LNFH*LT SICAF*PFCY SLF*LCLLSF GSHLNCKIIM KLVTPKRT*N
9301  FLFS*ESSQL *LHFTKNVVY SHVLNINHM* LMTRVLFTSI LNGILE*ELE NQHL*LNCAW
9361  MRLVLNHPFS TSISVIIQFP VYLLQLIARN LNWVVL*CVV RSMKTF*SIM TFVLF*ISSK
9421  RTN*NV**WT PKSAKCTPHY VWWTLRFNWQ *PEWRTQWGA IKTTSAPRFT Q*YCVLVHRS
9481  HSTWQGRP*I PSRTRRSN*H Q*QSR*PNWL LPKSYQTNSW W*R*NERSQS KMVFLLPRNW
9541  ARSWTSLWC* QRRHHMGCN* GSLEYTKRSH WHPQSC*QCC NRATTSSRNN IAKRLLRRRE
9601  QRRQSSLFSF LIT*SQQFKK FNSRQQ*GNF SC*NGWQWR* CCSCFAAA*Q IEPA*EQNVW
9661  *RPTTTRPNC H*EICC*GF* EASAKTYCH* SIQCNTSFRQ TWSRTNPRKF WGPGTNQTRN
9721  *LQTLAANCT ICPQRFSVLR NVAHWHGSHT FGNVVDLHRC HQIG*QRSKF QRSSHFAE*A
9781  Y*RIQNIPTN RA*KGQKEEG **NSSLTAET EETANCDSSS CCRFG*FLQT IATIHEQC*L
9841  NSGLNSCRPH KADGLYKRFR FSVYDI*STL VQNEFS*LHS TSRCS*L*SH IAIFNQCVTL
9901  GRT*KSHHIF TEATRSTIEC TVNNARESCL YGRALMCKIN FSSAIPM*F* *LLRRMTKKK
9961  KKKKKKK                                                          

codons = struct with fields:
    AAA: 303
    AAC: 212
    AAG: 110
    AAT: 260
    ACA: 249
    ACC: 150
    ACG: 60
    ACT: 220
    AGA: 277
    AGC: 141
    AGG: 123
    AGT: 197
    ATA: 116
    ATC: 112
    ATG: 117
    ATT: 208
    CAA: 233
    CAC: 160
    CAG: 92
    CAT: 172
    CCA: 115
    CCC: 42
    CCG: 28
    CCT: 107
    CGA: 31
    CGC: 40
    CGG: 33
    CGT: 54
    CTA: 110
    CTC: 78
    CTG: 80
    CTT: 208
    GAA: 186
    GAC: 123
    GAG: 84
    GAT: 167
    GCA: 116
    GCC: 68
    GCG: 22
    GCT: 169
    GGA: 112
    GGC: 84
    GGG: 49
    GGT: 149
    GTA: 121
    GTC: 101
    GTG: 93
    GTT: 233
    TAA: 342
    TAC: 227
    TAG: 128
    TAT: 278
    TCA: 173
    TCC: 75
    TCG: 34
    TCT: 190
    TGA: 304
    TGC: 268
    TGG: 263
    TGT: 367
    TTA: 232
    TTC: 210
    TTG: 178
    TTT: 383

TTT_aa = 
    'Phe	Phenylalanine
     '

TGT_aa = 
    'Cys	Cysteine
     '

TAA_aa = 
    'END	Termination codon
     '



CVSeq = 'IKGLYLPR*QTNQLSISCRSVL*TNFKICVAVTRLHA*CTHAV*LITNYCR*QDTSNSSIFCRLLTVSSVLQPIISTSRFRPGVTER*DGEPCPWFQRENTRPTQFACFTGSRRARTWLWRLRGGGLIRGTSTS*RWHLWLSRS*KRRFAST*TALCVHQTFGCSNCTSWSCYG*AGSRTRRHSVRS*W*DTWCPCPSCGRNTSGLPQGSSS*ER**RSWWP*LRRRSKVI*LRRRAWH*SL*RFSRKLEH*T*QWCYP*THA*A*RRGIHSLCR*QLLWP*WLPS*VH*RPSSTCW*SFMHFVRTTGLY*H*EGCILLP*T*A*NCLVHGTF*KEL*IADTF*N*IGKEI*HLQWGMSKFCISLKFHNQDYSTKG*KEKA*WLYG*NSICLSSCVTK*MQPNVPFNSHEV*SLW*NFMADGRFC*SHLRILWH*EFD*RRCHYLWLLTPKCCC*NLLSSMSQFRSRT*A*SCRIP**IWLENHSS*GWSHYCLWRLCVLLCWLP*QVCLLGSTC*R*HRL*PYRCCWRRFRRS**QPS*NTPKRESQHQYCW*L*T**RDRHYFGIFFCFHKCFCGNCERFGL*SIQTNC*ILW*F*SYKRKS*KRCLEYW*TEINTESSLCICIRGCSCCTINFLPHS*NCSKFCACFTEGRYNNTRWNFTVFTETH*CYDVHI*FGY*QSSCNGLHYRWCCSVDFAVAN*HLWHCL*KTQTRP*LA*REV*GRCRVS*RRLGNC*IYLNLCL*NCRWTNCHLCKGN*GECSDIL*ACK*IFGFVC*LYHYWWS*T*SLEFR*NICHALKGIVQKVC*IQRRNWPTHASKSPKRNYLLRGRNTSHRSVNRGSCLENW*FTTIRTTY**SC*SSIGWYTSLY*RAYVARNQRHRKVLCPCT*YDGNKQYLHTQRRCTNKGYFW**HCDRSARLQECEYHF*T**KD**ST**EVLCLYS*TRYRSK*VRLCCGRCCHKNFATSI*ITYTTGH*FR*VEYGYILLI**VW*V*IGFTYVLFFLPSR*G*RRR*L*RRRV*AINSI*VWY*R*LPR*TFGIWCHFCCSST*RRARRRLVR***STNCWSTRRQ*GQSDNYYSNNC*GSTSIRDGTYTSCSDY*SE*F*WLFKTY*QCIH*KCRHCGRS*KGKTNSGC*CSQCLP*TWRRCCRSLK*GY*QCHAS*I**LHSY*WTT*SGW*LCFKRTQSC*TLSSCCRPKC*QR*RHSTS*ECL*KF*SARSSTCTIIISWYFWC*PYTFFKSLCRYCSHKCLLSCL**KSL*QTCFKLFGNEE*KAS*TKDR*DS*RGS*AIYN*K*TFS*TEKTR**ENQSLC*RSYNNSGRN*VPHRKLVTLY*H*WQSSSRFCHSC**H*HHFLKERCSIYSG*CCSRGCFNCCGYTY*KGWWHY*NASESFEKSANRQLYNHLPGSGFKWLHCRGGKDSA*KV*KCLLHSTIYYL**EARNSWNCFLEFARNACTCRRNTQINACLCGN*SHSFNYTA*I*GY*NTRGCG*LWC*ILLLHQ*NNCSVTYQHT*RSK*NSCYNATWLCNTWLKFGRSCSVYEISQSASYSFCFFT*CCYSV*WLSYFFF*NT*RTFY*NHLTCWFL*RLVLFWTIYTTRYRIS*ER**KCILH**SYHIPPRW*SYHL*QS*DTSFFERSEDY*GVYNSRQH*PPHASCGHVNDIWTTVWSNLFGWS*CY*NKTS*FT*R*NILCFT***HSTC*GF*VLPHN*S*FSG*VHVSIKSH*KVEIPTS*WFNFY*MGR*QLLSCHCIVNTPTNRVEV*STCSTRCLLQSKGW*SC*LLCTYLSLL**DSR*VR*C*RNNELLVSTCQFRFLQKSLERGV*NLWTTADNP*GCRSCYVHGHTFL*TI*ERCSDTLYVW*TSYKISSTTGVTFCYDVSTTCSV*T*AWYIYLC**VHW*LPVWSL*TYNF*RNFVLHRRCFTYKVLRIQRSYYGCFLQRKQLHNNHKTSYL*IGWCCLYRN*P*VGQLL*ERQFLFHRATN*SCTKPTISKRKLR*F*VCM**YQIC**FKPVNWL*ETCFKRA*SYIFP*LKW*CGGY*L*TLHTLF*ERS*IVT*TYCLAC*QCN**SHV*TKYLVYTLSLEHKTS*NIKFV*CTEVRGRAGNG*SCLRRSKTSL*RSSGKSYHTERRS*V*CENYRSCRRHYT*TSK**FKNYRRGWPHRSNGCLCRQF*SYY*ET**II*SIRFENPCYSWFSCC**CPLGYYS*LC*AFS*QSC*YNY*HSYTVFKPCLY*LYALFLYFIATIVYFY*KYKF*N*SIYADYYSKEYC*ECR*ILSRGFI*LFEVT*FF*TDKYYNLVFTIKCLPRFFNLLNRCFRCFNV*FRHAFLLYWLQRRLFELY*CHYCNLLYWFYTL*CLS*WFRFFRHLSFFRNYTNYHFIF*MGFNCFWLSCRVVFGIYSFH*VFLCTWIGCNHAIVFQLFCSTFY**FLAYVVNN*SCTNGPDFSYG*NVHLLCIILLCMEKLCACCRRL*FINLYDVLQT**SNKSRMYNYC*WC*KVLLCLC*WR*RLLQTTQLELC*L*YILCW*YIY***SCERLVTTV*KTNKSY*PVFLHR**CYSEEWFHPSLL**SWSKDL*KTFSLSFC*LRQPES**H*RFIAY*CYSF*W*IKM*RIICKISVCLLQSAYVSTYTVTRSGISV*CW**CGSCS*NV*CLR*YVFINF*RTNGKTQNTSCNCRS*TCKECVLRQCLIYFYFSSSARVC*FRCRN*RCC*MS*IVTSI*HRSYWR*L**LYAHL*QS*KHDTP*PWCLY*L*CASY*CAGSKKSQHCFDMER*RFHVIV*TTTKTNT*CC*KE*LTF*VDMCNY*TSC*CCNNKDST*GW*NC**LVEAVN*SYTCVPFCCCYFLFNNTCSCHV*TY*LFK*NHRIQGY*WWCHS*HSIYRYLFC*QTC*F*HMV*PAWW*LY**QSLPIDCCSHNKRSGFCRAWFAWHDITHN*W*LFAFLT*SF*CSW*HLLHTIKTYRVH*LCNISLCFGC*MYNF*RCFW*ASTILL*YQCTRRFCCL*KFTP*HTLCAHGWLYYSIS*HLP*RFC*SGNNF*F*VL*ARHL*KIRSWCLCIY*W*MGT*Q*LLQIFTRSFLWCRCCKFTY*YVYTTNSTYWCFGHISIYSSWWYCSYRSNMPCLLFYEV*KSFW*IQSCSCL*YFTIPYVIHCTLFNTSLLILTWCLFCYLLVLDILSY**CFFFSTYSVDGYVHTFSTFLDNNCLYHLYFHKAFLLVL**LPKETCSL*WCFL*YF*RSCAVHLFVK*RNVSKVA**CAITSYAI**ILSSL**VQVF*WSNGYN*LQRSCLLSSRKGSQ*LQ*LRF*CSLPTTTNLYHLSCFAEWF*KNGIPIW*S*GLYGTSNLWYNYT*RSLA**RSLLSKTCDLHL*RHA*P*L*RFTHS*V*S*FLGTGW*CSTQGYWTFYAKLCT*A*G*YSQS*DT*V*VCSHSTRTDFFSVSLLQWFTIWCLPMCYEAQFHY*GFIP*WFMW*CWF*HRL*LCLFLLHAPYGITNWSSCWHRLRR*LLWTFC*QANSTSSWYGHNYYS*CFSLVVRCCYKWRQVVSQSIYHNS**L*PCGYEVQL*TSNTRPC*HTRTSFCSNWNCRFRYVCFIKRITAKWYEWTYHIG*CFIRR*IYTF*CC*TMLRCYFPKCSEKNNQGYTPLVVTHNFDFTFSFSPEYSMVFVLFFV*KCLFTFCYGYYCYVCFCNDVCQT*ACISLFVFVTFSCHCSLF*YGLYAC*LGDAYYDMVGYG*Y*FVWF*AKRLCYVCISCSVTNPYDSKNCV**WC*ESVDTYECLDTRL*SLLW*CFRSSHFHVGSYNLCYF*LLRCSYNCHVFGQRYCFYVC*VLPYFLHNW*YTSVYNASLLFLRLFLYLLLWPLLFTQPLL*TDSWCL*LLSFYTGV*IYEFTGTTPTQE*HRCLQTQH*IVGCWWQTLYQSSHCTV*NVRCKVHISSLTLSFATTQSRIII*IVGSMCPVTQ*HSLS*RYY*SL*KNGFTTFCFAFHAGCCRHKQAL*RNAGQQGNLTSYSLRV*FPSIICSFCYCSRSL*AGCC*W*F*SCS*KVEEVFECG*I*I*P*CSHAT*VGKDG*SSYDPNV*TG*I*GQEGKSY*CYADNAFHYA*KVG**CTQQHYQQCKRWLCSLEHNTSYNSSQTNGCHTRL*HI*KYV*WYNIYLCISIVGNPTGCRCR**NCST**N*YGQFT*FSMASYCNSFKGQFCCQITE**A*SCCTTTDVLCCRYYTNCLH**QCVSLLQHNKGR*VCTCTVIRFTGFEMG*IP*E*WNWYYLYRTGTTL*VCYRHT*RS*SEVFILY*RIKQPK*RYGTW*FSCHSTSTSW*CNRSACQFNCIIFLCFCCRCC*SLQRLSS*WGTTNH*LC*DVVYTHWYWSGNNSYTGSQYGSRILWWCIVLSVLPLPHRSSKS*RIL*LKR*VCTNTYNLC**PCGFYT*KHSLYRLRYVERLWL*L*STPRTHASVS*CTIVFKRVCGVSAARLTPCGTGTSTDVVYRAFDIYNDKVAGFAKFLKTNCCRFQEKDEDDNLIDSYFVVKRHTFSNYQHEETIYNLLKDCPAVAKHDFFKFRIDGDMVPHISRQRLTKYTMADLVYALRHFDEGNCDTLKEILVTYNCCDDDYFNKKDWYDFVENPDILRVYANLGERVRQALLKTVQFCDAMRNAGIVGVLTLDNQDLNGNWYDFGDFIQTTPGSGVPVVDSYYSLLMPILTLTRALTAESHVDTDLTKPYIKWDLLKYDFTEERLKLFDRYFKYWDQTYHPNCVNCLDDRCILHCANFNVLFSTVFPPTSFGPLVRKIFVDGVPFVVSTGYHFRELGVVHNQDVNLHSSRLSFKELLVYAADPAMHAASGNLLLDKRTTCFSVAALTNNVAFQTVKPGNFNKDFYDFAVSKGFFKEGSSVELKHFFFAQDGNAAISDYDYYRYNLPTMCDIRQLLFVVEVVDKYFDCYDGGCINANQVIVNNLDKSAGFPFNKWGKARLYYDSMSYEDQDALFAYTKRNVIPTITQMNLKYAISAKNRARTVAGVSICSTMTNRQFHQKLLKSIAATRGATVVIGTSKFYGGWHNMLKTVYSDVENPHLMGWDYPKCDRAMPNMLRIMASLVLARKHTTCCSLSHRFYRLANECAQVLSEMVMCGGSLYVKPGGTSSGDATTAYANSVFNICQAVTANVNALLSTDGNKIADKYVRNLQHRLYECLYRNRDVDTDFVNEFYAYLRKHFSMMILSDDAVVCFNSTYASQGLVASIKNFKSVLYYQNNVFMSEAKCWTETDLTKGPHEFCSQHTMLVKQGDDYVYLPYPDPSRILGAGCFVDDIVKTDGTLMIERFVSLAIDAYPLTKHPNQEYADVFHLYLQYIRKLHDELTGHMLDMYSVMLTNDNTSRYWEPEFYEAMYTPHTVLQAVGACVLCNSQTSLRCGACIRRPFLCCKCCYDHVISTSHKLVLSVNPYVCNAPGCDVTDVTQLYLGGMSYYCKSHKPPISFPLCANGQVFGLYKNTCVGSDNVTDFNAIATCDWTNAGDYILANTCTERLKLFAAETLKATEETFKLSYGIATVREVLSDRELHLSWEVGKPRPPLNRNYVFTGYRVTKNSKVQIGEYTFEKGDYGDAVVYRGTTTYKLNVGDYFVLTSHTVMPLSAPTLVPQEHYVRITGLYPTLNISDEFSSNVANYQKVGMQKYSTLQGPPGTGKSHFAIGLALYYPSARIVYTACSHAAVDALCEKALKYLPIDKCSRIIPARARVECFDKFKVNSTLEQYVFCTVNALPETTADIVVFDEISMATNYDLSVVNARLRAKHYVYIGDPAQLPAPRTLLTKGTLEPEYFNSVCRLMKTIGPDMFLGTCRRCPAEIVDTVSALVYDNKLKAHKDKSAQCFKMFYKGVITHDVSSAINRPQIGVVREFLTRNPAWRKAVFISPYNSQNAVASKILGLPTQTVDSSQGSEYDYVIFTQTTETAHSCNVNRFNVAITRAKVGILCIMSDRDLYDKLQFTSLEIPRRNVATLQAENVTGLFKDCSKVITGLHPTQAPTHLSVDTKFKTEGLCVDIPGIPKDMTYRRLISMMGFKMNYQVNGYPNMFITREEAIRHVRAWIGFDVEGCHATREAVGTNLPLQLGFSTGVNLVAVPTGYVDTPNNTDFSRVSAKPPPGDQFKHLIPLMYKGLPWNVVRIKIVQMLSDTLKNLSDRVVFVLWAHGFELTSMKYFVKIGPERTCCLCDRRATCFSTASDTYACWHHSIGFDYVYNPFMIDVQQWGFTGNLQSNHDLYCQVHGNAHVASCDAIMTRCLAVHECFVKRVDWTIEYPIIGDELKINAACRKVQHMVVKAALLADKFPVLHDIGNPKAIKCVPQADVEWKFYDAQPCSDKAYKIEELFYSYATHSDKFTDGVCLFWNCNVDRYPANSIVCRFDTRVLSNLNLPGCDGGSLYVNKHAFHTPAFDKSAFVNLKQLPFFYYSDSPCESHGKQVVSDIDYVPLKSATCITRCNLGGAVCRHHANEYRLYLDAYNMMISAGFSLWVYKQFDTYNLWNTFTRLQSLENVAFNVVNKGHFDGQQGEVPVSIINNTVYTKVDGVDVELFENKTTLPVNVAFELWAKRNIKPVPEVKILNNLGVDIAANTVIWDYKRDAPAHISTIGVCSMTDIAKKPTETICAPLTVFFDGRVDGQVDLFRNARNGVLITEGSVKGLQPSVGPKQASLNGVTLIGEAVKTQFNYYKKVDGVVQQLPETYFTQSRNLQEFKPRSQMEIDFLELAMDEFIERYKLEGYAFEHIVYGDFSHSQLGGLHLLIGLAKRFKESPFELEDFIPMDSTVKNYFITDAQTGSSKCVCSVIDLLLDDFVEIIKSQDLSVVSKVVKVTIDYTEISFMLWCKDGHVETFYPKLQSSQAWQPGVAMPNLYKMQRMLLEKCDLQNYGDSATLPKGIMMNVAKYTQLCQYLNTLTLAVPYNMRVIHFGAGSDKGVAPGTAVLRQWLPTGTLLVDSDLNDFVSDADSTLIGDCATVHTANKWDLIISDMYDPKTKNVTKENDSKEGFFTYICGFIQQKLALGGSVAIKITEHSWNADLYKLMGHFAWWTAFVTNVNASSSEAFLIGCNYLGKPREQIDGYVMHANYIFWRNTNPIQLSSYSLFDMSKFPLKLRGTAVMSLKEGQINDMILSLLSKGRLIIRENNRVVISSDVLVNN*TNNVCFSCFIATSL*SVC*SYNQNSITPCIH*FFHTWCLLP*QSFQILSFTFNSGLVLTFLFQCYLVPCYTCLWDQWY*EV**PCPTI**WCLFCFH*EV*HNKRLDFWYYFRFEDPVPTYC**RY*CCY*SL*ISIL**SIFGCLLPQKQQKLDGK*VQSLF*CE*LHF*ICLSAFSYGP*RKTG*FQKS*GICV*EY*WLF*NIF*AHAY*FSA*SPSGFFGFRTIGRFANRY*HH*VSNFTCFT*KLFDSW*FFFRLDSWCCSLLCGLSST*DFSIKI**KWNHYRCCRLCT*PSLRNKVYVEILHCRKRNLSNF*L*SPTNRIYC*IS*YYKLVPFW*SF*RHQICICLCLEQEENQQLCC*LFCPI*FRIIFHF*VLWSVSY*IK*SLLY*CLCRFICN*R**SQTNRSRANWKDC*L*L*ITR*FYRLRYSLEF*QS*F*GWW*L*LPV*IV*EV*SQTF*ERYFN*NLSGR*HTL*WC*RF*LLLSFTIIWFPTH*WCWLPTIQSSSTFF*TSTCTSNCLWT*KVY*FG*KQMCQFQLQWFNRHRCSY*V*QKVSAFPTIWQRHC*HY*CCP*STDT*DS*HYTMFFWWCQCYNTRNKYF*PGCCSLSGC*LHRSPCCYSCRSTYSYLACLFYRF*CFSNTCRLFNRG*TCQQLI*V*HTHWCRYMR*LSDSD*FSSAGT*CS*SIHHCLHYVTWCRKFSCLL**LYCHTHKFYY*CYHRNSTSVYDQDISRLYNVHLW*FN*MQQSFVAIWQFLYTIKPCFNWNSC*TRQKHPRSFCTSQTNLQNTTN*RFWWF*FFTNITRSIKTKQEVIY*RSTFQQSDTCRCWLHQTIW*LPW*YCC*RPHLCTKV*RPYCFATFAHR*NDCSIHFCTVSGYNHFWLDLWCRCCITNTICYANGL*V*WYWSYTECSL*EPKIDCQPI**CYWQNSRLTFFHSKCTWKTSRCGQPKCTSFKHAC*TT*LQFWCNFKCFK*YPFTS*QS*G*SAN**VDHRQTSKFADICDSTIN*SCRNQSFC*SCCY*NVRVCTWTIKKS*FLWKGLSSYVLPSVSTSWCSLLACDLCPCTRKELHNCSCHLS*WKSTLSS*RCLCFKWHTLVCNTKEFL*TTNHYYRQHICVW*L*CCNRNCQQHSL*SFAT*IRLIQGGVR*IF*ESYITRC*FR*HLWH*CFSCKHSKRN*PPQ*GCQEFK*ISHRSPRTWKV*AVYKMAMVHLARFYSWLDCHSNGDNYALLYDQLL*LSQGLLFLWILLQI**RRL*ASAQRSQITLHINELMDLFMRIFTIGTVTLKQGEIKDATPSDFVRATATIPIQASLPFGWLIVGVALLAVFQSASKIITLKKRWQLALSKGVHFVCNLLLLFVTVYSHLLLVAAGLEAPFLYLYALVYFLQSINFVRIIMRLWLCWKCRSKNPLLYDANYFLCWHTNCYDYCIPYNSVTSSIVITSGDGTTSPISEHDYQIGGYTEKWESGVKDCVVLHSYFTSDYYQLYSTQLSTDTGVEHVTFFIYNKIVDEPEEHVQIHTIDGSSGVVNPVMEPIYDEPTTTTSVPL*AQADEYELMYSFVSEETGTLIVNSVLLFLAFVVFLLVTLAILTALRLCAYCCNIVNVSLVKPSFYVYSRVKNLNSSRVPDLLV*TN*ILY*FFCLEL*F*PWQIPTVLLPLKSLKSSLNNGT***VSYSLHGFVFYNLPMPTGIGFCI*LS*FSSGCYGQ*L*LVLCLLLFTE*IGSPVELLSQWLVL*A*CGSATSLLLSDCLRVRVPCGHSIQKLTFFSTCHSMALF*PDRF*KVNS*SEL*SFVDIFVLLDTI*DAVTSRTCLKKSLLLHHERFLITNWELRSV*QVTQVLLHTVATGLATIN*TQTIPVAVTILLCLYSK*QQMFHLVDFQVTIAEILLIIMRTFKVSIWNLDYIINLIIKNLSKSLTENKYSQLDEEQPMEID*TNMKIILFLALITLATCELYHYQECVRGTTVLLKEPCSSGTYEGNSPFHPLADNKFALTCFSTQFAFACPDGVKHVYQLRARSVSPKLFIRQEEVQELYSPIFLIVAAIVFITLCFTLKRKTE*LNFH*LTSICAF*PFCYSLF*LCLLSFGSHLNCKIIMKLVTPKRT*NFLFS*ESSQL*LHFTKNVVYSHVLNINHM*LMTRVLFTSILNGILE*ELENQHL*LNCAWMRLVLNHPFSTSISVIIQFPVYLLQLIARNLNWVVL*CVVRSMKTF*SIMTFVLF*ISSKRTN*NV**WTPKSAKCTPHYVWWTLRFNWQ*PEWRTQWGAIKTTSAPRFTQ*YCVLVHRSHSTWQGRP*IPSRTRRSN*HQ*QSR*PNWLLPKSYQTNSWW*R*NERSQSKMVFLLPRNWARSWTSLWC*QRRHHMGCN*GSLEYTKRSHWHPQSC*QCCNRATTSSRNNIAKRLLRRREQRRQSSLFSFLIT*SQQFKKFNSRQQ*GNFSC*NGWQWR*CCSCFAAA*QIEPA*EQNVW*RPTTTRPNCH*EICC*GF*EASAKTYCH*SIQCNTSFRQTWSRTNPRKFWGPGTNQTRN*LQTLAANCTICPQRFSVLRNVAHWHGSHTFGNVVDLHRCHQIG*QRSKFQRSSHFAE*AY*RIQNIPTNRA*KGQKEEG**NSSLTAETEETANCDSSSCCRFG*FLQTIATIHEQC*LNSGLNSCRPHKADGLYKRFRFSVYDI*STLVQNEFS*LHSTSRCS*L*SHIAIFNQCVTLGRT*KSHHIFTEATRSTIECTVNNARESCLYGRALMCKINFSSAIPM*F**LLRRMTKKKKKKKKKK'

CVprotein = 'MESLVPGFNEKTHVQLSLPVLQVRDVLVRGFGDSVEEVLSEARQHLKDGTCGLVEVEKGVLPQLEQPYVFIKRSDARTAPHGHVMVELVAELEGIQYGRSGETLGVLVPHVGEIPVAYRKVLLRKNGNKGAGGHSYGADLKSFDLGDELGTDPYEDFQENWNTKHSSGVTRELMRELNGGAYTRYVDNNFCGPDGYPLECIKDLLARAGKASCTLSEQLDFIDTKRGVYCCREHEHEIAWYTERSEKSYELQTPFEIKLAKKFDTFNGECPNFVFPLNSIIKTIQPRVEKKKLDGFMGRIRSVYPVASPNECNQMCLSTLMKCDHCGETSWQTGDFVKATCEFCGTENLTKEGATTCGYLPQNAVVKIYCPACHNSEVGPEHSLAEYHNESGLKTILRKGGRTIAFGGCVFSYVGCHNKCAYWVPRASANIGCNHTGVVGEGSEGLNDNLLEILQKEKVNINIVGDFKLNEEIAIILASFSASTSAFVETVKGLDYKAFKQIVESCGNFKVTKGKAKKGAWNIGEQKSILSPLYAFASEAARVVRSIFSRTLETAQNSVRVLQKAAITILDGISQYSLRLIDAMMFTSDLATNNLVVMAYITGGVVQLTSQWLTNIFGTVYEKLKPVLDWLEEKFKEGVEFLRDGWEIVKFISTCACEIVGGQIVTCAKEIKESVQTFFKLVNKFLALCADSIIIGGAKLKALNLGETFVTHSKGLYRKCVKSREETGLLMPLKAPKEIIFLEGETLPTEVLTEEVVLKTGDLQPLEQPTSEAVEAPLVGTPVCINGLMLLEIKDTEKYCALAPNMMVTNNTFTLKGGAPTKVTFGDDTVIEVQGYKSVNITFELDERIDKVLNEKCSAYTVELGTEVNEFACVVADAVIKTLQPVSELLTPLGIDLDEWSMATYYLFDESGEFKLASHMYCSFYPPDEDEEEGDCEEEEFEPSTQYEYGTEDDYQGKPLEFGATSAALQPEEEQEEDWLDDDSQQTVGQQDGSEDNQTTTIQTIVEVQPQLEMELTPVVQTIEVNSFSGYLKLTDNVYIKNADIVEEAKKVKPTVVVNAANVYLKHGGGVAGALNKATNNAMQVESDDYIATNGPLKVGGSCVLSGHNLAKHCLHVVGPNVNKGEDIQLLKSAYENFNQHEVLLAPLLSAGIFGADPIHSLRVCVDTVRTNVYLAVFDKNLYDKLVSSFLEMKSEKQVEQKIAEIPKEEVKPFITESKPSVEQRKQDDKKIKACVEEVTTTLEETKFLTENLLLYIDINGNLHPDSATLVSDIDITFLKKDAPYIVGDVVQEGVLTAVVIPTKKAGGTTEMLAKALRKVPTDNYITTYPGQGLNGYTVEEAKTVLKKCKSAFYILPSIISNEKQEILGTVSWNLREMLAHAEETRKLMPVCVETKAIVSTIQRKYKGIKIQEGVVDYGARFYFYTSKTTVASLINTLNDLNETLVTMPLGYVTHGLNLEEAARYMRSLKVPATVSVSSPDAVTAYNGYLTSSSKTPEEHFIETISLAGSYKDWSYSGQSTQLGIEFLKRGDKSVYYTSNPTTFHLDGEVITFDNLKTLLSLREVRTIKVFTTVDNINLHTQVVDMSMTYGQQFGPTYLDGADVTKIKPHNSHEGKTFYVLPNDDTLRVEAFEYYHTTDPSFLGRYMSALNHTKKWKYPQVNGLTSIKWADNNCYLATALLTLQQIELKFNPPALQDAYYRARAGEAANFCALILAYCNKTVGELGDVRETMSYLFQHANLDSCKRVLNVVCKTCGQQQTTLKGVEAVMYMGTLSYEQFKKGVQIPCTCGKQATKYLVQQESPFVMMSAPPAQYELKHGTFTCASEYTGNYQCGHYKHITSKETLYCIDGALLTKSSEYKGPITDVFYKENSYTTTIKPVTYKLDGVVCTEIDPKLDNYYKKDNSYFTEQPIDLVPNQPYPNASFDNFKFVCDNIKFADDLNQLTGYKKPASRELKVTFFPDLNGDVVAIDYKHYTPSFKKGAKLLHKPIVWHVNNATNKATYKPNTWCIRCLWSTKPVETSNSFDVLKSEDAQGMDNLACEDLKPVSEEVVENPTIQKDVLECNVKTTEVVGDIILKPANNSLKITEEVGHTDLMAAYVDNSSLTIKKPNELSRVLGLKTLATHGLAAVNSVPWDTIANYAKPFLNKVVSTTTNIVTRCLNRVCTNYMPYFFTLLLQLCTFTRSTNSRIKASMPTTIAKNTVKSVGKFCLEASFNYLKSPNFSKLINIIIWFLLLSVCLGSLIYSTAALGVLMSNLGMPSYCTGYREGYLNSTNVTIATYCTGSIPCSVCLSGLDSLDTYPSLETIQITISSFKWDLTAFGLVAEWFLAYILFTRFFYVLGLAAIMQLFFSYFAVHFISNSWLMWLIINLVQMAPISAMVRMYIFFASFYYVWKSYVHVVDGCNSSTCMMCYKRNRATRVECTTIVNGVRRSFYVYANGGKGFCKLHNWNCVNCDTFCAGSTFISDEVARDLSLQFKRPINPTDQSSYIVDSVTVKNGSIHLYFDKAGQKTYERHSLSHFVNLDNLRANNTKGSLPINVIVFDGKSKCEESSAKSASVYYSQLMCQPILLLDQALVSDVGDSAEVAVKMFDAYVNTFSSTFNVPMEKLKTLVATAEAELAKNVSLDNVLSTFISAARQGFVDSDVETKDVVECLKLSHQSDIEVTGDSCNNYMLTYNKVENMTPRDLGACIDCSARHINAQVAKSHNIALIWNVKDFMSLSEQLRKQIRSAAKKNNLPFKLTCATTRQVVNVVTTKIALKGGKIVNNWLKQLIKVTLVFLFVAAIFYLITPVHVMSKHTDFSSEIIGYKAIDGGVTRDIASTDTCFANKHADFDTWFSQRGGSYTNDKACPLIAAVITREVGFVVPGLPGTILRTTNGDFLHFLPRVFSAVGNICYTPSKLIEYTDFATSACVLAAECTIFKDASGKPVPYCYDTNVLEGSVAYESLRPDTRYVLMDGSIIQFPNTYLEGSVRVVTTFDSEYCRHGTCERSEAGVCVSTSGRWVLNNDYYRSLPGVFCGVDAVNLLTNMFTPLIQPIGALDISASIVAGGIVAIVVTCLAYYFMRFRRAFGEYSHVVAFNTLLFLMSFTVLCLTPVYSFLPGVYSVIYLYLTFYLTNDVSFLAHIQWMVMFTPLVPFWITIAYIICISTKHFYWFFSNYLKRRVVFNGVSFSTFEEAALCTFLLNKEMYLKLRSDVLLPLTQYNRYLALYNKYKYFSGAMDTTSYREAACCHLAKALNDFSNSGSDVLYQPPQTSITSAVLQSGFRKMAFPSGKVEGCMVQVTCGTTTLNGLWLDDVVYCPRHVICTSEDMLNPNYEDLLIRKSNHNFLVQAGNVQLRVIGHSMQNCVLKLKVDTANPKTPKYKFVRIQPGQTFSVLACYNGSPSGVYQCAMRPNFTIKGSFLNGSCGSVGFNIDYDCVSFCYMHHMELPTGVHAGTDLEGNFYGPFVDRQTAQAAGTDTTITVNVLAWLYAAVINGDRWFLNRFTTTLNDFNLVAMKYNYEPLTQDHVDILGPLSAQTGIAVLDMCASLKELLQNGMNGRTILGSALLEDEFTPFDVVRQCSGVTFQSAVKRTIKGTHHWLLLTILTSLLVLVQSTQWSLFFFLYENAFLPFAMGIIAMSAFAMMFVKHKHAFLCLFLLPSLATVAYFNMVYMPASWVMRIMTWLDMVDTSLSGFKLKDCVMYASAVVLLILMTARTVYDDGARRVWTLMNVLTLVYKVYYGNALDQAISMWALIISVTSNYSGVVTTVMFLARGIVFMCVEYCPIFFITGNTLQCIMLVYCFLGYFCTCYFGLFCLLNRYFRLTLGVYDYLVSTQEFRYMNSQGLLPPKNSIDAFKLNIKLLGVGGKPCIKVATVQSKMSDVKCTSVVLLSVLQQLRVESSSKLWAQCVQLHNDILLAKDTTEAFEKMVSLLSVLLSMQGAVDINKLCEEMLDNRATLQAIASEFSSLPSYAAFATAQEAYEQAVANGDSEVVLKKLKKSLNVAKSEFDRDAAMQRKLEKMADQAMTQMYKQARSEDKRAKVTSAMQTMLFTMLRKLDNDALNNIINNARDGCVPLNIIPLTTAAKLMVVIPDYNTYKNTCDGTTFTYASALWEIQQVVDADSKIVQLSEISMDNSPNLAWPLIVTALRANSAVKLQNNELSPVALRQMSCAAGTTQTACTDDNALAYYNTTKGGRFVLALLSDLQDLKWARFPKSDGTGTIYTELEPPCRFVTDTPKGPKVKYLYFIKGLNNLNRGMVLGSLAATVRLQAGNATEVPANSTVLSFCAFAVDAAKAYKDYLASGGQPITNCVKMLCTHTGTGQAITVTPEANMDQESFGGASCCLYCRCHIDHPNPKGFCDLKGKYVQIPTTCANDPVGFTLKNTVCTVCGMWKGYGCSCDQLREPMLQSADAQSFLNRVCGVSAARLTPCGTGTSTDVVYRAFDIYNDKVAGFAKFLKTNCCRFQEKDEDDNLIDSYFVVKRHTFSNYQHEETIYNLLKDCPAVAKHDFFKFRIDGDMVPHISRQRLTKYTMADLVYALRHFDEGNCDTLKEILVTYNCCDDDYFNKKDWYDFVENPDILRVYANLGERVRQALLKTVQFCDAMRNAGIVGVLTLDNQDLNGNWYDFGDFIQTTPGSGVPVVDSYYSLLMPILTLTRALTAESHVDTDLTKPYIKWDLLKYDFTEERLKLFDRYFKYWDQTYHPNCVNCLDDRCILHCANFNVLFSTVFPPTSFGPLVRKIFVDGVPFVVSTGYHFRELGVVHNQDVNLHSSRLSFKELLVYAADPAMHAASGNLLLDKRTTCFSVAALTNNVAFQTVKPGNFNKDFYDFAVSKGFFKEGSSVELKHFFFAQDGNAAISDYDYYRYNLPTMCDIRQLLFVVEVVDKYFDCYDGGCINANQVIVNNLDKSAGFPFNKWGKARLYYDSMSYEDQDALFAYTKRNVIPTITQMNLKYAISAKNRARTVAGVSICSTMTNRQFHQKLLKSIAATRGATVVIGTSKFYGGWHNMLKTVYSDVENPHLMGWDYPKCDRAMPNMLRIMASLVLARKHTTCCSLSHRFYRLANECAQVLSEMVMCGGSLYVKPGGTSSGDATTAYANSVFNICQAVTANVNALLSTDGNKIADKYVRNLQHRLYECLYRNRDVDTDFVNEFYAYLRKHFSMMILSDDAVVCFNSTYASQGLVASIKNFKSVLYYQNNVFMSEAKCWTETDLTKGPHEFCSQHTMLVKQGDDYVYLPYPDPSRILGAGCFVDDIVKTDGTLMIERFVSLAIDAYPLTKHPNQEYADVFHLYLQYIRKLHDELTGHMLDMYSVMLTNDNTSRYWEPEFYEAMYTPHTVLQAVGACVLCNSQTSLRCGACIRRPFLCCKCCYDHVISTSHKLVLSVNPYVCNAPGCDVTDVTQLYLGGMSYYCKSHKPPISFPLCANGQVFGLYKNTCVGSDNVTDFNAIATCDWTNAGDYILANTCTERLKLFAAETLKATEETFKLSYGIATVREVLSDRELHLSWEVGKPRPPLNRNYVFTGYRVTKNSKVQIGEYTFEKGDYGDAVVYRGTTTYKLNVGDYFVLTSHTVMPLSAPTLVPQEHYVRITGLYPTLNISDEFSSNVANYQKVGMQKYSTLQGPPGTGKSHFAIGLALYYPSARIVYTACSHAAVDALCEKALKYLPIDKCSRIIPARARVECFDKFKVNSTLEQYVFCTVNALPETTADIVVFDEISMATNYDLSVVNARLRAKHYVYIGDPAQLPAPRTLLTKGTLEPEYFNSVCRLMKTIGPDMFLGTCRRCPAEIVDTVSALVYDNKLKAHKDKSAQCFKMFYKGVITHDVSSAINRPQIGVVREFLTRNPAWRKAVFISPYNSQNAVASKILGLPTQTVDSSQGSEYDYVIFTQTTETAHSCNVNRFNVAITRAKVGILCIMSDRDLYDKLQFTSLEIPRRNVATLQAENVTGLFKDCSKVITGLHPTQAPTHLSVDTKFKTEGLCVDIPGIPKDMTYRRLISMMGFKMNYQVNGYPNMFITREEAIRHVRAWIGFDVEGCHATREAVGTNLPLQLGFSTGVNLVAVPTGYVDTPNNTDFSRVSAKPPPGDQFKHLIPLMYKGLPWNVVRIKIVQMLSDTLKNLSDRVVFVLWAHGFELTSMKYFVKIGPERTCCLCDRRATCFSTASDTYACWHHSIGFDYVYNPFMIDVQQWGFTGNLQSNHDLYCQVHGNAHVASCDAIMTRCLAVHECFVKRVDWTIEYPIIGDELKINAACRKVQHMVVKAALLADKFPVLHDIGNPKAIKCVPQADVEWKFYDAQPCSDKAYKIEELFYSYATHSDKFTDGVCLFWNCNVDRYPANSIVCRFDTRVLSNLNLPGCDGGSLYVNKHAFHTPAFDKSAFVNLKQLPFFYYSDSPCESHGKQVVSDIDYVPLKSATCITRCNLGGAVCRHHANEYRLYLDAYNMMISAGFSLWVYKQFDTYNLWNTFTRLQSLENVAFNVVNKGHFDGQQGEVPVSIINNTVYTKVDGVDVELFENKTTLPVNVAFELWAKRNIKPVPEVKILNNLGVDIAANTVIWDYKRDAPAHISTIGVCSMTDIAKKPTETICAPLTVFFDGRVDGQVDLFRNARNGVLITEGSVKGLQPSVGPKQASLNGVTLIGEAVKTQFNYYKKVDGVVQQLPETYFTQSRNLQEFKPRSQMEIDFLELAMDEFIERYKLEGYAFEHIVYGDFSHSQLGGLHLLIGLAKRFKESPFELEDFIPMDSTVKNYFITDAQTGSSKCVCSVIDLLLDDFVEIIKSQDLSVVSKVVKVTIDYTEISFMLWCKDGHVETFYPKLQSSQAWQPGVAMPNLYKMQRMLLEKCDLQNYGDSATLPKGIMMNVAKYTQLCQYLNTLTLAVPYNMRVIHFGAGSDKGVAPGTAVLRQWLPTGTLLVDSDLNDFVSDADSTLIGDCATVHTANKWDLIISDMYDPKTKNVTKENDSKEGFFTYICGFIQQKLALGGSVAIKITEHSWNADLYKLMGHFAWWTAFVTNVNASSSEAFLIGCNYLGKPREQIDGYVMHANYIFWRNTNPIQLSSYSLFDMSKFPLKLRGTAVMSLKEGQINDMILSLLSKGRLIIRENNRVVISSDVLVNN'



ans = struct with fields:
    A: 375
    R: 558
    N: 472
    D: 290
    C: 635
    Q: 325
    E: 270
    G: 394
    H: 332
    I: 436
    L: 886
    K: 413
    M: 117
    F: 593
    P: 292
    S: 810
    T: 679
    W: 263
    Y: 505
    V: 548

covidpro = struct with fields:
                LocusName: 'YP_009724389'
      LocusSequenceLength: '7096'
     LocusNumberofStrands: ''
            LocusTopology: 'linear'
        LocusMoleculeType: ''
     LocusGenBankDivision: 'VRL'
    LocusModificationDate: '18-JUL-2020'
               Definition: 'ORF1ab polyprotein [Severe acute respiratory syndrome coronavirus 2].'
                Accession: 'YP_009724389'
                  Version: 'YP_009724389.1'
                       GI: ''
                  Project: []
                   DBLink: 'BioProject: PRJNA485481 DBSOURCE    REFSEQ: accession NC_045512.2'
                 Keywords: 'RefSeq.'
                  Segment: []
                   Source: 'Severe acute respiratory syndrome coronavirus 2 (SARS-CoV-2)'
           SourceOrganism: [4×65 char]
                Reference: {[1×1 struct]  [1×1 struct]  [1×1 struct]}
                  Comment: [17×67 char]
                 Features: [431×74 char]
                      CDS: [1×1 struct]
                 Sequence: 'meslvpgfnekthvqlslpvlqvrdvlvrgfgdsveevlsearqhlkdgtcglvevekgvlpqleqpyvfikrsdartaphghvmvelvaelegiqygrsgetlgvlvphvgeipvayrkvllrkngnkgagghsygadlksfdlgdelgtdpyedfqenwntkhssgvtrelmrelnggaytryvdnnfcgpdgyplecikdllaragkasctlseqldfidtkrgvyccreheheiawyterseksyelqtpfeiklakkfdtfngecpnfvfplnsiiktiqprvekkkldgfmgrirsvypvaspnecnqmclstlmkcdhcgetswqtgdfvkatcefcgtenltkegattcgylpqnavvkiycpachnsevgpehslaeyhnesglktilrkggrtiafggcvfsyvgchnkcaywvprasanigcnhtgvvgegseglndnlleilqkekvninivgdfklneeiaiilasfsastsafvetvkgldykafkqivescgnfkvtkgkakkgawnigeqksilsplyafaseaarvvrsifsrtletaqnsvrvlqkaaitildgisqyslrlidammftsdlatnnlvvmayitggvvqltsqwltnifgtvyeklkpvldwleekfkegveflrdgweivkfistcaceivggqivtcakeikesvqtffklvnkflalcadsiiiggaklkalnlgetfvthskglyrkcvksreetgllmplkapkeiiflegetlptevlteevvlktgdlqpleqptseaveaplvgtpvcinglmlleikdtekycalapnmmvtnntftlkggaptkvtfgddtvievqgyksvnitfelderidkvlnekcsaytvelgtevnefacvvadaviktlqpvselltplgidldewsmatyylfdesgefklashmycsfyppdedeeegdceeeefepstqyeygteddyqgkplefgatsaalqpeeeqeedwldddsqqtvgqqdgsednqtttiqtivevqpqlemeltpvvqtievnsfsgylkltdnvyiknadiveeakkvkptvvvnaanvylkhgggvagalnkatnnamqvesddyiatngplkvggscvlsghnlakhclhvvgpnvnkgediqllksayenfnqhevllapllsagifgadpihslrvcvdtvrtnvylavfdknlydklvssflemksekqveqkiaeipkeevkpfiteskpsveqrkqddkkikacveevtttleetkfltenlllyidingnlhpdsatlvsdiditflkkdapyivgdvvqegvltavviptkkaggttemlakalrkvptdnyittypgqglngytveeaktvlkkcksafyilpsiisnekqeilgtvswnlremlahaeetrklmpvcvetkaivstiqrkykgikiqegvvdygarfyfytskttvaslintlndlnetlvtmplgyvthglnleeaarymrslkvpatvsvsspdavtayngyltsssktpeehfietislagsykdwsysgqstqlgieflkrgdksvyytsnpttfhldgevitfdnlktllslrevrtikvfttvdninlhtqvvdmsmtygqqfgptyldgadvtkikphnshegktfyvlpnddtlrveafeyyhttdpsflgrymsalnhtkkwkypqvngltsikwadnncylatalltlqqielkfnppalqdayyrarageaanfcalilaycnktvgelgdvretmsylfqhanldsckrvlnvvcktcgqqqttlkgveavmymgtlsyeqfkkgvqipctcgkqatkylvqqespfvmmsappaqyelkhgtftcaseytgnyqcghykhitsketlycidgalltksseykgpitdvfykensytttikpvtykldgvvcteidpkldnyykkdnsyfteqpidlvpnqpypnasfdnfkfvcdnikfaddlnqltgykkpasrelkvtffpdlngdvvaidykhytpsfkkgakllhkpivwhvnnatnkatykpntwcirclwstkpvetsnsfdvlksedaqgmdnlacedlkpvseevvenptiqkdvlecnvkttevvgdiilkpannslkiteevghtdlmaayvdnssltikkpnelsrvlglktlathglaavnsvpwdtianyakpflnkvvstttnivtrclnrvctnympyfftlllqlctftrstnsrikasmpttiakntvksvgkfcleasfnylkspnfskliniiiwflllsvclgsliystaalgvlmsnlgmpsyctgyregylnstnvtiatyctgsipcsvclsgldsldtypsletiqitissfkwdltafglvaewflayilftrffyvlglaaimqlffsyfavhfisnswlmwliinlvqmapisamvrmyiffasfyyvwksyvhvvdgcnsstcmmcykrnratrvecttivngvrrsfyvyanggkgfcklhnwncvncdtfcagstfisdevardlslqfkrpinptdqssyivdsvtvkngsihlyfdkagqktyerhslshfvnldnlranntkgslpinvivfdgkskceessaksasvyysqlmcqpillldqalvsdvgdsaevavkmfdayvntfsstfnvpmeklktlvataeaelaknvsldnvlstfisaarqgfvdsdvetkdvveclklshqsdievtgdscnnymltynkvenmtprdlgacidcsarhinaqvakshnialiwnvkdfmslseqlrkqirsaakknnlpfkltcattrqvvnvvttkialkggkivnnwlkqlikvtlvflfvaaifylitpvhvmskhtdfsseiigykaidggvtrdiastdtcfankhadfdtwfsqrggsytndkacpliaavitrevgfvvpglpgtilrttngdflhflprvfsavgnicytpsklieytdfatsacvlaaectifkdasgkpvpycydtnvlegsvayeslrpdtryvlmdgsiiqfpntylegsvrvvttfdseycrhgtcerseagvcvstsgrwvlnndyyrslpgvfcgvdavnlltnmftpliqpigaldisasivaggivaivvtclayyfmrfrrafgeyshvvafntllflmsftvlcltpvysflpgvysviylyltfyltndvsflahiqwmvmftplvpfwitiayiicistkhfywffsnylkrrvvfngvsfstfeeaalctfllnkemylklrsdvllpltqynrylalynkykyfsgamdttsyreaacchlakalndfsnsgsdvlyqppqtsitsavlqsgfrkmafpsgkvegcmvqvtcgtttlnglwlddvvycprhvictsedmlnpnyedllirksnhnflvqagnvqlrvighsmqncvlklkvdtanpktpkykfvriqpgqtfsvlacyngspsgvyqcamrpnftikgsflngscgsvgfnidydcvsfcymhhmelptgvhagtdlegnfygpfvdrqtaqaagtdttitvnvlawlyaavingdrwflnrftttlndfnlvamkynyepltqdhvdilgplsaqtgiavldmcaslkellqngmngrtilgsalledeftpfdvvrqcsgvtfqsavkrtikgthhwllltiltsllvlvqstqwslffflyenaflpfamgiiamsafammfvkhkhaflclfllpslatvayfnmvympaswvmrimtwldmvdtslsgfklkdcvmyasavvllilmtartvyddgarrvwtlmnvltlvykvyygnaldqaismwaliisvtsnysgvvttvmflargivfmcveycpiffitgntlqcimlvycflgyfctcyfglfcllnryfrltlgvydylvstqefrymnsqgllppknsidafklnikllgvggkpcikvatvqskmsdvkctsvvllsvlqqlrvesssklwaqcvqlhndillakdtteafekmvsllsvllsmqgavdinklceemldnratlqaiasefsslpsyaafataqeayeqavangdsevvlkklkkslnvaksefdrdaamqrklekmadqamtqmykqarsedkrakvtsamqtmlftmlrkldndalnniinnardgcvplniiplttaaklmvvipdyntykntcdgttftyasalweiqqvvdadskivqlseismdnspnlawplivtalransavklqnnelspvalrqmscaagttqtactddnalayynttkggrfvlallsdlqdlkwarfpksdgtgtiyteleppcrfvtdtpkgpkvkylyfikglnnlnrgmvlgslaatvrlqagnatevpanstvlsfcafavdaakaykdylasggqpitncvkmlcthtgtgqaitvtpeanmdqesfggascclycrchidhpnpkgfcdlkgkyvqipttcandpvgftlkntvctvcgmwkgygcscdqlrepmlqsadaqsflnrvcgvsaarltpcgtgtstdvvyrafdiyndkvagfakflktnccrfqekdeddnlidsyfvvkrhtfsnyqheetiynllkdcpavakhdffkfridgdmvphisrqrltkytmadlvyalrhfdegncdtlkeilvtynccdddyfnkkdwydfvenpdilrvyanlgervrqallktvqfcdamrnagivgvltldnqdlngnwydfgdfiqttpgsgvpvvdsyysllmpiltltraltaeshvdtdltkpyikwdllkydfteerlklfdryfkywdqtyhpncvnclddrcilhcanfnvlfstvfpptsfgplvrkifvdgvpfvvstgyhfrelgvvhnqdvnlhssrlsfkellvyaadpamhaasgnllldkrttcfsvaaltnnvafqtvkpgnfnkdfydfavskgffkegssvelkhfffaqdgnaaisdydyyrynlptmcdirqllfvvevvdkyfdcydggcinanqvivnnldksagfpfnkwgkarlyydsmsyedqdalfaytkrnviptitqmnlkyaisaknrartvagvsicstmtnrqfhqkllksiaatrgatvvigtskfyggwhnmlktvysdvenphlmgwdypkcdrampnmlrimaslvlarkhttccslshrfyrlanecaqvlsemvmcggslyvkpggtssgdattayansvfnicqavtanvnallstdgnkiadkyvrnlqhrlyeclyrnrdvdtdfvnefyaylrkhfsmmilsddavvcfnstyasqglvasiknfksvlyyqnnvfmseakcwtetdltkgphefcsqhtmlvkqgddyvylpypdpsrilgagcfvddivktdgtlmierfvslaidaypltkhpnqeyadvfhlylqyirklhdeltghmldmysvmltndntsrywepefyeamytphtvlqavgacvlcnsqtslrcgacirrpflcckccydhvistshklvlsvnpyvcnapgcdvtdvtqlylggmsyyckshkppisfplcangqvfglykntcvgsdnvtdfnaiatcdwtnagdyilantcterlklfaaetlkateetfklsygiatvrevlsdrelhlswevgkprpplnrnyvftgyrvtknskvqigeytfekgdygdavvyrgtttyklnvgdyfvltshtvmplsaptlvpqehyvritglyptlnisdefssnvanyqkvgmqkystlqgppgtgkshfaiglalyypsarivytacshaavdalcekalkylpidkcsriipararvecfdkfkvnstleqyvfctvnalpettadivvfdeismatnydlsvvnarlrakhyvyigdpaqlpaprtlltkgtlepeyfnsvcrlmktigpdmflgtcrrcpaeivdtvsalvydnklkahkdksaqcfkmfykgvithdvssainrpqigvvrefltrnpawrkavfispynsqnavaskilglptqtvdssqgseydyviftqttetahscnvnrfnvaitrakvgilcimsdrdlydklqftsleiprrnvatlqaenvtglfkdcskvitglhptqapthlsvdtkfkteglcvdipgipkdmtyrrlismmgfkmnyqvngypnmfitreeairhvrawigfdvegchatreavgtnlplqlgfstgvnlvavptgyvdtpnntdfsrvsakpppgdqfkhliplmykglpwnvvrikivqmlsdtlknlsdrvvfvlwahgfeltsmkyfvkigpertcclcdrratcfstasdtyacwhhsigfdyvynpfmidvqqwgftgnlqsnhdlycqvhgnahvascdaimtrclavhecfvkrvdwtieypiigdelkinaacrkvqhmvvkaalladkfpvlhdignpkaikcvpqadvewkfydaqpcsdkaykieelfysyathsdkftdgvclfwncnvdrypansivcrfdtrvlsnlnlpgcdggslyvnkhafhtpafdksafvnlkqlpffyysdspceshgkqvvsdidyvplksatcitrcnlggavcrhhaneyrlyldaynmmisagfslwvykqfdtynlwntftrlqslenvafnvvnkghfdgqqgevpvsiinntvytkvdgvdvelfenkttlpvnvafelwakrnikpvpevkilnnlgvdiaantviwdykrdapahistigvcsmtdiakkpteticapltvffdgrvdgqvdlfrnarngvlitegsvkglqpsvgpkqaslngvtligeavktqfnyykkvdgvvqqlpetyftqsrnlqefkprsqmeidflelamdefieryklegyafehivygdfshsqlgglhlliglakrfkespfeledfipmdstvknyfitdaqtgsskcvcsvidlllddfveiiksqdlsvvskvvkvtidyteisfmlwckdghvetfypklqssqawqpgvampnlykmqrmllekcdlqnygdsatlpkgimmnvakytqlcqylntltlavpynmrvihfgagsdkgvapgtavlrqwlptgtllvdsdlndfvsdadstligdcatvhtankwdliisdmydpktknvtkendskegfftyicgfiqqklalggsvaikitehswnadlyklmghfawwtafvtnvnassseafligcnylgkpreqidgyvmhanyifwrntnpiqlssyslfdmskfplklrgtavmslkegqindmilsllskgrliirennrvvissdvlvnn'
                SearchURL: 'https://www.ncbi.nlm.nih.gov/entrez/viewer.fcgi?db=protein&id=YP_009724389'
              RetrieveURL: 'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=protein&id=1796318597&rettype=gp&retmode=text'

covidproAC = struct with fields:
    C: 35644
    H: 55333
    N: 9253
    O: 10496
    S: 394

ans = 35644

covidproMW = 7.9405e+05

Post a Comment

0 Comments