X-Git-Url: https://bilbo.iut-bm.univ-fcomte.fr/and/gitweb/chloroplast13.git/blobdiff_plain/38c1dcfbefcbed8161d2e34081f977f7e1222f64..66d1141218f190b9d6a8997af7d8ef784db3208e:/annotated.tex?ds=inline diff --git a/annotated.tex b/annotated.tex index 819cc33..46619ab 100644 --- a/annotated.tex +++ b/annotated.tex @@ -47,11 +47,11 @@ that stores annotated and/or unannotated chloroplast genomes. We have considered the GenBank-NCBI \cite{Sayers01012011} database as sequence database: 99~genomes of chloroplasts were retrieved. These genomes lie in the eleven type of chloroplast families and Table \ref{Tab2} -summarizes their distribution in our dataset.\\ +summarizes their distribution in our dataset. \begin{figure}[h] \centering - \includegraphics[width=0.8\textwidth]{generalView} + \includegraphics[width=0.75\textwidth]{generalView} \caption{A general overview of the annotation-based approach}\label{Fig1} \end{figure} @@ -190,9 +190,9 @@ to extract core genes, as explained in Algorithm \ref{Alg3:thirdM}. \STATE $geneList=\text{empty list}$ \STATE $common=set(dir(NCBI\_Genes)) \cap set(dir(Dogma\_Genes))$ \FOR{$\text{gene in common}$} - \STATE $gen1 \leftarrow open(NCBI\_Genes(gene)).read()$ - \STATE $gen2 \leftarrow open(Dogma\_Genes(gene)).read()$ - \STATE $score \leftarrow geneChk(gen1,gen2)$ + \STATE $g1 \leftarrow open(NCBI\_Genes(gene)).read()$ + \STATE $g2 \leftarrow open(Dogma\_Genes(gene)).read()$ + \STATE $score \leftarrow geneChk(g1,g2)$ \IF {$score > Threshold$} \STATE $geneList \leftarrow gene$ \ENDIF @@ -210,18 +210,16 @@ geneChk subroutine. \caption{Find the Maximum Similarity Score between two sequences} \label{Alg3:genechk} \begin{algorithmic} -\REQUIRE $gen1,gen2 \leftarrow \text{NCBI gene sequence, Dogma gene sequence}$ +\REQUIRE $g1,g2 \leftarrow \text{NCBI gene sequence, Dogma gene sequence}$ \ENSURE $\text{Maximum similarity score}$ -\STATE $Score1 \leftarrow needle(gen1,gen2)$ -\STATE $Score2 \leftarrow needle(gen1,Reverse(gen2))$ -\STATE $Score3 \leftarrow needle(gen1,Complement(gen2))$ -\STATE $Score4 \leftarrow needle(gen1,Reverse(Complement(gen2)))$ -\RETURN $max(Score1, Score2, Score3, Score4)$ +\STATE $score1 \leftarrow needle(g1,g2)$ +\STATE $score2 \leftarrow needle(g1,Reverse(g2))$ +\STATE $score3 \leftarrow needle(g1,Complement(g2))$ +\STATE $score4 \leftarrow needle(g1,Reverse(Complement(g2)))$ +\RETURN $max(score1,score2,score3,score4)$ \end{algorithmic} \end{algorithm} -% THIS SUBSECTION MUST BE IMPROVED - \subsubsection{Intersection Core Matrix (\textit{ICM})} To extract core genes, we iteratively collect the maximum number of @@ -266,8 +264,8 @@ core genes with its two genomes parents. \caption{Extract Maximum Intersection Score} \label{Alg1:ICM} \begin{algorithmic} -\REQUIRE $L \leftarrow \text{genomes vectors}$ -\ENSURE $B1 \leftarrow Max Core Vector$ +\REQUIRE $L \leftarrow \text{genomes sets}$ +\ENSURE $B1 \leftarrow \text{Max Core set}$ \FOR{$i \leftarrow 0:len(L)-1$} \STATE $score \leftarrow 0$ \STATE $core1 \leftarrow set(GenomeList[L[i]])$ @@ -297,7 +295,7 @@ names\_Accession number)}, while an edge is labeled with the number of lost genes from a leaf genome or an intermediate core gene. Such numbers are very interesting because they give an information about the evolution: how many genes were lost between two species whether -they belong to the same familie or not. By the principle of +they belong to the same family or not. By the principle of classification, a small number of genes lost among species indicates that those species are close to each other and belong to same family, while a large lost means that we have an evolutionary relationship @@ -321,7 +319,7 @@ to align these sequences with each others. \end{enumerate} \begin{figure}[H] - \centering \includegraphics[width=0.8\textwidth]{Whole_system} + \centering \includegraphics[width=0.75\textwidth]{Whole_system} \caption{Overview of the pipeline}\label{wholesystem} \end{figure}