From 10a733d0fdf845729c8b868f9f6959f5fe34f01c Mon Sep 17 00:00:00 2001 From: bassam al-kindy Date: Tue, 1 Oct 2013 11:18:57 +0200 Subject: [PATCH 1/1] an updated version of annotation.tex --- annotated.tex | 27 ++++++++++++++++++++++++++- biblio.bib | 45 ++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 70 insertions(+), 2 deletions(-) diff --git a/annotated.tex b/annotated.tex index 720adc5..1e00c74 100644 --- a/annotated.tex +++ b/annotated.tex @@ -1 +1,26 @@ - sdfdfadqdqaaaaaaaaaaaaaaaaaaa +The field of Genome annotation pay a lot of attentions where the ability to collect and analyze genomical data can provide strong indicator for the study of life\cite{Eisen2007}. A lot of genome annotation centres present various types of annotation tools (cost-effective sequencing methods\cite{Bakke2009}) on different annotation levels. In this section, we will consider a new method of annotation for extracting core genome from large amount of chloroplast genomes as a solution of the previous method where stated in section two. This method is based on extracting gene features from well annotated genomes. The question now is how can we have good annotation genome? To answer this question, we need to focusing on studying the annotation's accuracy (systematically\cite{Bakke2009}) of the genome. The general overview of the system is illustrated in Figure 1.\\ + + +\begin{figure}[h] +\caption{A general overview of the system} + \centering + \includegraphics[width=0.5\textwidth]{generalView} +\end{figure} + +In Figure 1, we illustrate the general overview of the system. In this system, there are three main stages: \textit{Database, Gene extraction ,} and \textit{relationships}. There are many international nucleotide sequence databases like (GenBank/NCBI in USA at (http://www.ncbi.nlm.nih.gov/genbank/),\\ EMBL-Bank/ENA/EBI in Europe at (http://www.ebi.ac.uk/ena/), and DDBJ in Japon at (http://www.ddbj.nig.ac.jp/)). In our work, the database must be any confident data source that store annotated or unannotated chloroplast genomes. We will consider GenBank/NCBI database as our nucleotide sequences database. Extract Gene Features, we refer to our main process of extracting needed information to find core genome from well large annotation genomes. Thanks to good annotation tool that lead us to extract good gene features. Here, Gene features can be anything like (genes names, gene sequences, protein sequence,...etc). To verify the results from our system, we need to organize and represent our results in the form of (tables, phylogenetic trees, graphs,...,etc), and compare these results with another annotation tool like Dogma\cite{RDogma}. All this work is to see the relationship among our large population of chloroplast genomes and find the core genome for root ancestral node. Furthermore, in this part we can visualize the evolution relationships of different chloroplast organisms.\\ +The output from each stage in our system will be considered to be an input to the second stage and so on. The rest of this section, in section 3.1, we will introduce some annotation problem with NCBI chloroplast genomes and we will discuss our method for how can we extract useful data. Section 3.2 we will present here our system for calculating evolutionary core genome based on another annotation tool than NCBI. + +\subsection{Gene Extraction Techniques from annotated NCBI genomes} +With NCBI, the idea is to use the existing annotations of NCBI for chloroplast genomes to extract the core and pan genome. Techniques used here is by using Gene name and Gene contents based on some similarity issues. + +\subsubsection{Core genome based on NCBI Genes Names} +Our simple idea to construct core genome is based on the extraction of Genes names from chloroplast genomes annotated by NCBI. For instant, in this stage neither sequence comparison nor new annotation were made, we just want to extract genes names as stored in each chloroplast genome in NCBI. +By built a dictionary of genes names of each genome, from collection process, we consider a number of duplications in each genome, in other words, name duplication comes from genes fragments a long chloroplast DNA sequences. To achieve core genome, we need to reach the identical state, without regard to the position or gene orientation, where each gene has only one name. To filter the dictionary from gene name duplications, we change the list of genes names of each genome to be a set of genes names. By using the concept and the definition of a set in mathematics, we remove all the duplications and reach the identical state. \\ +By using the intersection among these genomes + +\subsubsection{Extracting Core genes from NCBI annotations} + +\subsection{Dogma Annotation tool} + + +\subsubsection{Why Dogma?} diff --git a/biblio.bib b/biblio.bib index 2a4eade..c3efe04 100644 --- a/biblio.bib +++ b/biblio.bib @@ -62,4 +62,47 @@ PAGES = {1178-85}, URL = {http://www.biomedsearch.com/nih/Reconstructing-ancestor-Mycobacterium-leprae-dynamics/17623808.html}, PubMedID = {17623808}, ISSN = {1088-9051} -} \ No newline at end of file +} +@article{Eisen2007, + author = {Eisen, Jonathan A}, + journal = {PLoS Biol}, + publisher = {Public Library of Science}, + title = {Environmental Shotgun Sequencing: Its Potential and Challenges for Studying the Hidden World of Microbes}, + year = {2007}, + month = {03}, + volume = {5}, + url = {http://dx.doi.org/10.1371%2Fjournal.pbio.0050082}, + pages = {e82}, + abstract = { +

Environmental shotgun sequencing promises to reveal novel and fundamental insights into the hidden world of microbes, but the complexity of analysis required to realize this potential poses unique interdisciplinary challenges.

+ }, + number = {3}, + doi = {10.1371/journal.pbio.0050082} +} +@Article{RDogma, +AUTHOR = {Stacia K. Wyman, Robert K. Jansen and Jeffrey L. Boore}, +TITLE = {Automatic annotation of organellar genomes +with DOGMA}, +JOURNAL = {BIOINFORMATICS, oxford Press}, +VOLUME = {20}, +YEAR = {2004}, +NUMBER = {172004}, +PAGES = {3252-3255}, +URL={http://www.biosci.utexas.edu/ib/faculty/jansen/pubs/Wyman%20et%20al.%202004.pdf}, +} +@article{Bakke2009, + author = {Bakke, Peter AND Carney, Nick AND DeLoache, Will AND Gearing, Mary AND Ingvorsen, Kjeld AND Lotz, Matt AND McNair, Jay AND Penumetcha, Pallavi AND Simpson, Samantha AND Voss, Laura AND Win, Max AND Heyer, Laurie J. AND Campbell, A. Malcolm}, + journal = {PLoS ONE}, + publisher = {Public Library of Science}, + title = {Evaluation of Three Automated Genome Annotations for Halorhabdus utahensis}, + year = {2009}, + month = {07}, + volume = {4}, + url = {http://dx.doi.org/10.1371%2Fjournal.pone.0006291}, + pages = {e6291}, + abstract = { +

Genome annotations are accumulating rapidly and depend heavily on automated annotation systems. Many genome centers offer annotation systems but no one has compared their output in a systematic way to determine accuracy and inherent errors. Errors in the annotations are routinely deposited in databases such as NCBI and used to validate subsequent annotation errors. We submitted the genome sequence of halophilic archaeon Halorhabdus utahensis to be analyzed by three genome annotation services. We have examined the output from each service in a variety of ways in order to compare the methodology and effectiveness of the annotations, as well as to explore the genes, pathways, and physiology of the previously unannotated genome. The annotation services differ considerably in gene calls, features, and ease of use. We had to manually identify the origin of replication and the species-specific consensus ribosome-binding site. Additionally, we conducted laboratory experiments to test H. utahensis growth and enzyme activity. Current annotation practices need to improve in order to more accurately reflect a genome's biological potential. We make specific recommendations that could improve the quality of microbial annotation projects.

+}, + number = {7}, + doi = {10.1371/journal.pone.0006291} +} -- 2.39.5